1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
3 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
5 define arm_aapcs_vfpcc <4 x i32> @shuffle1_i32(<4 x i32> %src) {
6 ; CHECK-LABEL: shuffle1_i32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vmov.f32 s4, s3
9 ; CHECK-NEXT: vmov.f32 s5, s2
10 ; CHECK-NEXT: vmov.f32 s6, s1
11 ; CHECK-NEXT: vmov.f32 s7, s0
12 ; CHECK-NEXT: vmov q0, q1
15 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
19 define arm_aapcs_vfpcc <4 x i32> @shuffle2_i32(<4 x i32> %src) {
20 ; CHECK-LABEL: shuffle2_i32:
21 ; CHECK: @ %bb.0: @ %entry
24 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
28 define arm_aapcs_vfpcc <4 x i32> @shuffle3_i32(<4 x i32> %src) {
29 ; CHECK-LABEL: shuffle3_i32:
30 ; CHECK: @ %bb.0: @ %entry
31 ; CHECK-NEXT: vmov.f32 s4, s3
32 ; CHECK-NEXT: vmov.f32 s5, s1
33 ; CHECK-NEXT: vmov.f32 s6, s2
34 ; CHECK-NEXT: vmov.f32 s7, s0
35 ; CHECK-NEXT: vmov q0, q1
38 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
42 define arm_aapcs_vfpcc <4 x i32> @shuffle5_i32(<4 x i32> %src) {
43 ; CHECK-LABEL: shuffle5_i32:
44 ; CHECK: @ %bb.0: @ %entry
45 ; CHECK-NEXT: vrev64.32 q1, q0
46 ; CHECK-NEXT: vmov q0, q1
49 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
53 define arm_aapcs_vfpcc <4 x i32> @shuffle6_i32(<4 x i32> %src) {
54 ; CHECK-LABEL: shuffle6_i32:
55 ; CHECK: @ %bb.0: @ %entry
58 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 3>
62 define arm_aapcs_vfpcc <8 x i16> @shuffle1_i16(<8 x i16> %src) {
63 ; CHECK-LABEL: shuffle1_i16:
64 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: vmov q1, q0
66 ; CHECK-NEXT: vmov.u16 r0, q0[7]
67 ; CHECK-NEXT: vmov.16 q0[0], r0
68 ; CHECK-NEXT: vmov.u16 r0, q1[6]
69 ; CHECK-NEXT: vmov.16 q0[1], r0
70 ; CHECK-NEXT: vmov.u16 r0, q1[5]
71 ; CHECK-NEXT: vmov.16 q0[2], r0
72 ; CHECK-NEXT: vmov.u16 r0, q1[4]
73 ; CHECK-NEXT: vmov.16 q0[3], r0
74 ; CHECK-NEXT: vmov.u16 r0, q1[3]
75 ; CHECK-NEXT: vmov.16 q0[4], r0
76 ; CHECK-NEXT: vmov.u16 r0, q1[2]
77 ; CHECK-NEXT: vmov.16 q0[5], r0
78 ; CHECK-NEXT: vmov.u16 r0, q1[1]
79 ; CHECK-NEXT: vmov.16 q0[6], r0
80 ; CHECK-NEXT: vmov.u16 r0, q1[0]
81 ; CHECK-NEXT: vmov.16 q0[7], r0
84 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
88 define arm_aapcs_vfpcc <8 x i16> @shuffle2_i16(<8 x i16> %src) {
89 ; CHECK-LABEL: shuffle2_i16:
90 ; CHECK: @ %bb.0: @ %entry
93 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
97 define arm_aapcs_vfpcc <8 x i16> @shuffle3_i16(<8 x i16> %src) {
98 ; CHECK-LABEL: shuffle3_i16:
99 ; CHECK: @ %bb.0: @ %entry
100 ; CHECK-NEXT: vmov q1, q0
101 ; CHECK-NEXT: vmov.u16 r0, q0[4]
102 ; CHECK-NEXT: vmov.16 q0[0], r0
103 ; CHECK-NEXT: vmov.u16 r0, q1[5]
104 ; CHECK-NEXT: vmov.16 q0[1], r0
105 ; CHECK-NEXT: vmov.u16 r0, q1[7]
106 ; CHECK-NEXT: vmov.16 q0[2], r0
107 ; CHECK-NEXT: vmov.u16 r0, q1[6]
108 ; CHECK-NEXT: vmov.16 q0[3], r0
109 ; CHECK-NEXT: vmov.u16 r0, q1[3]
110 ; CHECK-NEXT: vmov.16 q0[4], r0
111 ; CHECK-NEXT: vmov.u16 r0, q1[1]
112 ; CHECK-NEXT: vmov.16 q0[5], r0
113 ; CHECK-NEXT: vmov.u16 r0, q1[2]
114 ; CHECK-NEXT: vmov.16 q0[6], r0
115 ; CHECK-NEXT: vmov.u16 r0, q1[0]
116 ; CHECK-NEXT: vmov.16 q0[7], r0
119 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
123 define arm_aapcs_vfpcc <8 x i16> @shuffle5_i16(<8 x i16> %src) {
124 ; CHECK-LABEL: shuffle5_i16:
125 ; CHECK: @ %bb.0: @ %entry
126 ; CHECK-NEXT: vrev64.16 q1, q0
127 ; CHECK-NEXT: vmov q0, q1
130 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
134 define arm_aapcs_vfpcc <8 x i16> @shuffle6_i16(<8 x i16> %src) {
135 ; CHECK-LABEL: shuffle6_i16:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: vrev32.16 q0, q0
140 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
144 define arm_aapcs_vfpcc <16 x i8> @shuffle1_i8(<16 x i8> %src) {
145 ; CHECK-LABEL: shuffle1_i8:
146 ; CHECK: @ %bb.0: @ %entry
147 ; CHECK-NEXT: vmov q1, q0
148 ; CHECK-NEXT: vmov.u8 r0, q0[15]
149 ; CHECK-NEXT: vmov.8 q0[0], r0
150 ; CHECK-NEXT: vmov.u8 r0, q1[14]
151 ; CHECK-NEXT: vmov.8 q0[1], r0
152 ; CHECK-NEXT: vmov.u8 r0, q1[13]
153 ; CHECK-NEXT: vmov.8 q0[2], r0
154 ; CHECK-NEXT: vmov.u8 r0, q1[12]
155 ; CHECK-NEXT: vmov.8 q0[3], r0
156 ; CHECK-NEXT: vmov.u8 r0, q1[11]
157 ; CHECK-NEXT: vmov.8 q0[4], r0
158 ; CHECK-NEXT: vmov.u8 r0, q1[10]
159 ; CHECK-NEXT: vmov.8 q0[5], r0
160 ; CHECK-NEXT: vmov.u8 r0, q1[9]
161 ; CHECK-NEXT: vmov.8 q0[6], r0
162 ; CHECK-NEXT: vmov.u8 r0, q1[8]
163 ; CHECK-NEXT: vmov.8 q0[7], r0
164 ; CHECK-NEXT: vmov.u8 r0, q1[7]
165 ; CHECK-NEXT: vmov.8 q0[8], r0
166 ; CHECK-NEXT: vmov.u8 r0, q1[6]
167 ; CHECK-NEXT: vmov.8 q0[9], r0
168 ; CHECK-NEXT: vmov.u8 r0, q1[5]
169 ; CHECK-NEXT: vmov.8 q0[10], r0
170 ; CHECK-NEXT: vmov.u8 r0, q1[4]
171 ; CHECK-NEXT: vmov.8 q0[11], r0
172 ; CHECK-NEXT: vmov.u8 r0, q1[3]
173 ; CHECK-NEXT: vmov.8 q0[12], r0
174 ; CHECK-NEXT: vmov.u8 r0, q1[2]
175 ; CHECK-NEXT: vmov.8 q0[13], r0
176 ; CHECK-NEXT: vmov.u8 r0, q1[1]
177 ; CHECK-NEXT: vmov.8 q0[14], r0
178 ; CHECK-NEXT: vmov.u8 r0, q1[0]
179 ; CHECK-NEXT: vmov.8 q0[15], r0
182 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
186 define arm_aapcs_vfpcc <16 x i8> @shuffle2_i8(<16 x i8> %src) {
187 ; CHECK-LABEL: shuffle2_i8:
188 ; CHECK: @ %bb.0: @ %entry
191 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
195 define arm_aapcs_vfpcc <16 x i8> @shuffle3_i8(<16 x i8> %src) {
196 ; CHECK-LABEL: shuffle3_i8:
197 ; CHECK: @ %bb.0: @ %entry
198 ; CHECK-NEXT: vmov q1, q0
199 ; CHECK-NEXT: vmov.u8 r0, q0[4]
200 ; CHECK-NEXT: vmov.8 q0[0], r0
201 ; CHECK-NEXT: vmov.u8 r0, q1[5]
202 ; CHECK-NEXT: vmov.8 q0[1], r0
203 ; CHECK-NEXT: vmov.u8 r0, q1[15]
204 ; CHECK-NEXT: vmov.8 q0[2], r0
205 ; CHECK-NEXT: vmov.u8 r0, q1[7]
206 ; CHECK-NEXT: vmov.8 q0[3], r0
207 ; CHECK-NEXT: vmov.u8 r0, q1[14]
208 ; CHECK-NEXT: vmov.8 q0[4], r0
209 ; CHECK-NEXT: vmov.u8 r0, q1[9]
210 ; CHECK-NEXT: vmov.8 q0[5], r0
211 ; CHECK-NEXT: vmov.u8 r0, q1[6]
212 ; CHECK-NEXT: vmov.8 q0[6], r0
213 ; CHECK-NEXT: vmov.u8 r0, q1[3]
214 ; CHECK-NEXT: vmov.8 q0[7], r0
215 ; CHECK-NEXT: vmov.u8 r0, q1[10]
216 ; CHECK-NEXT: vmov.8 q0[8], r0
217 ; CHECK-NEXT: vmov.u8 r0, q1[12]
218 ; CHECK-NEXT: vmov.8 q0[9], r0
219 ; CHECK-NEXT: vmov.u8 r0, q1[1]
220 ; CHECK-NEXT: vmov.8 q0[10], r0
221 ; CHECK-NEXT: vmov.u8 r0, q1[13]
222 ; CHECK-NEXT: vmov.8 q0[11], r0
223 ; CHECK-NEXT: vmov.u8 r0, q1[2]
224 ; CHECK-NEXT: vmov.8 q0[12], r0
225 ; CHECK-NEXT: vmov.u8 r0, q1[8]
226 ; CHECK-NEXT: vmov.8 q0[13], r0
227 ; CHECK-NEXT: vmov.u8 r0, q1[0]
228 ; CHECK-NEXT: vmov.8 q0[14], r0
229 ; CHECK-NEXT: vmov.u8 r0, q1[11]
230 ; CHECK-NEXT: vmov.8 q0[15], r0
233 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 15, i32 7, i32 14, i32 9, i32 6, i32 3, i32 10, i32 12, i32 1, i32 13, i32 2, i32 8, i32 0, i32 11>
237 define arm_aapcs_vfpcc <16 x i8> @shuffle5_i8(<16 x i8> %src) {
238 ; CHECK-LABEL: shuffle5_i8:
239 ; CHECK: @ %bb.0: @ %entry
240 ; CHECK-NEXT: vrev64.8 q1, q0
241 ; CHECK-NEXT: vmov q0, q1
244 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
248 define arm_aapcs_vfpcc <16 x i8> @shuffle6_i8(<16 x i8> %src) {
249 ; CHECK-LABEL: shuffle6_i8:
250 ; CHECK: @ %bb.0: @ %entry
251 ; CHECK-NEXT: vrev32.8 q0, q0
254 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
258 define arm_aapcs_vfpcc <16 x i8> @shuffle7_i8(<16 x i8> %src) {
259 ; CHECK-LABEL: shuffle7_i8:
260 ; CHECK: @ %bb.0: @ %entry
261 ; CHECK-NEXT: vrev16.8 q0, q0
264 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
268 define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) {
269 ; CHECK-LABEL: shuffle1_i64:
270 ; CHECK: @ %bb.0: @ %entry
273 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
277 define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) {
278 ; CHECK-LABEL: shuffle2_i64:
279 ; CHECK: @ %bb.0: @ %entry
280 ; CHECK-NEXT: vmov.f32 s4, s2
281 ; CHECK-NEXT: vmov.f32 s5, s3
282 ; CHECK-NEXT: vmov.f32 s6, s0
283 ; CHECK-NEXT: vmov.f32 s7, s1
284 ; CHECK-NEXT: vmov q0, q1
287 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
291 define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) {
292 ; CHECK-LABEL: shuffle3_i64:
293 ; CHECK: @ %bb.0: @ %entry
296 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 undef, i32 1>
300 define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) {
301 ; CHECK-LABEL: shuffle1_f32:
302 ; CHECK: @ %bb.0: @ %entry
303 ; CHECK-NEXT: vmov.f32 s4, s3
304 ; CHECK-NEXT: vmov.f32 s5, s2
305 ; CHECK-NEXT: vmov.f32 s6, s1
306 ; CHECK-NEXT: vmov.f32 s7, s0
307 ; CHECK-NEXT: vmov q0, q1
310 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
314 define arm_aapcs_vfpcc <4 x float> @shuffle2_f32(<4 x float> %src) {
315 ; CHECK-LABEL: shuffle2_f32:
316 ; CHECK: @ %bb.0: @ %entry
319 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
323 define arm_aapcs_vfpcc <4 x float> @shuffle3_f32(<4 x float> %src) {
324 ; CHECK-LABEL: shuffle3_f32:
325 ; CHECK: @ %bb.0: @ %entry
326 ; CHECK-NEXT: vmov.f32 s4, s3
327 ; CHECK-NEXT: vmov.f32 s5, s1
328 ; CHECK-NEXT: vmov.f32 s6, s2
329 ; CHECK-NEXT: vmov.f32 s7, s0
330 ; CHECK-NEXT: vmov q0, q1
333 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
337 define arm_aapcs_vfpcc <4 x float> @shuffle5_f32(<4 x float> %src) {
338 ; CHECK-LABEL: shuffle5_f32:
339 ; CHECK: @ %bb.0: @ %entry
340 ; CHECK-NEXT: vrev64.32 q1, q0
341 ; CHECK-NEXT: vmov q0, q1
344 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
348 define arm_aapcs_vfpcc <8 x half> @shuffle1_f16(<8 x half> %src) {
349 ; CHECK-LABEL: shuffle1_f16:
350 ; CHECK: @ %bb.0: @ %entry
351 ; CHECK-NEXT: vmov.u16 r0, q0[7]
352 ; CHECK-NEXT: vmov.u16 r1, q0[6]
353 ; CHECK-NEXT: vmov.16 q1[0], r0
354 ; CHECK-NEXT: vmov.u16 r0, q0[5]
355 ; CHECK-NEXT: vmov.16 q1[1], r1
356 ; CHECK-NEXT: vmov.16 q1[2], r0
357 ; CHECK-NEXT: vmov.u16 r0, q0[4]
358 ; CHECK-NEXT: vmov.16 q1[3], r0
359 ; CHECK-NEXT: vmov.u16 r0, q0[3]
360 ; CHECK-NEXT: vmov.16 q1[4], r0
361 ; CHECK-NEXT: vmov.u16 r0, q0[2]
362 ; CHECK-NEXT: vmov.16 q1[5], r0
363 ; CHECK-NEXT: vmov.u16 r0, q0[1]
364 ; CHECK-NEXT: vmov.16 q1[6], r0
365 ; CHECK-NEXT: vmov.u16 r0, q0[0]
366 ; CHECK-NEXT: vmov.16 q1[7], r0
367 ; CHECK-NEXT: vmov q0, q1
370 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
374 define arm_aapcs_vfpcc <8 x half> @shuffle2_f16(<8 x half> %src) {
375 ; CHECK-LABEL: shuffle2_f16:
376 ; CHECK: @ %bb.0: @ %entry
379 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
383 define arm_aapcs_vfpcc <8 x half> @shuffle3_f16(<8 x half> %src) {
384 ; CHECK-LABEL: shuffle3_f16:
385 ; CHECK: @ %bb.0: @ %entry
386 ; CHECK-NEXT: vmov.u16 r0, q0[4]
387 ; CHECK-NEXT: vmov.u16 r1, q0[5]
388 ; CHECK-NEXT: vmov.16 q1[0], r0
389 ; CHECK-NEXT: vmov.u16 r0, q0[7]
390 ; CHECK-NEXT: vmov.16 q1[1], r1
391 ; CHECK-NEXT: vmov.16 q1[2], r0
392 ; CHECK-NEXT: vmov.u16 r0, q0[6]
393 ; CHECK-NEXT: vmov.16 q1[3], r0
394 ; CHECK-NEXT: vmov.u16 r0, q0[3]
395 ; CHECK-NEXT: vmov.16 q1[4], r0
396 ; CHECK-NEXT: vmov.u16 r0, q0[1]
397 ; CHECK-NEXT: vmov.16 q1[5], r0
398 ; CHECK-NEXT: vmov.u16 r0, q0[2]
399 ; CHECK-NEXT: vmov.16 q1[6], r0
400 ; CHECK-NEXT: vmov.u16 r0, q0[0]
401 ; CHECK-NEXT: vmov.16 q1[7], r0
402 ; CHECK-NEXT: vmov q0, q1
405 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 7, i32 6, i32 3, i32 1, i32 2, i32 0>
409 define arm_aapcs_vfpcc <8 x half> @shuffle5_f16(<8 x half> %src) {
410 ; CHECK-LABEL: shuffle5_f16:
411 ; CHECK: @ %bb.0: @ %entry
412 ; CHECK-NEXT: vrev64.16 q1, q0
413 ; CHECK-NEXT: vmov q0, q1
416 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
420 define arm_aapcs_vfpcc <8 x half> @shuffle6_f16(<8 x half> %src) {
421 ; CHECK-LABEL: shuffle6_f16:
422 ; CHECK: @ %bb.0: @ %entry
423 ; CHECK-NEXT: vrev32.16 q0, q0
426 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
430 define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) {
431 ; CHECK-LABEL: shuffle1_f64:
432 ; CHECK: @ %bb.0: @ %entry
435 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 0, i32 1>
436 ret <2 x double> %out
439 define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) {
440 ; CHECK-LABEL: shuffle2_f64:
441 ; CHECK: @ %bb.0: @ %entry
442 ; CHECK-NEXT: vmov.f32 s4, s2
443 ; CHECK-NEXT: vmov.f32 s5, s3
444 ; CHECK-NEXT: vmov.f32 s6, s0
445 ; CHECK-NEXT: vmov.f32 s7, s1
446 ; CHECK-NEXT: vmov q0, q1
449 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 0>
450 ret <2 x double> %out
453 define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) {
454 ; CHECK-LABEL: shuffle3_f64:
455 ; CHECK: @ %bb.0: @ %entry
458 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 undef, i32 1>
459 ret <2 x double> %out
463 define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) {
464 ; CHECK-LABEL: insert_i32:
465 ; CHECK: @ %bb.0: @ %entry
466 ; CHECK-NEXT: vmov.32 q0[0], r0
469 %res = insertelement <4 x i32> undef, i32 %a, i32 0
473 define arm_aapcs_vfpcc <8 x i16> @insert_i16(i16 %a) {
474 ; CHECK-LABEL: insert_i16:
475 ; CHECK: @ %bb.0: @ %entry
476 ; CHECK-NEXT: vmov.16 q0[0], r0
479 %res = insertelement <8 x i16> undef, i16 %a, i32 0
483 define arm_aapcs_vfpcc <16 x i8> @insert_i8(i8 %a) {
484 ; CHECK-LABEL: insert_i8:
485 ; CHECK: @ %bb.0: @ %entry
486 ; CHECK-NEXT: vmov.8 q0[0], r0
489 %res = insertelement <16 x i8> undef, i8 %a, i32 0
493 define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) {
494 ; CHECK-LABEL: insert_i64:
495 ; CHECK: @ %bb.0: @ %entry
496 ; CHECK-NEXT: vmov.32 q0[0], r0
497 ; CHECK-NEXT: vmov.32 q0[1], r1
500 %res = insertelement <2 x i64> undef, i64 %a, i32 0
504 define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) {
505 ; CHECK-LABEL: insert_f32:
506 ; CHECK: @ %bb.0: @ %entry
507 ; CHECK-NEXT: @ kill: def $s0 killed $s0 def $q0
510 %res = insertelement <4 x float> undef, float %a, i32 0
514 ; TODO: Calling convention needs fixing to pass half types directly to functions
515 define arm_aapcs_vfpcc <8 x half> @insert_f16(half *%aa) {
516 ; CHECK-LABEL: insert_f16:
517 ; CHECK: @ %bb.0: @ %entry
518 ; CHECK-NEXT: vldr.16 s0, [r0]
521 %a = load half, half* %aa
522 %res = insertelement <8 x half> undef, half %a, i32 0
526 define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) {
527 ; CHECK-LABEL: insert_f64:
528 ; CHECK: @ %bb.0: @ %entry
529 ; CHECK-NEXT: .save {r4, r6, r7, lr}
530 ; CHECK-NEXT: push {r4, r6, r7, lr}
531 ; CHECK-NEXT: .setfp r7, sp, #8
532 ; CHECK-NEXT: add r7, sp, #8
533 ; CHECK-NEXT: .pad #16
534 ; CHECK-NEXT: sub sp, #16
535 ; CHECK-NEXT: mov r4, sp
536 ; CHECK-NEXT: bfc r4, #0, #4
537 ; CHECK-NEXT: mov sp, r4
538 ; CHECK-NEXT: sub.w r4, r7, #8
539 ; CHECK-NEXT: vstr d0, [sp]
540 ; CHECK-NEXT: mov r0, sp
541 ; CHECK-NEXT: vldrw.u32 q0, [r0]
542 ; CHECK-NEXT: mov sp, r4
543 ; CHECK-NEXT: pop {r4, r6, r7, pc}
545 %res = insertelement <2 x double> undef, double %a, i32 0
546 ret <2 x double> %res
549 define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
550 ; CHECK-LABEL: scalar_to_vector_i32:
551 ; CHECK: @ %bb.0: @ %entry
552 ; CHECK-NEXT: .pad #8
553 ; CHECK-NEXT: sub sp, #8
554 ; CHECK-NEXT: adr r1, .LCPI38_0
555 ; CHECK-NEXT: vmov.u16 r0, q0[0]
556 ; CHECK-NEXT: vldrw.u32 q1, [r1]
557 ; CHECK-NEXT: vmov.32 q0[0], r0
558 ; CHECK-NEXT: mov r2, sp
559 ; CHECK-NEXT: vmov.f32 s1, s5
560 ; CHECK-NEXT: vmov.f32 s2, s6
561 ; CHECK-NEXT: vmov.f32 s3, s7
562 ; CHECK-NEXT: vstrh.32 q0, [r2]
563 ; CHECK-NEXT: ldrd r0, r1, [sp], #8
565 ; CHECK-NEXT: .p2align 4
566 ; CHECK-NEXT: @ %bb.1:
567 ; CHECK-NEXT: .LCPI38_0:
568 ; CHECK-NEXT: .zero 4
569 ; CHECK-NEXT: .long 7 @ 0x7
570 ; CHECK-NEXT: .long 1 @ 0x1
571 ; CHECK-NEXT: .long 9 @ 0x9
573 %f = shufflevector <8 x i16> %v, <8 x i16> <i16 undef, i16 7, i16 1, i16 9, i16 undef, i16 undef, i16 undef, i16 undef>, <4 x i32> <i32 0, i32 9, i32 10, i32 11>
574 %0 = bitcast <4 x i16> %f to i64
579 define arm_aapcs_vfpcc i32 @extract_i32_0(<4 x i32> %a) {
580 ; CHECK-LABEL: extract_i32_0:
581 ; CHECK: @ %bb.0: @ %entry
582 ; CHECK-NEXT: vmov r0, s0
585 %res = extractelement <4 x i32> %a, i32 0
589 define arm_aapcs_vfpcc i32 @extract_i32_3(<4 x i32> %a) {
590 ; CHECK-LABEL: extract_i32_3:
591 ; CHECK: @ %bb.0: @ %entry
592 ; CHECK-NEXT: vmov r0, s3
595 %res = extractelement <4 x i32> %a, i32 3
599 define arm_aapcs_vfpcc i16 @extract_i16_0(<8 x i16> %a) {
600 ; CHECK-LABEL: extract_i16_0:
601 ; CHECK: @ %bb.0: @ %entry
602 ; CHECK-NEXT: vmov.u16 r0, q0[0]
605 %res = extractelement <8 x i16> %a, i32 0
609 define arm_aapcs_vfpcc i16 @extract_i16_3(<8 x i16> %a) {
610 ; CHECK-LABEL: extract_i16_3:
611 ; CHECK: @ %bb.0: @ %entry
612 ; CHECK-NEXT: vmov.u16 r0, q0[3]
615 %res = extractelement <8 x i16> %a, i32 3
619 define arm_aapcs_vfpcc i8 @extract_i8_0(<16 x i8> %a) {
620 ; CHECK-LABEL: extract_i8_0:
621 ; CHECK: @ %bb.0: @ %entry
622 ; CHECK-NEXT: vmov.u8 r0, q0[0]
625 %res = extractelement <16 x i8> %a, i32 0
629 define arm_aapcs_vfpcc i8 @extract_i8_3(<16 x i8> %a) {
630 ; CHECK-LABEL: extract_i8_3:
631 ; CHECK: @ %bb.0: @ %entry
632 ; CHECK-NEXT: vmov.u8 r0, q0[3]
635 %res = extractelement <16 x i8> %a, i32 3
639 define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) {
640 ; CHECK-LABEL: extract_i64_0:
641 ; CHECK: @ %bb.0: @ %entry
642 ; CHECK-NEXT: vmov r0, s0
643 ; CHECK-NEXT: vmov r1, s1
646 %res = extractelement <2 x i64> %a, i32 0
650 define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) {
651 ; CHECK-LABEL: extract_i64_1:
652 ; CHECK: @ %bb.0: @ %entry
653 ; CHECK-NEXT: vmov r0, s2
654 ; CHECK-NEXT: vmov r1, s3
657 %res = extractelement <2 x i64> %a, i32 1
661 define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) {
662 ; CHECK-LABEL: extract_f32_0:
663 ; CHECK: @ %bb.0: @ %entry
664 ; CHECK-NEXT: @ kill: def $s0 killed $s0 killed $q0
667 %res = extractelement <4 x float> %a, i32 0
671 define arm_aapcs_vfpcc float @extract_f32_3(<4 x float> %a) {
672 ; CHECK-LABEL: extract_f32_3:
673 ; CHECK: @ %bb.0: @ %entry
674 ; CHECK-NEXT: vmov.f32 s0, s3
677 %res = extractelement <4 x float> %a, i32 3
681 define arm_aapcs_vfpcc half @extract_f16_0(<8 x half> %a) {
682 ; CHECK-LABEL: extract_f16_0:
683 ; CHECK: @ %bb.0: @ %entry
684 ; CHECK-NEXT: vmov.u16 r1, q0[0]
685 ; CHECK-NEXT: vmov s0, r1
686 ; CHECK-NEXT: vstr.16 s0, [r0]
689 %res = extractelement <8 x half> %a, i32 0
693 define arm_aapcs_vfpcc half @extract_f16_3(<8 x half> %a) {
694 ; CHECK-LABEL: extract_f16_3:
695 ; CHECK: @ %bb.0: @ %entry
696 ; CHECK-NEXT: vmov.u16 r1, q0[3]
697 ; CHECK-NEXT: vmov s0, r1
698 ; CHECK-NEXT: vstr.16 s0, [r0]
701 %res = extractelement <8 x half> %a, i32 3
705 define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) {
706 ; CHECK-LABEL: extract_f64_0:
707 ; CHECK: @ %bb.0: @ %entry
708 ; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0
711 %res = extractelement <2 x double> %a, i32 0
715 define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) {
716 ; CHECK-LABEL: extract_f64_1:
717 ; CHECK: @ %bb.0: @ %entry
718 ; CHECK-NEXT: vmov.f32 s0, s2
719 ; CHECK-NEXT: vmov.f32 s1, s3
722 %res = extractelement <2 x double> %a, i32 1