1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
4 ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
6 define arm_aapcs_vfpcc <4 x i32> @vdup_i32(i32 %src) {
7 ; CHECK-LE-LABEL: vdup_i32:
8 ; CHECK-LE: @ %bb.0: @ %entry
9 ; CHECK-LE-NEXT: vdup.32 q0, r0
10 ; CHECK-LE-NEXT: bx lr
12 ; CHECK-BE-LABEL: vdup_i32:
13 ; CHECK-BE: @ %bb.0: @ %entry
14 ; CHECK-BE-NEXT: vdup.32 q1, r0
15 ; CHECK-BE-NEXT: vrev64.32 q0, q1
16 ; CHECK-BE-NEXT: bx lr
18 %0 = insertelement <4 x i32> undef, i32 %src, i32 0
19 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
23 define arm_aapcs_vfpcc <8 x i16> @vdup_i16(i16 %src) {
24 ; CHECK-LE-LABEL: vdup_i16:
25 ; CHECK-LE: @ %bb.0: @ %entry
26 ; CHECK-LE-NEXT: vdup.16 q0, r0
27 ; CHECK-LE-NEXT: bx lr
29 ; CHECK-BE-LABEL: vdup_i16:
30 ; CHECK-BE: @ %bb.0: @ %entry
31 ; CHECK-BE-NEXT: vdup.16 q1, r0
32 ; CHECK-BE-NEXT: vrev64.16 q0, q1
33 ; CHECK-BE-NEXT: bx lr
35 %0 = insertelement <8 x i16> undef, i16 %src, i32 0
36 %out = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
40 define arm_aapcs_vfpcc <16 x i8> @vdup_i8(i8 %src) {
41 ; CHECK-LE-LABEL: vdup_i8:
42 ; CHECK-LE: @ %bb.0: @ %entry
43 ; CHECK-LE-NEXT: vdup.8 q0, r0
44 ; CHECK-LE-NEXT: bx lr
46 ; CHECK-BE-LABEL: vdup_i8:
47 ; CHECK-BE: @ %bb.0: @ %entry
48 ; CHECK-BE-NEXT: vdup.8 q1, r0
49 ; CHECK-BE-NEXT: vrev64.8 q0, q1
50 ; CHECK-BE-NEXT: bx lr
52 %0 = insertelement <16 x i8> undef, i8 %src, i32 0
53 %out = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
57 define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
58 ; CHECK-LE-LABEL: vdup_i64:
59 ; CHECK-LE: @ %bb.0: @ %entry
60 ; CHECK-LE-NEXT: vmov q0[2], q0[0], r0, r0
61 ; CHECK-LE-NEXT: vmov q0[3], q0[1], r1, r1
62 ; CHECK-LE-NEXT: bx lr
64 ; CHECK-BE-LABEL: vdup_i64:
65 ; CHECK-BE: @ %bb.0: @ %entry
66 ; CHECK-BE-NEXT: vmov q1[2], q1[0], r0, r0
67 ; CHECK-BE-NEXT: vmov q1[3], q1[1], r1, r1
68 ; CHECK-BE-NEXT: vrev64.32 q0, q1
69 ; CHECK-BE-NEXT: bx lr
71 %0 = insertelement <2 x i64> undef, i64 %src, i32 0
72 %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer
76 define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) {
77 ; CHECK-LE-LABEL: vdup_f32_1:
78 ; CHECK-LE: @ %bb.0: @ %entry
79 ; CHECK-LE-NEXT: vmov r0, s0
80 ; CHECK-LE-NEXT: vdup.32 q0, r0
81 ; CHECK-LE-NEXT: bx lr
83 ; CHECK-BE-LABEL: vdup_f32_1:
84 ; CHECK-BE: @ %bb.0: @ %entry
85 ; CHECK-BE-NEXT: vmov r0, s0
86 ; CHECK-BE-NEXT: vdup.32 q1, r0
87 ; CHECK-BE-NEXT: vrev64.32 q0, q1
88 ; CHECK-BE-NEXT: bx lr
90 %0 = insertelement <4 x float> undef, float %src, i32 0
91 %out = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
95 define arm_aapcs_vfpcc <4 x float> @vdup_f32_2(float %src1, float %src2) {
96 ; CHECK-LE-LABEL: vdup_f32_2:
97 ; CHECK-LE: @ %bb.0: @ %entry
98 ; CHECK-LE-NEXT: vadd.f32 s0, s0, s1
99 ; CHECK-LE-NEXT: vmov r0, s0
100 ; CHECK-LE-NEXT: vdup.32 q0, r0
101 ; CHECK-LE-NEXT: bx lr
103 ; CHECK-BE-LABEL: vdup_f32_2:
104 ; CHECK-BE: @ %bb.0: @ %entry
105 ; CHECK-BE-NEXT: vadd.f32 s0, s0, s1
106 ; CHECK-BE-NEXT: vmov r0, s0
107 ; CHECK-BE-NEXT: vdup.32 q1, r0
108 ; CHECK-BE-NEXT: vrev64.32 q0, q1
109 ; CHECK-BE-NEXT: bx lr
111 %0 = fadd float %src1, %src2
112 %1 = insertelement <4 x float> undef, float %0, i32 0
113 %out = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
117 define arm_aapcs_vfpcc <4 x float> @vdup_f32_1bc(float %src) {
118 ; CHECK-LE-LABEL: vdup_f32_1bc:
119 ; CHECK-LE: @ %bb.0: @ %entry
120 ; CHECK-LE-NEXT: vmov r0, s0
121 ; CHECK-LE-NEXT: vdup.32 q0, r0
122 ; CHECK-LE-NEXT: bx lr
124 ; CHECK-BE-LABEL: vdup_f32_1bc:
125 ; CHECK-BE: @ %bb.0: @ %entry
126 ; CHECK-BE-NEXT: vmov r0, s0
127 ; CHECK-BE-NEXT: vdup.32 q1, r0
128 ; CHECK-BE-NEXT: vrev64.32 q0, q1
129 ; CHECK-BE-NEXT: bx lr
131 %srcbc = bitcast float %src to i32
132 %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
133 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
134 %outbc = bitcast <4 x i32> %out to <4 x float>
135 ret <4 x float> %outbc
138 define arm_aapcs_vfpcc <4 x float> @vdup_f32_2bc(float %src1, float %src2) {
139 ; CHECK-LE-LABEL: vdup_f32_2bc:
140 ; CHECK-LE: @ %bb.0: @ %entry
141 ; CHECK-LE-NEXT: vadd.f32 s0, s0, s1
142 ; CHECK-LE-NEXT: vmov r0, s0
143 ; CHECK-LE-NEXT: vdup.32 q0, r0
144 ; CHECK-LE-NEXT: bx lr
146 ; CHECK-BE-LABEL: vdup_f32_2bc:
147 ; CHECK-BE: @ %bb.0: @ %entry
148 ; CHECK-BE-NEXT: vadd.f32 s0, s0, s1
149 ; CHECK-BE-NEXT: vmov r0, s0
150 ; CHECK-BE-NEXT: vdup.32 q1, r0
151 ; CHECK-BE-NEXT: vrev64.32 q0, q1
152 ; CHECK-BE-NEXT: bx lr
154 %0 = fadd float %src1, %src2
155 %bc = bitcast float %0 to i32
156 %1 = insertelement <4 x i32> undef, i32 %bc, i32 0
157 %out = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
158 %outbc = bitcast <4 x i32> %out to <4 x float>
159 ret <4 x float> %outbc
162 define arm_aapcs_vfpcc <8 x half> @vdup_f16(half %0, half %1) {
163 ; CHECK-LE-LABEL: vdup_f16:
164 ; CHECK-LE: @ %bb.0: @ %entry
165 ; CHECK-LE-NEXT: vadd.f16 s0, s0, s1
166 ; CHECK-LE-NEXT: vmov.f16 r0, s0
167 ; CHECK-LE-NEXT: vdup.16 q0, r0
168 ; CHECK-LE-NEXT: bx lr
170 ; CHECK-BE-LABEL: vdup_f16:
171 ; CHECK-BE: @ %bb.0: @ %entry
172 ; CHECK-BE-NEXT: vadd.f16 s0, s0, s1
173 ; CHECK-BE-NEXT: vmov.f16 r0, s0
174 ; CHECK-BE-NEXT: vdup.16 q1, r0
175 ; CHECK-BE-NEXT: vrev64.16 q0, q1
176 ; CHECK-BE-NEXT: bx lr
178 %2 = fadd half %0, %1
179 %3 = insertelement <8 x half> undef, half %2, i32 0
180 %out = shufflevector <8 x half> %3, <8 x half> undef, <8 x i32> zeroinitializer
184 define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half %0, half %1) {
185 ; CHECK-LE-LABEL: vdup_f16_bc:
186 ; CHECK-LE: @ %bb.0: @ %entry
187 ; CHECK-LE-NEXT: vadd.f16 s0, s0, s1
188 ; CHECK-LE-NEXT: vmov.f16 r0, s0
189 ; CHECK-LE-NEXT: vdup.16 q0, r0
190 ; CHECK-LE-NEXT: bx lr
192 ; CHECK-BE-LABEL: vdup_f16_bc:
193 ; CHECK-BE: @ %bb.0: @ %entry
194 ; CHECK-BE-NEXT: vadd.f16 s0, s0, s1
195 ; CHECK-BE-NEXT: vmov.f16 r0, s0
196 ; CHECK-BE-NEXT: vdup.16 q1, r0
197 ; CHECK-BE-NEXT: vrev64.16 q0, q1
198 ; CHECK-BE-NEXT: bx lr
200 %2 = fadd half %0, %1
201 %bc = bitcast half %2 to i16
202 %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
203 %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
204 %outbc = bitcast <8 x i16> %out to <8 x half>
205 ret <8 x half> %outbc
208 define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) {
209 ; CHECK-LABEL: vdup_f64:
210 ; CHECK: @ %bb.0: @ %entry
211 ; CHECK-NEXT: vmov.f32 s2, s0
212 ; CHECK-NEXT: vmov.f32 s3, s1
215 %0 = insertelement <2 x double> undef, double %src, i32 0
216 %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
217 ret <2 x double> %out
222 define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) {
223 ; CHECK-LE-LABEL: vduplane_i32:
224 ; CHECK-LE: @ %bb.0: @ %entry
225 ; CHECK-LE-NEXT: vmov r0, s3
226 ; CHECK-LE-NEXT: vdup.32 q0, r0
227 ; CHECK-LE-NEXT: bx lr
229 ; CHECK-BE-LABEL: vduplane_i32:
230 ; CHECK-BE: @ %bb.0: @ %entry
231 ; CHECK-BE-NEXT: vrev64.32 q1, q0
232 ; CHECK-BE-NEXT: vmov r0, s7
233 ; CHECK-BE-NEXT: vdup.32 q1, r0
234 ; CHECK-BE-NEXT: vrev64.32 q0, q1
235 ; CHECK-BE-NEXT: bx lr
237 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
241 define arm_aapcs_vfpcc <8 x i16> @vduplane_i16(<8 x i16> %src) {
242 ; CHECK-LE-LABEL: vduplane_i16:
243 ; CHECK-LE: @ %bb.0: @ %entry
244 ; CHECK-LE-NEXT: vmov.u16 r0, q0[3]
245 ; CHECK-LE-NEXT: vdup.16 q0, r0
246 ; CHECK-LE-NEXT: bx lr
248 ; CHECK-BE-LABEL: vduplane_i16:
249 ; CHECK-BE: @ %bb.0: @ %entry
250 ; CHECK-BE-NEXT: vrev64.16 q1, q0
251 ; CHECK-BE-NEXT: vmov.u16 r0, q1[3]
252 ; CHECK-BE-NEXT: vdup.16 q1, r0
253 ; CHECK-BE-NEXT: vrev64.16 q0, q1
254 ; CHECK-BE-NEXT: bx lr
256 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
260 define arm_aapcs_vfpcc <16 x i8> @vduplane_i8(<16 x i8> %src) {
261 ; CHECK-LE-LABEL: vduplane_i8:
262 ; CHECK-LE: @ %bb.0: @ %entry
263 ; CHECK-LE-NEXT: vmov.u8 r0, q0[3]
264 ; CHECK-LE-NEXT: vdup.8 q0, r0
265 ; CHECK-LE-NEXT: bx lr
267 ; CHECK-BE-LABEL: vduplane_i8:
268 ; CHECK-BE: @ %bb.0: @ %entry
269 ; CHECK-BE-NEXT: vrev64.8 q1, q0
270 ; CHECK-BE-NEXT: vmov.u8 r0, q1[3]
271 ; CHECK-BE-NEXT: vdup.8 q1, r0
272 ; CHECK-BE-NEXT: vrev64.8 q0, q1
273 ; CHECK-BE-NEXT: bx lr
275 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
279 define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) {
280 ; CHECK-LABEL: vduplane_i64:
281 ; CHECK: @ %bb.0: @ %entry
282 ; CHECK-NEXT: vmov.f32 s0, s2
283 ; CHECK-NEXT: vmov.f32 s1, s3
286 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
290 define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) {
291 ; CHECK-LE-LABEL: vduplane_f32:
292 ; CHECK-LE: @ %bb.0: @ %entry
293 ; CHECK-LE-NEXT: vmov r0, s3
294 ; CHECK-LE-NEXT: vdup.32 q0, r0
295 ; CHECK-LE-NEXT: bx lr
297 ; CHECK-BE-LABEL: vduplane_f32:
298 ; CHECK-BE: @ %bb.0: @ %entry
299 ; CHECK-BE-NEXT: vrev64.32 q1, q0
300 ; CHECK-BE-NEXT: vmov r0, s7
301 ; CHECK-BE-NEXT: vdup.32 q1, r0
302 ; CHECK-BE-NEXT: vrev64.32 q0, q1
303 ; CHECK-BE-NEXT: bx lr
305 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
309 define arm_aapcs_vfpcc <8 x half> @vduplane_f16(<8 x half> %src) {
310 ; CHECK-LE-LABEL: vduplane_f16:
311 ; CHECK-LE: @ %bb.0: @ %entry
312 ; CHECK-LE-NEXT: vmov.u16 r0, q0[3]
313 ; CHECK-LE-NEXT: vdup.16 q0, r0
314 ; CHECK-LE-NEXT: bx lr
316 ; CHECK-BE-LABEL: vduplane_f16:
317 ; CHECK-BE: @ %bb.0: @ %entry
318 ; CHECK-BE-NEXT: vrev64.16 q1, q0
319 ; CHECK-BE-NEXT: vmov.u16 r0, q1[3]
320 ; CHECK-BE-NEXT: vdup.16 q1, r0
321 ; CHECK-BE-NEXT: vrev64.16 q0, q1
322 ; CHECK-BE-NEXT: bx lr
324 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
328 define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) {
329 ; CHECK-LABEL: vduplane_f64:
330 ; CHECK: @ %bb.0: @ %entry
331 ; CHECK-NEXT: vmov.f32 s0, s2
332 ; CHECK-NEXT: vmov.f32 s1, s3
335 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1>
336 ret <2 x double> %out
340 define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) {
341 ; CHECK-LABEL: vdup_f32_extract:
342 ; CHECK: @ %bb.0: @ %entry
345 %srcbc = bitcast float %src to i32
346 %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0
347 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer
348 %outbc = bitcast <4 x i32> %out to <4 x float>
349 %ext = extractelement <4 x float> %outbc, i32 2
353 define arm_aapcs_vfpcc half @vdup_f16_extract(half %0, half %1) {
354 ; CHECK-LABEL: vdup_f16_extract:
355 ; CHECK: @ %bb.0: @ %entry
356 ; CHECK-NEXT: vadd.f16 s0, s0, s1
359 %2 = fadd half %0, %1
360 %bc = bitcast half %2 to i16
361 %3 = insertelement <8 x i16> undef, i16 %bc, i32 0
362 %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer
363 %outbc = bitcast <8 x i16> %out to <8 x half>
364 %ext = extractelement <8 x half> %outbc, i32 2
369 define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
370 ; CHECK-LE-LABEL: bitcast_i64_v8i16:
372 ; CHECK-LE-NEXT: .pad #8
373 ; CHECK-LE-NEXT: sub sp, #8
374 ; CHECK-LE-NEXT: strd r0, r1, [sp]
375 ; CHECK-LE-NEXT: mov r0, sp
376 ; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
377 ; CHECK-LE-NEXT: vmov r0, s0
378 ; CHECK-LE-NEXT: vdup.16 q0, r0
379 ; CHECK-LE-NEXT: add sp, #8
380 ; CHECK-LE-NEXT: bx lr
382 ; CHECK-BE-LABEL: bitcast_i64_v8i16:
384 ; CHECK-BE-NEXT: .pad #8
385 ; CHECK-BE-NEXT: sub sp, #8
386 ; CHECK-BE-NEXT: strd r0, r1, [sp]
387 ; CHECK-BE-NEXT: mov r0, sp
388 ; CHECK-BE-NEXT: vldrh.u32 q0, [r0]
389 ; CHECK-BE-NEXT: vmov r0, s0
390 ; CHECK-BE-NEXT: vdup.16 q1, r0
391 ; CHECK-BE-NEXT: vrev64.16 q0, q1
392 ; CHECK-BE-NEXT: add sp, #8
393 ; CHECK-BE-NEXT: bx lr
394 %b = bitcast i64 %a to <4 x i16>
395 %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
399 define arm_aapcs_vfpcc <8 x i16> @bitcast_i128_v8i16(i128 %a) {
400 ; CHECK-LE-LABEL: bitcast_i128_v8i16:
402 ; CHECK-LE-NEXT: vdup.16 q0, r0
403 ; CHECK-LE-NEXT: bx lr
405 ; CHECK-BE-LABEL: bitcast_i128_v8i16:
407 ; CHECK-BE-NEXT: vmov.32 q0[0], r0
408 ; CHECK-BE-NEXT: vrev32.16 q0, q0
409 ; CHECK-BE-NEXT: vmov.u16 r0, q0[0]
410 ; CHECK-BE-NEXT: vdup.16 q1, r0
411 ; CHECK-BE-NEXT: vrev64.16 q0, q1
412 ; CHECK-BE-NEXT: bx lr
413 %b = bitcast i128 %a to <8 x i16>
414 %r = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
418 define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
419 ; CHECK-LE-LABEL: bitcast_i64_v8i16_lane1:
421 ; CHECK-LE-NEXT: .pad #8
422 ; CHECK-LE-NEXT: sub sp, #8
423 ; CHECK-LE-NEXT: strd r0, r1, [sp]
424 ; CHECK-LE-NEXT: mov r0, sp
425 ; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
426 ; CHECK-LE-NEXT: vmov r0, s1
427 ; CHECK-LE-NEXT: vdup.16 q0, r0
428 ; CHECK-LE-NEXT: add sp, #8
429 ; CHECK-LE-NEXT: bx lr
431 ; CHECK-BE-LABEL: bitcast_i64_v8i16_lane1:
433 ; CHECK-BE-NEXT: .pad #8
434 ; CHECK-BE-NEXT: sub sp, #8
435 ; CHECK-BE-NEXT: strd r0, r1, [sp]
436 ; CHECK-BE-NEXT: mov r0, sp
437 ; CHECK-BE-NEXT: vldrh.u32 q0, [r0]
438 ; CHECK-BE-NEXT: vmov r0, s1
439 ; CHECK-BE-NEXT: vdup.16 q1, r0
440 ; CHECK-BE-NEXT: vrev64.16 q0, q1
441 ; CHECK-BE-NEXT: add sp, #8
442 ; CHECK-BE-NEXT: bx lr
443 %b = bitcast i64 %a to <4 x i16>
444 %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
448 define arm_aapcs_vfpcc <8 x i16> @bitcast_f64_v8i16(double %a) {
449 ; CHECK-LE-LABEL: bitcast_f64_v8i16:
451 ; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
452 ; CHECK-LE-NEXT: vdup.16 q0, r0
453 ; CHECK-LE-NEXT: bx lr
455 ; CHECK-BE-LABEL: bitcast_f64_v8i16:
457 ; CHECK-BE-NEXT: vrev64.16 q1, q0
458 ; CHECK-BE-NEXT: vmov.u16 r0, q1[0]
459 ; CHECK-BE-NEXT: vdup.16 q1, r0
460 ; CHECK-BE-NEXT: vrev64.16 q0, q1
461 ; CHECK-BE-NEXT: bx lr
462 %b = bitcast double %a to <4 x i16>
463 %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer
467 define arm_aapcs_vfpcc <8 x half> @bitcast_i64_v8f16(i64 %a) {
468 ; CHECK-LE-LABEL: bitcast_i64_v8f16:
470 ; CHECK-LE-NEXT: vmov.32 q0[0], r0
471 ; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
472 ; CHECK-LE-NEXT: vdup.16 q0, r0
473 ; CHECK-LE-NEXT: bx lr
475 ; CHECK-BE-LABEL: bitcast_i64_v8f16:
477 ; CHECK-BE-NEXT: vmov.32 q0[0], r0
478 ; CHECK-BE-NEXT: vrev32.16 q0, q0
479 ; CHECK-BE-NEXT: vmov.u16 r0, q0[0]
480 ; CHECK-BE-NEXT: vdup.16 q1, r0
481 ; CHECK-BE-NEXT: vrev64.16 q0, q1
482 ; CHECK-BE-NEXT: bx lr
483 %b = bitcast i64 %a to <4 x half>
484 %r = shufflevector <4 x half> %b, <4 x half> poison, <8 x i32> zeroinitializer
488 define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_v2f64(i64 %a) {
489 ; CHECK-LE-LABEL: bitcast_i64_v2f64:
491 ; CHECK-LE-NEXT: vmov q0[2], q0[0], r0, r0
492 ; CHECK-LE-NEXT: vmov q0[3], q0[1], r1, r1
493 ; CHECK-LE-NEXT: bx lr
495 ; CHECK-BE-LABEL: bitcast_i64_v2f64:
497 ; CHECK-BE-NEXT: vmov q1[2], q1[0], r0, r0
498 ; CHECK-BE-NEXT: vmov q1[3], q1[1], r1, r1
499 ; CHECK-BE-NEXT: vrev64.32 q0, q1
500 ; CHECK-BE-NEXT: bx lr
501 %b = bitcast i64 %a to <1 x i64>
502 %r = shufflevector <1 x i64> %b, <1 x i64> poison, <2 x i32> zeroinitializer
506 define arm_aapcs_vfpcc <2 x i64> @bitcast_v2f64_v2i64(<2 x double> %a) {
507 ; CHECK-LABEL: bitcast_v2f64_v2i64:
509 ; CHECK-NEXT: vmov.f32 s2, s0
510 ; CHECK-NEXT: vmov.f32 s3, s1
512 %b = bitcast <2 x double> %a to <2 x i64>
513 %r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
517 define arm_aapcs_vfpcc <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a) {
518 ; CHECK-LABEL: bitcast_v8i16_v2i64:
520 ; CHECK-NEXT: vmov.f32 s2, s0
521 ; CHECK-NEXT: vmov.f32 s3, s1
523 %b = bitcast <8 x i16> %a to <2 x i64>
524 %r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
528 define arm_aapcs_vfpcc <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) {
529 ; CHECK-LE-LABEL: bitcast_v2f64_v8i16:
531 ; CHECK-LE-NEXT: vmov.u16 r0, q0[0]
532 ; CHECK-LE-NEXT: vdup.16 q0, r0
533 ; CHECK-LE-NEXT: bx lr
535 ; CHECK-BE-LABEL: bitcast_v2f64_v8i16:
537 ; CHECK-BE-NEXT: vrev64.16 q1, q0
538 ; CHECK-BE-NEXT: vmov.u16 r0, q1[0]
539 ; CHECK-BE-NEXT: vdup.16 q1, r0
540 ; CHECK-BE-NEXT: vrev64.16 q0, q1
541 ; CHECK-BE-NEXT: bx lr
542 %b = bitcast <2 x i64> %a to <8 x i16>
543 %r = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
547 define arm_aapcs_vfpcc <8 x i16> @other_max_case(i32 %blockSize) {
548 ; CHECK-LE-LABEL: other_max_case:
550 ; CHECK-LE-NEXT: vdup.16 q0, r0
551 ; CHECK-LE-NEXT: bx lr
553 ; CHECK-BE-LABEL: other_max_case:
555 ; CHECK-BE-NEXT: vmov.32 q0[0], r0
556 ; CHECK-BE-NEXT: vrev32.16 q0, q0
557 ; CHECK-BE-NEXT: vmov.u16 r0, q0[0]
558 ; CHECK-BE-NEXT: vdup.16 q1, r0
559 ; CHECK-BE-NEXT: vrev64.16 q0, q1
560 ; CHECK-BE-NEXT: bx lr
561 %vec.blockSize = bitcast i32 %blockSize to <2 x i16>
562 %.splat2 = shufflevector <2 x i16> %vec.blockSize, <2 x i16> poison, <8 x i32> zeroinitializer
563 ret <8 x i16> %.splat2