1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
7 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_45670123(<8 x i16> %s1, <8 x i16> %s2) {
8 ; CHECK-LABEL: shuffle_i16_45670123:
9 ; CHECK: @ %bb.0: @ %entry
10 ; CHECK-NEXT: vmov.f32 s4, s2
11 ; CHECK-NEXT: vmov.f32 s6, s0
12 ; CHECK-NEXT: vmov.f32 s5, s3
13 ; CHECK-NEXT: vmov.f32 s7, s1
14 ; CHECK-NEXT: vmov q0, q1
17 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
21 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_67452301(<8 x i16> %s1, <8 x i16> %s2) {
22 ; CHECK-LABEL: shuffle_i16_67452301:
23 ; CHECK: @ %bb.0: @ %entry
24 ; CHECK-NEXT: vmov.f32 s4, s3
25 ; CHECK-NEXT: vmov.f32 s5, s2
26 ; CHECK-NEXT: vmov.f32 s6, s1
27 ; CHECK-NEXT: vmov.f32 s7, s0
28 ; CHECK-NEXT: vmov q0, q1
31 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
35 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_76543210(<8 x i16> %s1, <8 x i16> %s2) {
36 ; CHECK-LABEL: shuffle_i16_76543210:
37 ; CHECK: @ %bb.0: @ %entry
38 ; CHECK-NEXT: vrev64.16 q1, q0
39 ; CHECK-NEXT: vmov.f32 s0, s6
40 ; CHECK-NEXT: vmov.f32 s1, s7
41 ; CHECK-NEXT: vmov.f32 s2, s4
42 ; CHECK-NEXT: vmov.f32 s3, s5
45 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
49 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_01234567(<8 x i16> %s1, <8 x i16> %s2) {
50 ; CHECK-LABEL: shuffle_i16_01234567:
51 ; CHECK: @ %bb.0: @ %entry
54 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
58 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0123cdef(<8 x i16> %s1, <8 x i16> %s2) {
59 ; CHECK-LABEL: shuffle_i16_0123cdef:
60 ; CHECK: @ %bb.0: @ %entry
61 ; CHECK-NEXT: vmov.f32 s2, s6
62 ; CHECK-NEXT: vmov.f32 s3, s7
65 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
69 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_u7u5u3u1(<8 x i16> %s1, <8 x i16> %s2) {
70 ; CHECK-LABEL: shuffle_i16_u7u5u3u1:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: vmov.f32 s4, s3
73 ; CHECK-NEXT: vmov.f32 s5, s2
74 ; CHECK-NEXT: vmov.f32 s6, s1
75 ; CHECK-NEXT: vmov.f32 s7, s0
76 ; CHECK-NEXT: vmov q0, q1
79 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1>
83 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_6u4u2u0u(<8 x i16> %s1, <8 x i16> %s2) {
84 ; CHECK-LABEL: shuffle_i16_6u4u2u0u:
85 ; CHECK: @ %bb.0: @ %entry
86 ; CHECK-NEXT: vmov.f32 s4, s3
87 ; CHECK-NEXT: vmov.f32 s5, s2
88 ; CHECK-NEXT: vmov.f32 s6, s1
89 ; CHECK-NEXT: vmov.f32 s7, s0
90 ; CHECK-NEXT: vmov q0, q1
93 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef>
97 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0uuuuuuu(<8 x i16> %s1, <8 x i16> %s2) {
98 ; CHECK-LABEL: shuffle_i16_0uuuuuuu:
99 ; CHECK: @ %bb.0: @ %entry
102 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
106 define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_uuuu0uuu(<8 x i16> %s1, <8 x i16> %s2) {
107 ; CHECK-LABEL: shuffle_i16_uuuu0uuu:
108 ; CHECK: @ %bb.0: @ %entry
109 ; CHECK-NEXT: vmov.u16 r0, q0[0]
110 ; CHECK-NEXT: vdup.16 q0, r0
113 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>
120 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45670123(<16 x i8> %s1, <16 x i8> %s2) {
121 ; CHECK-LABEL: shuffle_i8_cdef89ab45670123:
122 ; CHECK: @ %bb.0: @ %entry
123 ; CHECK-NEXT: vmov.f32 s4, s3
124 ; CHECK-NEXT: vmov.f32 s5, s2
125 ; CHECK-NEXT: vmov.f32 s6, s1
126 ; CHECK-NEXT: vmov.f32 s7, s0
127 ; CHECK-NEXT: vmov q0, q1
130 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
134 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_efcdab8967452301(<16 x i8> %s1, <16 x i8> %s2) {
135 ; CHECK-LABEL: shuffle_i8_efcdab8967452301:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: vmov q1, q0
138 ; CHECK-NEXT: vmov.u8 r0, q0[14]
139 ; CHECK-NEXT: vmov.8 q0[0], r0
140 ; CHECK-NEXT: vmov.u8 r0, q1[15]
141 ; CHECK-NEXT: vmov.8 q0[1], r0
142 ; CHECK-NEXT: vmov.u8 r0, q1[12]
143 ; CHECK-NEXT: vmov.8 q0[2], r0
144 ; CHECK-NEXT: vmov.u8 r0, q1[13]
145 ; CHECK-NEXT: vmov.8 q0[3], r0
146 ; CHECK-NEXT: vmov.u8 r0, q1[10]
147 ; CHECK-NEXT: vmov.8 q0[4], r0
148 ; CHECK-NEXT: vmov.u8 r0, q1[11]
149 ; CHECK-NEXT: vmov.8 q0[5], r0
150 ; CHECK-NEXT: vmov.u8 r0, q1[8]
151 ; CHECK-NEXT: vmov.8 q0[6], r0
152 ; CHECK-NEXT: vmov.u8 r0, q1[9]
153 ; CHECK-NEXT: vmov.8 q0[7], r0
154 ; CHECK-NEXT: vmov.u8 r0, q1[6]
155 ; CHECK-NEXT: vmov.8 q0[8], r0
156 ; CHECK-NEXT: vmov.u8 r0, q1[7]
157 ; CHECK-NEXT: vmov.8 q0[9], r0
158 ; CHECK-NEXT: vmov.u8 r0, q1[4]
159 ; CHECK-NEXT: vmov.8 q0[10], r0
160 ; CHECK-NEXT: vmov.u8 r0, q1[5]
161 ; CHECK-NEXT: vmov.8 q0[11], r0
162 ; CHECK-NEXT: vmov.u8 r0, q1[2]
163 ; CHECK-NEXT: vmov.8 q0[12], r0
164 ; CHECK-NEXT: vmov.u8 r0, q1[3]
165 ; CHECK-NEXT: vmov.8 q0[13], r0
166 ; CHECK-NEXT: vmov.u8 r0, q1[0]
167 ; CHECK-NEXT: vmov.8 q0[14], r0
168 ; CHECK-NEXT: vmov.u8 r0, q1[1]
169 ; CHECK-NEXT: vmov.8 q0[15], r0
172 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
176 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_fedcba9876543210(<16 x i8> %s1, <16 x i8> %s2) {
177 ; CHECK-LABEL: shuffle_i8_fedcba9876543210:
178 ; CHECK: @ %bb.0: @ %entry
179 ; CHECK-NEXT: vrev64.8 q1, q0
180 ; CHECK-NEXT: vmov.f32 s0, s6
181 ; CHECK-NEXT: vmov.f32 s1, s7
182 ; CHECK-NEXT: vmov.f32 s2, s4
183 ; CHECK-NEXT: vmov.f32 s3, s5
186 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
190 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123456789abcdef(<16 x i8> %s1, <16 x i8> %s2) {
191 ; CHECK-LABEL: shuffle_i8_0123456789abcdef:
192 ; CHECK: @ %bb.0: @ %entry
195 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
199 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123ghij4567klmn(<16 x i8> %s1, <16 x i8> %s2) {
200 ; CHECK-LABEL: shuffle_i8_0123ghij4567klmn:
201 ; CHECK: @ %bb.0: @ %entry
202 ; CHECK-NEXT: vmov.f32 s8, s0
203 ; CHECK-NEXT: vmov.f32 s9, s4
204 ; CHECK-NEXT: vmov.f32 s10, s1
205 ; CHECK-NEXT: vmov.f32 s11, s5
206 ; CHECK-NEXT: vmov q0, q2
209 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23>
213 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdeu89ub4u67u123(<16 x i8> %s1, <16 x i8> %s2) {
214 ; CHECK-LABEL: shuffle_i8_cdeu89ub4u67u123:
215 ; CHECK: @ %bb.0: @ %entry
216 ; CHECK-NEXT: vmov.f32 s4, s3
217 ; CHECK-NEXT: vmov.f32 s5, s2
218 ; CHECK-NEXT: vmov.f32 s6, s1
219 ; CHECK-NEXT: vmov.f32 s7, s0
220 ; CHECK-NEXT: vmov q0, q1
223 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 undef, i32 8, i32 9, i32 undef, i32 11, i32 4, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 3>
227 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cduu8uubuu67u12u(<16 x i8> %s1, <16 x i8> %s2) {
228 ; CHECK-LABEL: shuffle_i8_cduu8uubuu67u12u:
229 ; CHECK: @ %bb.0: @ %entry
230 ; CHECK-NEXT: vmov.f32 s4, s3
231 ; CHECK-NEXT: vmov.f32 s5, s2
232 ; CHECK-NEXT: vmov.f32 s6, s1
233 ; CHECK-NEXT: vmov.f32 s7, s0
234 ; CHECK-NEXT: vmov q0, q1
237 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 undef>
241 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cuuuuuubuu6uuu2u(<16 x i8> %s1, <16 x i8> %s2) {
242 ; CHECK-LABEL: shuffle_i8_cuuuuuubuu6uuu2u:
243 ; CHECK: @ %bb.0: @ %entry
244 ; CHECK-NEXT: vmov.f32 s4, s3
245 ; CHECK-NEXT: vmov.f32 s5, s2
246 ; CHECK-NEXT: vmov.f32 s6, s1
247 ; CHECK-NEXT: vmov.f32 s7, s0
248 ; CHECK-NEXT: vmov q0, q1
251 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 undef, i32 undef, i32 undef, i32 2, i32 undef>
255 define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45u700123(<16 x i8> %s1, <16 x i8> %s2) {
256 ; CHECK-LABEL: shuffle_i8_cdef89ab45u700123:
257 ; CHECK: @ %bb.0: @ %entry
258 ; CHECK-NEXT: vmov.u8 r0, q0[4]
259 ; CHECK-NEXT: vmov.8 q1[8], r0
260 ; CHECK-NEXT: vmov.u8 r0, q0[5]
261 ; CHECK-NEXT: vmov.8 q1[9], r0
262 ; CHECK-NEXT: vmov.u8 r0, q0[0]
263 ; CHECK-NEXT: vmov.8 q1[11], r0
264 ; CHECK-NEXT: vmov.f32 s4, s3
265 ; CHECK-NEXT: vmov.f32 s5, s2
266 ; CHECK-NEXT: vmov.f32 s7, s0
267 ; CHECK-NEXT: vmov q0, q1
270 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 undef, i32 0, i32 0, i32 1, i32 2, i32 3>
278 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_45670123(<8 x half> %s1, <8 x half> %s2) {
279 ; CHECK-LABEL: shuffle_f16_45670123:
280 ; CHECK: @ %bb.0: @ %entry
281 ; CHECK-NEXT: vmov.f32 s4, s2
282 ; CHECK-NEXT: vmov.f32 s6, s0
283 ; CHECK-NEXT: vmov.f32 s5, s3
284 ; CHECK-NEXT: vmov.f32 s7, s1
285 ; CHECK-NEXT: vmov q0, q1
288 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
292 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_67452301(<8 x half> %s1, <8 x half> %s2) {
293 ; CHECK-LABEL: shuffle_f16_67452301:
294 ; CHECK: @ %bb.0: @ %entry
295 ; CHECK-NEXT: vmov.f32 s4, s3
296 ; CHECK-NEXT: vmov.f32 s5, s2
297 ; CHECK-NEXT: vmov.f32 s6, s1
298 ; CHECK-NEXT: vmov.f32 s7, s0
299 ; CHECK-NEXT: vmov q0, q1
302 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
306 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_76543210(<8 x half> %s1, <8 x half> %s2) {
307 ; CHECK-LABEL: shuffle_f16_76543210:
308 ; CHECK: @ %bb.0: @ %entry
309 ; CHECK-NEXT: vrev64.16 q1, q0
310 ; CHECK-NEXT: vmov.f32 s0, s6
311 ; CHECK-NEXT: vmov.f32 s1, s7
312 ; CHECK-NEXT: vmov.f32 s2, s4
313 ; CHECK-NEXT: vmov.f32 s3, s5
316 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
320 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_01234567(<8 x half> %s1, <8 x half> %s2) {
321 ; CHECK-LABEL: shuffle_f16_01234567:
322 ; CHECK: @ %bb.0: @ %entry
325 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
329 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0123cdef(<8 x half> %s1, <8 x half> %s2) {
330 ; CHECK-LABEL: shuffle_f16_0123cdef:
331 ; CHECK: @ %bb.0: @ %entry
332 ; CHECK-NEXT: vmov.f32 s2, s6
333 ; CHECK-NEXT: vmov.f32 s3, s7
336 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
340 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_u7u5u3u1(<8 x half> %s1, <8 x half> %s2) {
341 ; CHECK-LABEL: shuffle_f16_u7u5u3u1:
342 ; CHECK: @ %bb.0: @ %entry
343 ; CHECK-NEXT: vmov.f32 s4, s3
344 ; CHECK-NEXT: vmov.f32 s5, s2
345 ; CHECK-NEXT: vmov.f32 s6, s1
346 ; CHECK-NEXT: vmov.f32 s7, s0
347 ; CHECK-NEXT: vmov q0, q1
350 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1>
354 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_6u4u2u0u(<8 x half> %s1, <8 x half> %s2) {
355 ; CHECK-LABEL: shuffle_f16_6u4u2u0u:
356 ; CHECK: @ %bb.0: @ %entry
357 ; CHECK-NEXT: vmov.f32 s4, s3
358 ; CHECK-NEXT: vmov.f32 s5, s2
359 ; CHECK-NEXT: vmov.f32 s6, s1
360 ; CHECK-NEXT: vmov.f32 s7, s0
361 ; CHECK-NEXT: vmov q0, q1
364 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef>
368 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0uuuuuuu(<8 x half> %s1, <8 x half> %s2) {
369 ; CHECK-LABEL: shuffle_f16_0uuuuuuu:
370 ; CHECK: @ %bb.0: @ %entry
373 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
377 define arm_aapcs_vfpcc <8 x half> @shuffle_f16_uuuu0uuu(<8 x half> %s1, <8 x half> %s2) {
378 ; CHECK-LABEL: shuffle_f16_uuuu0uuu:
379 ; CHECK: @ %bb.0: @ %entry
380 ; CHECK-NEXT: vmov.u16 r0, q0[0]
381 ; CHECK-NEXT: vdup.16 q0, r0
384 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>