1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i1(<4 x i32> %m) {
5 ; CHECK-LABEL: sext_v4i32_v4i32_v4i1:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vshl.i32 q0, q0, #31
8 ; CHECK-NEXT: vshr.s32 q0, q0, #31
11 %shl = shl <4 x i32> %m, <i32 31, i32 31, i32 31, i32 31>
12 %shr = ashr exact <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
16 define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i8(<4 x i32> %m) {
17 ; CHECK-LABEL: sext_v4i32_v4i32_v4i8:
18 ; CHECK: @ %bb.0: @ %entry
19 ; CHECK-NEXT: vmovlb.s8 q0, q0
20 ; CHECK-NEXT: vmovlb.s16 q0, q0
23 %shl = shl <4 x i32> %m, <i32 24, i32 24, i32 24, i32 24>
24 %shr = ashr exact <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
28 define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i16(<4 x i32> %m) {
29 ; CHECK-LABEL: sext_v4i32_v4i32_v4i16:
30 ; CHECK: @ %bb.0: @ %entry
31 ; CHECK-NEXT: vmovlb.s16 q0, q0
34 %shl = shl <4 x i32> %m, <i32 16, i32 16, i32 16, i32 16>
35 %shr = ashr exact <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
39 define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i8(<8 x i16> %m) {
40 ; CHECK-LABEL: sext_v8i16_v8i16_v8i8:
41 ; CHECK: @ %bb.0: @ %entry
42 ; CHECK-NEXT: vmovlb.s8 q0, q0
45 %shl = shl <8 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
46 %shr = ashr exact <8 x i16> %shl, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
50 define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i1(<8 x i16> %m) {
51 ; CHECK-LABEL: sext_v8i16_v8i16_v8i1:
52 ; CHECK: @ %bb.0: @ %entry
53 ; CHECK-NEXT: vshl.i16 q0, q0, #15
54 ; CHECK-NEXT: vshr.s16 q0, q0, #15
57 %shl = shl <8 x i16> %m, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
58 %shr = ashr exact <8 x i16> %shl, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
62 define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i32(<2 x i64> %m) {
63 ; CHECK-LABEL: sext_v2i64_v2i64_v2i32:
64 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: vmov r0, s2
66 ; CHECK-NEXT: vmov r1, s0
67 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
68 ; CHECK-NEXT: asrs r0, r0, #31
69 ; CHECK-NEXT: asrs r1, r1, #31
70 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
73 %shl = shl <2 x i64> %m, <i64 32, i64 32>
74 %shr = ashr exact <2 x i64> %shl, <i64 32, i64 32>
78 define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i35(<2 x i64> %m) {
79 ; CHECK-LABEL: sext_v2i64_v2i64_v2i35:
80 ; CHECK: @ %bb.0: @ %entry
81 ; CHECK-NEXT: vmov r0, r1, d1
82 ; CHECK-NEXT: vmov r2, r3, d0
83 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
84 ; CHECK-NEXT: sbfx r0, r1, #0, #3
85 ; CHECK-NEXT: sbfx r1, r3, #0, #3
86 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
89 %shl = shl <2 x i64> %m, <i64 29, i64 29>
90 %shr = ashr exact <2 x i64> %shl, <i64 29, i64 29>
94 define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) {
95 ; CHECK-LABEL: sext_v8i8_v8i16:
96 ; CHECK: @ %bb.0: @ %entry
97 ; CHECK-NEXT: vmovlb.s8 q0, q0
100 %0 = sext <8 x i8> %src to <8 x i16>
104 define arm_aapcs_vfpcc <4 x i32> @sext_v4i16_v4i32(<4 x i16> %src) {
105 ; CHECK-LABEL: sext_v4i16_v4i32:
106 ; CHECK: @ %bb.0: @ %entry
107 ; CHECK-NEXT: vmovlb.s16 q0, q0
110 %0 = sext <4 x i16> %src to <4 x i32>
114 define arm_aapcs_vfpcc <4 x i32> @sext_v4i8_v4i32(<4 x i8> %src) {
115 ; CHECK-LABEL: sext_v4i8_v4i32:
116 ; CHECK: @ %bb.0: @ %entry
117 ; CHECK-NEXT: vmovlb.s8 q0, q0
118 ; CHECK-NEXT: vmovlb.s16 q0, q0
121 %0 = sext <4 x i8> %src to <4 x i32>
125 define arm_aapcs_vfpcc <16 x i16> @sext_v16i8_v16i16(<16 x i8> %src) {
126 ; CHECK-LABEL: sext_v16i8_v16i16:
127 ; CHECK: @ %bb.0: @ %entry
128 ; CHECK-NEXT: .pad #16
129 ; CHECK-NEXT: sub sp, #16
130 ; CHECK-NEXT: mov r0, sp
131 ; CHECK-NEXT: vstrw.32 q0, [r0]
132 ; CHECK-NEXT: vldrb.s16 q0, [r0]
133 ; CHECK-NEXT: vldrb.s16 q1, [r0, #8]
134 ; CHECK-NEXT: add sp, #16
137 %0 = sext <16 x i8> %src to <16 x i16>
141 define arm_aapcs_vfpcc <8 x i32> @sext_v8i16_v8i32(<8 x i16> %src) {
142 ; CHECK-LABEL: sext_v8i16_v8i32:
143 ; CHECK: @ %bb.0: @ %entry
144 ; CHECK-NEXT: .pad #16
145 ; CHECK-NEXT: sub sp, #16
146 ; CHECK-NEXT: mov r0, sp
147 ; CHECK-NEXT: vstrw.32 q0, [r0]
148 ; CHECK-NEXT: vldrh.s32 q0, [r0]
149 ; CHECK-NEXT: vldrh.s32 q1, [r0, #8]
150 ; CHECK-NEXT: add sp, #16
153 %0 = sext <8 x i16> %src to <8 x i32>
157 define arm_aapcs_vfpcc <16 x i32> @sext_v16i8_v16i32(<16 x i8> %src) {
158 ; CHECK-LABEL: sext_v16i8_v16i32:
159 ; CHECK: @ %bb.0: @ %entry
160 ; CHECK-NEXT: .pad #48
161 ; CHECK-NEXT: sub sp, #48
162 ; CHECK-NEXT: mov r0, sp
163 ; CHECK-NEXT: add r1, sp, #32
164 ; CHECK-NEXT: vstrw.32 q0, [r0]
165 ; CHECK-NEXT: vldrb.s16 q0, [r0]
166 ; CHECK-NEXT: vstrw.32 q0, [r1]
167 ; CHECK-NEXT: vldrb.s16 q0, [r0, #8]
168 ; CHECK-NEXT: add r0, sp, #16
169 ; CHECK-NEXT: vstrw.32 q0, [r0]
170 ; CHECK-NEXT: vldrh.s32 q0, [r1]
171 ; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
172 ; CHECK-NEXT: vldrh.s32 q2, [r0]
173 ; CHECK-NEXT: vldrh.s32 q3, [r0, #8]
174 ; CHECK-NEXT: add sp, #48
177 %0 = sext <16 x i8> %src to <16 x i32>
181 define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) {
182 ; CHECK-LABEL: sext_v2i32_v2i64:
183 ; CHECK: @ %bb.0: @ %entry
184 ; CHECK-NEXT: vmov r0, s2
185 ; CHECK-NEXT: vmov r1, s0
186 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
187 ; CHECK-NEXT: asrs r0, r0, #31
188 ; CHECK-NEXT: asrs r1, r1, #31
189 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
192 %0 = sext <2 x i32> %src to <2 x i64>
197 define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) {
198 ; CHECK-LABEL: zext_v8i8_v8i16:
199 ; CHECK: @ %bb.0: @ %entry
200 ; CHECK-NEXT: vmovlb.u8 q0, q0
203 %0 = zext <8 x i8> %src to <8 x i16>
207 define arm_aapcs_vfpcc <4 x i32> @zext_v4i16_v4i32(<4 x i16> %src) {
208 ; CHECK-LABEL: zext_v4i16_v4i32:
209 ; CHECK: @ %bb.0: @ %entry
210 ; CHECK-NEXT: vmovlb.u16 q0, q0
213 %0 = zext <4 x i16> %src to <4 x i32>
217 define arm_aapcs_vfpcc <4 x i32> @zext_v4i8_v4i32(<4 x i8> %src) {
218 ; CHECK-LABEL: zext_v4i8_v4i32:
219 ; CHECK: @ %bb.0: @ %entry
220 ; CHECK-NEXT: vmov.i32 q1, #0xff
221 ; CHECK-NEXT: vand q0, q0, q1
224 %0 = zext <4 x i8> %src to <4 x i32>
228 define arm_aapcs_vfpcc <16 x i16> @zext_v16i8_v16i16(<16 x i8> %src) {
229 ; CHECK-LABEL: zext_v16i8_v16i16:
230 ; CHECK: @ %bb.0: @ %entry
231 ; CHECK-NEXT: .pad #16
232 ; CHECK-NEXT: sub sp, #16
233 ; CHECK-NEXT: mov r0, sp
234 ; CHECK-NEXT: vstrw.32 q0, [r0]
235 ; CHECK-NEXT: vldrb.u16 q0, [r0]
236 ; CHECK-NEXT: vldrb.u16 q1, [r0, #8]
237 ; CHECK-NEXT: add sp, #16
240 %0 = zext <16 x i8> %src to <16 x i16>
244 define arm_aapcs_vfpcc <8 x i32> @zext_v8i16_v8i32(<8 x i16> %src) {
245 ; CHECK-LABEL: zext_v8i16_v8i32:
246 ; CHECK: @ %bb.0: @ %entry
247 ; CHECK-NEXT: .pad #16
248 ; CHECK-NEXT: sub sp, #16
249 ; CHECK-NEXT: mov r0, sp
250 ; CHECK-NEXT: vstrw.32 q0, [r0]
251 ; CHECK-NEXT: vldrh.u32 q0, [r0]
252 ; CHECK-NEXT: vldrh.u32 q1, [r0, #8]
253 ; CHECK-NEXT: add sp, #16
256 %0 = zext <8 x i16> %src to <8 x i32>
260 define arm_aapcs_vfpcc <16 x i32> @zext_v16i8_v16i32(<16 x i8> %src) {
261 ; CHECK-LABEL: zext_v16i8_v16i32:
262 ; CHECK: @ %bb.0: @ %entry
263 ; CHECK-NEXT: .pad #48
264 ; CHECK-NEXT: sub sp, #48
265 ; CHECK-NEXT: mov r0, sp
266 ; CHECK-NEXT: add r1, sp, #32
267 ; CHECK-NEXT: vstrw.32 q0, [r0]
268 ; CHECK-NEXT: vldrb.u16 q0, [r0]
269 ; CHECK-NEXT: vstrw.32 q0, [r1]
270 ; CHECK-NEXT: vldrb.u16 q0, [r0, #8]
271 ; CHECK-NEXT: add r0, sp, #16
272 ; CHECK-NEXT: vstrw.32 q0, [r0]
273 ; CHECK-NEXT: vldrh.u32 q0, [r1]
274 ; CHECK-NEXT: vldrh.u32 q1, [r1, #8]
275 ; CHECK-NEXT: vldrh.u32 q2, [r0]
276 ; CHECK-NEXT: vldrh.u32 q3, [r0, #8]
277 ; CHECK-NEXT: add sp, #48
280 %0 = zext <16 x i8> %src to <16 x i32>
284 define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
285 ; CHECK-LABEL: zext_v2i32_v2i64:
286 ; CHECK: @ %bb.0: @ %entry
287 ; CHECK-NEXT: vmov.i64 q1, #0xffffffff
288 ; CHECK-NEXT: vand q0, q0, q1
291 %0 = zext <2 x i32> %src to <2 x i64>
296 define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) {
297 ; CHECK-LABEL: trunc_v8i16_v8i8:
298 ; CHECK: @ %bb.0: @ %entry
301 %0 = trunc <8 x i16> %src to <8 x i8>
305 define arm_aapcs_vfpcc <4 x i16> @trunc_v4i32_v4i16(<4 x i32> %src) {
306 ; CHECK-LABEL: trunc_v4i32_v4i16:
307 ; CHECK: @ %bb.0: @ %entry
310 %0 = trunc <4 x i32> %src to <4 x i16>
314 define arm_aapcs_vfpcc <4 x i8> @trunc_v4i32_v4i8(<4 x i32> %src) {
315 ; CHECK-LABEL: trunc_v4i32_v4i8:
316 ; CHECK: @ %bb.0: @ %entry
319 %0 = trunc <4 x i32> %src to <4 x i8>
323 define arm_aapcs_vfpcc <16 x i8> @trunc_v16i16_v16i8(<16 x i16> %src) {
324 ; CHECK-LABEL: trunc_v16i16_v16i8:
325 ; CHECK: @ %bb.0: @ %entry
326 ; CHECK-NEXT: .pad #16
327 ; CHECK-NEXT: sub sp, #16
328 ; CHECK-NEXT: mov r0, sp
329 ; CHECK-NEXT: vstrb.16 q1, [r0, #8]
330 ; CHECK-NEXT: vstrb.16 q0, [r0]
331 ; CHECK-NEXT: vldrw.u32 q0, [r0]
332 ; CHECK-NEXT: add sp, #16
335 %0 = trunc <16 x i16> %src to <16 x i8>
339 define arm_aapcs_vfpcc <8 x i16> @trunc_v8i32_v8i16(<8 x i32> %src) {
340 ; CHECK-LABEL: trunc_v8i32_v8i16:
341 ; CHECK: @ %bb.0: @ %entry
342 ; CHECK-NEXT: .pad #16
343 ; CHECK-NEXT: sub sp, #16
344 ; CHECK-NEXT: mov r0, sp
345 ; CHECK-NEXT: vstrh.32 q1, [r0, #8]
346 ; CHECK-NEXT: vstrh.32 q0, [r0]
347 ; CHECK-NEXT: vldrw.u32 q0, [r0]
348 ; CHECK-NEXT: add sp, #16
351 %0 = trunc <8 x i32> %src to <8 x i16>
355 define arm_aapcs_vfpcc <16 x i8> @trunc_v16i32_v16i8(<16 x i32> %src) {
356 ; CHECK-LABEL: trunc_v16i32_v16i8:
357 ; CHECK: @ %bb.0: @ %entry
358 ; CHECK-NEXT: .pad #16
359 ; CHECK-NEXT: sub sp, #16
360 ; CHECK-NEXT: mov r0, sp
361 ; CHECK-NEXT: vstrb.32 q3, [r0, #12]
362 ; CHECK-NEXT: vstrb.32 q2, [r0, #8]
363 ; CHECK-NEXT: vstrb.32 q1, [r0, #4]
364 ; CHECK-NEXT: vstrb.32 q0, [r0]
365 ; CHECK-NEXT: vldrw.u32 q0, [r0]
366 ; CHECK-NEXT: add sp, #16
369 %0 = trunc <16 x i32> %src to <16 x i8>
373 define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) {
374 ; CHECK-LABEL: trunc_v2i64_v2i32:
375 ; CHECK: @ %bb.0: @ %entry
378 %0 = trunc <2 x i64> %src to <2 x i32>