1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
5 define void @foo_int8_int32(ptr %dest, ptr readonly %src, i32 %n) {
6 ; CHECK-LABEL: foo_int8_int32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r1]
9 ; CHECK-NEXT: vstrb.32 q0, [r0]
12 %wide.load = load <4 x i32>, ptr %src, align 4
13 %0 = trunc <4 x i32> %wide.load to <4 x i8>
14 store <4 x i8> %0, ptr %dest, align 1
18 define void @foo_int16_int32(ptr %dest, ptr readonly %src, i32 %n) {
19 ; CHECK-LABEL: foo_int16_int32:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vldrw.u32 q0, [r1]
22 ; CHECK-NEXT: vstrh.32 q0, [r0]
25 %wide.load = load <4 x i32>, ptr %src, align 4
26 %0 = trunc <4 x i32> %wide.load to <4 x i16>
27 store <4 x i16> %0, ptr %dest, align 2
31 define void @foo_int8_int16(ptr %dest, ptr readonly %src, i32 %n) {
32 ; CHECK-LABEL: foo_int8_int16:
33 ; CHECK: @ %bb.0: @ %entry
34 ; CHECK-NEXT: vldrh.u16 q0, [r1]
35 ; CHECK-NEXT: vstrb.16 q0, [r0]
38 %wide.load = load <8 x i16>, ptr %src, align 2
39 %0 = trunc <8 x i16> %wide.load to <8 x i8>
40 store <8 x i8> %0, ptr %dest, align 1
45 define void @foo_int8_int32_double(ptr %dest, ptr readonly %src, i32 %n) {
46 ; CHECK-LE-LABEL: foo_int8_int32_double:
47 ; CHECK-LE: @ %bb.0: @ %entry
48 ; CHECK-LE-NEXT: .pad #16
49 ; CHECK-LE-NEXT: sub sp, #16
50 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1, #16]
51 ; CHECK-LE-NEXT: mov r2, sp
52 ; CHECK-LE-NEXT: vstrh.32 q0, [r2, #8]
53 ; CHECK-LE-NEXT: vldrh.u16 q0, [r1]
54 ; CHECK-LE-NEXT: vstrh.32 q0, [r2]
55 ; CHECK-LE-NEXT: vldrw.u32 q0, [r2]
56 ; CHECK-LE-NEXT: vstrb.16 q0, [r0]
57 ; CHECK-LE-NEXT: add sp, #16
58 ; CHECK-LE-NEXT: bx lr
60 ; CHECK-BE-LABEL: foo_int8_int32_double:
61 ; CHECK-BE: @ %bb.0: @ %entry
62 ; CHECK-BE-NEXT: .pad #16
63 ; CHECK-BE-NEXT: sub sp, #16
64 ; CHECK-BE-NEXT: vldrb.u8 q0, [r1, #16]
65 ; CHECK-BE-NEXT: mov r2, sp
66 ; CHECK-BE-NEXT: vrev32.8 q0, q0
67 ; CHECK-BE-NEXT: vstrh.32 q0, [r2, #8]
68 ; CHECK-BE-NEXT: vldrb.u8 q0, [r1]
69 ; CHECK-BE-NEXT: vrev32.8 q0, q0
70 ; CHECK-BE-NEXT: vstrh.32 q0, [r2]
71 ; CHECK-BE-NEXT: vldrh.u16 q0, [r2]
72 ; CHECK-BE-NEXT: vstrb.16 q0, [r0]
73 ; CHECK-BE-NEXT: add sp, #16
74 ; CHECK-BE-NEXT: bx lr
76 %wide.load = load <8 x i32>, ptr %src, align 2
77 %0 = trunc <8 x i32> %wide.load to <8 x i8>
78 store <8 x i8> %0, ptr %dest, align 1
82 define void @foo_int16_int32_double(ptr %dest, ptr readonly %src, i32 %n) {
83 ; CHECK-LABEL: foo_int16_int32_double:
84 ; CHECK: @ %bb.0: @ %entry
85 ; CHECK-NEXT: vldrw.u32 q0, [r1]
86 ; CHECK-NEXT: vldrw.u32 q1, [r1, #16]
87 ; CHECK-NEXT: vstrh.32 q1, [r0, #8]
88 ; CHECK-NEXT: vstrh.32 q0, [r0]
91 %wide.load = load <8 x i32>, ptr %src, align 4
92 %0 = trunc <8 x i32> %wide.load to <8 x i16>
93 store <8 x i16> %0, ptr %dest, align 2
97 define void @foo_int8_int16_double(ptr %dest, ptr readonly %src, i32 %n) {
98 ; CHECK-LABEL: foo_int8_int16_double:
99 ; CHECK: @ %bb.0: @ %entry
100 ; CHECK-NEXT: vldrh.u16 q0, [r1]
101 ; CHECK-NEXT: vldrh.u16 q1, [r1, #16]
102 ; CHECK-NEXT: vstrb.16 q1, [r0, #8]
103 ; CHECK-NEXT: vstrb.16 q0, [r0]
106 %wide.load = load <16 x i16>, ptr %src, align 2
107 %0 = trunc <16 x i16> %wide.load to <16 x i8>
108 store <16 x i8> %0, ptr %dest, align 1
112 define void @foo_int8_int32_quad(ptr %dest, ptr readonly %src, i32 %n) {
113 ; CHECK-LABEL: foo_int8_int32_quad:
114 ; CHECK: @ %bb.0: @ %entry
115 ; CHECK-NEXT: vldrw.u32 q0, [r1]
116 ; CHECK-NEXT: vldrw.u32 q1, [r1, #16]
117 ; CHECK-NEXT: vldrw.u32 q2, [r1, #32]
118 ; CHECK-NEXT: vldrw.u32 q3, [r1, #48]
119 ; CHECK-NEXT: vstrb.32 q1, [r0, #4]
120 ; CHECK-NEXT: vstrb.32 q0, [r0]
121 ; CHECK-NEXT: vstrb.32 q3, [r0, #12]
122 ; CHECK-NEXT: vstrb.32 q2, [r0, #8]
125 %wide.load = load <16 x i32>, ptr %src, align 4
126 %0 = trunc <16 x i32> %wide.load to <16 x i8>
127 store <16 x i8> %0, ptr %dest, align 1
132 define void @foo_int32_int8(ptr %dest, ptr readonly %src, i32 %n) {
133 ; CHECK-LABEL: foo_int32_int8:
134 ; CHECK: @ %bb.0: @ %entry
135 ; CHECK-NEXT: vldrb.s32 q0, [r1]
136 ; CHECK-NEXT: vstrw.32 q0, [r0]
139 %wide.load = load <4 x i8>, ptr %src, align 1
140 %0 = sext <4 x i8> %wide.load to <4 x i32>
141 store <4 x i32> %0, ptr %dest, align 4
145 define void @foo_int16_int8(ptr %dest, ptr readonly %src, i32 %n) {
146 ; CHECK-LABEL: foo_int16_int8:
147 ; CHECK: @ %bb.0: @ %entry
148 ; CHECK-NEXT: vldrb.s16 q0, [r1]
149 ; CHECK-NEXT: vstrh.16 q0, [r0]
152 %wide.load = load <8 x i8>, ptr %src, align 1
153 %0 = sext <8 x i8> %wide.load to <8 x i16>
154 store <8 x i16> %0, ptr %dest, align 2
158 define void @foo_int32_int16(ptr %dest, ptr readonly %src, i32 %n) {
159 ; CHECK-LABEL: foo_int32_int16:
160 ; CHECK: @ %bb.0: @ %entry
161 ; CHECK-NEXT: vldrh.s32 q0, [r1]
162 ; CHECK-NEXT: vstrw.32 q0, [r0]
165 %wide.load = load <4 x i16>, ptr %src, align 2
166 %0 = sext <4 x i16> %wide.load to <4 x i32>
167 store <4 x i32> %0, ptr %dest, align 4
171 define void @foo_int32_int8_double(ptr %dest, ptr readonly %src, i32 %n) {
172 ; CHECK-LABEL: foo_int32_int8_double:
173 ; CHECK: @ %bb.0: @ %entry
174 ; CHECK-NEXT: vldrb.s32 q0, [r1]
175 ; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
176 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
177 ; CHECK-NEXT: vstrw.32 q0, [r0]
180 %wide.load = load <8 x i8>, ptr %src, align 1
181 %0 = sext <8 x i8> %wide.load to <8 x i32>
182 store <8 x i32> %0, ptr %dest, align 4
186 define void @foo_int16_int8_double(ptr %dest, ptr readonly %src, i32 %n) {
187 ; CHECK-LABEL: foo_int16_int8_double:
188 ; CHECK: @ %bb.0: @ %entry
189 ; CHECK-NEXT: vldrb.s16 q0, [r1]
190 ; CHECK-NEXT: vldrb.s16 q1, [r1, #8]
191 ; CHECK-NEXT: vstrh.16 q1, [r0, #16]
192 ; CHECK-NEXT: vstrh.16 q0, [r0]
195 %wide.load = load <16 x i8>, ptr %src, align 1
196 %0 = sext <16 x i8> %wide.load to <16 x i16>
197 store <16 x i16> %0, ptr %dest, align 2
201 define void @foo_int32_int16_double(ptr %dest, ptr readonly %src, i32 %n) {
202 ; CHECK-LABEL: foo_int32_int16_double:
203 ; CHECK: @ %bb.0: @ %entry
204 ; CHECK-NEXT: vldrh.s32 q0, [r1]
205 ; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
206 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
207 ; CHECK-NEXT: vstrw.32 q0, [r0]
210 %wide.load = load <8 x i16>, ptr %src, align 2
211 %0 = sext <8 x i16> %wide.load to <8 x i32>
212 store <8 x i32> %0, ptr %dest, align 4
216 define void @foo_int32_int8_quad(ptr %dest, ptr readonly %src, i32 %n) {
217 ; CHECK-LABEL: foo_int32_int8_quad:
218 ; CHECK: @ %bb.0: @ %entry
219 ; CHECK-NEXT: vldrb.s32 q0, [r1]
220 ; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
221 ; CHECK-NEXT: vldrb.s32 q2, [r1, #8]
222 ; CHECK-NEXT: vldrb.s32 q3, [r1, #12]
223 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
224 ; CHECK-NEXT: vstrw.32 q0, [r0]
225 ; CHECK-NEXT: vstrw.32 q3, [r0, #48]
226 ; CHECK-NEXT: vstrw.32 q2, [r0, #32]
229 %wide.load = load <16 x i8>, ptr %src, align 1
230 %0 = sext <16 x i8> %wide.load to <16 x i32>
231 store <16 x i32> %0, ptr %dest, align 4
236 define void @foo_uint32_uint8(ptr %dest, ptr readonly %src, i32 %n) {
237 ; CHECK-LABEL: foo_uint32_uint8:
238 ; CHECK: @ %bb.0: @ %entry
239 ; CHECK-NEXT: vldrb.u32 q0, [r1]
240 ; CHECK-NEXT: vstrw.32 q0, [r0]
243 %wide.load = load <4 x i8>, ptr %src, align 1
244 %0 = zext <4 x i8> %wide.load to <4 x i32>
245 store <4 x i32> %0, ptr %dest, align 4
249 define void @foo_uint16_uint8(ptr %dest, ptr readonly %src, i32 %n) {
250 ; CHECK-LABEL: foo_uint16_uint8:
251 ; CHECK: @ %bb.0: @ %entry
252 ; CHECK-NEXT: vldrb.u16 q0, [r1]
253 ; CHECK-NEXT: vstrh.16 q0, [r0]
256 %wide.load = load <8 x i8>, ptr %src, align 1
257 %0 = zext <8 x i8> %wide.load to <8 x i16>
258 store <8 x i16> %0, ptr %dest, align 2
262 define void @foo_uint32_uint16(ptr %dest, ptr readonly %src, i32 %n) {
263 ; CHECK-LABEL: foo_uint32_uint16:
264 ; CHECK: @ %bb.0: @ %entry
265 ; CHECK-NEXT: vldrh.u32 q0, [r1]
266 ; CHECK-NEXT: vstrw.32 q0, [r0]
269 %wide.load = load <4 x i16>, ptr %src, align 2
270 %0 = zext <4 x i16> %wide.load to <4 x i32>
271 store <4 x i32> %0, ptr %dest, align 4
276 define void @foo_uint32_uint8_double(ptr %dest, ptr readonly %src, i32 %n) {
277 ; CHECK-LABEL: foo_uint32_uint8_double:
278 ; CHECK: @ %bb.0: @ %entry
279 ; CHECK-NEXT: vldrb.u32 q0, [r1]
280 ; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
281 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
282 ; CHECK-NEXT: vstrw.32 q0, [r0]
285 %wide.load = load <8 x i8>, ptr %src, align 1
286 %0 = zext <8 x i8> %wide.load to <8 x i32>
287 store <8 x i32> %0, ptr %dest, align 4
291 define void @foo_uint16_uint8_double(ptr %dest, ptr readonly %src, i32 %n) {
292 ; CHECK-LABEL: foo_uint16_uint8_double:
293 ; CHECK: @ %bb.0: @ %entry
294 ; CHECK-NEXT: vldrb.u16 q0, [r1]
295 ; CHECK-NEXT: vldrb.u16 q1, [r1, #8]
296 ; CHECK-NEXT: vstrh.16 q1, [r0, #16]
297 ; CHECK-NEXT: vstrh.16 q0, [r0]
300 %wide.load = load <16 x i8>, ptr %src, align 1
301 %0 = zext <16 x i8> %wide.load to <16 x i16>
302 store <16 x i16> %0, ptr %dest, align 2
306 define void @foo_uint32_uint16_double(ptr %dest, ptr readonly %src, i32 %n) {
307 ; CHECK-LABEL: foo_uint32_uint16_double:
308 ; CHECK: @ %bb.0: @ %entry
309 ; CHECK-NEXT: vldrh.u32 q0, [r1]
310 ; CHECK-NEXT: vldrh.u32 q1, [r1, #8]
311 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
312 ; CHECK-NEXT: vstrw.32 q0, [r0]
315 %wide.load = load <8 x i16>, ptr %src, align 2
316 %0 = zext <8 x i16> %wide.load to <8 x i32>
317 store <8 x i32> %0, ptr %dest, align 4
321 define void @foo_uint32_uint8_quad(ptr %dest, ptr readonly %src, i32 %n) {
322 ; CHECK-LABEL: foo_uint32_uint8_quad:
323 ; CHECK: @ %bb.0: @ %entry
324 ; CHECK-NEXT: vldrb.u32 q0, [r1]
325 ; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
326 ; CHECK-NEXT: vldrb.u32 q2, [r1, #8]
327 ; CHECK-NEXT: vldrb.u32 q3, [r1, #12]
328 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
329 ; CHECK-NEXT: vstrw.32 q0, [r0]
330 ; CHECK-NEXT: vstrw.32 q3, [r0, #48]
331 ; CHECK-NEXT: vstrw.32 q2, [r0, #32]
334 %wide.load = load <16 x i8>, ptr %src, align 1
335 %0 = zext <16 x i8> %wide.load to <16 x i32>
336 store <16 x i32> %0, ptr %dest, align 4
341 define void @foo_int32_int8_both(ptr %dest, ptr readonly %src, i32 %n) {
342 ; CHECK-LE-LABEL: foo_int32_int8_both:
343 ; CHECK-LE: @ %bb.0: @ %entry
344 ; CHECK-LE-NEXT: .pad #32
345 ; CHECK-LE-NEXT: sub sp, #32
346 ; CHECK-LE-NEXT: vldrb.s16 q0, [r1, #8]
347 ; CHECK-LE-NEXT: add r2, sp, #16
348 ; CHECK-LE-NEXT: vstrw.32 q0, [r2]
349 ; CHECK-LE-NEXT: vldrb.s16 q0, [r1]
350 ; CHECK-LE-NEXT: mov r1, sp
351 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
352 ; CHECK-LE-NEXT: vldrh.u32 q0, [r2, #8]
353 ; CHECK-LE-NEXT: vstrw.32 q0, [r0, #48]
354 ; CHECK-LE-NEXT: vldrh.u32 q0, [r2]
355 ; CHECK-LE-NEXT: vstrw.32 q0, [r0, #32]
356 ; CHECK-LE-NEXT: vldrh.u32 q0, [r1, #8]
357 ; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16]
358 ; CHECK-LE-NEXT: vldrh.u32 q0, [r1]
359 ; CHECK-LE-NEXT: vstrw.32 q0, [r0]
360 ; CHECK-LE-NEXT: add sp, #32
361 ; CHECK-LE-NEXT: bx lr
363 ; CHECK-BE-LABEL: foo_int32_int8_both:
364 ; CHECK-BE: @ %bb.0: @ %entry
365 ; CHECK-BE-NEXT: .pad #32
366 ; CHECK-BE-NEXT: sub sp, #32
367 ; CHECK-BE-NEXT: vldrb.s16 q0, [r1, #8]
368 ; CHECK-BE-NEXT: add r2, sp, #16
369 ; CHECK-BE-NEXT: vstrh.16 q0, [r2]
370 ; CHECK-BE-NEXT: vldrb.s16 q0, [r1]
371 ; CHECK-BE-NEXT: mov r1, sp
372 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
373 ; CHECK-BE-NEXT: vldrh.u32 q0, [r2, #8]
374 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #48]
375 ; CHECK-BE-NEXT: vldrh.u32 q0, [r2]
376 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #32]
377 ; CHECK-BE-NEXT: vldrh.u32 q0, [r1, #8]
378 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #16]
379 ; CHECK-BE-NEXT: vldrh.u32 q0, [r1]
380 ; CHECK-BE-NEXT: vstrw.32 q0, [r0]
381 ; CHECK-BE-NEXT: add sp, #32
382 ; CHECK-BE-NEXT: bx lr
384 %wide.load = load <16 x i8>, ptr %src, align 1
385 %0 = sext <16 x i8> %wide.load to <16 x i16>
386 %1 = zext <16 x i16> %0 to <16 x i32>
387 store <16 x i32> %1, ptr %dest, align 4
391 define ptr @foo_uint32_uint16_double_offset(ptr %dest, ptr readonly %src, i32 %n) {
392 ; CHECK-LABEL: foo_uint32_uint16_double_offset:
393 ; CHECK: @ %bb.0: @ %entry
394 ; CHECK-NEXT: vldrh.s32 q0, [r1, #16]!
395 ; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
396 ; CHECK-NEXT: vstrw.32 q0, [r0]
397 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
398 ; CHECK-NEXT: mov r0, r1
401 %z = getelementptr inbounds <8 x i16>, ptr %src, i32 1
402 %wide.load = load <8 x i16>, ptr %z, align 2
403 %0 = sext <8 x i16> %wide.load to <8 x i32>
404 store <8 x i32> %0, ptr %dest, align 4
408 define ptr @foo_uint32_uint16_quad_offset(ptr %dest, ptr readonly %src, i32 %n) {
409 ; CHECK-LABEL: foo_uint32_uint16_quad_offset:
410 ; CHECK: @ %bb.0: @ %entry
411 ; CHECK-NEXT: vldrh.s32 q0, [r1, #32]!
412 ; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
413 ; CHECK-NEXT: vldrh.s32 q2, [r1, #24]
414 ; CHECK-NEXT: vldrh.s32 q3, [r1, #16]
415 ; CHECK-NEXT: vstrw.32 q0, [r0]
416 ; CHECK-NEXT: vstrw.32 q2, [r0, #48]
417 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
418 ; CHECK-NEXT: vstrw.32 q3, [r0, #32]
419 ; CHECK-NEXT: mov r0, r1
422 %z = getelementptr inbounds <16 x i16>, ptr %src, i32 1
423 %wide.load = load <16 x i16>, ptr %z, align 2
424 %0 = sext <16 x i16> %wide.load to <16 x i32>
425 store <16 x i32> %0, ptr %dest, align 4
430 define void @foo_int16_int32_align1(ptr %dest, ptr readonly %src, i32 %n) {
431 ; CHECK-LABEL: foo_int16_int32_align1:
432 ; CHECK: @ %bb.0: @ %entry
433 ; CHECK-NEXT: .pad #8
434 ; CHECK-NEXT: sub sp, #8
435 ; CHECK-NEXT: vldrw.u32 q0, [r1]
436 ; CHECK-NEXT: mov r1, sp
437 ; CHECK-NEXT: vstrh.32 q0, [r1]
438 ; CHECK-NEXT: ldrd r1, r2, [sp]
439 ; CHECK-NEXT: str r1, [r0]
440 ; CHECK-NEXT: str r2, [r0, #4]
441 ; CHECK-NEXT: add sp, #8
444 %wide.load = load <4 x i32>, ptr %src, align 4
445 %0 = trunc <4 x i32> %wide.load to <4 x i16>
446 store <4 x i16> %0, ptr %dest, align 1
450 define void @foo_int32_int16_align1(ptr %dest, ptr readonly %src, i32 %n) {
451 ; CHECK-LABEL: foo_int32_int16_align1:
452 ; CHECK: @ %bb.0: @ %entry
453 ; CHECK-NEXT: .pad #8
454 ; CHECK-NEXT: sub sp, #8
455 ; CHECK-NEXT: ldr r2, [r1]
456 ; CHECK-NEXT: ldr r1, [r1, #4]
457 ; CHECK-NEXT: strd r2, r1, [sp]
458 ; CHECK-NEXT: mov r1, sp
459 ; CHECK-NEXT: vldrh.s32 q0, [r1]
460 ; CHECK-NEXT: vstrw.32 q0, [r0]
461 ; CHECK-NEXT: add sp, #8
464 %wide.load = load <4 x i16>, ptr %src, align 1
465 %0 = sext <4 x i16> %wide.load to <4 x i32>
466 store <4 x i32> %0, ptr %dest, align 4
470 define void @foo_uint32_uint16_align1(ptr %dest, ptr readonly %src, i32 %n) {
471 ; CHECK-LABEL: foo_uint32_uint16_align1:
472 ; CHECK: @ %bb.0: @ %entry
473 ; CHECK-NEXT: .pad #8
474 ; CHECK-NEXT: sub sp, #8
475 ; CHECK-NEXT: ldr r2, [r1]
476 ; CHECK-NEXT: ldr r1, [r1, #4]
477 ; CHECK-NEXT: strd r2, r1, [sp]
478 ; CHECK-NEXT: mov r1, sp
479 ; CHECK-NEXT: vldrh.u32 q0, [r1]
480 ; CHECK-NEXT: vstrw.32 q0, [r0]
481 ; CHECK-NEXT: add sp, #8
484 %wide.load = load <4 x i16>, ptr %src, align 1
485 %0 = zext <4 x i16> %wide.load to <4 x i32>
486 store <4 x i32> %0, ptr %dest, align 4