1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
5 define void @foo_int8_int32(<4 x i8>* %dest, <4 x i32>* readonly %src, i32 %n) {
6 ; CHECK-LABEL: foo_int8_int32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r1]
9 ; CHECK-NEXT: vstrb.32 q0, [r0]
12 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
13 %0 = trunc <4 x i32> %wide.load to <4 x i8>
14 store <4 x i8> %0, <4 x i8>* %dest, align 1
18 define void @foo_int16_int32(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
19 ; CHECK-LABEL: foo_int16_int32:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vldrw.u32 q0, [r1]
22 ; CHECK-NEXT: vstrh.32 q0, [r0]
25 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
26 %0 = trunc <4 x i32> %wide.load to <4 x i16>
27 store <4 x i16> %0, <4 x i16>* %dest, align 2
31 define void @foo_int8_int16(<8 x i8>* %dest, <8 x i16>* readonly %src, i32 %n) {
32 ; CHECK-LABEL: foo_int8_int16:
33 ; CHECK: @ %bb.0: @ %entry
34 ; CHECK-NEXT: vldrh.u16 q0, [r1]
35 ; CHECK-NEXT: vstrb.16 q0, [r0]
38 %wide.load = load <8 x i16>, <8 x i16>* %src, align 2
39 %0 = trunc <8 x i16> %wide.load to <8 x i8>
40 store <8 x i8> %0, <8 x i8>* %dest, align 1
45 define void @foo_int8_int32_double(<8 x i8>* %dest, <8 x i32>* readonly %src, i32 %n) {
46 ; CHECK-LE-LABEL: foo_int8_int32_double:
47 ; CHECK-LE: @ %bb.0: @ %entry
48 ; CHECK-LE-NEXT: vldrh.u16 q1, [r1]
49 ; CHECK-LE-NEXT: vmov r2, r3, d2
50 ; CHECK-LE-NEXT: vmov.16 q0[0], r2
51 ; CHECK-LE-NEXT: vmov.16 q0[1], r3
52 ; CHECK-LE-NEXT: vmov r2, r3, d3
53 ; CHECK-LE-NEXT: vldrh.u16 q1, [r1, #16]
54 ; CHECK-LE-NEXT: vmov.16 q0[2], r2
55 ; CHECK-LE-NEXT: vmov.16 q0[3], r3
56 ; CHECK-LE-NEXT: vmov r1, r2, d2
57 ; CHECK-LE-NEXT: vmov.16 q0[4], r1
58 ; CHECK-LE-NEXT: vmov.16 q0[5], r2
59 ; CHECK-LE-NEXT: vmov r1, r2, d3
60 ; CHECK-LE-NEXT: vmov.16 q0[6], r1
61 ; CHECK-LE-NEXT: vmov.16 q0[7], r2
62 ; CHECK-LE-NEXT: vstrb.16 q0, [r0]
63 ; CHECK-LE-NEXT: bx lr
65 ; CHECK-BE-LABEL: foo_int8_int32_double:
66 ; CHECK-BE: @ %bb.0: @ %entry
67 ; CHECK-BE-NEXT: vldrb.u8 q0, [r1]
68 ; CHECK-BE-NEXT: vrev32.8 q1, q0
69 ; CHECK-BE-NEXT: vmov r2, r3, d2
70 ; CHECK-BE-NEXT: vmov.16 q0[0], r2
71 ; CHECK-BE-NEXT: vmov.16 q0[1], r3
72 ; CHECK-BE-NEXT: vmov r2, r3, d3
73 ; CHECK-BE-NEXT: vldrb.u8 q1, [r1, #16]
74 ; CHECK-BE-NEXT: vmov.16 q0[2], r2
75 ; CHECK-BE-NEXT: vmov.16 q0[3], r3
76 ; CHECK-BE-NEXT: vrev32.8 q1, q1
77 ; CHECK-BE-NEXT: vmov r1, r2, d2
78 ; CHECK-BE-NEXT: vmov.16 q0[4], r1
79 ; CHECK-BE-NEXT: vmov.16 q0[5], r2
80 ; CHECK-BE-NEXT: vmov r1, r2, d3
81 ; CHECK-BE-NEXT: vmov.16 q0[6], r1
82 ; CHECK-BE-NEXT: vmov.16 q0[7], r2
83 ; CHECK-BE-NEXT: vstrb.16 q0, [r0]
84 ; CHECK-BE-NEXT: bx lr
86 %wide.load = load <8 x i32>, <8 x i32>* %src, align 2
87 %0 = trunc <8 x i32> %wide.load to <8 x i8>
88 store <8 x i8> %0, <8 x i8>* %dest, align 1
92 define void @foo_int16_int32_double(<8 x i16>* %dest, <8 x i32>* readonly %src, i32 %n) {
93 ; CHECK-LABEL: foo_int16_int32_double:
94 ; CHECK: @ %bb.0: @ %entry
95 ; CHECK-NEXT: vldrw.u32 q0, [r1]
96 ; CHECK-NEXT: vldrw.u32 q1, [r1, #16]
97 ; CHECK-NEXT: vstrh.32 q1, [r0, #8]
98 ; CHECK-NEXT: vstrh.32 q0, [r0]
101 %wide.load = load <8 x i32>, <8 x i32>* %src, align 4
102 %0 = trunc <8 x i32> %wide.load to <8 x i16>
103 store <8 x i16> %0, <8 x i16>* %dest, align 2
107 define void @foo_int8_int16_double(<16 x i8>* %dest, <16 x i16>* readonly %src, i32 %n) {
108 ; CHECK-LABEL: foo_int8_int16_double:
109 ; CHECK: @ %bb.0: @ %entry
110 ; CHECK-NEXT: vldrh.u16 q0, [r1]
111 ; CHECK-NEXT: vldrh.u16 q1, [r1, #16]
112 ; CHECK-NEXT: vstrb.16 q1, [r0, #8]
113 ; CHECK-NEXT: vstrb.16 q0, [r0]
116 %wide.load = load <16 x i16>, <16 x i16>* %src, align 2
117 %0 = trunc <16 x i16> %wide.load to <16 x i8>
118 store <16 x i8> %0, <16 x i8>* %dest, align 1
122 define void @foo_int8_int32_quad(<16 x i8>* %dest, <16 x i32>* readonly %src, i32 %n) {
123 ; CHECK-LABEL: foo_int8_int32_quad:
124 ; CHECK: @ %bb.0: @ %entry
125 ; CHECK-NEXT: vldrw.u32 q0, [r1]
126 ; CHECK-NEXT: vldrw.u32 q1, [r1, #16]
127 ; CHECK-NEXT: vldrw.u32 q2, [r1, #32]
128 ; CHECK-NEXT: vldrw.u32 q3, [r1, #48]
129 ; CHECK-NEXT: vstrb.32 q1, [r0, #4]
130 ; CHECK-NEXT: vstrb.32 q0, [r0]
131 ; CHECK-NEXT: vstrb.32 q3, [r0, #12]
132 ; CHECK-NEXT: vstrb.32 q2, [r0, #8]
135 %wide.load = load <16 x i32>, <16 x i32>* %src, align 4
136 %0 = trunc <16 x i32> %wide.load to <16 x i8>
137 store <16 x i8> %0, <16 x i8>* %dest, align 1
142 define void @foo_int32_int8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
143 ; CHECK-LABEL: foo_int32_int8:
144 ; CHECK: @ %bb.0: @ %entry
145 ; CHECK-NEXT: vldrb.s32 q0, [r1]
146 ; CHECK-NEXT: vstrw.32 q0, [r0]
149 %wide.load = load <4 x i8>, <4 x i8>* %src, align 1
150 %0 = sext <4 x i8> %wide.load to <4 x i32>
151 store <4 x i32> %0, <4 x i32>* %dest, align 4
155 define void @foo_int16_int8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
156 ; CHECK-LABEL: foo_int16_int8:
157 ; CHECK: @ %bb.0: @ %entry
158 ; CHECK-NEXT: vldrb.s16 q0, [r1]
159 ; CHECK-NEXT: vstrh.16 q0, [r0]
162 %wide.load = load <8 x i8>, <8 x i8>* %src, align 1
163 %0 = sext <8 x i8> %wide.load to <8 x i16>
164 store <8 x i16> %0, <8 x i16>* %dest, align 2
168 define void @foo_int32_int16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
169 ; CHECK-LABEL: foo_int32_int16:
170 ; CHECK: @ %bb.0: @ %entry
171 ; CHECK-NEXT: vldrh.s32 q0, [r1]
172 ; CHECK-NEXT: vstrw.32 q0, [r0]
175 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
176 %0 = sext <4 x i16> %wide.load to <4 x i32>
177 store <4 x i32> %0, <4 x i32>* %dest, align 4
181 define void @foo_int32_int8_double(<8 x i32>* %dest, <8 x i8>* readonly %src, i32 %n) {
182 ; CHECK-LABEL: foo_int32_int8_double:
183 ; CHECK: @ %bb.0: @ %entry
184 ; CHECK-NEXT: vldrb.s32 q0, [r1]
185 ; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
186 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
187 ; CHECK-NEXT: vstrw.32 q0, [r0]
190 %wide.load = load <8 x i8>, <8 x i8>* %src, align 1
191 %0 = sext <8 x i8> %wide.load to <8 x i32>
192 store <8 x i32> %0, <8 x i32>* %dest, align 4
196 define void @foo_int16_int8_double(<16 x i16>* %dest, <16 x i8>* readonly %src, i32 %n) {
197 ; CHECK-LABEL: foo_int16_int8_double:
198 ; CHECK: @ %bb.0: @ %entry
199 ; CHECK-NEXT: vldrb.s16 q0, [r1]
200 ; CHECK-NEXT: vldrb.s16 q1, [r1, #8]
201 ; CHECK-NEXT: vstrh.16 q1, [r0, #16]
202 ; CHECK-NEXT: vstrh.16 q0, [r0]
205 %wide.load = load <16 x i8>, <16 x i8>* %src, align 1
206 %0 = sext <16 x i8> %wide.load to <16 x i16>
207 store <16 x i16> %0, <16 x i16>* %dest, align 2
211 define void @foo_int32_int16_double(<8 x i32>* %dest, <8 x i16>* readonly %src, i32 %n) {
212 ; CHECK-LABEL: foo_int32_int16_double:
213 ; CHECK: @ %bb.0: @ %entry
214 ; CHECK-NEXT: vldrh.s32 q0, [r1]
215 ; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
216 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
217 ; CHECK-NEXT: vstrw.32 q0, [r0]
220 %wide.load = load <8 x i16>, <8 x i16>* %src, align 2
221 %0 = sext <8 x i16> %wide.load to <8 x i32>
222 store <8 x i32> %0, <8 x i32>* %dest, align 4
226 define void @foo_int32_int8_quad(<16 x i32>* %dest, <16 x i8>* readonly %src, i32 %n) {
227 ; CHECK-LABEL: foo_int32_int8_quad:
228 ; CHECK: @ %bb.0: @ %entry
229 ; CHECK-NEXT: vldrb.s32 q0, [r1]
230 ; CHECK-NEXT: vldrb.s32 q1, [r1, #4]
231 ; CHECK-NEXT: vldrb.s32 q2, [r1, #8]
232 ; CHECK-NEXT: vldrb.s32 q3, [r1, #12]
233 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
234 ; CHECK-NEXT: vstrw.32 q0, [r0]
235 ; CHECK-NEXT: vstrw.32 q3, [r0, #48]
236 ; CHECK-NEXT: vstrw.32 q2, [r0, #32]
239 %wide.load = load <16 x i8>, <16 x i8>* %src, align 1
240 %0 = sext <16 x i8> %wide.load to <16 x i32>
241 store <16 x i32> %0, <16 x i32>* %dest, align 4
246 define void @foo_uint32_uint8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
247 ; CHECK-LABEL: foo_uint32_uint8:
248 ; CHECK: @ %bb.0: @ %entry
249 ; CHECK-NEXT: vldrb.u32 q0, [r1]
250 ; CHECK-NEXT: vstrw.32 q0, [r0]
253 %wide.load = load <4 x i8>, <4 x i8>* %src, align 1
254 %0 = zext <4 x i8> %wide.load to <4 x i32>
255 store <4 x i32> %0, <4 x i32>* %dest, align 4
259 define void @foo_uint16_uint8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
260 ; CHECK-LABEL: foo_uint16_uint8:
261 ; CHECK: @ %bb.0: @ %entry
262 ; CHECK-NEXT: vldrb.u16 q0, [r1]
263 ; CHECK-NEXT: vstrh.16 q0, [r0]
266 %wide.load = load <8 x i8>, <8 x i8>* %src, align 1
267 %0 = zext <8 x i8> %wide.load to <8 x i16>
268 store <8 x i16> %0, <8 x i16>* %dest, align 2
272 define void @foo_uint32_uint16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
273 ; CHECK-LABEL: foo_uint32_uint16:
274 ; CHECK: @ %bb.0: @ %entry
275 ; CHECK-NEXT: vldrh.u32 q0, [r1]
276 ; CHECK-NEXT: vstrw.32 q0, [r0]
279 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
280 %0 = zext <4 x i16> %wide.load to <4 x i32>
281 store <4 x i32> %0, <4 x i32>* %dest, align 4
286 define void @foo_uint32_uint8_double(<8 x i32>* %dest, <8 x i8>* readonly %src, i32 %n) {
287 ; CHECK-LABEL: foo_uint32_uint8_double:
288 ; CHECK: @ %bb.0: @ %entry
289 ; CHECK-NEXT: vldrb.u32 q0, [r1]
290 ; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
291 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
292 ; CHECK-NEXT: vstrw.32 q0, [r0]
295 %wide.load = load <8 x i8>, <8 x i8>* %src, align 1
296 %0 = zext <8 x i8> %wide.load to <8 x i32>
297 store <8 x i32> %0, <8 x i32>* %dest, align 4
301 define void @foo_uint16_uint8_double(<16 x i16>* %dest, <16 x i8>* readonly %src, i32 %n) {
302 ; CHECK-LABEL: foo_uint16_uint8_double:
303 ; CHECK: @ %bb.0: @ %entry
304 ; CHECK-NEXT: vldrb.u16 q0, [r1]
305 ; CHECK-NEXT: vldrb.u16 q1, [r1, #8]
306 ; CHECK-NEXT: vstrh.16 q1, [r0, #16]
307 ; CHECK-NEXT: vstrh.16 q0, [r0]
310 %wide.load = load <16 x i8>, <16 x i8>* %src, align 1
311 %0 = zext <16 x i8> %wide.load to <16 x i16>
312 store <16 x i16> %0, <16 x i16>* %dest, align 2
316 define void @foo_uint32_uint16_double(<8 x i32>* %dest, <8 x i16>* readonly %src, i32 %n) {
317 ; CHECK-LABEL: foo_uint32_uint16_double:
318 ; CHECK: @ %bb.0: @ %entry
319 ; CHECK-NEXT: vldrh.u32 q0, [r1]
320 ; CHECK-NEXT: vldrh.u32 q1, [r1, #8]
321 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
322 ; CHECK-NEXT: vstrw.32 q0, [r0]
325 %wide.load = load <8 x i16>, <8 x i16>* %src, align 2
326 %0 = zext <8 x i16> %wide.load to <8 x i32>
327 store <8 x i32> %0, <8 x i32>* %dest, align 4
331 define void @foo_uint32_uint8_quad(<16 x i32>* %dest, <16 x i8>* readonly %src, i32 %n) {
332 ; CHECK-LABEL: foo_uint32_uint8_quad:
333 ; CHECK: @ %bb.0: @ %entry
334 ; CHECK-NEXT: vldrb.u32 q0, [r1]
335 ; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
336 ; CHECK-NEXT: vldrb.u32 q2, [r1, #8]
337 ; CHECK-NEXT: vldrb.u32 q3, [r1, #12]
338 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
339 ; CHECK-NEXT: vstrw.32 q0, [r0]
340 ; CHECK-NEXT: vstrw.32 q3, [r0, #48]
341 ; CHECK-NEXT: vstrw.32 q2, [r0, #32]
344 %wide.load = load <16 x i8>, <16 x i8>* %src, align 1
345 %0 = zext <16 x i8> %wide.load to <16 x i32>
346 store <16 x i32> %0, <16 x i32>* %dest, align 4
351 define void @foo_int32_int8_both(<16 x i32>* %dest, <16 x i8>* readonly %src, i32 %n) {
352 ; CHECK-LE-LABEL: foo_int32_int8_both:
353 ; CHECK-LE: @ %bb.0: @ %entry
354 ; CHECK-LE-NEXT: .pad #32
355 ; CHECK-LE-NEXT: sub sp, #32
356 ; CHECK-LE-NEXT: vldrb.s16 q0, [r1, #8]
357 ; CHECK-LE-NEXT: add r2, sp, #16
358 ; CHECK-LE-NEXT: vstrw.32 q0, [r2]
359 ; CHECK-LE-NEXT: vldrb.s16 q0, [r1]
360 ; CHECK-LE-NEXT: mov r1, sp
361 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
362 ; CHECK-LE-NEXT: vldrh.u32 q0, [r2, #8]
363 ; CHECK-LE-NEXT: vstrw.32 q0, [r0, #48]
364 ; CHECK-LE-NEXT: vldrh.u32 q0, [r2]
365 ; CHECK-LE-NEXT: vstrw.32 q0, [r0, #32]
366 ; CHECK-LE-NEXT: vldrh.u32 q0, [r1, #8]
367 ; CHECK-LE-NEXT: vstrw.32 q0, [r0, #16]
368 ; CHECK-LE-NEXT: vldrh.u32 q0, [r1]
369 ; CHECK-LE-NEXT: vstrw.32 q0, [r0]
370 ; CHECK-LE-NEXT: add sp, #32
371 ; CHECK-LE-NEXT: bx lr
373 ; CHECK-BE-LABEL: foo_int32_int8_both:
374 ; CHECK-BE: @ %bb.0: @ %entry
375 ; CHECK-BE-NEXT: .pad #32
376 ; CHECK-BE-NEXT: sub sp, #32
377 ; CHECK-BE-NEXT: vldrb.s16 q0, [r1, #8]
378 ; CHECK-BE-NEXT: add r2, sp, #16
379 ; CHECK-BE-NEXT: vstrh.16 q0, [r2]
380 ; CHECK-BE-NEXT: vldrb.s16 q0, [r1]
381 ; CHECK-BE-NEXT: mov r1, sp
382 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
383 ; CHECK-BE-NEXT: vldrh.u32 q0, [r2, #8]
384 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #48]
385 ; CHECK-BE-NEXT: vldrh.u32 q0, [r2]
386 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #32]
387 ; CHECK-BE-NEXT: vldrh.u32 q0, [r1, #8]
388 ; CHECK-BE-NEXT: vstrw.32 q0, [r0, #16]
389 ; CHECK-BE-NEXT: vldrh.u32 q0, [r1]
390 ; CHECK-BE-NEXT: vstrw.32 q0, [r0]
391 ; CHECK-BE-NEXT: add sp, #32
392 ; CHECK-BE-NEXT: bx lr
394 %wide.load = load <16 x i8>, <16 x i8>* %src, align 1
395 %0 = sext <16 x i8> %wide.load to <16 x i16>
396 %1 = zext <16 x i16> %0 to <16 x i32>
397 store <16 x i32> %1, <16 x i32>* %dest, align 4
401 define <8 x i16>* @foo_uint32_uint16_double_offset(<8 x i32>* %dest, <8 x i16>* readonly %src, i32 %n) {
402 ; CHECK-LABEL: foo_uint32_uint16_double_offset:
403 ; CHECK: @ %bb.0: @ %entry
404 ; CHECK-NEXT: vldrh.s32 q0, [r1, #16]!
405 ; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
406 ; CHECK-NEXT: vstrw.32 q0, [r0]
407 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
408 ; CHECK-NEXT: mov r0, r1
411 %z = getelementptr inbounds <8 x i16>, <8 x i16>* %src, i32 1
412 %wide.load = load <8 x i16>, <8 x i16>* %z, align 2
413 %0 = sext <8 x i16> %wide.load to <8 x i32>
414 store <8 x i32> %0, <8 x i32>* %dest, align 4
418 define <16 x i16>* @foo_uint32_uint16_quad_offset(<16 x i32>* %dest, <16 x i16>* readonly %src, i32 %n) {
419 ; CHECK-LABEL: foo_uint32_uint16_quad_offset:
420 ; CHECK: @ %bb.0: @ %entry
421 ; CHECK-NEXT: vldrh.s32 q0, [r1, #32]!
422 ; CHECK-NEXT: vldrh.s32 q1, [r1, #8]
423 ; CHECK-NEXT: vldrh.s32 q2, [r1, #24]
424 ; CHECK-NEXT: vldrh.s32 q3, [r1, #16]
425 ; CHECK-NEXT: vstrw.32 q0, [r0]
426 ; CHECK-NEXT: vstrw.32 q2, [r0, #48]
427 ; CHECK-NEXT: vstrw.32 q1, [r0, #16]
428 ; CHECK-NEXT: vstrw.32 q3, [r0, #32]
429 ; CHECK-NEXT: mov r0, r1
432 %z = getelementptr inbounds <16 x i16>, <16 x i16>* %src, i32 1
433 %wide.load = load <16 x i16>, <16 x i16>* %z, align 2
434 %0 = sext <16 x i16> %wide.load to <16 x i32>
435 store <16 x i32> %0, <16 x i32>* %dest, align 4
440 define void @foo_int16_int32_align1(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
441 ; CHECK-LABEL: foo_int16_int32_align1:
442 ; CHECK: @ %bb.0: @ %entry
443 ; CHECK-NEXT: .pad #8
444 ; CHECK-NEXT: sub sp, #8
445 ; CHECK-NEXT: vldrw.u32 q0, [r1]
446 ; CHECK-NEXT: mov r1, sp
447 ; CHECK-NEXT: vstrh.32 q0, [r1]
448 ; CHECK-NEXT: ldrd r1, r2, [sp]
449 ; CHECK-NEXT: str r1, [r0]
450 ; CHECK-NEXT: str r2, [r0, #4]
451 ; CHECK-NEXT: add sp, #8
454 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
455 %0 = trunc <4 x i32> %wide.load to <4 x i16>
456 store <4 x i16> %0, <4 x i16>* %dest, align 1
460 define void @foo_int32_int16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
461 ; CHECK-LABEL: foo_int32_int16_align1:
462 ; CHECK: @ %bb.0: @ %entry
463 ; CHECK-NEXT: .pad #8
464 ; CHECK-NEXT: sub sp, #8
465 ; CHECK-NEXT: ldr r2, [r1]
466 ; CHECK-NEXT: ldr r1, [r1, #4]
467 ; CHECK-NEXT: strd r2, r1, [sp]
468 ; CHECK-NEXT: mov r1, sp
469 ; CHECK-NEXT: vldrh.s32 q0, [r1]
470 ; CHECK-NEXT: vstrw.32 q0, [r0]
471 ; CHECK-NEXT: add sp, #8
474 %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
475 %0 = sext <4 x i16> %wide.load to <4 x i32>
476 store <4 x i32> %0, <4 x i32>* %dest, align 4
480 define void @foo_uint32_uint16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
481 ; CHECK-LABEL: foo_uint32_uint16_align1:
482 ; CHECK: @ %bb.0: @ %entry
483 ; CHECK-NEXT: .pad #8
484 ; CHECK-NEXT: sub sp, #8
485 ; CHECK-NEXT: ldr r2, [r1]
486 ; CHECK-NEXT: ldr r1, [r1, #4]
487 ; CHECK-NEXT: strd r2, r1, [sp]
488 ; CHECK-NEXT: mov r1, sp
489 ; CHECK-NEXT: vldrh.u32 q0, [r1]
490 ; CHECK-NEXT: vstrw.32 q0, [r0]
491 ; CHECK-NEXT: add sp, #8
494 %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
495 %0 = zext <4 x i16> %wide.load to <4 x i32>
496 store <4 x i32> %0, <4 x i32>* %dest, align 4