1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
4 define void @foo_int8_int32(<4 x i8>* %dest, <4 x i32>* readonly %src, i32 %n) {
5 ; CHECK-LABEL: foo_int8_int32:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrw.u32 q0, [r1]
8 ; CHECK-NEXT: vstrb.32 q0, [r0]
11 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
12 %0 = trunc <4 x i32> %wide.load to <4 x i8>
13 store <4 x i8> %0, <4 x i8>* %dest, align 1
17 define void @foo_int16_int32(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
18 ; CHECK-LABEL: foo_int16_int32:
19 ; CHECK: @ %bb.0: @ %entry
20 ; CHECK-NEXT: vldrw.u32 q0, [r1]
21 ; CHECK-NEXT: vstrh.32 q0, [r0]
24 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
25 %0 = trunc <4 x i32> %wide.load to <4 x i16>
26 store <4 x i16> %0, <4 x i16>* %dest, align 2
30 define void @foo_int8_int16(<8 x i8>* %dest, <8 x i16>* readonly %src, i32 %n) {
31 ; CHECK-LABEL: foo_int8_int16:
32 ; CHECK: @ %bb.0: @ %entry
33 ; CHECK-NEXT: vldrh.u16 q0, [r1]
34 ; CHECK-NEXT: vstrb.16 q0, [r0]
37 %wide.load = load <8 x i16>, <8 x i16>* %src, align 2
38 %0 = trunc <8 x i16> %wide.load to <8 x i8>
39 store <8 x i8> %0, <8 x i8>* %dest, align 1
43 define void @foo_int32_int8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
44 ; CHECK-LABEL: foo_int32_int8:
45 ; CHECK: @ %bb.0: @ %entry
46 ; CHECK-NEXT: vldrb.s32 q0, [r1]
47 ; CHECK-NEXT: vstrw.32 q0, [r0]
50 %wide.load = load <4 x i8>, <4 x i8>* %src, align 1
51 %0 = sext <4 x i8> %wide.load to <4 x i32>
52 store <4 x i32> %0, <4 x i32>* %dest, align 4
56 define void @foo_int16_int8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
57 ; CHECK-LABEL: foo_int16_int8:
58 ; CHECK: @ %bb.0: @ %entry
59 ; CHECK-NEXT: vldrb.s16 q0, [r1]
60 ; CHECK-NEXT: vstrh.16 q0, [r0]
63 %wide.load = load <8 x i8>, <8 x i8>* %src, align 1
64 %0 = sext <8 x i8> %wide.load to <8 x i16>
65 store <8 x i16> %0, <8 x i16>* %dest, align 2
69 define void @foo_int32_int16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
70 ; CHECK-LABEL: foo_int32_int16:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: vldrh.s32 q0, [r1]
73 ; CHECK-NEXT: vstrw.32 q0, [r0]
76 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
77 %0 = sext <4 x i16> %wide.load to <4 x i32>
78 store <4 x i32> %0, <4 x i32>* %dest, align 4
82 define void @foo_uint32_uint8(<4 x i32>* %dest, <4 x i8>* readonly %src, i32 %n) {
83 ; CHECK-LABEL: foo_uint32_uint8:
84 ; CHECK: @ %bb.0: @ %entry
85 ; CHECK-NEXT: vldrb.u32 q0, [r1]
86 ; CHECK-NEXT: vstrw.32 q0, [r0]
89 %wide.load = load <4 x i8>, <4 x i8>* %src, align 1
90 %0 = zext <4 x i8> %wide.load to <4 x i32>
91 store <4 x i32> %0, <4 x i32>* %dest, align 4
95 define void @foo_uint16_uint8(<8 x i16>* %dest, <8 x i8>* readonly %src, i32 %n) {
96 ; CHECK-LABEL: foo_uint16_uint8:
97 ; CHECK: @ %bb.0: @ %entry
98 ; CHECK-NEXT: vldrb.u16 q0, [r1]
99 ; CHECK-NEXT: vstrh.16 q0, [r0]
102 %wide.load = load <8 x i8>, <8 x i8>* %src, align 1
103 %0 = zext <8 x i8> %wide.load to <8 x i16>
104 store <8 x i16> %0, <8 x i16>* %dest, align 2
108 define void @foo_uint32_uint16(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
109 ; CHECK-LABEL: foo_uint32_uint16:
110 ; CHECK: @ %bb.0: @ %entry
111 ; CHECK-NEXT: vldrh.u32 q0, [r1]
112 ; CHECK-NEXT: vstrw.32 q0, [r0]
115 %wide.load = load <4 x i16>, <4 x i16>* %src, align 2
116 %0 = zext <4 x i16> %wide.load to <4 x i32>
117 store <4 x i32> %0, <4 x i32>* %dest, align 4
124 define void @foo_int16_int32_align1(<4 x i16>* %dest, <4 x i32>* readonly %src, i32 %n) {
125 ; CHECK-LABEL: foo_int16_int32_align1:
126 ; CHECK: @ %bb.0: @ %entry
127 ; CHECK-NEXT: .pad #8
128 ; CHECK-NEXT: sub sp, #8
129 ; CHECK-NEXT: vldrw.u32 q0, [r1]
130 ; CHECK-NEXT: mov r1, sp
131 ; CHECK-NEXT: vstrh.32 q0, [r1]
132 ; CHECK-NEXT: ldrd r1, r2, [sp]
133 ; CHECK-NEXT: str r1, [r0]
134 ; CHECK-NEXT: str r2, [r0, #4]
135 ; CHECK-NEXT: add sp, #8
138 %wide.load = load <4 x i32>, <4 x i32>* %src, align 4
139 %0 = trunc <4 x i32> %wide.load to <4 x i16>
140 store <4 x i16> %0, <4 x i16>* %dest, align 1
144 define void @foo_int32_int16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
145 ; CHECK-LABEL: foo_int32_int16_align1:
146 ; CHECK: @ %bb.0: @ %entry
147 ; CHECK-NEXT: .pad #8
148 ; CHECK-NEXT: sub sp, #8
149 ; CHECK-NEXT: ldr r2, [r1]
150 ; CHECK-NEXT: ldr r1, [r1, #4]
151 ; CHECK-NEXT: strd r2, r1, [sp]
152 ; CHECK-NEXT: mov r1, sp
153 ; CHECK-NEXT: vldrh.s32 q0, [r1]
154 ; CHECK-NEXT: vstrw.32 q0, [r0]
155 ; CHECK-NEXT: add sp, #8
158 %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
159 %0 = sext <4 x i16> %wide.load to <4 x i32>
160 store <4 x i32> %0, <4 x i32>* %dest, align 4
164 define void @foo_uint32_uint16_align1(<4 x i32>* %dest, <4 x i16>* readonly %src, i32 %n) {
165 ; CHECK-LABEL: foo_uint32_uint16_align1:
166 ; CHECK: @ %bb.0: @ %entry
167 ; CHECK-NEXT: .pad #8
168 ; CHECK-NEXT: sub sp, #8
169 ; CHECK-NEXT: ldr r2, [r1]
170 ; CHECK-NEXT: ldr r1, [r1, #4]
171 ; CHECK-NEXT: strd r2, r1, [sp]
172 ; CHECK-NEXT: mov r1, sp
173 ; CHECK-NEXT: vldrh.u32 q0, [r1]
174 ; CHECK-NEXT: vstrw.32 q0, [r0]
175 ; CHECK-NEXT: add sp, #8
178 %wide.load = load <4 x i16>, <4 x i16>* %src, align 1
179 %0 = zext <4 x i16> %wide.load to <4 x i32>
180 store <4 x i32> %0, <4 x i32>* %dest, align 4