1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i8_i32(i8* %base, <4 x i32>* %offptr) {
5 ; CHECK-LABEL: zext_unscaled_i8_i32:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrw.u32 q1, [r1]
8 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
11 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
12 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
13 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
14 %gather.zext = zext <4 x i8> %gather to <4 x i32>
15 ret <4 x i32> %gather.zext
18 define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i8_i32(i8* %base, <4 x i32>* %offptr) {
19 ; CHECK-LABEL: sext_unscaled_i8_i32:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vldrw.u32 q1, [r1]
22 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
25 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
26 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
27 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
28 %gather.sext = sext <4 x i8> %gather to <4 x i32>
29 ret <4 x i32> %gather.sext
32 define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i8_i32_opaque(ptr %base, ptr %offptr) {
33 ; CHECK-LABEL: sext_unscaled_i8_i32_opaque:
34 ; CHECK: @ %bb.0: @ %entry
35 ; CHECK-NEXT: vldrw.u32 q1, [r1]
36 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
39 %offs = load <4 x i32>, ptr %offptr, align 4
40 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
41 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
42 %gather.sext = sext <4 x i8> %gather to <4 x i32>
43 ret <4 x i32> %gather.sext
46 define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i16_i32(i8* %base, <4 x i32>* %offptr) {
47 ; CHECK-LABEL: zext_unscaled_i16_i32:
48 ; CHECK: @ %bb.0: @ %entry
49 ; CHECK-NEXT: vldrw.u32 q1, [r1]
50 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
53 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
54 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
55 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
56 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
57 %gather.zext = zext <4 x i16> %gather to <4 x i32>
58 ret <4 x i32> %gather.zext
61 define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i16_i32(i8* %base, <4 x i32>* %offptr) {
62 ; CHECK-LABEL: sext_unscaled_i16_i32:
63 ; CHECK: @ %bb.0: @ %entry
64 ; CHECK-NEXT: vldrw.u32 q1, [r1]
65 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
68 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
69 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
70 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
71 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
72 %gather.sext = sext <4 x i16> %gather to <4 x i32>
73 ret <4 x i32> %gather.sext
76 define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32(i8* %base, <4 x i32>* %offptr) {
77 ; CHECK-LABEL: unscaled_i32_i32:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vldrw.u32 q1, [r1]
80 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
83 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
84 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
85 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
86 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
90 define arm_aapcs_vfpcc <4 x float> @unscaled_f32_i32(i8* %base, <4 x i32>* %offptr) {
91 ; CHECK-LABEL: unscaled_f32_i32:
92 ; CHECK: @ %bb.0: @ %entry
93 ; CHECK-NEXT: vldrw.u32 q1, [r1]
94 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
97 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4
98 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs
99 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
100 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
101 ret <4 x float> %gather
104 define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i16(i8* %base, <4 x i16>* %offptr) {
105 ; CHECK-LABEL: unsigned_unscaled_b_i32_i16:
106 ; CHECK: @ %bb.0: @ %entry
107 ; CHECK-NEXT: vldrh.u32 q1, [r1]
108 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
111 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
112 %offs.zext = zext <4 x i16> %offs to <4 x i32>
113 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
114 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
115 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
116 ret <4 x i32> %gather
119 define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i16(i8* %base, <4 x i16>* %offptr) {
120 ; CHECK-LABEL: signed_unscaled_i32_i16:
121 ; CHECK: @ %bb.0: @ %entry
122 ; CHECK-NEXT: vldrh.s32 q1, [r1]
123 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
126 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
127 %offs.sext = sext <4 x i16> %offs to <4 x i32>
128 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
129 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
130 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
131 ret <4 x i32> %gather
134 define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i16(i8* %base, <4 x i16>* %offptr) {
135 ; CHECK-LABEL: a_unsigned_unscaled_f32_i16:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: vldrh.u32 q1, [r1]
138 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
141 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
142 %offs.zext = zext <4 x i16> %offs to <4 x i32>
143 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
144 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
145 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
146 ret <4 x float> %gather
149 define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i16(i8* %base, <4 x i16>* %offptr) {
150 ; CHECK-LABEL: b_signed_unscaled_f32_i16:
151 ; CHECK: @ %bb.0: @ %entry
152 ; CHECK-NEXT: vldrh.s32 q1, [r1]
153 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
156 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
157 %offs.sext = sext <4 x i16> %offs to <4 x i32>
158 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
159 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
160 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
161 ret <4 x float> %gather
164 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr) {
165 ; CHECK-LABEL: zext_signed_unscaled_i16_i16:
166 ; CHECK: @ %bb.0: @ %entry
167 ; CHECK-NEXT: vldrh.s32 q1, [r1]
168 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
171 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
172 %offs.sext = sext <4 x i16> %offs to <4 x i32>
173 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
174 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
175 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
176 %gather.zext = zext <4 x i16> %gather to <4 x i32>
177 ret <4 x i32> %gather.zext
180 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr) {
181 ; CHECK-LABEL: sext_signed_unscaled_i16_i16:
182 ; CHECK: @ %bb.0: @ %entry
183 ; CHECK-NEXT: vldrh.s32 q1, [r1]
184 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
187 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
188 %offs.sext = sext <4 x i16> %offs to <4 x i32>
189 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
190 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
191 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
192 %gather.sext = sext <4 x i16> %gather to <4 x i32>
193 ret <4 x i32> %gather.sext
196 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr) {
197 ; CHECK-LABEL: zext_unsigned_unscaled_i16_i16:
198 ; CHECK: @ %bb.0: @ %entry
199 ; CHECK-NEXT: vldrh.u32 q1, [r1]
200 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
203 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
204 %offs.zext = zext <4 x i16> %offs to <4 x i32>
205 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
206 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
207 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
208 %gather.zext = zext <4 x i16> %gather to <4 x i32>
209 ret <4 x i32> %gather.zext
212 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr) {
213 ; CHECK-LABEL: sext_unsigned_unscaled_i16_i16:
214 ; CHECK: @ %bb.0: @ %entry
215 ; CHECK-NEXT: vldrh.u32 q1, [r1]
216 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
219 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
220 %offs.zext = zext <4 x i16> %offs to <4 x i32>
221 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
222 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
223 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
224 %gather.sext = sext <4 x i16> %gather to <4 x i32>
225 ret <4 x i32> %gather.sext
228 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr) {
229 ; CHECK-LABEL: zext_signed_unscaled_i8_i16:
230 ; CHECK: @ %bb.0: @ %entry
231 ; CHECK-NEXT: vldrh.s32 q1, [r1]
232 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
235 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
236 %offs.sext = sext <4 x i16> %offs to <4 x i32>
237 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
238 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
239 %gather.zext = zext <4 x i8> %gather to <4 x i32>
240 ret <4 x i32> %gather.zext
243 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr) {
244 ; CHECK-LABEL: sext_signed_unscaled_i8_i16:
245 ; CHECK: @ %bb.0: @ %entry
246 ; CHECK-NEXT: vldrh.s32 q1, [r1]
247 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
250 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
251 %offs.sext = sext <4 x i16> %offs to <4 x i32>
252 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
253 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
254 %gather.sext = sext <4 x i8> %gather to <4 x i32>
255 ret <4 x i32> %gather.sext
258 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr) {
259 ; CHECK-LABEL: zext_unsigned_unscaled_i8_i16:
260 ; CHECK: @ %bb.0: @ %entry
261 ; CHECK-NEXT: vldrh.u32 q1, [r1]
262 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
265 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
266 %offs.zext = zext <4 x i16> %offs to <4 x i32>
267 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
268 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
269 %gather.zext = zext <4 x i8> %gather to <4 x i32>
270 ret <4 x i32> %gather.zext
273 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr) {
274 ; CHECK-LABEL: sext_unsigned_unscaled_i8_i16:
275 ; CHECK: @ %bb.0: @ %entry
276 ; CHECK-NEXT: vldrh.u32 q1, [r1]
277 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
280 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2
281 %offs.zext = zext <4 x i16> %offs to <4 x i32>
282 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
283 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
284 %gather.sext = sext <4 x i8> %gather to <4 x i32>
285 ret <4 x i32> %gather.sext
288 define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i8(i8* %base, <4 x i8>* %offptr) {
289 ; CHECK-LABEL: unsigned_unscaled_b_i32_i8:
290 ; CHECK: @ %bb.0: @ %entry
291 ; CHECK-NEXT: vldrb.u32 q1, [r1]
292 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
295 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
296 %offs.zext = zext <4 x i8> %offs to <4 x i32>
297 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
298 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
299 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
300 ret <4 x i32> %gather
303 define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i8(i8* %base, <4 x i8>* %offptr) {
304 ; CHECK-LABEL: signed_unscaled_i32_i8:
305 ; CHECK: @ %bb.0: @ %entry
306 ; CHECK-NEXT: vldrb.s32 q1, [r1]
307 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
310 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
311 %offs.sext = sext <4 x i8> %offs to <4 x i32>
312 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
313 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*>
314 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
315 ret <4 x i32> %gather
318 define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i8(i8* %base, <4 x i8>* %offptr) {
319 ; CHECK-LABEL: a_unsigned_unscaled_f32_i8:
320 ; CHECK: @ %bb.0: @ %entry
321 ; CHECK-NEXT: vldrb.u32 q1, [r1]
322 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
325 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
326 %offs.zext = zext <4 x i8> %offs to <4 x i32>
327 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
328 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
329 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
330 ret <4 x float> %gather
333 define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i8(i8* %base, <4 x i8>* %offptr) {
334 ; CHECK-LABEL: b_signed_unscaled_f32_i8:
335 ; CHECK: @ %bb.0: @ %entry
336 ; CHECK-NEXT: vldrb.s32 q1, [r1]
337 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
340 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
341 %offs.sext = sext <4 x i8> %offs to <4 x i32>
342 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
343 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*>
344 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
345 ret <4 x float> %gather
348 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr) {
349 ; CHECK-LABEL: zext_signed_unscaled_i16_i8:
350 ; CHECK: @ %bb.0: @ %entry
351 ; CHECK-NEXT: vldrb.s32 q1, [r1]
352 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
355 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
356 %offs.sext = sext <4 x i8> %offs to <4 x i32>
357 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
358 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
359 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
360 %gather.zext = zext <4 x i16> %gather to <4 x i32>
361 ret <4 x i32> %gather.zext
364 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr) {
365 ; CHECK-LABEL: sext_signed_unscaled_i16_i8:
366 ; CHECK: @ %bb.0: @ %entry
367 ; CHECK-NEXT: vldrb.s32 q1, [r1]
368 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
371 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
372 %offs.sext = sext <4 x i8> %offs to <4 x i32>
373 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
374 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
375 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
376 %gather.sext = sext <4 x i16> %gather to <4 x i32>
377 ret <4 x i32> %gather.sext
380 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr) {
381 ; CHECK-LABEL: zext_unsigned_unscaled_i16_i8:
382 ; CHECK: @ %bb.0: @ %entry
383 ; CHECK-NEXT: vldrb.u32 q1, [r1]
384 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
387 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
388 %offs.zext = zext <4 x i8> %offs to <4 x i32>
389 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
390 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
391 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
392 %gather.zext = zext <4 x i16> %gather to <4 x i32>
393 ret <4 x i32> %gather.zext
396 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr) {
397 ; CHECK-LABEL: sext_unsigned_unscaled_i16_i8:
398 ; CHECK: @ %bb.0: @ %entry
399 ; CHECK-NEXT: vldrb.u32 q1, [r1]
400 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
403 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
404 %offs.zext = zext <4 x i8> %offs to <4 x i32>
405 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
406 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*>
407 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
408 %gather.sext = sext <4 x i16> %gather to <4 x i32>
409 ret <4 x i32> %gather.sext
412 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr) {
413 ; CHECK-LABEL: zext_signed_unscaled_i8_i8:
414 ; CHECK: @ %bb.0: @ %entry
415 ; CHECK-NEXT: vldrb.s32 q1, [r1]
416 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
419 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
420 %offs.sext = sext <4 x i8> %offs to <4 x i32>
421 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
422 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
423 %gather.zext = zext <4 x i8> %gather to <4 x i32>
424 ret <4 x i32> %gather.zext
427 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr) {
428 ; CHECK-LABEL: sext_signed_unscaled_i8_i8:
429 ; CHECK: @ %bb.0: @ %entry
430 ; CHECK-NEXT: vldrb.s32 q1, [r1]
431 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
434 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
435 %offs.sext = sext <4 x i8> %offs to <4 x i32>
436 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext
437 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
438 %gather.sext = sext <4 x i8> %gather to <4 x i32>
439 ret <4 x i32> %gather.sext
442 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr) {
443 ; CHECK-LABEL: zext_unsigned_unscaled_i8_i8:
444 ; CHECK: @ %bb.0: @ %entry
445 ; CHECK-NEXT: vldrb.u32 q1, [r1]
446 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
449 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
450 %offs.zext = zext <4 x i8> %offs to <4 x i32>
451 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
452 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
453 %gather.zext = zext <4 x i8> %gather to <4 x i32>
454 ret <4 x i32> %gather.zext
457 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr) {
458 ; CHECK-LABEL: sext_unsigned_unscaled_i8_i8:
459 ; CHECK: @ %bb.0: @ %entry
460 ; CHECK-NEXT: vldrb.u32 q1, [r1]
461 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
464 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1
465 %offs.zext = zext <4 x i8> %offs to <4 x i32>
466 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext
467 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
468 %gather.sext = sext <4 x i8> %gather to <4 x i32>
469 ret <4 x i32> %gather.sext
472 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i8_opaque(ptr %base, ptr %offptr) {
473 ; CHECK-LABEL: sext_unsigned_unscaled_i8_i8_opaque:
474 ; CHECK: @ %bb.0: @ %entry
475 ; CHECK-NEXT: vldrb.u32 q1, [r1]
476 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
479 %offs = load <4 x i8>, ptr %offptr, align 1
480 %offs.zext = zext <4 x i8> %offs to <4 x i32>
481 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
482 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
483 %gather.sext = sext <4 x i8> %gather to <4 x i32>
484 ret <4 x i32> %gather.sext
487 ; VLDRW.u32 Qd, [P, 4]
488 define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x i32*> %p) {
490 ; CHECK: @ %bb.0: @ %entry
491 ; CHECK-NEXT: vmov.i32 q1, #0x10
492 ; CHECK-NEXT: vadd.i32 q1, q0, q1
493 ; CHECK-NEXT: vldrw.u32 q0, [q1]
496 %g = getelementptr inbounds i32, <4 x i32*> %p, i32 4
497 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %g, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
498 ret <4 x i32> %gather
501 define arm_aapcs_vfpcc <4 x i32> @qi4_unaligned(<4 x i32*> %p) {
502 ; CHECK-LABEL: qi4_unaligned:
503 ; CHECK: @ %bb.0: @ %entry
504 ; CHECK-NEXT: vmov.i32 q1, #0x10
505 ; CHECK-NEXT: vadd.i32 q0, q0, q1
506 ; CHECK-NEXT: vmov r0, r1, d1
507 ; CHECK-NEXT: vmov r2, r3, d0
508 ; CHECK-NEXT: ldr r0, [r0]
509 ; CHECK-NEXT: ldr r2, [r2]
510 ; CHECK-NEXT: ldr r1, [r1]
511 ; CHECK-NEXT: ldr r3, [r3]
512 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
513 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
516 %g = getelementptr inbounds i32, <4 x i32*> %p, i32 4
517 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %g, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
518 ret <4 x i32> %gather
521 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>)
522 declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
523 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
524 declare <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*>, i32, <4 x i1>, <4 x half>)
525 declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
527 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)