1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
4 define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i8_i32(ptr %base, ptr %offptr) {
5 ; CHECK-LABEL: zext_unscaled_i8_i32:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrw.u32 q1, [r1]
8 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
11 %offs = load <4 x i32>, ptr %offptr, align 4
12 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
13 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
14 %gather.zext = zext <4 x i8> %gather to <4 x i32>
15 ret <4 x i32> %gather.zext
18 define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i8_i32(ptr %base, ptr %offptr) {
19 ; CHECK-LABEL: sext_unscaled_i8_i32:
20 ; CHECK: @ %bb.0: @ %entry
21 ; CHECK-NEXT: vldrw.u32 q1, [r1]
22 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
25 %offs = load <4 x i32>, ptr %offptr, align 4
26 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
27 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
28 %gather.sext = sext <4 x i8> %gather to <4 x i32>
29 ret <4 x i32> %gather.sext
32 define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i16_i32(ptr %base, ptr %offptr) {
33 ; CHECK-LABEL: zext_unscaled_i16_i32:
34 ; CHECK: @ %bb.0: @ %entry
35 ; CHECK-NEXT: vldrw.u32 q1, [r1]
36 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
39 %offs = load <4 x i32>, ptr %offptr, align 4
40 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
41 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
42 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
43 %gather.zext = zext <4 x i16> %gather to <4 x i32>
44 ret <4 x i32> %gather.zext
47 define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i16_i32(ptr %base, ptr %offptr) {
48 ; CHECK-LABEL: sext_unscaled_i16_i32:
49 ; CHECK: @ %bb.0: @ %entry
50 ; CHECK-NEXT: vldrw.u32 q1, [r1]
51 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
54 %offs = load <4 x i32>, ptr %offptr, align 4
55 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
56 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
57 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
58 %gather.sext = sext <4 x i16> %gather to <4 x i32>
59 ret <4 x i32> %gather.sext
62 define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32(ptr %base, ptr %offptr) {
63 ; CHECK-LABEL: unscaled_i32_i32:
64 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: vldrw.u32 q1, [r1]
66 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
69 %offs = load <4 x i32>, ptr %offptr, align 4
70 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
71 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
72 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
76 define arm_aapcs_vfpcc <4 x float> @unscaled_f32_i32(ptr %base, ptr %offptr) {
77 ; CHECK-LABEL: unscaled_f32_i32:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vldrw.u32 q1, [r1]
80 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
83 %offs = load <4 x i32>, ptr %offptr, align 4
84 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
85 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
86 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
87 ret <4 x float> %gather
90 define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i16(ptr %base, ptr %offptr) {
91 ; CHECK-LABEL: unsigned_unscaled_b_i32_i16:
92 ; CHECK: @ %bb.0: @ %entry
93 ; CHECK-NEXT: vldrh.u32 q1, [r1]
94 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
97 %offs = load <4 x i16>, ptr %offptr, align 2
98 %offs.zext = zext <4 x i16> %offs to <4 x i32>
99 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
100 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
101 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
102 ret <4 x i32> %gather
105 define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i16(ptr %base, ptr %offptr) {
106 ; CHECK-LABEL: signed_unscaled_i32_i16:
107 ; CHECK: @ %bb.0: @ %entry
108 ; CHECK-NEXT: vldrh.s32 q1, [r1]
109 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
112 %offs = load <4 x i16>, ptr %offptr, align 2
113 %offs.sext = sext <4 x i16> %offs to <4 x i32>
114 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
115 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
116 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
117 ret <4 x i32> %gather
120 define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i16(ptr %base, ptr %offptr) {
121 ; CHECK-LABEL: a_unsigned_unscaled_f32_i16:
122 ; CHECK: @ %bb.0: @ %entry
123 ; CHECK-NEXT: vldrh.u32 q1, [r1]
124 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
127 %offs = load <4 x i16>, ptr %offptr, align 2
128 %offs.zext = zext <4 x i16> %offs to <4 x i32>
129 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
130 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
131 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
132 ret <4 x float> %gather
135 define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i16(ptr %base, ptr %offptr) {
136 ; CHECK-LABEL: b_signed_unscaled_f32_i16:
137 ; CHECK: @ %bb.0: @ %entry
138 ; CHECK-NEXT: vldrh.s32 q1, [r1]
139 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
142 %offs = load <4 x i16>, ptr %offptr, align 2
143 %offs.sext = sext <4 x i16> %offs to <4 x i32>
144 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
145 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
146 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
147 ret <4 x float> %gather
150 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i16(ptr %base, ptr %offptr) {
151 ; CHECK-LABEL: zext_signed_unscaled_i16_i16:
152 ; CHECK: @ %bb.0: @ %entry
153 ; CHECK-NEXT: vldrh.s32 q1, [r1]
154 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
157 %offs = load <4 x i16>, ptr %offptr, align 2
158 %offs.sext = sext <4 x i16> %offs to <4 x i32>
159 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
160 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
161 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
162 %gather.zext = zext <4 x i16> %gather to <4 x i32>
163 ret <4 x i32> %gather.zext
166 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i16(ptr %base, ptr %offptr) {
167 ; CHECK-LABEL: sext_signed_unscaled_i16_i16:
168 ; CHECK: @ %bb.0: @ %entry
169 ; CHECK-NEXT: vldrh.s32 q1, [r1]
170 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
173 %offs = load <4 x i16>, ptr %offptr, align 2
174 %offs.sext = sext <4 x i16> %offs to <4 x i32>
175 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
176 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
177 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
178 %gather.sext = sext <4 x i16> %gather to <4 x i32>
179 ret <4 x i32> %gather.sext
182 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i16(ptr %base, ptr %offptr) {
183 ; CHECK-LABEL: zext_unsigned_unscaled_i16_i16:
184 ; CHECK: @ %bb.0: @ %entry
185 ; CHECK-NEXT: vldrh.u32 q1, [r1]
186 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
189 %offs = load <4 x i16>, ptr %offptr, align 2
190 %offs.zext = zext <4 x i16> %offs to <4 x i32>
191 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
192 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
193 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
194 %gather.zext = zext <4 x i16> %gather to <4 x i32>
195 ret <4 x i32> %gather.zext
198 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i16(ptr %base, ptr %offptr) {
199 ; CHECK-LABEL: sext_unsigned_unscaled_i16_i16:
200 ; CHECK: @ %bb.0: @ %entry
201 ; CHECK-NEXT: vldrh.u32 q1, [r1]
202 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
205 %offs = load <4 x i16>, ptr %offptr, align 2
206 %offs.zext = zext <4 x i16> %offs to <4 x i32>
207 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
208 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
209 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
210 %gather.sext = sext <4 x i16> %gather to <4 x i32>
211 ret <4 x i32> %gather.sext
214 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i16(ptr %base, ptr %offptr) {
215 ; CHECK-LABEL: zext_signed_unscaled_i8_i16:
216 ; CHECK: @ %bb.0: @ %entry
217 ; CHECK-NEXT: vldrh.s32 q1, [r1]
218 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
221 %offs = load <4 x i16>, ptr %offptr, align 2
222 %offs.sext = sext <4 x i16> %offs to <4 x i32>
223 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
224 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
225 %gather.zext = zext <4 x i8> %gather to <4 x i32>
226 ret <4 x i32> %gather.zext
229 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i16(ptr %base, ptr %offptr) {
230 ; CHECK-LABEL: sext_signed_unscaled_i8_i16:
231 ; CHECK: @ %bb.0: @ %entry
232 ; CHECK-NEXT: vldrh.s32 q1, [r1]
233 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
236 %offs = load <4 x i16>, ptr %offptr, align 2
237 %offs.sext = sext <4 x i16> %offs to <4 x i32>
238 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
239 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
240 %gather.sext = sext <4 x i8> %gather to <4 x i32>
241 ret <4 x i32> %gather.sext
244 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i16(ptr %base, ptr %offptr) {
245 ; CHECK-LABEL: zext_unsigned_unscaled_i8_i16:
246 ; CHECK: @ %bb.0: @ %entry
247 ; CHECK-NEXT: vldrh.u32 q1, [r1]
248 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
251 %offs = load <4 x i16>, ptr %offptr, align 2
252 %offs.zext = zext <4 x i16> %offs to <4 x i32>
253 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
254 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
255 %gather.zext = zext <4 x i8> %gather to <4 x i32>
256 ret <4 x i32> %gather.zext
259 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i16(ptr %base, ptr %offptr) {
260 ; CHECK-LABEL: sext_unsigned_unscaled_i8_i16:
261 ; CHECK: @ %bb.0: @ %entry
262 ; CHECK-NEXT: vldrh.u32 q1, [r1]
263 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
266 %offs = load <4 x i16>, ptr %offptr, align 2
267 %offs.zext = zext <4 x i16> %offs to <4 x i32>
268 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
269 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
270 %gather.sext = sext <4 x i8> %gather to <4 x i32>
271 ret <4 x i32> %gather.sext
274 define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i8(ptr %base, ptr %offptr) {
275 ; CHECK-LABEL: unsigned_unscaled_b_i32_i8:
276 ; CHECK: @ %bb.0: @ %entry
277 ; CHECK-NEXT: vldrb.u32 q1, [r1]
278 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
281 %offs = load <4 x i8>, ptr %offptr, align 1
282 %offs.zext = zext <4 x i8> %offs to <4 x i32>
283 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
284 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
285 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
286 ret <4 x i32> %gather
289 define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i8(ptr %base, ptr %offptr) {
290 ; CHECK-LABEL: signed_unscaled_i32_i8:
291 ; CHECK: @ %bb.0: @ %entry
292 ; CHECK-NEXT: vldrb.s32 q1, [r1]
293 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
296 %offs = load <4 x i8>, ptr %offptr, align 1
297 %offs.sext = sext <4 x i8> %offs to <4 x i32>
298 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
299 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
300 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
301 ret <4 x i32> %gather
304 define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i8(ptr %base, ptr %offptr) {
305 ; CHECK-LABEL: a_unsigned_unscaled_f32_i8:
306 ; CHECK: @ %bb.0: @ %entry
307 ; CHECK-NEXT: vldrb.u32 q1, [r1]
308 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
311 %offs = load <4 x i8>, ptr %offptr, align 1
312 %offs.zext = zext <4 x i8> %offs to <4 x i32>
313 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
314 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
315 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
316 ret <4 x float> %gather
319 define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i8(ptr %base, ptr %offptr) {
320 ; CHECK-LABEL: b_signed_unscaled_f32_i8:
321 ; CHECK: @ %bb.0: @ %entry
322 ; CHECK-NEXT: vldrb.s32 q1, [r1]
323 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
326 %offs = load <4 x i8>, ptr %offptr, align 1
327 %offs.sext = sext <4 x i8> %offs to <4 x i32>
328 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
329 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
330 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
331 ret <4 x float> %gather
334 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i8(ptr %base, ptr %offptr) {
335 ; CHECK-LABEL: zext_signed_unscaled_i16_i8:
336 ; CHECK: @ %bb.0: @ %entry
337 ; CHECK-NEXT: vldrb.s32 q1, [r1]
338 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
341 %offs = load <4 x i8>, ptr %offptr, align 1
342 %offs.sext = sext <4 x i8> %offs to <4 x i32>
343 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
344 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
345 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
346 %gather.zext = zext <4 x i16> %gather to <4 x i32>
347 ret <4 x i32> %gather.zext
350 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i8(ptr %base, ptr %offptr) {
351 ; CHECK-LABEL: sext_signed_unscaled_i16_i8:
352 ; CHECK: @ %bb.0: @ %entry
353 ; CHECK-NEXT: vldrb.s32 q1, [r1]
354 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
357 %offs = load <4 x i8>, ptr %offptr, align 1
358 %offs.sext = sext <4 x i8> %offs to <4 x i32>
359 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
360 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
361 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
362 %gather.sext = sext <4 x i16> %gather to <4 x i32>
363 ret <4 x i32> %gather.sext
366 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i8(ptr %base, ptr %offptr) {
367 ; CHECK-LABEL: zext_unsigned_unscaled_i16_i8:
368 ; CHECK: @ %bb.0: @ %entry
369 ; CHECK-NEXT: vldrb.u32 q1, [r1]
370 ; CHECK-NEXT: vldrh.u32 q0, [r0, q1]
373 %offs = load <4 x i8>, ptr %offptr, align 1
374 %offs.zext = zext <4 x i8> %offs to <4 x i32>
375 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
376 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
377 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
378 %gather.zext = zext <4 x i16> %gather to <4 x i32>
379 ret <4 x i32> %gather.zext
382 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i8(ptr %base, ptr %offptr) {
383 ; CHECK-LABEL: sext_unsigned_unscaled_i16_i8:
384 ; CHECK: @ %bb.0: @ %entry
385 ; CHECK-NEXT: vldrb.u32 q1, [r1]
386 ; CHECK-NEXT: vldrh.s32 q0, [r0, q1]
389 %offs = load <4 x i8>, ptr %offptr, align 1
390 %offs.zext = zext <4 x i8> %offs to <4 x i32>
391 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
392 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
393 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
394 %gather.sext = sext <4 x i16> %gather to <4 x i32>
395 ret <4 x i32> %gather.sext
398 define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i8(ptr %base, ptr %offptr) {
399 ; CHECK-LABEL: zext_signed_unscaled_i8_i8:
400 ; CHECK: @ %bb.0: @ %entry
401 ; CHECK-NEXT: vldrb.s32 q1, [r1]
402 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
405 %offs = load <4 x i8>, ptr %offptr, align 1
406 %offs.sext = sext <4 x i8> %offs to <4 x i32>
407 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
408 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
409 %gather.zext = zext <4 x i8> %gather to <4 x i32>
410 ret <4 x i32> %gather.zext
413 define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i8(ptr %base, ptr %offptr) {
414 ; CHECK-LABEL: sext_signed_unscaled_i8_i8:
415 ; CHECK: @ %bb.0: @ %entry
416 ; CHECK-NEXT: vldrb.s32 q1, [r1]
417 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
420 %offs = load <4 x i8>, ptr %offptr, align 1
421 %offs.sext = sext <4 x i8> %offs to <4 x i32>
422 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
423 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
424 %gather.sext = sext <4 x i8> %gather to <4 x i32>
425 ret <4 x i32> %gather.sext
428 define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr) {
429 ; CHECK-LABEL: zext_unsigned_unscaled_i8_i8:
430 ; CHECK: @ %bb.0: @ %entry
431 ; CHECK-NEXT: vldrb.u32 q1, [r1]
432 ; CHECK-NEXT: vldrb.u32 q0, [r0, q1]
435 %offs = load <4 x i8>, ptr %offptr, align 1
436 %offs.zext = zext <4 x i8> %offs to <4 x i32>
437 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
438 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
439 %gather.zext = zext <4 x i8> %gather to <4 x i32>
440 ret <4 x i32> %gather.zext
443 define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr) {
444 ; CHECK-LABEL: sext_unsigned_unscaled_i8_i8:
445 ; CHECK: @ %bb.0: @ %entry
446 ; CHECK-NEXT: vldrb.u32 q1, [r1]
447 ; CHECK-NEXT: vldrb.s32 q0, [r0, q1]
450 %offs = load <4 x i8>, ptr %offptr, align 1
451 %offs.zext = zext <4 x i8> %offs to <4 x i32>
452 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
453 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
454 %gather.sext = sext <4 x i8> %gather to <4 x i32>
455 ret <4 x i32> %gather.sext
458 ; VLDRW.u32 Qd, [P, 4]
459 define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x ptr> %p) {
461 ; CHECK: @ %bb.0: @ %entry
462 ; CHECK-NEXT: movs r0, #16
463 ; CHECK-NEXT: vadd.i32 q1, q0, r0
464 ; CHECK-NEXT: vldrw.u32 q0, [q1]
467 %g = getelementptr inbounds i32, <4 x ptr> %p, i32 4
468 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %g, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
469 ret <4 x i32> %gather
472 define arm_aapcs_vfpcc <4 x i32> @qi4_unaligned(<4 x ptr> %p) {
473 ; CHECK-LABEL: qi4_unaligned:
474 ; CHECK: @ %bb.0: @ %entry
475 ; CHECK-NEXT: movs r0, #16
476 ; CHECK-NEXT: vadd.i32 q0, q0, r0
477 ; CHECK-NEXT: vmov r0, r1, d1
478 ; CHECK-NEXT: vmov r2, r3, d0
479 ; CHECK-NEXT: ldr r0, [r0]
480 ; CHECK-NEXT: ldr r2, [r2]
481 ; CHECK-NEXT: ldr r1, [r1]
482 ; CHECK-NEXT: ldr r3, [r3]
483 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
484 ; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
487 %g = getelementptr inbounds i32, <4 x ptr> %p, i32 4
488 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %g, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
489 ret <4 x i32> %gather
492 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
493 declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
494 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
495 declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
496 declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)