1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s
6 define arm_aapcs_vfpcc <2 x i32> @ptr_v2i32(ptr %offptr) {
7 ; CHECK-LABEL: ptr_v2i32:
8 ; CHECK: @ %bb.0: @ %entry
9 ; CHECK-NEXT: ldrd r0, r1, [r0]
10 ; CHECK-NEXT: ldr r1, [r1]
11 ; CHECK-NEXT: ldr r0, [r0]
12 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
15 %offs = load <2 x ptr>, ptr %offptr, align 4
16 %gather = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %offs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
20 define arm_aapcs_vfpcc <4 x i32> @ptr_v4i32(ptr %offptr) {
21 ; CHECK-LABEL: ptr_v4i32:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: vldrw.u32 q1, [r0]
24 ; CHECK-NEXT: vldrw.u32 q0, [q1]
27 %offs = load <4 x ptr>, ptr %offptr, align 4
28 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %offs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
32 define arm_aapcs_vfpcc <8 x i32> @ptr_v8i32(ptr %offptr) {
33 ; CHECK-LABEL: ptr_v8i32:
34 ; CHECK: @ %bb.0: @ %entry
35 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
36 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
37 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
38 ; CHECK-NEXT: vmov r1, r2, d1
39 ; CHECK-NEXT: vmov r3, r12, d0
40 ; CHECK-NEXT: vldrw.u32 q0, [r0]
41 ; CHECK-NEXT: vmov r0, lr, d1
42 ; CHECK-NEXT: ldr r7, [r2]
43 ; CHECK-NEXT: vmov r2, r4, d0
44 ; CHECK-NEXT: ldr r6, [r1]
45 ; CHECK-NEXT: ldr r3, [r3]
46 ; CHECK-NEXT: ldr r0, [r0]
47 ; CHECK-NEXT: ldr.w r1, [r12]
48 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r6
49 ; CHECK-NEXT: ldr.w r5, [lr]
50 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r7
51 ; CHECK-NEXT: ldr r2, [r2]
52 ; CHECK-NEXT: ldr r4, [r4]
53 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
54 ; CHECK-NEXT: vmov q0[3], q0[1], r4, r5
55 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
57 %offs = load <8 x ptr>, ptr %offptr, align 4
58 %gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %offs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
62 define arm_aapcs_vfpcc <16 x i32> @ptr_v16i32(ptr %offptr) {
63 ; CHECK-LABEL: ptr_v16i32:
64 ; CHECK: @ %bb.0: @ %entry
65 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
66 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
67 ; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
68 ; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
69 ; CHECK-NEXT: vldrw.u32 q2, [r0, #32]
70 ; CHECK-NEXT: vmov r1, r2, d1
71 ; CHECK-NEXT: vmov r3, lr, d0
72 ; CHECK-NEXT: vldrw.u32 q0, [r0]
73 ; CHECK-NEXT: vmov r4, r5, d1
74 ; CHECK-NEXT: ldr r7, [r2]
75 ; CHECK-NEXT: vmov r2, r6, d0
76 ; CHECK-NEXT: ldr.w r12, [r1]
77 ; CHECK-NEXT: ldr r3, [r3]
78 ; CHECK-NEXT: ldr r4, [r4]
79 ; CHECK-NEXT: ldr r5, [r5]
80 ; CHECK-NEXT: vmov q3[2], q3[0], r3, r12
81 ; CHECK-NEXT: ldr.w r1, [lr]
82 ; CHECK-NEXT: vmov q3[3], q3[1], r1, r7
83 ; CHECK-NEXT: ldr r2, [r2]
84 ; CHECK-NEXT: ldr r6, [r6]
85 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r4
86 ; CHECK-NEXT: vmov r2, r4, d3
87 ; CHECK-NEXT: vmov q0[3], q0[1], r6, r5
88 ; CHECK-NEXT: vmov r6, r5, d2
89 ; CHECK-NEXT: ldr r2, [r2]
90 ; CHECK-NEXT: ldr r6, [r6]
91 ; CHECK-NEXT: ldr r5, [r5]
92 ; CHECK-NEXT: vmov q1[2], q1[0], r6, r2
93 ; CHECK-NEXT: ldr r6, [r4]
94 ; CHECK-NEXT: vmov r0, r2, d5
95 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r6
96 ; CHECK-NEXT: vmov r6, r5, d4
97 ; CHECK-NEXT: ldr r0, [r0]
98 ; CHECK-NEXT: ldr r6, [r6]
99 ; CHECK-NEXT: ldr r2, [r2]
100 ; CHECK-NEXT: ldr r5, [r5]
101 ; CHECK-NEXT: vmov q2[2], q2[0], r6, r0
102 ; CHECK-NEXT: vmov q2[3], q2[1], r5, r2
103 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
105 %offs = load <16 x ptr>, ptr %offptr, align 4
106 %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %offs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef)
107 ret <16 x i32> %gather
112 define arm_aapcs_vfpcc <2 x float> @ptr_v2f32(ptr %offptr) {
113 ; CHECK-LABEL: ptr_v2f32:
114 ; CHECK: @ %bb.0: @ %entry
115 ; CHECK-NEXT: ldrd r0, r1, [r0]
116 ; CHECK-NEXT: vldr s1, [r1]
117 ; CHECK-NEXT: vldr s0, [r0]
120 %offs = load <2 x ptr>, ptr %offptr, align 4
121 %gather = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %offs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
122 ret <2 x float> %gather
125 define arm_aapcs_vfpcc <4 x float> @ptr_v4f32(ptr %offptr) {
126 ; CHECK-LABEL: ptr_v4f32:
127 ; CHECK: @ %bb.0: @ %entry
128 ; CHECK-NEXT: vldrw.u32 q1, [r0]
129 ; CHECK-NEXT: vldrw.u32 q0, [q1]
132 %offs = load <4 x ptr>, ptr %offptr, align 4
133 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %offs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
134 ret <4 x float> %gather
137 define arm_aapcs_vfpcc <8 x float> @ptr_v8f32(ptr %offptr) {
138 ; CHECK-LABEL: ptr_v8f32:
139 ; CHECK: @ %bb.0: @ %entry
140 ; CHECK-NEXT: .save {r4, r5, r7, lr}
141 ; CHECK-NEXT: push {r4, r5, r7, lr}
142 ; CHECK-NEXT: vldrw.u32 q0, [r0]
143 ; CHECK-NEXT: vmov r12, r2, d1
144 ; CHECK-NEXT: vmov lr, r1, d0
145 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
146 ; CHECK-NEXT: vmov r0, r3, d1
147 ; CHECK-NEXT: vmov r4, r5, d0
148 ; CHECK-NEXT: vldr s3, [r2]
149 ; CHECK-NEXT: vldr s2, [r12]
150 ; CHECK-NEXT: vldr s1, [r1]
151 ; CHECK-NEXT: vldr s0, [lr]
152 ; CHECK-NEXT: vldr s7, [r3]
153 ; CHECK-NEXT: vldr s6, [r0]
154 ; CHECK-NEXT: vldr s5, [r5]
155 ; CHECK-NEXT: vldr s4, [r4]
156 ; CHECK-NEXT: pop {r4, r5, r7, pc}
158 %offs = load <8 x ptr>, ptr %offptr, align 4
159 %gather = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %offs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef)
160 ret <8 x float> %gather
165 define arm_aapcs_vfpcc <8 x i16> @ptr_i16(ptr %offptr) {
166 ; CHECK-LABEL: ptr_i16:
167 ; CHECK: @ %bb.0: @ %entry
168 ; CHECK-NEXT: .save {r4, r5, r6, lr}
169 ; CHECK-NEXT: push {r4, r5, r6, lr}
170 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
171 ; CHECK-NEXT: vmov r1, r2, d0
172 ; CHECK-NEXT: vmov r3, r12, d1
173 ; CHECK-NEXT: vldrw.u32 q0, [r0]
174 ; CHECK-NEXT: vmov r4, r5, d0
175 ; CHECK-NEXT: vmov r0, lr, d1
176 ; CHECK-NEXT: ldrh r1, [r1]
177 ; CHECK-NEXT: ldrh r6, [r3]
178 ; CHECK-NEXT: ldrh r2, [r2]
179 ; CHECK-NEXT: ldrh r4, [r4]
180 ; CHECK-NEXT: ldrh r5, [r5]
181 ; CHECK-NEXT: vmov.16 q0[0], r4
182 ; CHECK-NEXT: ldrh r0, [r0]
183 ; CHECK-NEXT: vmov.16 q0[1], r5
184 ; CHECK-NEXT: ldrh.w r3, [lr]
185 ; CHECK-NEXT: vmov.16 q0[2], r0
186 ; CHECK-NEXT: ldrh.w r12, [r12]
187 ; CHECK-NEXT: vmov.16 q0[3], r3
188 ; CHECK-NEXT: vmov.16 q0[4], r1
189 ; CHECK-NEXT: vmov.16 q0[5], r2
190 ; CHECK-NEXT: vmov.16 q0[6], r6
191 ; CHECK-NEXT: vmov.16 q0[7], r12
192 ; CHECK-NEXT: pop {r4, r5, r6, pc}
194 %offs = load <8 x ptr>, ptr %offptr, align 4
195 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
196 ret <8 x i16> %gather
199 define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_sext(ptr %offptr) {
200 ; CHECK-LABEL: ptr_v2i16_sext:
201 ; CHECK: @ %bb.0: @ %entry
202 ; CHECK-NEXT: ldrd r0, r1, [r0]
203 ; CHECK-NEXT: ldrsh.w r1, [r1]
204 ; CHECK-NEXT: ldrsh.w r0, [r0]
205 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
206 ; CHECK-NEXT: asrs r1, r1, #31
207 ; CHECK-NEXT: asrs r0, r0, #31
208 ; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
211 %offs = load <2 x ptr>, ptr %offptr, align 4
212 %gather = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %offs, i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> undef)
213 %ext = sext <2 x i16> %gather to <2 x i32>
217 define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_zext(ptr %offptr) {
218 ; CHECK-LABEL: ptr_v2i16_zext:
219 ; CHECK: @ %bb.0: @ %entry
220 ; CHECK-NEXT: ldrd r0, r1, [r0]
221 ; CHECK-NEXT: vmov.i64 q0, #0xffff
222 ; CHECK-NEXT: ldrh r1, [r1]
223 ; CHECK-NEXT: ldrh r0, [r0]
224 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
225 ; CHECK-NEXT: vand q0, q1, q0
228 %offs = load <2 x ptr>, ptr %offptr, align 4
229 %gather = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %offs, i32 2, <2 x i1> <i1 true, i1 true>, <2 x i16> undef)
230 %ext = zext <2 x i16> %gather to <2 x i32>
234 define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_sext(ptr %offptr) {
235 ; CHECK-LABEL: ptr_v4i16_sext:
236 ; CHECK: @ %bb.0: @ %entry
237 ; CHECK-NEXT: vldrw.u32 q1, [r0]
238 ; CHECK-NEXT: movs r1, #0
239 ; CHECK-NEXT: vldrh.s32 q0, [r1, q1]
242 %offs = load <4 x ptr>, ptr %offptr, align 4
243 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %offs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
244 %ext = sext <4 x i16> %gather to <4 x i32>
248 define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_zext(ptr %offptr) {
249 ; CHECK-LABEL: ptr_v4i16_zext:
250 ; CHECK: @ %bb.0: @ %entry
251 ; CHECK-NEXT: vldrw.u32 q1, [r0]
252 ; CHECK-NEXT: movs r1, #0
253 ; CHECK-NEXT: vldrh.u32 q0, [r1, q1]
256 %offs = load <4 x ptr>, ptr %offptr, align 4
257 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %offs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
258 %ext = zext <4 x i16> %gather to <4 x i32>
262 define arm_aapcs_vfpcc <4 x i16> @ptr_v4i16(ptr %offptr) {
263 ; CHECK-LABEL: ptr_v4i16:
264 ; CHECK: @ %bb.0: @ %entry
265 ; CHECK-NEXT: vldrw.u32 q1, [r0]
266 ; CHECK-NEXT: movs r1, #0
267 ; CHECK-NEXT: vldrh.u32 q0, [r1, q1]
270 %offs = load <4 x ptr>, ptr %offptr, align 4
271 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %offs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
272 ret <4 x i16> %gather
275 define arm_aapcs_vfpcc <8 x i32> @ptr_v8i16_sext(ptr %offptr) {
276 ; CHECK-LABEL: ptr_v8i16_sext:
277 ; CHECK: @ %bb.0: @ %entry
278 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
279 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
280 ; CHECK-NEXT: .pad #16
281 ; CHECK-NEXT: sub sp, #16
282 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
283 ; CHECK-NEXT: vmov r3, r1, d1
284 ; CHECK-NEXT: vmov r12, r2, d0
285 ; CHECK-NEXT: vldrw.u32 q0, [r0]
286 ; CHECK-NEXT: vmov lr, r0, d1
287 ; CHECK-NEXT: ldrh r7, [r1]
288 ; CHECK-NEXT: ldrh.w r1, [r12]
289 ; CHECK-NEXT: ldrh r2, [r2]
290 ; CHECK-NEXT: ldrh r4, [r0]
291 ; CHECK-NEXT: vmov r0, r5, d0
292 ; CHECK-NEXT: ldrh.w r6, [lr]
293 ; CHECK-NEXT: ldrh r3, [r3]
294 ; CHECK-NEXT: ldrh r0, [r0]
295 ; CHECK-NEXT: ldrh r5, [r5]
296 ; CHECK-NEXT: vmov.16 q0[0], r0
297 ; CHECK-NEXT: mov r0, sp
298 ; CHECK-NEXT: vmov.16 q0[1], r5
299 ; CHECK-NEXT: vmov.16 q0[2], r6
300 ; CHECK-NEXT: vmov.16 q0[3], r4
301 ; CHECK-NEXT: vmov.16 q0[4], r1
302 ; CHECK-NEXT: vmov.16 q0[5], r2
303 ; CHECK-NEXT: vmov.16 q0[6], r3
304 ; CHECK-NEXT: vmov.16 q0[7], r7
305 ; CHECK-NEXT: vstrw.32 q0, [r0]
306 ; CHECK-NEXT: vldrh.s32 q0, [r0]
307 ; CHECK-NEXT: vldrh.s32 q1, [r0, #8]
308 ; CHECK-NEXT: add sp, #16
309 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
311 %offs = load <8 x ptr>, ptr %offptr, align 4
312 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
313 %ext = sext <8 x i16> %gather to <8 x i32>
317 define arm_aapcs_vfpcc <8 x i32> @ptr_v8i16_zext(ptr %offptr) {
318 ; CHECK-LABEL: ptr_v8i16_zext:
319 ; CHECK: @ %bb.0: @ %entry
320 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
321 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
322 ; CHECK-NEXT: .pad #16
323 ; CHECK-NEXT: sub sp, #16
324 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
325 ; CHECK-NEXT: vmov r3, r1, d1
326 ; CHECK-NEXT: vmov r12, r2, d0
327 ; CHECK-NEXT: vldrw.u32 q0, [r0]
328 ; CHECK-NEXT: vmov lr, r0, d1
329 ; CHECK-NEXT: ldrh r7, [r1]
330 ; CHECK-NEXT: ldrh.w r1, [r12]
331 ; CHECK-NEXT: ldrh r2, [r2]
332 ; CHECK-NEXT: ldrh r4, [r0]
333 ; CHECK-NEXT: vmov r0, r5, d0
334 ; CHECK-NEXT: ldrh.w r6, [lr]
335 ; CHECK-NEXT: ldrh r3, [r3]
336 ; CHECK-NEXT: ldrh r0, [r0]
337 ; CHECK-NEXT: ldrh r5, [r5]
338 ; CHECK-NEXT: vmov.16 q0[0], r0
339 ; CHECK-NEXT: mov r0, sp
340 ; CHECK-NEXT: vmov.16 q0[1], r5
341 ; CHECK-NEXT: vmov.16 q0[2], r6
342 ; CHECK-NEXT: vmov.16 q0[3], r4
343 ; CHECK-NEXT: vmov.16 q0[4], r1
344 ; CHECK-NEXT: vmov.16 q0[5], r2
345 ; CHECK-NEXT: vmov.16 q0[6], r3
346 ; CHECK-NEXT: vmov.16 q0[7], r7
347 ; CHECK-NEXT: vstrw.32 q0, [r0]
348 ; CHECK-NEXT: vldrh.u32 q0, [r0]
349 ; CHECK-NEXT: vldrh.u32 q1, [r0, #8]
350 ; CHECK-NEXT: add sp, #16
351 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
353 %offs = load <8 x ptr>, ptr %offptr, align 4
354 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
355 %ext = zext <8 x i16> %gather to <8 x i32>
361 define arm_aapcs_vfpcc <8 x half> @ptr_f16(ptr %offptr) {
362 ; CHECK-LABEL: ptr_f16:
363 ; CHECK: @ %bb.0: @ %entry
364 ; CHECK-NEXT: vldrw.u32 q0, [r0]
365 ; CHECK-NEXT: vmov r1, r2, d0
366 ; CHECK-NEXT: vldr.16 s4, [r2]
367 ; CHECK-NEXT: vldr.16 s0, [r1]
368 ; CHECK-NEXT: vmov r1, r2, d1
369 ; CHECK-NEXT: vins.f16 s0, s4
370 ; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
371 ; CHECK-NEXT: vldr.16 s1, [r1]
372 ; CHECK-NEXT: vldr.16 s2, [r2]
373 ; CHECK-NEXT: vmov r0, r1, d2
374 ; CHECK-NEXT: vins.f16 s1, s2
375 ; CHECK-NEXT: vldr.16 s4, [r1]
376 ; CHECK-NEXT: vldr.16 s2, [r0]
377 ; CHECK-NEXT: vmov r0, r1, d3
378 ; CHECK-NEXT: vldr.16 s3, [r0]
379 ; CHECK-NEXT: vins.f16 s2, s4
380 ; CHECK-NEXT: vldr.16 s4, [r1]
381 ; CHECK-NEXT: vins.f16 s3, s4
384 %offs = load <8 x ptr>, ptr %offptr, align 4
385 %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %offs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef)
386 ret <8 x half> %gather
389 define arm_aapcs_vfpcc <4 x half> @ptr_v4f16(ptr %offptr) {
390 ; CHECK-LABEL: ptr_v4f16:
391 ; CHECK: @ %bb.0: @ %entry
392 ; CHECK-NEXT: vldrw.u32 q0, [r0]
393 ; CHECK-NEXT: vmov r0, r1, d0
394 ; CHECK-NEXT: vldr.16 s4, [r1]
395 ; CHECK-NEXT: vldr.16 s0, [r0]
396 ; CHECK-NEXT: vmov r0, r1, d1
397 ; CHECK-NEXT: vldr.16 s2, [r1]
398 ; CHECK-NEXT: vldr.16 s1, [r0]
399 ; CHECK-NEXT: vins.f16 s0, s4
400 ; CHECK-NEXT: vins.f16 s1, s2
403 %offs = load <4 x ptr>, ptr %offptr, align 4
404 %gather = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %offs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x half> undef)
405 ret <4 x half> %gather
410 define arm_aapcs_vfpcc <16 x i8> @ptr_i8(ptr %offptr) {
411 ; CHECK-LABEL: ptr_i8:
412 ; CHECK: @ %bb.0: @ %entry
413 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
414 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
415 ; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
416 ; CHECK-NEXT: vldrw.u32 q2, [r0]
417 ; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
418 ; CHECK-NEXT: vmov r1, r2, d0
419 ; CHECK-NEXT: vmov r6, r7, d4
420 ; CHECK-NEXT: vmov r4, r3, d1
421 ; CHECK-NEXT: ldrb r5, [r1]
422 ; CHECK-NEXT: ldrb r1, [r2]
423 ; CHECK-NEXT: ldrb r2, [r6]
424 ; CHECK-NEXT: ldrb.w r12, [r3]
425 ; CHECK-NEXT: vmov.8 q0[0], r2
426 ; CHECK-NEXT: vmov r2, r3, d3
427 ; CHECK-NEXT: ldrb.w lr, [r4]
428 ; CHECK-NEXT: ldrb r4, [r2]
429 ; CHECK-NEXT: ldrb r2, [r3]
430 ; CHECK-NEXT: ldrb r3, [r7]
431 ; CHECK-NEXT: vmov.8 q0[1], r3
432 ; CHECK-NEXT: vmov r3, r6, d5
433 ; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
434 ; CHECK-NEXT: ldrb r3, [r3]
435 ; CHECK-NEXT: ldrb r6, [r6]
436 ; CHECK-NEXT: vmov.8 q0[2], r3
437 ; CHECK-NEXT: vmov r0, r3, d4
438 ; CHECK-NEXT: vmov.8 q0[3], r6
439 ; CHECK-NEXT: ldrb r0, [r0]
440 ; CHECK-NEXT: ldrb r3, [r3]
441 ; CHECK-NEXT: vmov.8 q0[4], r0
442 ; CHECK-NEXT: vmov.8 q0[5], r3
443 ; CHECK-NEXT: vmov r0, r3, d5
444 ; CHECK-NEXT: ldrb r0, [r0]
445 ; CHECK-NEXT: ldrb r3, [r3]
446 ; CHECK-NEXT: vmov.8 q0[6], r0
447 ; CHECK-NEXT: vmov.8 q0[7], r3
448 ; CHECK-NEXT: vmov r0, r3, d2
449 ; CHECK-NEXT: ldrb r0, [r0]
450 ; CHECK-NEXT: ldrb r3, [r3]
451 ; CHECK-NEXT: vmov.8 q0[8], r0
452 ; CHECK-NEXT: vmov.8 q0[9], r3
453 ; CHECK-NEXT: vmov.8 q0[10], r4
454 ; CHECK-NEXT: vmov.8 q0[11], r2
455 ; CHECK-NEXT: vmov.8 q0[12], r5
456 ; CHECK-NEXT: vmov.8 q0[13], r1
457 ; CHECK-NEXT: vmov.8 q0[14], lr
458 ; CHECK-NEXT: vmov.8 q0[15], r12
459 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
461 %offs = load <16 x ptr>, ptr %offptr, align 4
462 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %offs, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
463 ret <16 x i8> %gather
466 define arm_aapcs_vfpcc <8 x i16> @ptr_v8i8_sext16(ptr %offptr) {
467 ; CHECK-LABEL: ptr_v8i8_sext16:
468 ; CHECK: @ %bb.0: @ %entry
469 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
470 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
471 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
472 ; CHECK-NEXT: vmov r3, r1, d1
473 ; CHECK-NEXT: vmov r12, r2, d0
474 ; CHECK-NEXT: vldrw.u32 q0, [r0]
475 ; CHECK-NEXT: vmov r4, r5, d0
476 ; CHECK-NEXT: vmov lr, r0, d1
477 ; CHECK-NEXT: ldrb r7, [r1]
478 ; CHECK-NEXT: ldrb.w r1, [r12]
479 ; CHECK-NEXT: ldrb r2, [r2]
480 ; CHECK-NEXT: ldrb r4, [r4]
481 ; CHECK-NEXT: ldrb r5, [r5]
482 ; CHECK-NEXT: vmov.16 q0[0], r4
483 ; CHECK-NEXT: ldrb.w r6, [lr]
484 ; CHECK-NEXT: vmov.16 q0[1], r5
485 ; CHECK-NEXT: ldrb r0, [r0]
486 ; CHECK-NEXT: vmov.16 q0[2], r6
487 ; CHECK-NEXT: ldrb r3, [r3]
488 ; CHECK-NEXT: vmov.16 q0[3], r0
489 ; CHECK-NEXT: vmov.16 q0[4], r1
490 ; CHECK-NEXT: vmov.16 q0[5], r2
491 ; CHECK-NEXT: vmov.16 q0[6], r3
492 ; CHECK-NEXT: vmov.16 q0[7], r7
493 ; CHECK-NEXT: vmovlb.s8 q0, q0
494 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
496 %offs = load <8 x ptr>, ptr %offptr, align 4
497 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
498 %ext = sext <8 x i8> %gather to <8 x i16>
502 define arm_aapcs_vfpcc <8 x i16> @ptr_v8i8_zext16(ptr %offptr) {
503 ; CHECK-LABEL: ptr_v8i8_zext16:
504 ; CHECK: @ %bb.0: @ %entry
505 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
506 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
507 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
508 ; CHECK-NEXT: vmov r3, r1, d1
509 ; CHECK-NEXT: vmov r12, r2, d0
510 ; CHECK-NEXT: vldrw.u32 q0, [r0]
511 ; CHECK-NEXT: vmov r4, r5, d0
512 ; CHECK-NEXT: vmov lr, r0, d1
513 ; CHECK-NEXT: ldrb r7, [r1]
514 ; CHECK-NEXT: ldrb.w r1, [r12]
515 ; CHECK-NEXT: ldrb r2, [r2]
516 ; CHECK-NEXT: ldrb r4, [r4]
517 ; CHECK-NEXT: ldrb r5, [r5]
518 ; CHECK-NEXT: vmov.16 q0[0], r4
519 ; CHECK-NEXT: ldrb.w r6, [lr]
520 ; CHECK-NEXT: vmov.16 q0[1], r5
521 ; CHECK-NEXT: ldrb r0, [r0]
522 ; CHECK-NEXT: vmov.16 q0[2], r6
523 ; CHECK-NEXT: ldrb r3, [r3]
524 ; CHECK-NEXT: vmov.16 q0[3], r0
525 ; CHECK-NEXT: vmov.16 q0[4], r1
526 ; CHECK-NEXT: vmov.16 q0[5], r2
527 ; CHECK-NEXT: vmov.16 q0[6], r3
528 ; CHECK-NEXT: vmov.16 q0[7], r7
529 ; CHECK-NEXT: vmovlb.u8 q0, q0
530 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
532 %offs = load <8 x ptr>, ptr %offptr, align 4
533 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
534 %ext = zext <8 x i8> %gather to <8 x i16>
538 define arm_aapcs_vfpcc <8 x i8> @ptr_v8i8(ptr %offptr) {
539 ; CHECK-LABEL: ptr_v8i8:
540 ; CHECK: @ %bb.0: @ %entry
541 ; CHECK-NEXT: .save {r4, r5, r6, lr}
542 ; CHECK-NEXT: push {r4, r5, r6, lr}
543 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
544 ; CHECK-NEXT: vmov r1, r2, d0
545 ; CHECK-NEXT: vmov r3, r12, d1
546 ; CHECK-NEXT: vldrw.u32 q0, [r0]
547 ; CHECK-NEXT: vmov r4, r5, d0
548 ; CHECK-NEXT: vmov r0, lr, d1
549 ; CHECK-NEXT: ldrb r1, [r1]
550 ; CHECK-NEXT: ldrb r6, [r3]
551 ; CHECK-NEXT: ldrb r2, [r2]
552 ; CHECK-NEXT: ldrb r4, [r4]
553 ; CHECK-NEXT: ldrb r5, [r5]
554 ; CHECK-NEXT: vmov.16 q0[0], r4
555 ; CHECK-NEXT: ldrb r0, [r0]
556 ; CHECK-NEXT: vmov.16 q0[1], r5
557 ; CHECK-NEXT: ldrb.w r3, [lr]
558 ; CHECK-NEXT: vmov.16 q0[2], r0
559 ; CHECK-NEXT: ldrb.w r12, [r12]
560 ; CHECK-NEXT: vmov.16 q0[3], r3
561 ; CHECK-NEXT: vmov.16 q0[4], r1
562 ; CHECK-NEXT: vmov.16 q0[5], r2
563 ; CHECK-NEXT: vmov.16 q0[6], r6
564 ; CHECK-NEXT: vmov.16 q0[7], r12
565 ; CHECK-NEXT: pop {r4, r5, r6, pc}
567 %offs = load <8 x ptr>, ptr %offptr, align 4
568 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
572 define arm_aapcs_vfpcc <4 x i32> @ptr_v4i8_sext32(ptr %offptr) {
573 ; CHECK-LABEL: ptr_v4i8_sext32:
574 ; CHECK: @ %bb.0: @ %entry
575 ; CHECK-NEXT: vldrw.u32 q1, [r0]
576 ; CHECK-NEXT: movs r1, #0
577 ; CHECK-NEXT: vldrb.s32 q0, [r1, q1]
580 %offs = load <4 x ptr>, ptr %offptr, align 4
581 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %offs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
582 %ext = sext <4 x i8> %gather to <4 x i32>
586 define arm_aapcs_vfpcc <4 x i32> @ptr_v4i8_zext32(ptr %offptr) {
587 ; CHECK-LABEL: ptr_v4i8_zext32:
588 ; CHECK: @ %bb.0: @ %entry
589 ; CHECK-NEXT: vldrw.u32 q1, [r0]
590 ; CHECK-NEXT: movs r1, #0
591 ; CHECK-NEXT: vldrb.u32 q0, [r1, q1]
594 %offs = load <4 x ptr>, ptr %offptr, align 4
595 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %offs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
596 %ext = zext <4 x i8> %gather to <4 x i32>
600 define arm_aapcs_vfpcc <4 x i8> @ptr_v4i8(ptr %offptr) {
601 ; CHECK-LABEL: ptr_v4i8:
602 ; CHECK: @ %bb.0: @ %entry
603 ; CHECK-NEXT: vldrw.u32 q1, [r0]
604 ; CHECK-NEXT: movs r1, #0
605 ; CHECK-NEXT: vldrb.u32 q0, [r1, q1]
608 %offs = load <4 x ptr>, ptr %offptr, align 4
609 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %offs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
613 define arm_aapcs_vfpcc <8 x i32> @ptr_v8i8_sext32(ptr %offptr) {
614 ; CHECK-LABEL: ptr_v8i8_sext32:
615 ; CHECK: @ %bb.0: @ %entry
616 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
617 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
618 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
619 ; CHECK-NEXT: vmov r1, r2, d1
620 ; CHECK-NEXT: vmov r3, r12, d0
621 ; CHECK-NEXT: vldrw.u32 q0, [r0]
622 ; CHECK-NEXT: vmov r0, lr, d1
623 ; CHECK-NEXT: ldrb r7, [r2]
624 ; CHECK-NEXT: vmov r2, r4, d0
625 ; CHECK-NEXT: ldrb r6, [r1]
626 ; CHECK-NEXT: ldrb r3, [r3]
627 ; CHECK-NEXT: ldrb r0, [r0]
628 ; CHECK-NEXT: ldrb.w r1, [r12]
629 ; CHECK-NEXT: vmov q1[2], q1[0], r3, r6
630 ; CHECK-NEXT: ldrb.w r5, [lr]
631 ; CHECK-NEXT: vmov q1[3], q1[1], r1, r7
632 ; CHECK-NEXT: vmovlb.s8 q1, q1
633 ; CHECK-NEXT: vmovlb.s16 q1, q1
634 ; CHECK-NEXT: ldrb r2, [r2]
635 ; CHECK-NEXT: ldrb r4, [r4]
636 ; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
637 ; CHECK-NEXT: vmov q0[3], q0[1], r4, r5
638 ; CHECK-NEXT: vmovlb.s8 q0, q0
639 ; CHECK-NEXT: vmovlb.s16 q0, q0
640 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
642 %offs = load <8 x ptr>, ptr %offptr, align 4
643 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
644 %ext = sext <8 x i8> %gather to <8 x i32>
648 define arm_aapcs_vfpcc <8 x i32> @ptr_v8i8_zext32(ptr %offptr) {
649 ; CHECK-LABEL: ptr_v8i8_zext32:
650 ; CHECK: @ %bb.0: @ %entry
651 ; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
652 ; CHECK-NEXT: push {r4, r5, r6, r7, lr}
653 ; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
654 ; CHECK-NEXT: vmov.i32 q1, #0xff
655 ; CHECK-NEXT: vmov r1, r2, d1
656 ; CHECK-NEXT: vmov r12, r3, d0
657 ; CHECK-NEXT: vldrw.u32 q0, [r0]
658 ; CHECK-NEXT: vmov r4, r5, d0
659 ; CHECK-NEXT: vmov r0, lr, d1
660 ; CHECK-NEXT: ldrb r7, [r2]
661 ; CHECK-NEXT: ldrb r1, [r1]
662 ; CHECK-NEXT: ldrb.w r2, [r12]
663 ; CHECK-NEXT: ldrb r4, [r4]
664 ; CHECK-NEXT: ldrb r0, [r0]
665 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
666 ; CHECK-NEXT: ldrb r3, [r3]
667 ; CHECK-NEXT: ldrb.w r6, [lr]
668 ; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
669 ; CHECK-NEXT: ldrb r5, [r5]
670 ; CHECK-NEXT: vmov q2[3], q2[1], r3, r7
671 ; CHECK-NEXT: vmov q0[3], q0[1], r5, r6
672 ; CHECK-NEXT: vand q0, q0, q1
673 ; CHECK-NEXT: vand q1, q2, q1
674 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
676 %offs = load <8 x ptr>, ptr %offptr, align 4
677 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %offs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
678 %ext = zext <8 x i8> %gather to <8 x i32>
684 define void @foo_ptr_p_int32_t(ptr %dest, ptr %src, i32 %n) {
685 ; CHECK-LABEL: foo_ptr_p_int32_t:
686 ; CHECK: @ %bb.0: @ %entry
687 ; CHECK-NEXT: .save {r7, lr}
688 ; CHECK-NEXT: push {r7, lr}
689 ; CHECK-NEXT: bic r2, r2, #15
690 ; CHECK-NEXT: cmp r2, #1
692 ; CHECK-NEXT: poplt {r7, pc}
693 ; CHECK-NEXT: .LBB26_1: @ %vector.body.preheader
694 ; CHECK-NEXT: subs r2, #4
695 ; CHECK-NEXT: movs r3, #1
696 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2
697 ; CHECK-NEXT: .LBB26_2: @ %vector.body
698 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
699 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16
700 ; CHECK-NEXT: vptt.i32 ne, q0, zr
701 ; CHECK-NEXT: vldrwt.u32 q1, [q0]
702 ; CHECK-NEXT: vstrwt.32 q1, [r0], #16
703 ; CHECK-NEXT: le lr, .LBB26_2
704 ; CHECK-NEXT: @ %bb.3: @ %for.end
705 ; CHECK-NEXT: pop {r7, pc}
707 %and = and i32 %n, -16
708 %cmp11 = icmp sgt i32 %and, 0
709 br i1 %cmp11, label %vector.body, label %for.end
711 vector.body: ; preds = %vector.body, %entry
712 %index = phi i32 [ %index.next, %vector.body ], [ 0, %entry ]
713 %i = getelementptr inbounds ptr, ptr %src, i32 %index
714 %wide.load = load <4 x ptr>, ptr %i, align 4
715 %i2 = icmp ne <4 x ptr> %wide.load, zeroinitializer
716 %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %wide.load, i32 4, <4 x i1> %i2, <4 x i32> undef)
717 %i3 = getelementptr inbounds i32, ptr %dest, i32 %index
718 call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.gather, ptr %i3, i32 4, <4 x i1> %i2)
719 %index.next = add i32 %index, 4
720 %i5 = icmp eq i32 %index.next, %and
721 br i1 %i5, label %for.end, label %vector.body
723 for.end: ; preds = %vector.body, %entry
727 define void @foo_ptr_p_float(ptr %dest, ptr %src, i32 %n) {
728 ; CHECK-LABEL: foo_ptr_p_float:
729 ; CHECK: @ %bb.0: @ %entry
730 ; CHECK-NEXT: .save {r7, lr}
731 ; CHECK-NEXT: push {r7, lr}
732 ; CHECK-NEXT: bic r2, r2, #15
733 ; CHECK-NEXT: cmp r2, #1
735 ; CHECK-NEXT: poplt {r7, pc}
736 ; CHECK-NEXT: .LBB27_1: @ %vector.body.preheader
737 ; CHECK-NEXT: subs r2, #4
738 ; CHECK-NEXT: movs r3, #1
739 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2
740 ; CHECK-NEXT: .LBB27_2: @ %vector.body
741 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
742 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16
743 ; CHECK-NEXT: vptt.i32 ne, q0, zr
744 ; CHECK-NEXT: vldrwt.u32 q1, [q0]
745 ; CHECK-NEXT: vstrwt.32 q1, [r0], #16
746 ; CHECK-NEXT: le lr, .LBB27_2
747 ; CHECK-NEXT: @ %bb.3: @ %for.end
748 ; CHECK-NEXT: pop {r7, pc}
750 %and = and i32 %n, -16
751 %cmp11 = icmp sgt i32 %and, 0
752 br i1 %cmp11, label %vector.body, label %for.end
754 vector.body: ; preds = %vector.body, %entry
755 %index = phi i32 [ %index.next, %vector.body ], [ 0, %entry ]
756 %i = getelementptr inbounds ptr, ptr %src, i32 %index
757 %wide.load = load <4 x ptr>, ptr %i, align 4
758 %i2 = icmp ne <4 x ptr> %wide.load, zeroinitializer
759 %i3 = bitcast <4 x ptr> %wide.load to <4 x ptr>
760 %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %i3, i32 4, <4 x i1> %i2, <4 x i32> undef)
761 %i4 = getelementptr inbounds float, ptr %dest, i32 %index
762 call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.gather, ptr %i4, i32 4, <4 x i1> %i2)
763 %index.next = add i32 %index, 4
764 %i6 = icmp eq i32 %index.next, %and
765 br i1 %i6, label %for.end, label %vector.body
767 for.end: ; preds = %vector.body, %entry
771 define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x ptr> %p) {
773 ; CHECK: @ %bb.0: @ %entry
774 ; CHECK-NEXT: movs r0, #16
775 ; CHECK-NEXT: vadd.i32 q1, q0, r0
776 ; CHECK-NEXT: vldrw.u32 q0, [q1]
779 %g = getelementptr inbounds i32, <4 x ptr> %p, i32 4
780 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %g, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
781 ret <4 x i32> %gather
784 define arm_aapcs_vfpcc <8 x i32> @sext_unsigned_unscaled_i8_i8_toi64(ptr %base, ptr %offptr) {
785 ; CHECK-LABEL: sext_unsigned_unscaled_i8_i8_toi64:
786 ; CHECK: @ %bb.0: @ %entry
787 ; CHECK-NEXT: vldrb.u16 q0, [r1]
788 ; CHECK-NEXT: vldrb.u16 q1, [r0, q0]
789 ; CHECK-NEXT: vmov.u16 r0, q1[2]
790 ; CHECK-NEXT: vmov.u16 r1, q1[0]
791 ; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
792 ; CHECK-NEXT: vmov.u16 r0, q1[3]
793 ; CHECK-NEXT: vmov.u16 r1, q1[1]
794 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
795 ; CHECK-NEXT: vmov.u16 r0, q1[6]
796 ; CHECK-NEXT: vmov.u16 r1, q1[4]
797 ; CHECK-NEXT: vmovlb.s8 q0, q0
798 ; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
799 ; CHECK-NEXT: vmov.u16 r0, q1[7]
800 ; CHECK-NEXT: vmov.u16 r1, q1[5]
801 ; CHECK-NEXT: vmovlb.s16 q0, q0
802 ; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
803 ; CHECK-NEXT: vmovlb.s8 q1, q2
804 ; CHECK-NEXT: vmovlb.s16 q1, q1
807 %offs = load <8 x i8>, ptr %offptr, align 1
808 %offs.zext = zext <8 x i8> %offs to <8 x i32>
809 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext
810 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
811 %gather.sext = sext <8 x i8> %gather to <8 x i32>
812 ret <8 x i32> %gather.sext
815 define arm_aapcs_vfpcc <4 x i32> @gepconstoff_i32(ptr %base) {
816 ; CHECK-LABEL: gepconstoff_i32:
817 ; CHECK: @ %bb.0: @ %bb
818 ; CHECK-NEXT: adr r1, .LCPI30_0
819 ; CHECK-NEXT: vldrw.u32 q1, [r1]
820 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2]
822 ; CHECK-NEXT: .p2align 4
823 ; CHECK-NEXT: @ %bb.1:
824 ; CHECK-NEXT: .LCPI30_0:
825 ; CHECK-NEXT: .long 0 @ 0x0
826 ; CHECK-NEXT: .long 4 @ 0x4
827 ; CHECK-NEXT: .long 8 @ 0x8
828 ; CHECK-NEXT: .long 12 @ 0xc
830 %a = getelementptr i32, ptr %base, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
831 %g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %a, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
835 define arm_aapcs_vfpcc <4 x i32> @gepconstoff_i8(ptr %base) {
836 ; CHECK-LABEL: gepconstoff_i8:
837 ; CHECK: @ %bb.0: @ %bb
838 ; CHECK-NEXT: adr r1, .LCPI31_0
839 ; CHECK-NEXT: vldrw.u32 q1, [r1]
840 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
842 ; CHECK-NEXT: .p2align 4
843 ; CHECK-NEXT: @ %bb.1:
844 ; CHECK-NEXT: .LCPI31_0:
845 ; CHECK-NEXT: .long 4294967292 @ 0xfffffffc
846 ; CHECK-NEXT: .long 12 @ 0xc
847 ; CHECK-NEXT: .long 28 @ 0x1c
848 ; CHECK-NEXT: .long 44 @ 0x2c
850 %a = getelementptr i8, ptr %base, <4 x i32> <i32 0, i32 16, i32 32, i32 48>
851 %b = bitcast <4 x ptr> %a to <4 x ptr>
852 %c = getelementptr inbounds i32, <4 x ptr> %b, i32 -1
853 %g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %c, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
857 define arm_aapcs_vfpcc <4 x i32> @gepconstoff3_i16(ptr %base) {
858 ; CHECK-LABEL: gepconstoff3_i16:
859 ; CHECK: @ %bb.0: @ %bb
860 ; CHECK-NEXT: adr r1, .LCPI32_0
861 ; CHECK-NEXT: vldrw.u32 q1, [r1]
862 ; CHECK-NEXT: vldrw.u32 q0, [r0, q1]
864 ; CHECK-NEXT: .p2align 4
865 ; CHECK-NEXT: @ %bb.1:
866 ; CHECK-NEXT: .LCPI32_0:
867 ; CHECK-NEXT: .long 12 @ 0xc
868 ; CHECK-NEXT: .long 18 @ 0x12
869 ; CHECK-NEXT: .long 58 @ 0x3a
870 ; CHECK-NEXT: .long 280 @ 0x118
872 %a = getelementptr i16, ptr %base, <4 x i32> <i32 0, i32 16, i32 32, i32 48>
873 %b = bitcast <4 x ptr> %a to <4 x ptr>
874 %c = getelementptr i8, <4 x ptr> %b, <4 x i32> <i32 16, i32 -10, i32 -2, i32 188>
875 %d = bitcast <4 x ptr> %c to <4 x ptr>
876 %e = getelementptr inbounds i32, <4 x ptr> %d, i32 -1
877 %g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %e, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
881 declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
882 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
883 declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
884 declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
885 declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
886 declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
887 declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
888 declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
889 declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
890 declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
891 declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
892 declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
893 declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
894 declare <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x half>)
895 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
896 declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
897 declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
898 declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
899 declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)