1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 define void @masked_gather_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
13 ; CHECK-LABEL: masked_gather_v2i8:
15 ; CHECK-NEXT: ptrue p0.d, vl2
16 ; CHECK-NEXT: ldr q0, [x1]
17 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
18 ; CHECK-NEXT: ptrue p0.s, vl2
19 ; CHECK-NEXT: xtn v0.2s, v0.2d
20 ; CHECK-NEXT: st1b { z0.s }, p0, [x0]
22 %ptrs = load <2 x ptr>, ptr %b
23 %vals = call <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i8> undef)
24 store <2 x i8> %vals, ptr %a
28 define void @masked_gather_v4i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
29 ; CHECK-LABEL: masked_gather_v4i8:
31 ; CHECK-NEXT: ptrue p0.d, vl4
32 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
33 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
34 ; CHECK-NEXT: st1b { z0.d }, p0, [x0]
36 %ptrs = load <4 x ptr>, ptr %b
37 %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
38 store <4 x i8> %vals, ptr %a
42 define void @masked_gather_v8i8(ptr %a, ptr %b) #0 {
43 ; VBITS_GE_256-LABEL: masked_gather_v8i8:
44 ; VBITS_GE_256: // %bb.0:
45 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
46 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
47 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
48 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
49 ; VBITS_GE_256-NEXT: ld1b { z0.d }, p0/z, [z0.d]
50 ; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z1.d]
51 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
52 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
53 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
54 ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
55 ; VBITS_GE_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b
56 ; VBITS_GE_256-NEXT: str d0, [x0]
57 ; VBITS_GE_256-NEXT: ret
59 ; VBITS_GE_512-LABEL: masked_gather_v8i8:
60 ; VBITS_GE_512: // %bb.0:
61 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
62 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
63 ; VBITS_GE_512-NEXT: ld1b { z0.d }, p0/z, [z0.d]
64 ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
65 ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
66 ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b
67 ; VBITS_GE_512-NEXT: str d0, [x0]
68 ; VBITS_GE_512-NEXT: ret
69 %ptrs = load <8 x ptr>, ptr %b
70 %vals = call <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef)
71 store <8 x i8> %vals, ptr %a
75 define void @masked_gather_v16i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
76 ; CHECK-LABEL: masked_gather_v16i8:
78 ; CHECK-NEXT: ptrue p0.d, vl16
79 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
80 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
81 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
82 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
83 ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
84 ; CHECK-NEXT: str q0, [x0]
86 %ptrs = load <16 x ptr>, ptr %b
87 %vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
88 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef)
89 store <16 x i8> %vals, ptr %a
93 define void @masked_gather_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
94 ; CHECK-LABEL: masked_gather_v32i8:
96 ; CHECK-NEXT: ptrue p0.d, vl32
97 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
98 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d]
99 ; CHECK-NEXT: st1b { z0.d }, p0, [x0]
101 %ptrs = load <32 x ptr>, ptr %b
102 %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
103 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
104 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
105 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
106 store <32 x i8> %vals, ptr %a
114 define void @masked_gather_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
115 ; CHECK-LABEL: masked_gather_v2i16:
117 ; CHECK-NEXT: ptrue p0.d, vl2
118 ; CHECK-NEXT: ldr q0, [x1]
119 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
120 ; CHECK-NEXT: ptrue p0.s, vl2
121 ; CHECK-NEXT: xtn v0.2s, v0.2d
122 ; CHECK-NEXT: st1h { z0.s }, p0, [x0]
124 %ptrs = load <2 x ptr>, ptr %b
125 %vals = call <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i16> undef)
126 store <2 x i16> %vals, ptr %a
130 define void @masked_gather_v4i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
131 ; CHECK-LABEL: masked_gather_v4i16:
133 ; CHECK-NEXT: ptrue p0.d, vl4
134 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
135 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
136 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
137 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
138 ; CHECK-NEXT: str d0, [x0]
140 %ptrs = load <4 x ptr>, ptr %b
141 %vals = call <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef)
142 store <4 x i16> %vals, ptr %a
146 define void @masked_gather_v8i16(ptr %a, ptr %b) #0 {
147 ; VBITS_GE_256-LABEL: masked_gather_v8i16:
148 ; VBITS_GE_256: // %bb.0:
149 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
150 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
151 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
152 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
153 ; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z0.d]
154 ; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z1.d]
155 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
156 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
157 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h
158 ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h
159 ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0]
160 ; VBITS_GE_256-NEXT: str q1, [x0]
161 ; VBITS_GE_256-NEXT: ret
163 ; VBITS_GE_512-LABEL: masked_gather_v8i16:
164 ; VBITS_GE_512: // %bb.0:
165 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
166 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
167 ; VBITS_GE_512-NEXT: ld1h { z0.d }, p0/z, [z0.d]
168 ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s
169 ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h
170 ; VBITS_GE_512-NEXT: str q0, [x0]
171 ; VBITS_GE_512-NEXT: ret
172 %ptrs = load <8 x ptr>, ptr %b
173 %vals = call <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef)
174 store <8 x i16> %vals, ptr %a
178 define void @masked_gather_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
179 ; CHECK-LABEL: masked_gather_v16i16:
181 ; CHECK-NEXT: ptrue p0.d, vl16
182 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
183 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
184 ; CHECK-NEXT: st1h { z0.d }, p0, [x0]
186 %ptrs = load <16 x ptr>, ptr %b
187 %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
188 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> undef)
189 store <16 x i16> %vals, ptr %a
193 define void @masked_gather_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
194 ; CHECK-LABEL: masked_gather_v32i16:
196 ; CHECK-NEXT: ptrue p0.d, vl32
197 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
198 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d]
199 ; CHECK-NEXT: st1h { z0.d }, p0, [x0]
201 %ptrs = load <32 x ptr>, ptr %b
202 %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
203 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
204 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
205 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> undef)
206 store <32 x i16> %vals, ptr %a
214 define void @masked_gather_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
215 ; CHECK-LABEL: masked_gather_v2i32:
217 ; CHECK-NEXT: ptrue p0.d, vl2
218 ; CHECK-NEXT: ldr q0, [x1]
219 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
220 ; CHECK-NEXT: xtn v0.2s, v0.2d
221 ; CHECK-NEXT: str d0, [x0]
223 %ptrs = load <2 x ptr>, ptr %b
224 %vals = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
225 store <2 x i32> %vals, ptr %a
229 define void @masked_gather_v4i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
230 ; CHECK-LABEL: masked_gather_v4i32:
232 ; CHECK-NEXT: ptrue p0.d, vl4
233 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
234 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
235 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
236 ; CHECK-NEXT: str q0, [x0]
238 %ptrs = load <4 x ptr>, ptr %b
239 %vals = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
240 store <4 x i32> %vals, ptr %a
244 define void @masked_gather_v8i32(ptr %a, ptr %b) #0 {
245 ; VBITS_GE_256-LABEL: masked_gather_v8i32:
246 ; VBITS_GE_256: // %bb.0:
247 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
248 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
249 ; VBITS_GE_256-NEXT: ptrue p1.s, vl8
250 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
251 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
252 ; VBITS_GE_256-NEXT: ld1w { z0.d }, p0/z, [z0.d]
253 ; VBITS_GE_256-NEXT: ld1w { z1.d }, p0/z, [z1.d]
254 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4
255 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s
256 ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s
257 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s
258 ; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0]
259 ; VBITS_GE_256-NEXT: ret
261 ; VBITS_GE_512-LABEL: masked_gather_v8i32:
262 ; VBITS_GE_512: // %bb.0:
263 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
264 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
265 ; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z0.d]
266 ; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x0]
267 ; VBITS_GE_512-NEXT: ret
268 %ptrs = load <8 x ptr>, ptr %b
269 %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
270 store <8 x i32> %vals, ptr %a
274 define void @masked_gather_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
275 ; CHECK-LABEL: masked_gather_v16i32:
277 ; CHECK-NEXT: ptrue p0.d, vl16
278 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
279 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
280 ; CHECK-NEXT: st1w { z0.d }, p0, [x0]
282 %ptrs = load <16 x ptr>, ptr %b
283 %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
284 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef)
285 store <16 x i32> %vals, ptr %a
289 define void @masked_gather_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
290 ; CHECK-LABEL: masked_gather_v32i32:
292 ; CHECK-NEXT: ptrue p0.d, vl32
293 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
294 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d]
295 ; CHECK-NEXT: st1w { z0.d }, p0, [x0]
297 %ptrs = load <32 x ptr>, ptr %b
298 %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
299 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
300 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
301 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> undef)
302 store <32 x i32> %vals, ptr %a
310 define void @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
311 ; CHECK-LABEL: masked_gather_v2i64:
313 ; CHECK-NEXT: ptrue p0.d, vl2
314 ; CHECK-NEXT: ldr q0, [x1]
315 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
316 ; CHECK-NEXT: str q0, [x0]
318 %ptrs = load <2 x ptr>, ptr %b
319 %vals = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef)
320 store <2 x i64> %vals, ptr %a
324 define void @masked_gather_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
325 ; CHECK-LABEL: masked_gather_v4i64:
327 ; CHECK-NEXT: ptrue p0.d, vl4
328 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
329 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
330 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
332 %ptrs = load <4 x ptr>, ptr %b
333 %vals = call <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> undef)
334 store <4 x i64> %vals, ptr %a
338 define void @masked_gather_v8i64(ptr %a, ptr %b) #0 {
339 ; VBITS_GE_256-LABEL: masked_gather_v8i64:
340 ; VBITS_GE_256: // %bb.0:
341 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
342 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
343 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3]
344 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1]
345 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [z0.d]
346 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [z1.d]
347 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
348 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
349 ; VBITS_GE_256-NEXT: ret
351 ; VBITS_GE_512-LABEL: masked_gather_v8i64:
352 ; VBITS_GE_512: // %bb.0:
353 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
354 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1]
355 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [z0.d]
356 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
357 ; VBITS_GE_512-NEXT: ret
358 %ptrs = load <8 x ptr>, ptr %b
359 %vals = call <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef)
360 store <8 x i64> %vals, ptr %a
364 define void @masked_gather_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
365 ; CHECK-LABEL: masked_gather_v16i64:
367 ; CHECK-NEXT: ptrue p0.d, vl16
368 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
369 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
370 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
372 %ptrs = load <16 x ptr>, ptr %b
373 %vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
374 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i64> undef)
375 store <16 x i64> %vals, ptr %a
379 define void @masked_gather_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
380 ; CHECK-LABEL: masked_gather_v32i64:
382 ; CHECK-NEXT: ptrue p0.d, vl32
383 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
384 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d]
385 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
387 %ptrs = load <32 x ptr>, ptr %b
388 %vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
389 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
390 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true,
391 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i64> undef)
392 store <32 x i64> %vals, ptr %a
396 declare <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
397 declare <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
398 declare <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
399 declare <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
400 declare <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
402 declare <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
403 declare <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
404 declare <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
405 declare <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr>, i32, <16 x i1>, <16 x i16>)
406 declare <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr>, i32, <32 x i1>, <32 x i16>)
408 declare <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
409 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
410 declare <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
411 declare <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr>, i32, <16 x i1>, <16 x i32>)
412 declare <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr>, i32, <32 x i1>, <32 x i32>)
414 declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
415 declare <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
416 declare <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
417 declare <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr>, i32, <16 x i1>, <16 x i64>)
418 declare <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr>, i32, <32 x i1>, <32 x i64>)
420 attributes #0 = { "target-features"="+sve" }