1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
7 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr) {
8 ; CHECK-LABEL: st2b_i8:
9 ; CHECK: st2b { z0.b, z1.b }, p0, [x0]
11 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
12 <vscale x 16 x i8> %v1,
13 <vscale x 16 x i1> %pred,
22 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr) {
23 ; CHECK-LABEL: st2h_i16:
24 ; CHECK: st2h { z0.h, z1.h }, p0, [x0]
26 call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
27 <vscale x 8 x i16> %v1,
28 <vscale x 8 x i1> %pred,
33 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr) {
34 ; CHECK-LABEL: st2h_f16:
35 ; CHECK: st2h { z0.h, z1.h }, p0, [x0]
37 call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
38 <vscale x 8 x half> %v1,
39 <vscale x 8 x i1> %pred,
44 define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
45 ; CHECK-LABEL: st2h_bf16:
46 ; CHECK: st2h { z0.h, z1.h }, p0, [x0]
48 call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> %v0,
49 <vscale x 8 x bfloat> %v1,
50 <vscale x 8 x i1> %pred,
59 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr) {
60 ; CHECK-LABEL: st2w_i32:
61 ; CHECK: st2w { z0.s, z1.s }, p0, [x0]
63 call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
64 <vscale x 4 x i32> %v1,
65 <vscale x 4 x i1> %pred,
70 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr) {
71 ; CHECK-LABEL: st2w_f32:
72 ; CHECK: st2w { z0.s, z1.s }, p0, [x0]
74 call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
75 <vscale x 4 x float> %v1,
76 <vscale x 4 x i1> %pred,
85 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr) {
86 ; CHECK-LABEL: st2d_i64:
87 ; CHECK: st2d { z0.d, z1.d }, p0, [x0]
89 call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
90 <vscale x 2 x i64> %v1,
91 <vscale x 2 x i1> %pred,
96 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr) {
97 ; CHECK-LABEL: st2d_f64:
98 ; CHECK: st2d { z0.d, z1.d }, p0, [x0]
100 call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
101 <vscale x 2 x double> %v1,
102 <vscale x 2 x i1> %pred,
107 define void @st2d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i1> %pred, i8** %addr) {
108 ; CHECK-LABEL: st2d_ptr:
109 ; CHECK: st2d { z0.d, z1.d }, p0, [x0]
111 call void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*> %v0,
112 <vscale x 2 x i8*> %v1,
113 <vscale x 2 x i1> %pred,
122 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr) {
123 ; CHECK-LABEL: st3b_i8:
124 ; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0]
126 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
127 <vscale x 16 x i8> %v1,
128 <vscale x 16 x i8> %v2,
129 <vscale x 16 x i1> %pred,
138 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr) {
139 ; CHECK-LABEL: st3h_i16:
140 ; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
142 call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
143 <vscale x 8 x i16> %v1,
144 <vscale x 8 x i16> %v2,
145 <vscale x 8 x i1> %pred,
150 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr) {
151 ; CHECK-LABEL: st3h_f16:
152 ; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
154 call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
155 <vscale x 8 x half> %v1,
156 <vscale x 8 x half> %v2,
157 <vscale x 8 x i1> %pred,
162 define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
163 ; CHECK-LABEL: st3h_bf16:
164 ; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
166 call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
167 <vscale x 8 x bfloat> %v1,
168 <vscale x 8 x bfloat> %v2,
169 <vscale x 8 x i1> %pred,
178 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr) {
179 ; CHECK-LABEL: st3w_i32:
180 ; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0]
182 call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
183 <vscale x 4 x i32> %v1,
184 <vscale x 4 x i32> %v2,
185 <vscale x 4 x i1> %pred,
190 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr) {
191 ; CHECK-LABEL: st3w_f32:
192 ; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0]
194 call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
195 <vscale x 4 x float> %v1,
196 <vscale x 4 x float> %v2,
197 <vscale x 4 x i1> %pred,
206 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr) {
207 ; CHECK-LABEL: st3d_i64:
208 ; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
210 call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
211 <vscale x 2 x i64> %v1,
212 <vscale x 2 x i64> %v2,
213 <vscale x 2 x i1> %pred,
218 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr) {
219 ; CHECK-LABEL: st3d_f64:
220 ; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
222 call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
223 <vscale x 2 x double> %v1,
224 <vscale x 2 x double> %v2,
225 <vscale x 2 x i1> %pred,
230 define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i1> %pred, i8** %addr) {
231 ; CHECK-LABEL: st3d_ptr:
232 ; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
234 call void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*> %v0,
235 <vscale x 2 x i8*> %v1,
236 <vscale x 2 x i8*> %v2,
237 <vscale x 2 x i1> %pred,
246 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr) {
247 ; CHECK-LABEL: st4b_i8:
248 ; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
250 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
251 <vscale x 16 x i8> %v1,
252 <vscale x 16 x i8> %v2,
253 <vscale x 16 x i8> %v3,
254 <vscale x 16 x i1> %pred,
263 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr) {
264 ; CHECK-LABEL: st4h_i16:
265 ; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
267 call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
268 <vscale x 8 x i16> %v1,
269 <vscale x 8 x i16> %v2,
270 <vscale x 8 x i16> %v3,
271 <vscale x 8 x i1> %pred,
276 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr) {
277 ; CHECK-LABEL: st4h_f16:
278 ; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
280 call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
281 <vscale x 8 x half> %v1,
282 <vscale x 8 x half> %v2,
283 <vscale x 8 x half> %v3,
284 <vscale x 8 x i1> %pred,
289 define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
290 ; CHECK-LABEL: st4h_bf16:
291 ; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
293 call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
294 <vscale x 8 x bfloat> %v1,
295 <vscale x 8 x bfloat> %v2,
296 <vscale x 8 x bfloat> %v3,
297 <vscale x 8 x i1> %pred,
306 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr) {
307 ; CHECK-LABEL: st4w_i32:
308 ; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
310 call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
311 <vscale x 4 x i32> %v1,
312 <vscale x 4 x i32> %v2,
313 <vscale x 4 x i32> %v3,
314 <vscale x 4 x i1> %pred,
319 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr) {
320 ; CHECK-LABEL: st4w_f32:
321 ; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
323 call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
324 <vscale x 4 x float> %v1,
325 <vscale x 4 x float> %v2,
326 <vscale x 4 x float> %v3,
327 <vscale x 4 x i1> %pred,
336 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr) {
337 ; CHECK-LABEL: st4d_i64:
338 ; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
340 call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
341 <vscale x 2 x i64> %v1,
342 <vscale x 2 x i64> %v2,
343 <vscale x 2 x i64> %v3,
344 <vscale x 2 x i1> %pred,
349 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr) {
350 ; CHECK-LABEL: st4d_f64:
351 ; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
353 call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
354 <vscale x 2 x double> %v1,
355 <vscale x 2 x double> %v2,
356 <vscale x 2 x double> %v3,
357 <vscale x 2 x i1> %pred,
362 define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i8*> %v3, <vscale x 2 x i1> %pred, i8** %addr) {
363 ; CHECK-LABEL: st4d_ptr:
364 ; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
366 call void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*> %v0,
367 <vscale x 2 x i8*> %v1,
368 <vscale x 2 x i8*> %v2,
369 <vscale x 2 x i8*> %v3,
370 <vscale x 2 x i1> %pred,
378 define void @stnt1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %addr) {
379 ; CHECK-LABEL: stnt1b_i8:
380 ; CHECK: stnt1b { z0.b }, p0, [x0]
382 call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data,
383 <vscale x 16 x i1> %pred,
392 define void @stnt1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %addr) {
393 ; CHECK-LABEL: stnt1h_i16:
394 ; CHECK: stnt1h { z0.h }, p0, [x0]
396 call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data,
397 <vscale x 8 x i1> %pred,
402 define void @stnt1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half* %addr) {
403 ; CHECK-LABEL: stnt1h_f16:
404 ; CHECK: stnt1h { z0.h }, p0, [x0]
406 call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data,
407 <vscale x 8 x i1> %pred,
412 define void @stnt1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
413 ; CHECK-LABEL: stnt1h_bf16:
414 ; CHECK: stnt1h { z0.h }, p0, [x0]
416 call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> %data,
417 <vscale x 8 x i1> %pred,
426 define void @stnt1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %addr) {
427 ; CHECK-LABEL: stnt1w_i32:
428 ; CHECK: stnt1w { z0.s }, p0, [x0]
430 call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data,
431 <vscale x 4 x i1> %pred,
436 define void @stnt1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float* %addr) {
437 ; CHECK-LABEL: stnt1w_f32:
438 ; CHECK: stnt1w { z0.s }, p0, [x0]
440 call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data,
441 <vscale x 4 x i1> %pred,
450 define void @stnt1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %addr) {
451 ; CHECK-LABEL: stnt1d_i64:
452 ; CHECK: stnt1d { z0.d }, p0, [x0]
454 call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data,
455 <vscale x 2 x i1> %pred,
460 define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, double* %addr) {
461 ; CHECK-LABEL: stnt1d_f64:
462 ; CHECK: stnt1d { z0.d }, p0, [x0]
464 call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data,
465 <vscale x 2 x i1> %pred,
473 define void @store_i64_tuple3(<vscale x 6 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3) {
474 ; CHECK-LABEL: store_i64_tuple3
475 ; CHECK: st1d { z2.d }, p0, [x0, #2, mul vl]
476 ; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
477 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
478 %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3)
479 store <vscale x 6 x i64> %tuple, <vscale x 6 x i64>* %out
483 define void @store_i64_tuple4(<vscale x 8 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4) {
484 ; CHECK-LABEL: store_i64_tuple4
485 ; CHECK: st1d { z3.d }, p0, [x0, #3, mul vl]
486 ; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl]
487 ; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
488 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
489 %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4)
490 store <vscale x 8 x i64> %tuple, <vscale x 8 x i64>* %out
494 define void @store_i16_tuple2(<vscale x 16 x i16>* %out, <vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2) {
495 ; CHECK-LABEL: store_i16_tuple2
496 ; CHECK: st1h { z1.h }, p0, [x0, #1, mul vl]
497 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
498 %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2)
499 store <vscale x 16 x i16> %tuple, <vscale x 16 x i16>* %out
503 define void @store_i16_tuple3(<vscale x 24 x i16>* %out, <vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3) {
504 ; CHECK-LABEL: store_i16_tuple3
505 ; CHECK: st1h { z2.h }, p0, [x0, #2, mul vl]
506 ; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl]
507 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
508 %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3)
509 store <vscale x 24 x i16> %tuple, <vscale x 24 x i16>* %out
513 define void @store_f32_tuple3(<vscale x 12 x float>* %out, <vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3) {
514 ; CHECK-LABEL: store_f32_tuple3
515 ; CHECK: st1w { z2.s }, p0, [x0, #2, mul vl]
516 ; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl]
517 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
518 %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3)
519 store <vscale x 12 x float> %tuple, <vscale x 12 x float>* %out
523 define void @store_f32_tuple4(<vscale x 16 x float>* %out, <vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3, <vscale x 4 x float> %in4) {
524 ; CHECK-LABEL: store_f32_tuple4
525 ; CHECK: st1w { z3.s }, p0, [x0, #3, mul vl]
526 ; CHECK-NEXT: st1w { z2.s }, p0, [x0, #2, mul vl]
527 ; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl]
528 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
529 %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3, <vscale x 4 x float> %in4)
530 store <vscale x 16 x float> %tuple, <vscale x 16 x float>* %out
534 declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
535 declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
536 declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
537 declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
538 declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
539 declare void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
540 declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
541 declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
542 declare void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
544 declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
545 declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
546 declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
547 declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
548 declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
549 declare void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
550 declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
551 declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
552 declare void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
554 declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
555 declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
556 declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
557 declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
558 declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
559 declare void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
560 declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
561 declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
562 declare void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
564 declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
565 declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
566 declare void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
567 declare void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
568 declare void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half*)
569 declare void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
570 declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*)
571 declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*)
573 declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
574 declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
576 declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
577 declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
579 declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
580 declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
582 ; +bf16 is required for the bfloat version.
583 attributes #0 = { "target-features"="+sve,+bf16" }