1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 -force-streaming < %s | FileCheck %s
5 ; == Normal Multi-Vector Consecutive Stores ==
7 define void @st1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
8 ; CHECK-LABEL: st1_x2_i8:
10 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
11 ; CHECK-NEXT: addvl sp, sp, #-1
12 ; CHECK-NEXT: mov z3.d, z2.d
13 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
14 ; CHECK-NEXT: mov p8.b, p0.b
15 ; CHECK-NEXT: mov z2.d, z1.d
16 ; CHECK-NEXT: st1b { z2.b, z3.b }, pn8, [x0]
17 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
18 ; CHECK-NEXT: addvl sp, sp, #1
19 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
21 call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
25 define void @st1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
26 ; CHECK-LABEL: st1_x2_i16:
28 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
29 ; CHECK-NEXT: addvl sp, sp, #-1
30 ; CHECK-NEXT: mov z3.d, z2.d
31 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
32 ; CHECK-NEXT: mov p8.b, p0.b
33 ; CHECK-NEXT: mov z2.d, z1.d
34 ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0]
35 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
36 ; CHECK-NEXT: addvl sp, sp, #1
37 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
39 call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
43 define void @st1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
44 ; CHECK-LABEL: st1_x2_i32:
46 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
47 ; CHECK-NEXT: addvl sp, sp, #-1
48 ; CHECK-NEXT: mov z3.d, z2.d
49 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
50 ; CHECK-NEXT: mov p8.b, p0.b
51 ; CHECK-NEXT: mov z2.d, z1.d
52 ; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0]
53 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
54 ; CHECK-NEXT: addvl sp, sp, #1
55 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
57 call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
61 define void @st1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
62 ; CHECK-LABEL: st1_x2_i64:
64 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
65 ; CHECK-NEXT: addvl sp, sp, #-1
66 ; CHECK-NEXT: mov z3.d, z2.d
67 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
68 ; CHECK-NEXT: mov p8.b, p0.b
69 ; CHECK-NEXT: mov z2.d, z1.d
70 ; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0]
71 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
72 ; CHECK-NEXT: addvl sp, sp, #1
73 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
75 call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
79 define void @st1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
80 ; CHECK-LABEL: st1_x2_f16:
82 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
83 ; CHECK-NEXT: addvl sp, sp, #-1
84 ; CHECK-NEXT: mov z3.d, z2.d
85 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
86 ; CHECK-NEXT: mov p8.b, p0.b
87 ; CHECK-NEXT: mov z2.d, z1.d
88 ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0]
89 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
90 ; CHECK-NEXT: addvl sp, sp, #1
91 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
93 call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
97 define void @st1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
98 ; CHECK-LABEL: st1_x2_bf16:
100 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
101 ; CHECK-NEXT: addvl sp, sp, #-1
102 ; CHECK-NEXT: mov z3.d, z2.d
103 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
104 ; CHECK-NEXT: mov p8.b, p0.b
105 ; CHECK-NEXT: mov z2.d, z1.d
106 ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0]
107 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
108 ; CHECK-NEXT: addvl sp, sp, #1
109 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
111 call void @llvm.aarch64.sve.st1.pn.x2.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
115 define void @st1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
116 ; CHECK-LABEL: st1_x2_f32:
118 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
119 ; CHECK-NEXT: addvl sp, sp, #-1
120 ; CHECK-NEXT: mov z3.d, z2.d
121 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
122 ; CHECK-NEXT: mov p8.b, p0.b
123 ; CHECK-NEXT: mov z2.d, z1.d
124 ; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0]
125 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
126 ; CHECK-NEXT: addvl sp, sp, #1
127 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
129 call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
133 define void @st1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
134 ; CHECK-LABEL: st1_x2_f64:
136 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
137 ; CHECK-NEXT: addvl sp, sp, #-1
138 ; CHECK-NEXT: mov z3.d, z2.d
139 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
140 ; CHECK-NEXT: mov p8.b, p0.b
141 ; CHECK-NEXT: mov z2.d, z1.d
142 ; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0]
143 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
144 ; CHECK-NEXT: addvl sp, sp, #1
145 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
147 call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
151 define void @st1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
152 ; CHECK-LABEL: st1_x4_i8:
154 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
155 ; CHECK-NEXT: addvl sp, sp, #-1
156 ; CHECK-NEXT: mov z7.d, z4.d
157 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
158 ; CHECK-NEXT: mov p8.b, p0.b
159 ; CHECK-NEXT: mov z6.d, z3.d
160 ; CHECK-NEXT: mov z5.d, z2.d
161 ; CHECK-NEXT: mov z4.d, z1.d
162 ; CHECK-NEXT: st1b { z4.b - z7.b }, pn8, [x0]
163 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
164 ; CHECK-NEXT: addvl sp, sp, #1
165 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
167 call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
171 define void @st1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
172 ; CHECK-LABEL: st1_x4_i16:
174 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
175 ; CHECK-NEXT: addvl sp, sp, #-1
176 ; CHECK-NEXT: mov z7.d, z4.d
177 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
178 ; CHECK-NEXT: mov p8.b, p0.b
179 ; CHECK-NEXT: mov z6.d, z3.d
180 ; CHECK-NEXT: mov z5.d, z2.d
181 ; CHECK-NEXT: mov z4.d, z1.d
182 ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0]
183 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
184 ; CHECK-NEXT: addvl sp, sp, #1
185 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
187 call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
191 define void @st1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
192 ; CHECK-LABEL: st1_x4_i32:
194 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
195 ; CHECK-NEXT: addvl sp, sp, #-1
196 ; CHECK-NEXT: mov z7.d, z4.d
197 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
198 ; CHECK-NEXT: mov p8.b, p0.b
199 ; CHECK-NEXT: mov z6.d, z3.d
200 ; CHECK-NEXT: mov z5.d, z2.d
201 ; CHECK-NEXT: mov z4.d, z1.d
202 ; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0]
203 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
204 ; CHECK-NEXT: addvl sp, sp, #1
205 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
207 call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
211 define void @st1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
212 ; CHECK-LABEL: st1_x4_i64:
214 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
215 ; CHECK-NEXT: addvl sp, sp, #-1
216 ; CHECK-NEXT: mov z7.d, z4.d
217 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
218 ; CHECK-NEXT: mov p8.b, p0.b
219 ; CHECK-NEXT: mov z6.d, z3.d
220 ; CHECK-NEXT: mov z5.d, z2.d
221 ; CHECK-NEXT: mov z4.d, z1.d
222 ; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0]
223 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
224 ; CHECK-NEXT: addvl sp, sp, #1
225 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
227 call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
231 define void @st1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
232 ; CHECK-LABEL: st1_x4_f16:
234 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
235 ; CHECK-NEXT: addvl sp, sp, #-1
236 ; CHECK-NEXT: mov z7.d, z4.d
237 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
238 ; CHECK-NEXT: mov p8.b, p0.b
239 ; CHECK-NEXT: mov z6.d, z3.d
240 ; CHECK-NEXT: mov z5.d, z2.d
241 ; CHECK-NEXT: mov z4.d, z1.d
242 ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0]
243 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
244 ; CHECK-NEXT: addvl sp, sp, #1
245 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
247 call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
251 define void @st1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
252 ; CHECK-LABEL: st1_x4_bf16:
254 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
255 ; CHECK-NEXT: addvl sp, sp, #-1
256 ; CHECK-NEXT: mov z7.d, z4.d
257 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
258 ; CHECK-NEXT: mov p8.b, p0.b
259 ; CHECK-NEXT: mov z6.d, z3.d
260 ; CHECK-NEXT: mov z5.d, z2.d
261 ; CHECK-NEXT: mov z4.d, z1.d
262 ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0]
263 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
264 ; CHECK-NEXT: addvl sp, sp, #1
265 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
267 call void @llvm.aarch64.sve.st1.pn.x4.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
271 define void @st1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
272 ; CHECK-LABEL: st1_x4_f32:
274 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
275 ; CHECK-NEXT: addvl sp, sp, #-1
276 ; CHECK-NEXT: mov z7.d, z4.d
277 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
278 ; CHECK-NEXT: mov p8.b, p0.b
279 ; CHECK-NEXT: mov z6.d, z3.d
280 ; CHECK-NEXT: mov z5.d, z2.d
281 ; CHECK-NEXT: mov z4.d, z1.d
282 ; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0]
283 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
284 ; CHECK-NEXT: addvl sp, sp, #1
285 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
287 call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
291 define void @st1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
292 ; CHECK-LABEL: st1_x4_f64:
294 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
295 ; CHECK-NEXT: addvl sp, sp, #-1
296 ; CHECK-NEXT: mov z7.d, z4.d
297 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
298 ; CHECK-NEXT: mov p8.b, p0.b
299 ; CHECK-NEXT: mov z6.d, z3.d
300 ; CHECK-NEXT: mov z5.d, z2.d
301 ; CHECK-NEXT: mov z4.d, z1.d
302 ; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0]
303 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
304 ; CHECK-NEXT: addvl sp, sp, #1
305 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
307 call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
311 ; == Non-temporal Multi-Vector Consecutive Stores ==
313 define void @stnt1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
314 ; CHECK-LABEL: stnt1_x2_i8:
316 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
317 ; CHECK-NEXT: addvl sp, sp, #-1
318 ; CHECK-NEXT: mov z3.d, z2.d
319 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
320 ; CHECK-NEXT: mov p8.b, p0.b
321 ; CHECK-NEXT: mov z2.d, z1.d
322 ; CHECK-NEXT: stnt1b { z2.b, z3.b }, pn8, [x0]
323 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
324 ; CHECK-NEXT: addvl sp, sp, #1
325 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
327 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
331 define void @stnt1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
332 ; CHECK-LABEL: stnt1_x2_i16:
334 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
335 ; CHECK-NEXT: addvl sp, sp, #-1
336 ; CHECK-NEXT: mov z3.d, z2.d
337 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
338 ; CHECK-NEXT: mov p8.b, p0.b
339 ; CHECK-NEXT: mov z2.d, z1.d
340 ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0]
341 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
342 ; CHECK-NEXT: addvl sp, sp, #1
343 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
345 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
349 define void @stnt1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
350 ; CHECK-LABEL: stnt1_x2_i32:
352 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
353 ; CHECK-NEXT: addvl sp, sp, #-1
354 ; CHECK-NEXT: mov z3.d, z2.d
355 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
356 ; CHECK-NEXT: mov p8.b, p0.b
357 ; CHECK-NEXT: mov z2.d, z1.d
358 ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0]
359 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
360 ; CHECK-NEXT: addvl sp, sp, #1
361 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
363 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
367 define void @stnt1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
368 ; CHECK-LABEL: stnt1_x2_i64:
370 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
371 ; CHECK-NEXT: addvl sp, sp, #-1
372 ; CHECK-NEXT: mov z3.d, z2.d
373 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
374 ; CHECK-NEXT: mov p8.b, p0.b
375 ; CHECK-NEXT: mov z2.d, z1.d
376 ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0]
377 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
378 ; CHECK-NEXT: addvl sp, sp, #1
379 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
381 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
385 define void @stnt1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
386 ; CHECK-LABEL: stnt1_x2_f16:
388 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
389 ; CHECK-NEXT: addvl sp, sp, #-1
390 ; CHECK-NEXT: mov z3.d, z2.d
391 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
392 ; CHECK-NEXT: mov p8.b, p0.b
393 ; CHECK-NEXT: mov z2.d, z1.d
394 ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0]
395 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
396 ; CHECK-NEXT: addvl sp, sp, #1
397 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
399 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
403 define void @stnt1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
404 ; CHECK-LABEL: stnt1_x2_bf16:
406 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
407 ; CHECK-NEXT: addvl sp, sp, #-1
408 ; CHECK-NEXT: mov z3.d, z2.d
409 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
410 ; CHECK-NEXT: mov p8.b, p0.b
411 ; CHECK-NEXT: mov z2.d, z1.d
412 ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0]
413 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
414 ; CHECK-NEXT: addvl sp, sp, #1
415 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
417 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
421 define void @stnt1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
422 ; CHECK-LABEL: stnt1_x2_f32:
424 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
425 ; CHECK-NEXT: addvl sp, sp, #-1
426 ; CHECK-NEXT: mov z3.d, z2.d
427 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
428 ; CHECK-NEXT: mov p8.b, p0.b
429 ; CHECK-NEXT: mov z2.d, z1.d
430 ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0]
431 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
432 ; CHECK-NEXT: addvl sp, sp, #1
433 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
435 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
439 define void @stnt1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
440 ; CHECK-LABEL: stnt1_x2_f64:
442 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
443 ; CHECK-NEXT: addvl sp, sp, #-1
444 ; CHECK-NEXT: mov z3.d, z2.d
445 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
446 ; CHECK-NEXT: mov p8.b, p0.b
447 ; CHECK-NEXT: mov z2.d, z1.d
448 ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0]
449 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
450 ; CHECK-NEXT: addvl sp, sp, #1
451 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
453 call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, target("aarch64.svcount") %pn, ptr %ptr);
457 define void @stnt1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
458 ; CHECK-LABEL: stnt1_x4_i8:
460 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
461 ; CHECK-NEXT: addvl sp, sp, #-1
462 ; CHECK-NEXT: mov z7.d, z4.d
463 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
464 ; CHECK-NEXT: mov p8.b, p0.b
465 ; CHECK-NEXT: mov z6.d, z3.d
466 ; CHECK-NEXT: mov z5.d, z2.d
467 ; CHECK-NEXT: mov z4.d, z1.d
468 ; CHECK-NEXT: stnt1b { z4.b - z7.b }, pn8, [x0]
469 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
470 ; CHECK-NEXT: addvl sp, sp, #1
471 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
473 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
477 define void @stnt1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
478 ; CHECK-LABEL: stnt1_x4_i16:
480 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
481 ; CHECK-NEXT: addvl sp, sp, #-1
482 ; CHECK-NEXT: mov z7.d, z4.d
483 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
484 ; CHECK-NEXT: mov p8.b, p0.b
485 ; CHECK-NEXT: mov z6.d, z3.d
486 ; CHECK-NEXT: mov z5.d, z2.d
487 ; CHECK-NEXT: mov z4.d, z1.d
488 ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0]
489 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
490 ; CHECK-NEXT: addvl sp, sp, #1
491 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
493 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
497 define void @stnt1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
498 ; CHECK-LABEL: stnt1_x4_i32:
500 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
501 ; CHECK-NEXT: addvl sp, sp, #-1
502 ; CHECK-NEXT: mov z7.d, z4.d
503 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
504 ; CHECK-NEXT: mov p8.b, p0.b
505 ; CHECK-NEXT: mov z6.d, z3.d
506 ; CHECK-NEXT: mov z5.d, z2.d
507 ; CHECK-NEXT: mov z4.d, z1.d
508 ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0]
509 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
510 ; CHECK-NEXT: addvl sp, sp, #1
511 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
513 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
517 define void @stnt1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
518 ; CHECK-LABEL: stnt1_x4_i64:
520 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
521 ; CHECK-NEXT: addvl sp, sp, #-1
522 ; CHECK-NEXT: mov z7.d, z4.d
523 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
524 ; CHECK-NEXT: mov p8.b, p0.b
525 ; CHECK-NEXT: mov z6.d, z3.d
526 ; CHECK-NEXT: mov z5.d, z2.d
527 ; CHECK-NEXT: mov z4.d, z1.d
528 ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0]
529 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
530 ; CHECK-NEXT: addvl sp, sp, #1
531 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
533 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
537 define void @stnt1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
538 ; CHECK-LABEL: stnt1_x4_f16:
540 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
541 ; CHECK-NEXT: addvl sp, sp, #-1
542 ; CHECK-NEXT: mov z7.d, z4.d
543 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
544 ; CHECK-NEXT: mov p8.b, p0.b
545 ; CHECK-NEXT: mov z6.d, z3.d
546 ; CHECK-NEXT: mov z5.d, z2.d
547 ; CHECK-NEXT: mov z4.d, z1.d
548 ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0]
549 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
550 ; CHECK-NEXT: addvl sp, sp, #1
551 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
553 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
557 define void @stnt1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
558 ; CHECK-LABEL: stnt1_x4_bf16:
560 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
561 ; CHECK-NEXT: addvl sp, sp, #-1
562 ; CHECK-NEXT: mov z7.d, z4.d
563 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
564 ; CHECK-NEXT: mov p8.b, p0.b
565 ; CHECK-NEXT: mov z6.d, z3.d
566 ; CHECK-NEXT: mov z5.d, z2.d
567 ; CHECK-NEXT: mov z4.d, z1.d
568 ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0]
569 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
570 ; CHECK-NEXT: addvl sp, sp, #1
571 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
573 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
577 define void @stnt1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
578 ; CHECK-LABEL: stnt1_x4_f32:
580 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
581 ; CHECK-NEXT: addvl sp, sp, #-1
582 ; CHECK-NEXT: mov z7.d, z4.d
583 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
584 ; CHECK-NEXT: mov p8.b, p0.b
585 ; CHECK-NEXT: mov z6.d, z3.d
586 ; CHECK-NEXT: mov z5.d, z2.d
587 ; CHECK-NEXT: mov z4.d, z1.d
588 ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0]
589 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
590 ; CHECK-NEXT: addvl sp, sp, #1
591 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
593 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
597 define void @stnt1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, target("aarch64.svcount") %pn, ptr %ptr) nounwind {
598 ; CHECK-LABEL: stnt1_x4_f64:
600 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
601 ; CHECK-NEXT: addvl sp, sp, #-1
602 ; CHECK-NEXT: mov z7.d, z4.d
603 ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill
604 ; CHECK-NEXT: mov p8.b, p0.b
605 ; CHECK-NEXT: mov z6.d, z3.d
606 ; CHECK-NEXT: mov z5.d, z2.d
607 ; CHECK-NEXT: mov z4.d, z1.d
608 ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0]
609 ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
610 ; CHECK-NEXT: addvl sp, sp, #1
611 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
613 call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(<vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, target("aarch64.svcount") %pn, ptr %ptr);
617 declare void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr)
618 declare void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, target("aarch64.svcount"), ptr)
619 declare void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, target("aarch64.svcount"), ptr)
620 declare void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, target("aarch64.svcount"), ptr)
621 declare void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, target("aarch64.svcount"), ptr)
622 declare void @llvm.aarch64.sve.st1.pn.x2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, target("aarch64.svcount"), ptr)
623 declare void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, target("aarch64.svcount"), ptr)
624 declare void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, target("aarch64.svcount"), ptr)
625 declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr)
626 declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, target("aarch64.svcount"), ptr)
627 declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, target("aarch64.svcount"), ptr)
628 declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, target("aarch64.svcount"), ptr)
629 declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, target("aarch64.svcount"), ptr)
630 declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, target("aarch64.svcount"), ptr)
631 declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, target("aarch64.svcount"), ptr)
632 declare void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, target("aarch64.svcount"), ptr)
635 declare void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr)
636 declare void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, target("aarch64.svcount"), ptr)
637 declare void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, target("aarch64.svcount"), ptr)
638 declare void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, target("aarch64.svcount"), ptr)
639 declare void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, target("aarch64.svcount"), ptr)
640 declare void @llvm.aarch64.sve.st1.pn.x4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, target("aarch64.svcount"), ptr)
641 declare void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, target("aarch64.svcount"), ptr)
642 declare void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, target("aarch64.svcount"), ptr)
643 declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, target("aarch64.svcount"), ptr)
644 declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, target("aarch64.svcount"), ptr)
645 declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, target("aarch64.svcount"), ptr)
646 declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, target("aarch64.svcount"), ptr)
647 declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, target("aarch64.svcount"), ptr)
648 declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, target("aarch64.svcount"), ptr)
649 declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, target("aarch64.svcount"), ptr)
650 declare void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, target("aarch64.svcount"), ptr)