1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
5 ; NOTE: invalid, upper and lower bound immediate values of the reg+imm
6 ; addressing mode are checked only for the byte version of each
7 ; instruction (`st<N>b`), as the code for detecting the immediate is
8 ; common to all instructions, and varies only for the number of
9 ; elements of the structured store, which is <N> = 2, 3, 4.
15 define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
16 ; CHECK-LABEL: st2b_i8_valid_imm:
18 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
19 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
20 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
22 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2, i64 0
23 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
24 <vscale x 16 x i8> %v1,
25 <vscale x 16 x i1> %pred,
30 define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
31 ; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
33 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
34 ; CHECK-NEXT: rdvl x8, #3
35 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
36 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8]
38 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
39 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
40 <vscale x 16 x i8> %v1,
41 <vscale x 16 x i1> %pred,
46 define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
47 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
49 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
50 ; CHECK-NEXT: rdvl x8, #-18
51 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
52 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8]
54 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18, i64 0
55 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
56 <vscale x 16 x i8> %v1,
57 <vscale x 16 x i1> %pred,
62 define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
63 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
65 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
66 ; CHECK-NEXT: rdvl x8, #16
67 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
68 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8]
70 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16, i64 0
71 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
72 <vscale x 16 x i8> %v1,
73 <vscale x 16 x i1> %pred,
78 define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
79 ; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
81 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
82 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
83 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
85 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16, i64 0
86 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
87 <vscale x 16 x i8> %v1,
88 <vscale x 16 x i1> %pred,
93 define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
94 ; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
96 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
97 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
98 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
100 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14, i64 0
101 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
102 <vscale x 16 x i8> %v1,
103 <vscale x 16 x i1> %pred,
112 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
113 ; CHECK-LABEL: st2h_i16:
115 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
116 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
117 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
119 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2, i64 0
120 call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
121 <vscale x 8 x i16> %v1,
122 <vscale x 8 x i1> %pred,
127 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
128 ; CHECK-LABEL: st2h_f16:
130 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
131 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
132 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
134 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2, i64 0
135 call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
136 <vscale x 8 x half> %v1,
137 <vscale x 8 x i1> %pred,
146 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
147 ; CHECK-LABEL: st2w_i32:
149 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
150 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
151 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
153 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4, i64 0
154 call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
155 <vscale x 4 x i32> %v1,
156 <vscale x 4 x i1> %pred,
161 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
162 ; CHECK-LABEL: st2w_f32:
164 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
165 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
166 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
168 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6, i64 0
169 call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
170 <vscale x 4 x float> %v1,
171 <vscale x 4 x i1> %pred,
180 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
181 ; CHECK-LABEL: st2d_i64:
183 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
184 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
185 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
187 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8, i64 0
188 call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
189 <vscale x 2 x i64> %v1,
190 <vscale x 2 x i1> %pred,
195 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
196 ; CHECK-LABEL: st2d_f64:
198 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
199 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
200 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
202 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10, i64 0
203 call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
204 <vscale x 2 x double> %v1,
205 <vscale x 2 x i1> %pred,
214 define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
215 ; CHECK-LABEL: st3b_i8_valid_imm:
217 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
218 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
219 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
220 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #3, mul vl]
222 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3, i64 0
223 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
224 <vscale x 16 x i8> %v1,
225 <vscale x 16 x i8> %v2,
226 <vscale x 16 x i1> %pred,
231 define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
232 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
234 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
235 ; CHECK-NEXT: rdvl x8, #4
236 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
237 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
238 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
240 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
241 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
242 <vscale x 16 x i8> %v1,
243 <vscale x 16 x i8> %v2,
244 <vscale x 16 x i1> %pred,
249 define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
250 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
252 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
253 ; CHECK-NEXT: rdvl x8, #5
254 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
255 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
256 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
258 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
259 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
260 <vscale x 16 x i8> %v1,
261 <vscale x 16 x i8> %v2,
262 <vscale x 16 x i1> %pred,
267 define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
268 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
270 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
271 ; CHECK-NEXT: rdvl x8, #-27
272 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
273 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
274 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
276 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27, i64 0
277 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
278 <vscale x 16 x i8> %v1,
279 <vscale x 16 x i8> %v2,
280 <vscale x 16 x i1> %pred,
285 define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
286 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
288 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
289 ; CHECK-NEXT: rdvl x8, #24
290 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
291 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
292 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8]
294 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24, i64 0
295 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
296 <vscale x 16 x i8> %v1,
297 <vscale x 16 x i8> %v2,
298 <vscale x 16 x i1> %pred,
303 define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
304 ; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
306 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
307 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
308 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
309 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl]
311 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24, i64 0
312 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
313 <vscale x 16 x i8> %v1,
314 <vscale x 16 x i8> %v2,
315 <vscale x 16 x i1> %pred,
320 define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
321 ; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
323 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
324 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
325 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
326 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #21, mul vl]
328 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21, i64 0
329 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
330 <vscale x 16 x i8> %v1,
331 <vscale x 16 x i8> %v2,
332 <vscale x 16 x i1> %pred,
341 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
342 ; CHECK-LABEL: st3h_i16:
344 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
345 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
346 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
347 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #6, mul vl]
349 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6, i64 0
350 call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
351 <vscale x 8 x i16> %v1,
352 <vscale x 8 x i16> %v2,
353 <vscale x 8 x i1> %pred,
358 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
359 ; CHECK-LABEL: st3h_f16:
361 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
362 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
363 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
364 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #9, mul vl]
366 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9, i64 0
367 call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
368 <vscale x 8 x half> %v1,
369 <vscale x 8 x half> %v2,
370 <vscale x 8 x i1> %pred,
379 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
380 ; CHECK-LABEL: st3w_i32:
382 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
383 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
384 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
385 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #12, mul vl]
387 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12, i64 0
388 call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
389 <vscale x 4 x i32> %v1,
390 <vscale x 4 x i32> %v2,
391 <vscale x 4 x i1> %pred,
396 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
397 ; CHECK-LABEL: st3w_f32:
399 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
400 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
401 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
402 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #15, mul vl]
404 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15, i64 0
405 call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
406 <vscale x 4 x float> %v1,
407 <vscale x 4 x float> %v2,
408 <vscale x 4 x i1> %pred,
417 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
418 ; CHECK-LABEL: st3d_i64:
420 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
421 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
422 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
423 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #18, mul vl]
425 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18, i64 0
426 call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
427 <vscale x 2 x i64> %v1,
428 <vscale x 2 x i64> %v2,
429 <vscale x 2 x i1> %pred,
434 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
435 ; CHECK-LABEL: st3d_f64:
437 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
438 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
439 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
440 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl]
442 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3, i64 0
443 call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
444 <vscale x 2 x double> %v1,
445 <vscale x 2 x double> %v2,
446 <vscale x 2 x i1> %pred,
455 define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
456 ; CHECK-LABEL: st4b_i8_valid_imm:
458 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
459 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
460 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
461 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
462 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #4, mul vl]
464 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4, i64 0
465 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
466 <vscale x 16 x i8> %v1,
467 <vscale x 16 x i8> %v2,
468 <vscale x 16 x i8> %v3,
469 <vscale x 16 x i1> %pred,
474 define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
475 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
477 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
478 ; CHECK-NEXT: rdvl x8, #5
479 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
480 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
481 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
482 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
484 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5, i64 0
485 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
486 <vscale x 16 x i8> %v1,
487 <vscale x 16 x i8> %v2,
488 <vscale x 16 x i8> %v3,
489 <vscale x 16 x i1> %pred,
494 define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
495 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
497 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
498 ; CHECK-NEXT: rdvl x8, #6
499 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
500 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
501 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
502 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
504 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6, i64 0
505 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
506 <vscale x 16 x i8> %v1,
507 <vscale x 16 x i8> %v2,
508 <vscale x 16 x i8> %v3,
509 <vscale x 16 x i1> %pred,
514 define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
515 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
517 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
518 ; CHECK-NEXT: rdvl x8, #7
519 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
520 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
521 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
522 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
524 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7, i64 0
525 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
526 <vscale x 16 x i8> %v1,
527 <vscale x 16 x i8> %v2,
528 <vscale x 16 x i8> %v3,
529 <vscale x 16 x i1> %pred,
534 define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
535 ; CHECK-LABEL: st4b_i8_invalid_imm_out_of_lower_bound:
537 ; CHECK-NEXT: rdvl x8, #1
538 ; CHECK-NEXT: mov x9, #-576 // =0xfffffffffffffdc0
539 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
540 ; CHECK-NEXT: lsr x8, x8, #4
541 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
542 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
543 ; CHECK-NEXT: mul x8, x8, x9
544 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
545 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
547 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
550 ; xBASE = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36
551 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36, i64 0
552 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
553 <vscale x 16 x i8> %v1,
554 <vscale x 16 x i8> %v2,
555 <vscale x 16 x i8> %v3,
556 <vscale x 16 x i1> %pred,
561 define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
562 ; CHECK-LABEL: st4b_i8_invalid_imm_out_of_upper_bound:
564 ; CHECK-NEXT: rdvl x8, #1
565 ; CHECK-NEXT: mov w9, #512 // =0x200
566 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
567 ; CHECK-NEXT: lsr x8, x8, #4
568 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
569 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
570 ; CHECK-NEXT: mul x8, x8, x9
571 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
572 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8]
574 ; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
577 ; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32
578 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32, i64 0
579 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
580 <vscale x 16 x i8> %v1,
581 <vscale x 16 x i8> %v2,
582 <vscale x 16 x i8> %v3,
583 <vscale x 16 x i1> %pred,
588 define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
589 ; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
591 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
592 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
593 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
594 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
595 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl]
597 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32, i64 0
598 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
599 <vscale x 16 x i8> %v1,
600 <vscale x 16 x i8> %v2,
601 <vscale x 16 x i8> %v3,
602 <vscale x 16 x i1> %pred,
607 define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
608 ; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
610 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
611 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
612 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
613 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
614 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #28, mul vl]
616 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28, i64 0
617 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
618 <vscale x 16 x i8> %v1,
619 <vscale x 16 x i8> %v2,
620 <vscale x 16 x i8> %v3,
621 <vscale x 16 x i1> %pred,
630 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
631 ; CHECK-LABEL: st4h_i16:
633 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
634 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
635 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
636 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
637 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #8, mul vl]
639 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8, i64 0
640 call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
641 <vscale x 8 x i16> %v1,
642 <vscale x 8 x i16> %v2,
643 <vscale x 8 x i16> %v3,
644 <vscale x 8 x i1> %pred,
649 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
650 ; CHECK-LABEL: st4h_f16:
652 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
653 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
654 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
655 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
656 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #12, mul vl]
658 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12, i64 0
659 call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
660 <vscale x 8 x half> %v1,
661 <vscale x 8 x half> %v2,
662 <vscale x 8 x half> %v3,
663 <vscale x 8 x i1> %pred,
672 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
673 ; CHECK-LABEL: st4w_i32:
675 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
676 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
677 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
678 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
679 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #16, mul vl]
681 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16, i64 0
682 call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
683 <vscale x 4 x i32> %v1,
684 <vscale x 4 x i32> %v2,
685 <vscale x 4 x i32> %v3,
686 <vscale x 4 x i1> %pred,
691 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
692 ; CHECK-LABEL: st4w_f32:
694 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
695 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
696 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
697 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
698 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #20, mul vl]
700 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20, i64 0
701 call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
702 <vscale x 4 x float> %v1,
703 <vscale x 4 x float> %v2,
704 <vscale x 4 x float> %v3,
705 <vscale x 4 x i1> %pred,
714 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
715 ; CHECK-LABEL: st4d_i64:
717 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
718 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
719 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
720 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
721 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #24, mul vl]
723 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24, i64 0
724 call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
725 <vscale x 2 x i64> %v1,
726 <vscale x 2 x i64> %v2,
727 <vscale x 2 x i64> %v3,
728 <vscale x 2 x i1> %pred,
733 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
734 ; CHECK-LABEL: st4d_f64:
736 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
737 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
738 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
739 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
740 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #28, mul vl]
742 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28, i64 0
743 call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
744 <vscale x 2 x double> %v1,
745 <vscale x 2 x double> %v2,
746 <vscale x 2 x double> %v3,
747 <vscale x 2 x i1> %pred,
752 declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
753 declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
754 declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
755 declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
756 declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
757 declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
758 declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
760 declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
761 declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
762 declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
763 declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
764 declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
765 declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
766 declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
768 declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
769 declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
770 declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
771 declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
772 declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
773 declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
774 declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)