1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s
7 define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
8 ; CHECK-LABEL: st2q_ss_i8:
10 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
11 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
12 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
14 %1 = getelementptr i128, ptr %addr, i64 %offset
15 call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8>%v0, <vscale x 16 x i8> %v1 ,
16 <vscale x 16 x i1> %pred,
21 define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
22 ; CHECK-LABEL: st2q_ss_i16:
24 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
25 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
26 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
28 %1 = getelementptr i128, ptr %addr, i64 %offset
29 call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> %v0,
30 <vscale x 8 x i16> %v1,
31 <vscale x 8 x i1> %pred,
36 define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
37 ; CHECK-LABEL: st2q_ss_i32:
39 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
40 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
41 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
43 %1 = getelementptr i128, ptr %addr, i64 %offset
44 call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> %v0,
45 <vscale x 4 x i32> %v1,
46 <vscale x 4 x i1> %pred,
51 define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
52 ; CHECK-LABEL: st2q_ss_i64:
54 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
55 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
56 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
58 %1 = getelementptr i128, ptr %addr, i64 %offset
59 call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> %v0,
60 <vscale x 2 x i64> %v1,
61 <vscale x 2 x i1> %pred,
66 define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
67 ; CHECK-LABEL: st2q_ss_f16:
69 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
70 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
71 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
73 %1 = getelementptr i128, ptr %addr, i64 %offset
74 call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> %v0,
75 <vscale x 8 x half> %v1,
76 <vscale x 8 x i1> %pred,
81 define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
82 ; CHECK-LABEL: st2q_ss_f32:
84 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
85 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
86 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
88 %1 = getelementptr i128, ptr %addr, i64 %offset
89 call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> %v0,
90 <vscale x 4 x float> %v1,
91 <vscale x 4 x i1> %pred,
96 define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
97 ; CHECK-LABEL: st2q_ss_f64:
99 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
100 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
101 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
103 %1 = getelementptr i128, ptr %addr, i64 %offset
104 call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> %v0,
105 <vscale x 2 x double> %v1,
106 <vscale x 2 x i1> %pred,
111 define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
112 ; CHECK-LABEL: st2q_ss_bf16:
114 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
115 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
116 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
118 %1 = getelementptr i128, ptr %addr, i64 %offset
119 call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> %v0,
120 <vscale x 8 x bfloat> %v1,
121 <vscale x 8 x i1> %pred,
127 define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
128 ; CHECK-LABEL: st2q_si_i8_off16:
130 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
131 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
132 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl]
134 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16
135 call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> %v0,
136 <vscale x 16 x i8> %v1,
137 <vscale x 16 x i1> %pred,
142 define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
143 ; CHECK-LABEL: st2q_si_i8_off14:
145 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
146 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
147 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
149 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14
150 call void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8> %v0,
151 <vscale x 16 x i8> %v1,
152 <vscale x 16 x i1> %pred,
157 define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %base) {
158 ; CHECK-LABEL: st2q_si_i16:
160 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
161 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
162 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
164 %gep = getelementptr <vscale x 8 x i16>, ptr %base, i64 14
165 call void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16> %v0,
166 <vscale x 8 x i16> %v1,
167 <vscale x 8 x i1> %pred,
172 define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %base) {
173 ; CHECK-LABEL: st2q_si_i32:
175 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
176 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
177 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
179 %gep = getelementptr <vscale x 4 x i32>, ptr %base, i64 14
180 call void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32> %v0,
181 <vscale x 4 x i32> %v1,
182 <vscale x 4 x i1> %pred,
187 define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %base) {
188 ; CHECK-LABEL: st2q_si_i64:
190 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
191 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
192 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
194 %gep = getelementptr <vscale x 2 x i64>, ptr %base, i64 14
195 call void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64> %v0,
196 <vscale x 2 x i64> %v1,
197 <vscale x 2 x i1> %pred,
202 define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %base) {
203 ; CHECK-LABEL: st2q_si_f16:
205 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
206 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
207 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
209 %gep = getelementptr <vscale x 8 x half>, ptr %base, i64 14
210 call void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half> %v0,
211 <vscale x 8 x half> %v1,
212 <vscale x 8 x i1> %pred,
217 define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %base) {
218 ; CHECK-LABEL: st2q_si_f32:
220 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
221 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
222 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
224 %gep = getelementptr <vscale x 4 x float>, ptr %base, i64 14
225 call void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float> %v0,
226 <vscale x 4 x float> %v1,
227 <vscale x 4 x i1> %pred,
232 define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %base) {
233 ; CHECK-LABEL: st2q_si_f64:
235 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
236 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
237 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
239 %gep= getelementptr <vscale x 2 x double>, ptr %base, i64 14
240 call void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double> %v0,
241 <vscale x 2 x double> %v1,
242 <vscale x 2 x i1> %pred,
247 define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %base) {
248 ; CHECK-LABEL: st2q_si_bf16:
250 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
251 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
252 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
254 %gep = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 14
255 call void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat> %v0,
256 <vscale x 8 x bfloat> %v1,
257 <vscale x 8 x i1> %pred,
266 define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
267 ; CHECK-LABEL: st3q_ss_i8:
269 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
270 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
271 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
272 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
274 %1 = getelementptr i128, ptr %addr, i64 %offset
275 call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8>%v0,
276 <vscale x 16 x i8> %v1,
277 <vscale x 16 x i8> %v2,
278 <vscale x 16 x i1> %pred,
283 define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
284 ; CHECK-LABEL: st3q_ss_i16:
286 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
287 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
288 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
289 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
291 %1 = getelementptr i128, ptr %addr, i64 %offset
292 call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> %v0,
293 <vscale x 8 x i16> %v1,
294 <vscale x 8 x i16> %v2,
295 <vscale x 8 x i1> %pred,
300 define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
301 ; CHECK-LABEL: st3q_ss_i32:
303 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
304 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
305 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
306 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
308 %1 = getelementptr i128, ptr %addr, i64 %offset
309 call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> %v0,
310 <vscale x 4 x i32> %v1,
311 <vscale x 4 x i32> %v2,
312 <vscale x 4 x i1> %pred,
317 define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
318 ; CHECK-LABEL: st3q_ss_i64:
320 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
321 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
322 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
323 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
325 %1 = getelementptr i128, ptr %addr, i64 %offset
326 call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> %v0,
327 <vscale x 2 x i64> %v1,
328 <vscale x 2 x i64> %v2,
329 <vscale x 2 x i1> %pred,
334 define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
335 ; CHECK-LABEL: st3q_ss_f16:
337 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
338 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
339 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
340 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
342 %1 = getelementptr i128, ptr %addr, i64 %offset
343 call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> %v0,
344 <vscale x 8 x half> %v1,
345 <vscale x 8 x half> %v2,
346 <vscale x 8 x i1> %pred,
351 define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
352 ; CHECK-LABEL: st3q_ss_f32:
354 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
355 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
356 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
357 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
359 %1 = getelementptr i128, ptr %addr, i64 %offset
360 call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> %v0,
361 <vscale x 4 x float> %v1,
362 <vscale x 4 x float> %v2,
363 <vscale x 4 x i1> %pred,
368 define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
369 ; CHECK-LABEL: st3q_ss_f64:
371 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
372 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
373 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
374 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
376 %1 = getelementptr i128, ptr %addr, i64 %offset
377 call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> %v0,
378 <vscale x 2 x double> %v1,
379 <vscale x 2 x double> %v2,
380 <vscale x 2 x i1> %pred,
385 define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
386 ; CHECK-LABEL: st3q_ss_bf16:
388 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
389 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
390 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
391 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
393 %1 = getelementptr i128, ptr %addr, i64 %offset
394 call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> %v0,
395 <vscale x 8 x bfloat> %v1,
396 <vscale x 8 x bfloat> %v2,
397 <vscale x 8 x i1> %pred,
402 define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
403 ; CHECK-LABEL: st3q_si_i8_off24:
405 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
406 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
407 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
408 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl]
410 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24
411 call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> %v0,
412 <vscale x 16 x i8> %v1,
413 <vscale x 16 x i8> %v2,
414 <vscale x 16 x i1> %pred,
419 define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
420 ; CHECK-LABEL: st3q_si_i8_off21:
422 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
423 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
424 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
425 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
427 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21
428 call void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8> %v0,
429 <vscale x 16 x i8> %v1,
430 <vscale x 16 x i8> %v2,
431 <vscale x 16 x i1> %pred,
436 define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
437 ; CHECK-LABEL: st3q_si_i16:
439 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
440 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
441 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
442 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
444 %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 21
445 call void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16> %v0,
446 <vscale x 8 x i16> %v1,
447 <vscale x 8 x i16> %v2,
448 <vscale x 8 x i1> %pred,
453 define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
454 ; CHECK-LABEL: st3q_si_i32:
456 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
457 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
458 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
459 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
461 %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 21
462 call void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32> %v0,
463 <vscale x 4 x i32> %v1,
464 <vscale x 4 x i32> %v2,
465 <vscale x 4 x i1> %pred,
470 define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
471 ; CHECK-LABEL: st3q_si_i64:
473 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
474 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
475 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
476 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
478 %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 21
479 call void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64> %v0,
480 <vscale x 2 x i64> %v1,
481 <vscale x 2 x i64> %v2,
482 <vscale x 2 x i1> %pred,
487 define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
488 ; CHECK-LABEL: st3q_si_f16:
490 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
491 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
492 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
493 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
495 %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 21
496 call void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half> %v0,
497 <vscale x 8 x half> %v1,
498 <vscale x 8 x half> %v2,
499 <vscale x 8 x i1> %pred,
504 define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
505 ; CHECK-LABEL: st3q_si_f32:
507 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
508 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
509 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
510 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
512 %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 21
513 call void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float> %v0,
514 <vscale x 4 x float> %v1,
515 <vscale x 4 x float> %v2,
516 <vscale x 4 x i1> %pred,
521 define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
522 ; CHECK-LABEL: st3q_si_f64:
524 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
525 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
526 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
527 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
529 %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 21
530 call void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double> %v0,
531 <vscale x 2 x double> %v1,
532 <vscale x 2 x double> %v2,
533 <vscale x 2 x i1> %pred,
538 define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
539 ; CHECK-LABEL: st3q_si_bf16:
541 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
542 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
543 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
544 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
546 %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 21
547 call void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat> %v0,
548 <vscale x 8 x bfloat> %v1,
549 <vscale x 8 x bfloat> %v2,
550 <vscale x 8 x i1> %pred,
558 define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
559 ; CHECK-LABEL: st4q_ss_i8:
561 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
562 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
563 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
564 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
565 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
567 %1 = getelementptr i128, ptr %addr, i64 %offset
568 call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8>%v0,
569 <vscale x 16 x i8> %v1,
570 <vscale x 16 x i8> %v2,
571 <vscale x 16 x i8> %v3,
572 <vscale x 16 x i1> %pred,
577 define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
578 ; CHECK-LABEL: st4q_ss_i16:
580 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
581 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
582 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
583 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
584 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
586 %1 = getelementptr i128, ptr %addr, i64 %offset
587 call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> %v0,
588 <vscale x 8 x i16> %v1,
589 <vscale x 8 x i16> %v2,
590 <vscale x 8 x i16> %v3,
591 <vscale x 8 x i1> %pred,
596 define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
597 ; CHECK-LABEL: st4q_ss_i32:
599 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
600 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
601 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
602 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
603 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
605 %1 = getelementptr i128, ptr %addr, i64 %offset
606 call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> %v0,
607 <vscale x 4 x i32> %v1,
608 <vscale x 4 x i32> %v2,
609 <vscale x 4 x i32> %v3,
610 <vscale x 4 x i1> %pred,
615 define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
616 ; CHECK-LABEL: st4q_ss_i64:
618 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
619 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
620 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
621 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
622 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
624 %1 = getelementptr i128, ptr %addr, i64 %offset
625 call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> %v0,
626 <vscale x 2 x i64> %v1,
627 <vscale x 2 x i64> %v2,
628 <vscale x 2 x i64> %v3,
629 <vscale x 2 x i1> %pred,
634 define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
635 ; CHECK-LABEL: st4q_ss_f16:
637 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
638 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
639 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
640 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
641 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
643 %1 = getelementptr i128, ptr %addr, i64 %offset
644 call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> %v0,
645 <vscale x 8 x half> %v1,
646 <vscale x 8 x half> %v2,
647 <vscale x 8 x half> %v3,
648 <vscale x 8 x i1> %pred,
653 define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
654 ; CHECK-LABEL: st4q_ss_f32:
656 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
657 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
658 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
659 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
660 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
662 %1 = getelementptr i128, ptr %addr, i64 %offset
663 call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> %v0,
664 <vscale x 4 x float> %v1,
665 <vscale x 4 x float> %v2,
666 <vscale x 4 x float> %v3,
667 <vscale x 4 x i1> %pred,
672 define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
673 ; CHECK-LABEL: st4q_ss_f64:
675 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
676 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
677 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
678 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
679 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
681 %1 = getelementptr i128, ptr %addr, i64 %offset
682 call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> %v0,
683 <vscale x 2 x double> %v1,
684 <vscale x 2 x double> %v2,
685 <vscale x 2 x double> %v3,
686 <vscale x 2 x i1> %pred,
691 define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
692 ; CHECK-LABEL: st4q_ss_bf16:
694 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
695 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
696 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
697 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
698 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
700 %1 = getelementptr i128, ptr %addr, i64 %offset
701 call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> %v0,
702 <vscale x 8 x bfloat> %v1,
703 <vscale x 8 x bfloat> %v2,
704 <vscale x 8 x bfloat> %v3,
705 <vscale x 8 x i1> %pred,
710 define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
711 ; CHECK-LABEL: st4q_si_i8_off32:
713 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
714 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
715 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
716 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
717 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl]
719 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32
720 call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> %v0,
721 <vscale x 16 x i8> %v1,
722 <vscale x 16 x i8> %v2,
723 <vscale x 16 x i8> %v3,
724 <vscale x 16 x i1> %pred,
729 define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
730 ; CHECK-LABEL: st4q_si_i8_off28:
732 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
733 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
734 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
735 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
736 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
738 %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28
739 call void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8> %v0,
740 <vscale x 16 x i8> %v1,
741 <vscale x 16 x i8> %v2,
742 <vscale x 16 x i8> %v3,
743 <vscale x 16 x i1> %pred,
748 define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
749 ; CHECK-LABEL: st4q_si_i16:
751 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
752 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
753 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
754 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
755 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
757 %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 28
758 call void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16> %v0,
759 <vscale x 8 x i16> %v1,
760 <vscale x 8 x i16> %v2,
761 <vscale x 8 x i16> %v3,
762 <vscale x 8 x i1> %pred,
767 define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
768 ; CHECK-LABEL: st4q_si_i32:
770 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
771 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
772 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
773 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
774 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
776 %base1 = getelementptr <vscale x 4 x i32>, ptr %addr, i64 28
777 call void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32> %v0,
778 <vscale x 4 x i32> %v1,
779 <vscale x 4 x i32> %v2,
780 <vscale x 4 x i32> %v3,
781 <vscale x 4 x i1> %pred,
786 define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
787 ; CHECK-LABEL: st4q_si_i64:
789 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
790 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
791 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
792 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
793 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
795 %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 28
796 call void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64> %v0,
797 <vscale x 2 x i64> %v1,
798 <vscale x 2 x i64> %v2,
799 <vscale x 2 x i64> %v3,
800 <vscale x 2 x i1> %pred,
805 define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
806 ; CHECK-LABEL: st4q_si_f16:
808 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
809 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
810 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
811 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
812 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
814 %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 28
815 call void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half> %v0,
816 <vscale x 8 x half> %v1,
817 <vscale x 8 x half> %v2,
818 <vscale x 8 x half> %v3,
819 <vscale x 8 x i1> %pred,
824 define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2,<vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
825 ; CHECK-LABEL: st4q_si_f32:
827 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
828 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
829 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
830 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
831 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
833 %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 28
834 call void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float> %v0,
835 <vscale x 4 x float> %v1,
836 <vscale x 4 x float> %v2,
837 <vscale x 4 x float> %v3,
838 <vscale x 4 x i1> %pred,
843 define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
844 ; CHECK-LABEL: st4q_si_f64:
846 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
847 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
848 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
849 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
850 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
852 %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28
853 call void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double> %v0,
854 <vscale x 2 x double> %v1,
855 <vscale x 2 x double> %v2,
856 <vscale x 2 x double> %v3,
857 <vscale x 2 x i1> %pred,
862 define void @st4q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
863 ; CHECK-LABEL: st4q_si_bf16:
865 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
866 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
867 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
868 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
869 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
871 %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 28
872 call void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat> %v0,
873 <vscale x 8 x bfloat> %v1,
874 <vscale x 8 x bfloat> %v2,
875 <vscale x 8 x bfloat> %v3,
876 <vscale x 8 x i1> %pred,
882 declare void @llvm.aarch64.sve.st2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
883 declare void @llvm.aarch64.sve.st2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
884 declare void @llvm.aarch64.sve.st2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
885 declare void @llvm.aarch64.sve.st2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
887 declare void @llvm.aarch64.sve.st2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
888 declare void @llvm.aarch64.sve.st2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
889 declare void @llvm.aarch64.sve.st2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
890 declare void @llvm.aarch64.sve.st2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
892 declare void @llvm.aarch64.sve.st3q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
893 declare void @llvm.aarch64.sve.st3q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
894 declare void @llvm.aarch64.sve.st3q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
895 declare void @llvm.aarch64.sve.st3q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
897 declare void @llvm.aarch64.sve.st3q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
898 declare void @llvm.aarch64.sve.st3q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
899 declare void @llvm.aarch64.sve.st3q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
900 declare void @llvm.aarch64.sve.st3q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
902 declare void @llvm.aarch64.sve.st4q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,<vscale x 16 x i1>, ptr)
903 declare void @llvm.aarch64.sve.st4q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
904 declare void @llvm.aarch64.sve.st4q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i1>, ptr)
905 declare void @llvm.aarch64.sve.st4q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
907 declare void @llvm.aarch64.sve.st4q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
908 declare void @llvm.aarch64.sve.st4q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
909 declare void @llvm.aarch64.sve.st4q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
910 declare void @llvm.aarch64.sve.st4q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)