1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
9 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
10 ; CHECK-LABEL: st2b_i8:
12 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
13 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
14 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0]
16 call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
17 <vscale x 16 x i8> %v1,
18 <vscale x 16 x i1> %pred,
27 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
28 ; CHECK-LABEL: st2h_i16:
30 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
31 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
32 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0]
34 call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
35 <vscale x 8 x i16> %v1,
36 <vscale x 8 x i1> %pred,
41 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
42 ; CHECK-LABEL: st2h_f16:
44 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
45 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
46 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0]
48 call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
49 <vscale x 8 x half> %v1,
50 <vscale x 8 x i1> %pred,
55 define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr) #0 {
56 ; CHECK-LABEL: st2h_bf16:
58 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
59 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
60 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0]
62 call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> %v0,
63 <vscale x 8 x bfloat> %v1,
64 <vscale x 8 x i1> %pred,
73 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
74 ; CHECK-LABEL: st2w_i32:
76 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
77 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
78 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0]
80 call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
81 <vscale x 4 x i32> %v1,
82 <vscale x 4 x i1> %pred,
87 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
88 ; CHECK-LABEL: st2w_f32:
90 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
91 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
92 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0]
94 call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
95 <vscale x 4 x float> %v1,
96 <vscale x 4 x i1> %pred,
105 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
106 ; CHECK-LABEL: st2d_i64:
108 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
109 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
110 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0]
112 call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
113 <vscale x 2 x i64> %v1,
114 <vscale x 2 x i1> %pred,
119 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
120 ; CHECK-LABEL: st2d_f64:
122 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
123 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
124 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0]
126 call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
127 <vscale x 2 x double> %v1,
128 <vscale x 2 x i1> %pred,
133 define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
134 ; CHECK-LABEL: st2d_ptr:
136 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
137 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
138 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0]
140 call void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr> %v0,
141 <vscale x 2 x ptr> %v1,
142 <vscale x 2 x i1> %pred,
151 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
152 ; CHECK-LABEL: st3b_i8:
154 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
155 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
156 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
157 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0]
159 call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
160 <vscale x 16 x i8> %v1,
161 <vscale x 16 x i8> %v2,
162 <vscale x 16 x i1> %pred,
171 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
172 ; CHECK-LABEL: st3h_i16:
174 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
175 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
176 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
177 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
179 call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
180 <vscale x 8 x i16> %v1,
181 <vscale x 8 x i16> %v2,
182 <vscale x 8 x i1> %pred,
187 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
188 ; CHECK-LABEL: st3h_f16:
190 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
191 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
192 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
193 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
195 call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
196 <vscale x 8 x half> %v1,
197 <vscale x 8 x half> %v2,
198 <vscale x 8 x i1> %pred,
203 define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) #0 {
204 ; CHECK-LABEL: st3h_bf16:
206 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
207 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
208 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
209 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0]
211 call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
212 <vscale x 8 x bfloat> %v1,
213 <vscale x 8 x bfloat> %v2,
214 <vscale x 8 x i1> %pred,
223 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
224 ; CHECK-LABEL: st3w_i32:
226 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
227 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
228 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
229 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0]
231 call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
232 <vscale x 4 x i32> %v1,
233 <vscale x 4 x i32> %v2,
234 <vscale x 4 x i1> %pred,
239 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
240 ; CHECK-LABEL: st3w_f32:
242 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
243 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
244 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
245 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0]
247 call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
248 <vscale x 4 x float> %v1,
249 <vscale x 4 x float> %v2,
250 <vscale x 4 x i1> %pred,
259 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
260 ; CHECK-LABEL: st3d_i64:
262 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
263 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
264 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
265 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
267 call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
268 <vscale x 2 x i64> %v1,
269 <vscale x 2 x i64> %v2,
270 <vscale x 2 x i1> %pred,
275 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
276 ; CHECK-LABEL: st3d_f64:
278 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
279 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
280 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
281 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
283 call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
284 <vscale x 2 x double> %v1,
285 <vscale x 2 x double> %v2,
286 <vscale x 2 x i1> %pred,
291 define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
292 ; CHECK-LABEL: st3d_ptr:
294 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
295 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
296 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
297 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0]
299 call void @llvm.aarch64.sve.st3.nxv2p0(<vscale x 2 x ptr> %v0,
300 <vscale x 2 x ptr> %v1,
301 <vscale x 2 x ptr> %v2,
302 <vscale x 2 x i1> %pred,
311 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
312 ; CHECK-LABEL: st4b_i8:
314 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
315 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
316 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
317 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
318 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0]
320 call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
321 <vscale x 16 x i8> %v1,
322 <vscale x 16 x i8> %v2,
323 <vscale x 16 x i8> %v3,
324 <vscale x 16 x i1> %pred,
333 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
334 ; CHECK-LABEL: st4h_i16:
336 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
337 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
338 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
339 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
340 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
342 call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
343 <vscale x 8 x i16> %v1,
344 <vscale x 8 x i16> %v2,
345 <vscale x 8 x i16> %v3,
346 <vscale x 8 x i1> %pred,
351 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
352 ; CHECK-LABEL: st4h_f16:
354 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
355 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
356 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
357 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
358 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
360 call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
361 <vscale x 8 x half> %v1,
362 <vscale x 8 x half> %v2,
363 <vscale x 8 x half> %v3,
364 <vscale x 8 x i1> %pred,
369 define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) #0 {
370 ; CHECK-LABEL: st4h_bf16:
372 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
373 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
374 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
375 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
376 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0]
378 call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
379 <vscale x 8 x bfloat> %v1,
380 <vscale x 8 x bfloat> %v2,
381 <vscale x 8 x bfloat> %v3,
382 <vscale x 8 x i1> %pred,
391 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
392 ; CHECK-LABEL: st4w_i32:
394 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
395 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
396 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
397 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
398 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0]
400 call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
401 <vscale x 4 x i32> %v1,
402 <vscale x 4 x i32> %v2,
403 <vscale x 4 x i32> %v3,
404 <vscale x 4 x i1> %pred,
409 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
410 ; CHECK-LABEL: st4w_f32:
412 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
413 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
414 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
415 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
416 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0]
418 call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
419 <vscale x 4 x float> %v1,
420 <vscale x 4 x float> %v2,
421 <vscale x 4 x float> %v3,
422 <vscale x 4 x i1> %pred,
431 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
432 ; CHECK-LABEL: st4d_i64:
434 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
435 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
436 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
437 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
438 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
440 call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
441 <vscale x 2 x i64> %v1,
442 <vscale x 2 x i64> %v2,
443 <vscale x 2 x i64> %v3,
444 <vscale x 2 x i1> %pred,
449 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
450 ; CHECK-LABEL: st4d_f64:
452 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
453 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
454 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
455 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
456 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
458 call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
459 <vscale x 2 x double> %v1,
460 <vscale x 2 x double> %v2,
461 <vscale x 2 x double> %v3,
462 <vscale x 2 x i1> %pred,
467 define void @st4d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x ptr> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
468 ; CHECK-LABEL: st4d_ptr:
470 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
471 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
472 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
473 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
474 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0]
476 call void @llvm.aarch64.sve.st4.nxv2p0(<vscale x 2 x ptr> %v0,
477 <vscale x 2 x ptr> %v1,
478 <vscale x 2 x ptr> %v2,
479 <vscale x 2 x ptr> %v3,
480 <vscale x 2 x i1> %pred,
488 define void @stnt1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, ptr %addr) {
489 ; CHECK-LABEL: stnt1b_i8:
491 ; CHECK-NEXT: stnt1b { z0.b }, p0, [x0]
493 call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data,
494 <vscale x 16 x i1> %pred,
503 define void @stnt1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, ptr %addr) {
504 ; CHECK-LABEL: stnt1h_i16:
506 ; CHECK-NEXT: stnt1h { z0.h }, p0, [x0]
508 call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data,
509 <vscale x 8 x i1> %pred,
514 define void @stnt1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, ptr %addr) {
515 ; CHECK-LABEL: stnt1h_f16:
517 ; CHECK-NEXT: stnt1h { z0.h }, p0, [x0]
519 call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data,
520 <vscale x 8 x i1> %pred,
525 define void @stnt1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, ptr %addr) #0 {
526 ; CHECK-LABEL: stnt1h_bf16:
528 ; CHECK-NEXT: stnt1h { z0.h }, p0, [x0]
530 call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> %data,
531 <vscale x 8 x i1> %pred,
540 define void @stnt1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %addr) {
541 ; CHECK-LABEL: stnt1w_i32:
543 ; CHECK-NEXT: stnt1w { z0.s }, p0, [x0]
545 call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data,
546 <vscale x 4 x i1> %pred,
551 define void @stnt1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, ptr %addr) {
552 ; CHECK-LABEL: stnt1w_f32:
554 ; CHECK-NEXT: stnt1w { z0.s }, p0, [x0]
556 call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data,
557 <vscale x 4 x i1> %pred,
566 define void @stnt1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) {
567 ; CHECK-LABEL: stnt1d_i64:
569 ; CHECK-NEXT: stnt1d { z0.d }, p0, [x0]
571 call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data,
572 <vscale x 2 x i1> %pred,
577 define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, ptr %addr) {
578 ; CHECK-LABEL: stnt1d_f64:
580 ; CHECK-NEXT: stnt1d { z0.d }, p0, [x0]
582 call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data,
583 <vscale x 2 x i1> %pred,
589 declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
590 declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
591 declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
592 declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
593 declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
594 declare void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
595 declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
596 declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
597 declare void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x ptr>, <vscale x 2 x i1>, ptr nocapture)
599 declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
600 declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
601 declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
602 declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
603 declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
604 declare void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
605 declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
606 declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
607 declare void @llvm.aarch64.sve.st3.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x ptr>, <vscale x 2 x ptr>, <vscale x 2 x i1>, ptr nocapture)
609 declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
610 declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
611 declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
612 declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
613 declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, ptr)
614 declare void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
615 declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, ptr)
616 declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, ptr)
617 declare void @llvm.aarch64.sve.st4.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x ptr>, <vscale x 2 x ptr>, <vscale x 2 x ptr>, <vscale x 2 x i1>, ptr nocapture)
619 declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
620 declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
621 declare void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
622 declare void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
623 declare void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, ptr)
624 declare void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
625 declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, ptr)
626 declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, ptr)
628 ; +bf16 is required for the bfloat version.
629 attributes #0 = { "target-features"="+bf16" }