1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
5 ; Move Multi-Vector To Tile (Write) x 2
10 define void @za_write_vg2_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
11 ; CHECK-LABEL: za_write_vg2_horiz_b:
13 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
14 ; CHECK-NEXT: mov w12, w0
15 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
16 ; CHECK-NEXT: mov za0h.b[w12, 0:1], { z0.b, z1.b }
17 ; CHECK-NEXT: mov za0h.b[w12, 14:15], { z0.b, z1.b }
19 call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2)
20 %slice.14 = add i32 %slice, 14
21 call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 0, i32 %slice.14, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2)
25 define void @za_write_vg2_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
26 ; CHECK-LABEL: za_write_vg2_horiz_h:
28 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
29 ; CHECK-NEXT: mov w12, w0
30 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
31 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h }
32 ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h }
34 call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 0, i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2)
35 %slice.6 = add i32 %slice, 6
36 call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 1, i32 %slice.6, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2)
40 define void @za_write_vg2_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
41 ; CHECK-LABEL: za_write_vg2_horiz_f16:
43 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
44 ; CHECK-NEXT: mov w12, w0
45 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
46 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h }
47 ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h }
49 call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 0, i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
50 %slice.6 = add i32 %slice, 6
51 call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 1, i32 %slice.6, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
55 define void @za_write_vg2_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
56 ; CHECK-LABEL: za_write_vg2_horiz_bf16:
58 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
59 ; CHECK-NEXT: mov w12, w0
60 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
61 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h }
62 ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h }
64 call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 0, i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2)
65 %slice.6 = add i32 %slice, 6
66 call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 1, i32 %slice.6, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2)
70 define void @za_write_vg2_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
71 ; CHECK-LABEL: za_write_vg2_horiz_s:
73 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
74 ; CHECK-NEXT: mov w12, w0
75 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
76 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s }
77 ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s }
79 call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
80 %slice.2 = add i32 %slice, 2
81 call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 3, i32 %slice.2, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
85 define void @za_write_vg2_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
86 ; CHECK-LABEL: za_write_vg2_horiz_f32:
88 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
89 ; CHECK-NEXT: mov w12, w0
90 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
91 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s }
92 ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s }
94 call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
95 %slice.2 = add i32 %slice, 2
96 call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 3, i32 %slice.2, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
100 define void @za_write_vg2_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
101 ; CHECK-LABEL: za_write_vg2_horiz_d:
103 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
104 ; CHECK-NEXT: mov w12, w0
105 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
106 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d }
108 call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
112 define void @za_write_vg2_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
113 ; CHECK-LABEL: za_write_vg2_horiz_f64:
115 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
116 ; CHECK-NEXT: mov w12, w0
117 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
118 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d }
120 call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
126 define void @za_write_vg2_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
127 ; CHECK-LABEL: za_write_vg2_vert_b:
129 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
130 ; CHECK-NEXT: mov w12, w0
131 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
132 ; CHECK-NEXT: mov za0v.b[w12, 0:1], { z0.b, z1.b }
133 ; CHECK-NEXT: mov za0v.b[w12, 14:15], { z0.b, z1.b }
135 call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2)
136 %slice.14 = add i32 %slice, 14
137 call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 0, i32 %slice.14, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2)
141 define void @za_write_vg2_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
142 ; CHECK-LABEL: za_write_vg2_vert_h:
144 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
145 ; CHECK-NEXT: mov w12, w0
146 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
147 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h }
148 ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h }
150 call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 0, i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2)
151 %slice.6 = add i32 %slice, 6
152 call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 1, i32 %slice.6, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2)
156 define void @za_write_vg2_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
157 ; CHECK-LABEL: za_write_vg2_vert_f16:
159 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
160 ; CHECK-NEXT: mov w12, w0
161 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
162 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h }
163 ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h }
165 call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 0, i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
166 %slice.6 = add i32 %slice, 6
167 call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 1, i32 %slice.6, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2)
171 define void @za_write_vg2_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
172 ; CHECK-LABEL: za_write_vg2_vert_bf16:
174 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
175 ; CHECK-NEXT: mov w12, w0
176 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
177 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h }
178 ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h }
180 call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 0, i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2)
181 %slice.6 = add i32 %slice, 6
182 call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 1, i32 %slice.6, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2)
186 define void @za_write_vg2_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
187 ; CHECK-LABEL: za_write_vg2_vert_s:
189 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
190 ; CHECK-NEXT: mov w12, w0
191 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
192 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s }
193 ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s }
195 call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
196 %slice.2 = add i32 %slice, 2
197 call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 3, i32 %slice.2, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
201 define void @za_write_vg2_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
202 ; CHECK-LABEL: za_write_vg2_vert_f32:
204 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
205 ; CHECK-NEXT: mov w12, w0
206 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
207 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s }
208 ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s }
210 call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
211 %slice.2 = add i32 %slice, 2
212 call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 3, i32 %slice.2, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
216 define void @za_write_vg2_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
217 ; CHECK-LABEL: za_write_vg2_vert_d:
219 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
220 ; CHECK-NEXT: mov w12, w0
221 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
222 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d }
224 call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
228 define void @za_write_vg2_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
229 ; CHECK-LABEL: za_write_vg2_vert_f64:
231 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
232 ; CHECK-NEXT: mov w12, w0
233 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
234 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d }
236 call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
241 ; Move Multi-Vector To Tile (Write) x 4
246 define void @za_write_vg4_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
247 ; CHECK-LABEL: za_write_vg4_horiz_b:
249 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
250 ; CHECK-NEXT: mov w12, w0
251 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
252 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
253 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
254 ; CHECK-NEXT: mov za0h.b[w12, 0:3], { z0.b - z3.b }
255 ; CHECK-NEXT: mov za0h.b[w12, 12:15], { z0.b - z3.b }
257 call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4)
258 %slice.12 = add i32 %slice, 12
259 call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 0, i32 %slice.12, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4)
263 define void @za_write_vg4_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
264 ; CHECK-LABEL: za_write_vg4_horiz_h:
266 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
267 ; CHECK-NEXT: mov w12, w0
268 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
269 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
270 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
271 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h }
272 ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h }
274 call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 0, i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4)
275 %slice.4 = add i32 %slice, 4
276 call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 1, i32 %slice.4, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4)
280 define void @za_write_vg4_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
281 ; CHECK-LABEL: za_write_vg4_horiz_f16:
283 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
284 ; CHECK-NEXT: mov w12, w0
285 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
286 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
287 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
288 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h }
289 ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h }
291 call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 0, i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4)
292 %slice.4 = add i32 %slice, 4
293 call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 1, i32 %slice.4, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4)
297 define void @za_write_vg4_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
298 ; CHECK-LABEL: za_write_vg4_horiz_bf16:
300 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
301 ; CHECK-NEXT: mov w12, w0
302 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
303 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
304 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
305 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h }
306 ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h }
308 call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 0, i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4)
309 %slice.4 = add i32 %slice, 4
310 call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 1, i32 %slice.4, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4)
314 define void @za_write_vg4_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
315 ; CHECK-LABEL: za_write_vg4_horiz_s:
317 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
318 ; CHECK-NEXT: mov w12, w0
319 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
320 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
321 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
322 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s }
324 call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
328 define void @za_write_vg4_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
329 ; CHECK-LABEL: za_write_vg4_horiz_f32:
331 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
332 ; CHECK-NEXT: mov w12, w0
333 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
334 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
335 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
336 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s }
338 call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
342 define void @za_write_vg4_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
343 ; CHECK-LABEL: za_write_vg4_horiz_d:
345 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
346 ; CHECK-NEXT: mov w12, w0
347 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
348 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
349 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
350 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d }
352 call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
356 define void @za_write_vg4_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
357 ; CHECK-LABEL: za_write_vg4_horiz_f64:
359 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
360 ; CHECK-NEXT: mov w12, w0
361 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
362 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
363 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
364 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d }
366 call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
372 define void @za_write_vg4_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
373 ; CHECK-LABEL: za_write_vg4_vert_b:
375 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
376 ; CHECK-NEXT: mov w12, w0
377 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
378 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
379 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
380 ; CHECK-NEXT: mov za0v.b[w12, 0:3], { z0.b - z3.b }
381 ; CHECK-NEXT: mov za0v.b[w12, 12:15], { z0.b - z3.b }
383 call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4)
384 %slice.12 = add i32 %slice, 12
385 call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 0, i32 %slice.12, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4)
389 define void @za_write_vg4_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
390 ; CHECK-LABEL: za_write_vg4_vert_h:
392 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
393 ; CHECK-NEXT: mov w12, w0
394 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
395 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
396 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
397 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h }
398 ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h }
400 call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 0, i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4)
401 %slice.4 = add i32 %slice, 4
402 call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 1, i32 %slice.4, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4)
406 define void @za_write_vg4_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
407 ; CHECK-LABEL: za_write_vg4_vert_f16:
409 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
410 ; CHECK-NEXT: mov w12, w0
411 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
412 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
413 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
414 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h }
415 ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h }
417 call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 0, i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4)
418 %slice.4 = add i32 %slice, 4
419 call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 1, i32 %slice.4, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4)
423 define void @za_write_vg4_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
424 ; CHECK-LABEL: za_write_vg4_vert_bf16:
426 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
427 ; CHECK-NEXT: mov w12, w0
428 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
429 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
430 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
431 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h }
432 ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h }
434 call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 0, i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4)
435 %slice.4 = add i32 %slice, 4
436 call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 1, i32 %slice.4, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4)
440 define void @za_write_vg4_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
441 ; CHECK-LABEL: za_write_vg4_vert_s:
443 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
444 ; CHECK-NEXT: mov w12, w0
445 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
446 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
447 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
448 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s }
450 call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
454 define void @za_write_vg4_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
455 ; CHECK-LABEL: za_write_vg4_vert_f32:
457 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
458 ; CHECK-NEXT: mov w12, w0
459 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
460 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
461 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
462 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s }
464 call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
468 define void @za_write_vg4_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
469 ; CHECK-LABEL: za_write_vg4_vert_d:
471 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
472 ; CHECK-NEXT: mov w12, w0
473 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
474 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
475 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
476 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d }
478 call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
482 define void @za_write_vg4_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
483 ; CHECK-LABEL: za_write_vg4_vert_f64:
485 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
486 ; CHECK-NEXT: mov w12, w0
487 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
488 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
489 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
490 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d }
492 call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
497 ; Move Multi-Vector To ZA (Write) x2
500 define void @za_write_vg1x2_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2) {
501 ; CHECK-LABEL: za_write_vg1x2_d:
503 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
504 ; CHECK-NEXT: mov w8, w0
505 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
506 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
507 ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
509 call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2)
510 %slice.7 = add i32 %slice, 7
511 call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2)
515 define void @za_write_vg1x2_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2) {
516 ; CHECK-LABEL: za_write_vg1x2_f64:
518 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
519 ; CHECK-NEXT: mov w8, w0
520 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
521 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d }
522 ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d }
524 call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2)
525 %slice.7 = add i32 %slice, 7
526 call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 %slice.7, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2)
531 ; Move Multi-Vector To ZA (Write) x4
534 define void @za_write_vg1x4_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4) {
535 ; CHECK-LABEL: za_write_vg1x4_d:
537 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
538 ; CHECK-NEXT: mov w8, w0
539 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
540 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
541 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
542 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
543 ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
545 call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4)
546 %slice.7 = add i32 %slice, 7
547 call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 %slice.7, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4)
551 define void @za_write_vg1x4_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4) {
552 ; CHECK-LABEL: za_write_vg1x4_f64:
554 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
555 ; CHECK-NEXT: mov w8, w0
556 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
557 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
558 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
559 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d }
560 ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d }
562 call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4)
563 %slice.7 = add i32 %slice, 7
564 call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 %slice.7, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4)
568 declare void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32, i32, <vscale x 16 x i8>, <vscale x 16 x i8>)
569 declare void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32, i32, <vscale x 8 x i16>, <vscale x 8 x i16>)
570 declare void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32, i32, <vscale x 8 x half>, <vscale x 8 x half>)
571 declare void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32, i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
572 declare void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32, i32, <vscale x 4 x i32>, <vscale x 4 x i32>)
573 declare void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32, i32, <vscale x 4 x float>, <vscale x 4 x float>)
574 declare void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32, i32, <vscale x 2 x i64>, <vscale x 2 x i64>)
575 declare void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32, i32, <vscale x 2 x double>, <vscale x 2 x double>)
577 declare void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32, i32, <vscale x 16 x i8>, <vscale x 16 x i8>)
578 declare void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32, i32, <vscale x 8 x i16>, <vscale x 8 x i16>)
579 declare void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32, i32, <vscale x 8 x half>, <vscale x 8 x half>)
580 declare void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32, i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
581 declare void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32, i32, <vscale x 4 x i32>, <vscale x 4 x i32>)
582 declare void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32, i32, <vscale x 4 x float>, <vscale x 4 x float>)
583 declare void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32, i32, <vscale x 2 x i64>, <vscale x 2 x i64>)
584 declare void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32, i32, <vscale x 2 x double>, <vscale x 2 x double>)
586 declare void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32, i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
587 declare void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32, i32, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
588 declare void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32, i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
589 declare void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32, i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
590 declare void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32, i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
591 declare void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32, i32, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
592 declare void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32, i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
593 declare void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32, i32, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
595 declare void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32, i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
596 declare void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32, i32, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
597 declare void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32, i32, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
598 declare void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32, i32, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
599 declare void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32, i32, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
600 declare void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32, i32, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
601 declare void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32, i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
602 declare void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32, i32, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
604 declare void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>)
605 declare void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>)
607 declare void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
608 declare void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)