1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -force-streaming < %s | FileCheck %s
5 ; Move Multi-Vector From Tile (Read) x2
10 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_horiz_vg2_b(i32 %slice) {
11 ; CHECK-LABEL: za_read_horiz_vg2_b:
13 ; CHECK-NEXT: mov w12, w0
14 ; CHECK-NEXT: mov { z0.b, z1.b }, za0h.b[w12, 0:1]
15 ; CHECK-NEXT: mov { z0.b, z1.b }, za0h.b[w12, 14:15]
17 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 %slice)
18 %slice.14 = add i32 %slice, 14
19 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 0, i32 %slice.14)
20 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res2
23 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_horiz_vg2_h(i32 %slice) {
24 ; CHECK-LABEL: za_read_horiz_vg2_h:
26 ; CHECK-NEXT: mov w12, w0
27 ; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1]
28 ; CHECK-NEXT: mov { z0.h, z1.h }, za1h.h[w12, 6:7]
30 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 0, i32 %slice)
31 %slice.6 = add i32 %slice, 6
32 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 1, i32 %slice.6)
33 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res2
36 define { <vscale x 8 x half>, <vscale x 8 x half> } @za_read_horiz_vg2_f16(i32 %slice) {
37 ; CHECK-LABEL: za_read_horiz_vg2_f16:
39 ; CHECK-NEXT: mov w12, w0
40 ; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1]
41 ; CHECK-NEXT: mov { z0.h, z1.h }, za1h.h[w12, 6:7]
43 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 0, i32 %slice)
44 %slice.6 = add i32 %slice, 6
45 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 1, i32 %slice.6)
46 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res2
49 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_horiz_vg2_bf16(i32 %slice) {
50 ; CHECK-LABEL: za_read_horiz_vg2_bf16:
52 ; CHECK-NEXT: mov w12, w0
53 ; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1]
54 ; CHECK-NEXT: mov { z0.h, z1.h }, za1h.h[w12, 6:7]
56 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 0, i32 %slice)
57 %slice.6 = add i32 %slice, 6
58 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 1, i32 %slice.6)
59 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2
62 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_horiz_vg2_s(i32 %slice) {
63 ; CHECK-LABEL: za_read_horiz_vg2_s:
65 ; CHECK-NEXT: mov w12, w0
66 ; CHECK-NEXT: mov { z0.s, z1.s }, za0h.s[w12, 0:1]
67 ; CHECK-NEXT: mov { z0.s, z1.s }, za3h.s[w12, 2:3]
69 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 0, i32 %slice)
70 %slice.2 = add i32 %slice, 2
71 %res2 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 3, i32 %slice.2)
72 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res2
75 define { <vscale x 4 x float>, <vscale x 4 x float> } @za_read_horiz_vg2_f32(i32 %slice) {
76 ; CHECK-LABEL: za_read_horiz_vg2_f32:
78 ; CHECK-NEXT: mov w12, w0
79 ; CHECK-NEXT: mov { z0.s, z1.s }, za0h.s[w12, 0:1]
80 ; CHECK-NEXT: mov { z0.s, z1.s }, za3h.s[w12, 2:3]
82 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 0, i32 %slice)
83 %slice.2 = add i32 %slice, 2
84 %res2 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 3, i32 %slice.2)
85 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res2
88 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_horiz_vg2_d(i32 %slice) {
89 ; CHECK-LABEL: za_read_horiz_vg2_d:
91 ; CHECK-NEXT: mov w12, w0
92 ; CHECK-NEXT: mov { z0.d, z1.d }, za0h.d[w12, 0:1]
94 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 0, i32 %slice)
95 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
98 define { <vscale x 2 x double>, <vscale x 2 x double> } @za_read_horiz_vg2_f64(i32 %slice) {
99 ; CHECK-LABEL: za_read_horiz_vg2_f64:
101 ; CHECK-NEXT: mov w12, w0
102 ; CHECK-NEXT: mov { z0.d, z1.d }, za0h.d[w12, 0:1]
104 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 0, i32 %slice)
105 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
110 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_vert_vg2_b(i32 %slice) {
111 ; CHECK-LABEL: za_read_vert_vg2_b:
113 ; CHECK-NEXT: mov w12, w0
114 ; CHECK-NEXT: mov { z0.b, z1.b }, za0v.b[w12, 0:1]
115 ; CHECK-NEXT: mov { z0.b, z1.b }, za0v.b[w12, 14:15]
117 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 %slice)
118 %slice.14 = add i32 %slice, 14
119 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 0, i32 %slice.14)
120 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res2
123 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_vert_vg2_h(i32 %slice) {
124 ; CHECK-LABEL: za_read_vert_vg2_h:
126 ; CHECK-NEXT: mov w12, w0
127 ; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1]
128 ; CHECK-NEXT: mov { z0.h, z1.h }, za1v.h[w12, 6:7]
130 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 0, i32 %slice)
131 %slice.6 = add i32 %slice, 6
132 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 1, i32 %slice.6)
133 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res2
136 define { <vscale x 8 x half>, <vscale x 8 x half> } @za_read_vert_vg2_f16(i32 %slice) {
137 ; CHECK-LABEL: za_read_vert_vg2_f16:
139 ; CHECK-NEXT: mov w12, w0
140 ; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1]
141 ; CHECK-NEXT: mov { z0.h, z1.h }, za1v.h[w12, 6:7]
143 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 0, i32 %slice)
144 %slice.6 = add i32 %slice, 6
145 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 1, i32 %slice.6)
146 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res2
149 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_vert_vg2_bf16(i32 %slice) {
150 ; CHECK-LABEL: za_read_vert_vg2_bf16:
152 ; CHECK-NEXT: mov w12, w0
153 ; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1]
154 ; CHECK-NEXT: mov { z0.h, z1.h }, za1v.h[w12, 6:7]
156 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 0, i32 %slice)
157 %slice.6 = add i32 %slice, 6
158 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 1, i32 %slice.6)
159 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2
162 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_vert_vg2_s(i32 %slice) {
163 ; CHECK-LABEL: za_read_vert_vg2_s:
165 ; CHECK-NEXT: mov w12, w0
166 ; CHECK-NEXT: mov { z0.s, z1.s }, za0v.s[w12, 0:1]
167 ; CHECK-NEXT: mov { z0.s, z1.s }, za3v.s[w12, 2:3]
169 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 0, i32 %slice)
170 %slice.2 = add i32 %slice, 2
171 %res2 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 3, i32 %slice.2)
172 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res2
175 define { <vscale x 4 x float>, <vscale x 4 x float> } @za_read_vert_vg2_f32(i32 %slice) {
176 ; CHECK-LABEL: za_read_vert_vg2_f32:
178 ; CHECK-NEXT: mov w12, w0
179 ; CHECK-NEXT: mov { z0.s, z1.s }, za0v.s[w12, 0:1]
180 ; CHECK-NEXT: mov { z0.s, z1.s }, za3v.s[w12, 2:3]
182 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 0, i32 %slice)
183 %slice.2 = add i32 %slice, 2
184 %res2 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 3, i32 %slice.2)
185 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res2
188 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_vert_vg2_d(i32 %slice) {
189 ; CHECK-LABEL: za_read_vert_vg2_d:
191 ; CHECK-NEXT: mov w12, w0
192 ; CHECK-NEXT: mov { z0.d, z1.d }, za0v.d[w12, 0:1]
194 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 0, i32 %slice)
195 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res
198 define { <vscale x 2 x double>, <vscale x 2 x double> } @za_read_vert_vg2_f64(i32 %slice) {
199 ; CHECK-LABEL: za_read_vert_vg2_f64:
201 ; CHECK-NEXT: mov w12, w0
202 ; CHECK-NEXT: mov { z0.d, z1.d }, za0v.d[w12, 0:1]
204 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 0, i32 %slice)
205 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res
209 ; Move Multi-Vector From Tile (Read) x4
214 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_horiz_vg4_b(i32 %slice) {
215 ; CHECK-LABEL: za_read_horiz_vg4_b:
217 ; CHECK-NEXT: mov w12, w0
218 ; CHECK-NEXT: mov { z0.b - z3.b }, za0h.b[w12, 0:3]
219 ; CHECK-NEXT: mov { z0.b - z3.b }, za0h.b[w12, 12:15]
221 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 %slice)
222 %slice.12 = add i32 %slice, 12
223 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 0, i32 %slice.12)
224 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res2
227 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_horiz_vg4_h(i32 %slice) {
228 ; CHECK-LABEL: za_read_horiz_vg4_h:
230 ; CHECK-NEXT: mov w12, w0
231 ; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3]
232 ; CHECK-NEXT: mov { z0.h - z3.h }, za1h.h[w12, 4:7]
234 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 0, i32 %slice)
235 %slice.4 = add i32 %slice, 4
236 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 1, i32 %slice.4)
237 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res2
240 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @za_read_horiz_vg4_f16(i32 %slice) {
241 ; CHECK-LABEL: za_read_horiz_vg4_f16:
243 ; CHECK-NEXT: mov w12, w0
244 ; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3]
245 ; CHECK-NEXT: mov { z0.h - z3.h }, za1h.h[w12, 4:7]
247 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 0, i32 %slice)
248 %slice.4 = add i32 %slice, 4
249 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 1, i32 %slice.4)
250 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res2
253 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_horiz_vg4_bf16(i32 %slice) {
254 ; CHECK-LABEL: za_read_horiz_vg4_bf16:
256 ; CHECK-NEXT: mov w12, w0
257 ; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3]
258 ; CHECK-NEXT: mov { z0.h - z3.h }, za1h.h[w12, 4:7]
260 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 0, i32 %slice)
261 %slice.4 = add i32 %slice, 4
262 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 1, i32 %slice.4)
263 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2
266 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_horiz_vg4_s(i32 %slice) {
267 ; CHECK-LABEL: za_read_horiz_vg4_s:
269 ; CHECK-NEXT: mov w12, w0
270 ; CHECK-NEXT: mov { z0.s - z3.s }, za0h.s[w12, 0:3]
272 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 0, i32 %slice)
273 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
276 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @za_read_horiz_vg4_f32(i32 %slice) {
277 ; CHECK-LABEL: za_read_horiz_vg4_f32:
279 ; CHECK-NEXT: mov w12, w0
280 ; CHECK-NEXT: mov { z0.s - z3.s }, za0h.s[w12, 0:3]
282 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 0, i32 %slice)
283 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
286 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_horiz_vg4_d(i32 %slice) {
287 ; CHECK-LABEL: za_read_horiz_vg4_d:
289 ; CHECK-NEXT: mov w12, w0
290 ; CHECK-NEXT: mov { z0.d - z3.d }, za0h.d[w12, 0:3]
292 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 0, i32 %slice)
293 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
296 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @za_read_horiz_vg4_f64(i32 %slice) {
297 ; CHECK-LABEL: za_read_horiz_vg4_f64:
299 ; CHECK-NEXT: mov w12, w0
300 ; CHECK-NEXT: mov { z0.d - z3.d }, za0h.d[w12, 0:3]
302 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 0, i32 %slice)
303 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
308 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_vert_vg4_b(i32 %slice) {
309 ; CHECK-LABEL: za_read_vert_vg4_b:
311 ; CHECK-NEXT: mov w12, w0
312 ; CHECK-NEXT: mov { z0.b - z3.b }, za0v.b[w12, 0:3]
313 ; CHECK-NEXT: mov { z0.b - z3.b }, za0v.b[w12, 12:15]
315 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 %slice)
316 %slice.12 = add i32 %slice, 12
317 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 0, i32 %slice.12)
318 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res2
321 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_vert_vg4_h(i32 %slice) {
322 ; CHECK-LABEL: za_read_vert_vg4_h:
324 ; CHECK-NEXT: mov w12, w0
325 ; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3]
326 ; CHECK-NEXT: mov { z0.h - z3.h }, za1v.h[w12, 4:7]
328 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 0, i32 %slice)
329 %slice.4 = add i32 %slice, 4
330 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 1, i32 %slice.4)
331 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res2
334 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @za_read_vert_vg4_f16(i32 %slice) {
335 ; CHECK-LABEL: za_read_vert_vg4_f16:
337 ; CHECK-NEXT: mov w12, w0
338 ; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3]
339 ; CHECK-NEXT: mov { z0.h - z3.h }, za1v.h[w12, 4:7]
341 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 0, i32 %slice)
342 %slice.4 = add i32 %slice, 4
343 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 1, i32 %slice.4)
344 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res2
347 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_vert_vg4_bf16(i32 %slice) {
348 ; CHECK-LABEL: za_read_vert_vg4_bf16:
350 ; CHECK-NEXT: mov w12, w0
351 ; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3]
352 ; CHECK-NEXT: mov { z0.h - z3.h }, za1v.h[w12, 4:7]
354 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 0, i32 %slice)
355 %slice.4 = add i32 %slice, 4
356 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 1, i32 %slice.4)
357 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2
360 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_vert_vg4_s(i32 %slice) {
361 ; CHECK-LABEL: za_read_vert_vg4_s:
363 ; CHECK-NEXT: mov w12, w0
364 ; CHECK-NEXT: mov { z0.s - z3.s }, za0v.s[w12, 0:3]
366 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 0, i32 %slice)
367 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res
370 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @za_read_vert_vg4_f32(i32 %slice) {
371 ; CHECK-LABEL: za_read_vert_vg4_f32:
373 ; CHECK-NEXT: mov w12, w0
374 ; CHECK-NEXT: mov { z0.s - z3.s }, za0v.s[w12, 0:3]
376 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 0, i32 %slice)
377 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
380 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_vert_vg4_d(i32 %slice) {
381 ; CHECK-LABEL: za_read_vert_vg4_d:
383 ; CHECK-NEXT: mov w12, w0
384 ; CHECK-NEXT: mov { z0.d - z3.d }, za0v.d[w12, 0:3]
386 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 0, i32 %slice)
387 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res
390 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @za_read_vert_vg4_f64(i32 %slice) {
391 ; CHECK-LABEL: za_read_vert_vg4_f64:
393 ; CHECK-NEXT: mov w12, w0
394 ; CHECK-NEXT: mov { z0.d - z3.d }, za0v.d[w12, 0:3]
396 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 0, i32 %slice)
397 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
400 ; Move Multi-Vector From ZA (Read) x2
402 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_vg1x2_b(i32 %slice) {
403 ; CHECK-LABEL: za_read_vg1x2_b:
405 ; CHECK-NEXT: mov w8, w0
406 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
407 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2]
409 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 %slice)
410 %slice.7 = add i32 %slice, 7
411 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32 %slice.7)
412 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res2
415 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_vg1x2_h(i32 %slice) {
416 ; CHECK-LABEL: za_read_vg1x2_h:
418 ; CHECK-NEXT: mov w8, w0
419 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
420 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2]
422 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 %slice)
423 %slice.7 = add i32 %slice, 7
424 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32 %slice.7)
425 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res2
428 define { <vscale x 8 x half>, <vscale x 8 x half> } @za_read_vg1x2_f16(i32 %slice) {
429 ; CHECK-LABEL: za_read_vg1x2_f16:
431 ; CHECK-NEXT: mov w8, w0
432 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
433 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2]
435 %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 %slice)
436 %slice.7 = add i32 %slice, 7
437 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32 %slice.7)
438 ret { <vscale x 8 x half>, <vscale x 8 x half> } %res2
441 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_vg1x2_bf16(i32 %slice) {
442 ; CHECK-LABEL: za_read_vg1x2_bf16:
444 ; CHECK-NEXT: mov w8, w0
445 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
446 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2]
448 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 %slice)
449 %slice.7 = add i32 %slice, 7
450 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32 %slice.7)
451 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2
454 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_vg1x2_s(i32 %slice) {
455 ; CHECK-LABEL: za_read_vg1x2_s:
457 ; CHECK-NEXT: mov w8, w0
458 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
459 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2]
461 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 %slice)
462 %slice.7 = add i32 %slice, 7
463 %res2 = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32 %slice.7)
464 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res2
467 define { <vscale x 4 x float>, <vscale x 4 x float> } @za_read_vg1x2_f32(i32 %slice) {
468 ; CHECK-LABEL: za_read_vg1x2_f32:
470 ; CHECK-NEXT: mov w8, w0
471 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
472 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2]
474 %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 %slice)
475 %slice.7 = add i32 %slice, 7
476 %res2 = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32 %slice.7)
477 ret { <vscale x 4 x float>, <vscale x 4 x float> } %res2
480 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_vg1x2_d(i32 %slice) {
481 ; CHECK-LABEL: za_read_vg1x2_d:
483 ; CHECK-NEXT: mov w8, w0
484 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
485 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2]
487 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %slice)
488 %slice.7 = add i32 %slice, 7
489 %res2 = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %slice.7)
490 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res2
493 define { <vscale x 2 x double>, <vscale x 2 x double> } @za_read_vg1x2_f64(i32 %slice) {
494 ; CHECK-LABEL: za_read_vg1x2_f64:
496 ; CHECK-NEXT: mov w8, w0
497 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
498 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2]
500 %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 %slice)
501 %slice.7 = add i32 %slice, 7
502 %res2 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 %slice.7)
503 ret { <vscale x 2 x double>, <vscale x 2 x double> } %res2
506 ; Move Multi-Vector From ZA (Read) x4
508 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @za_read_vg1x4_b(i32 %slice) {
509 ; CHECK-LABEL: za_read_vg1x4_b:
511 ; CHECK-NEXT: mov w8, w0
512 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4]
513 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4]
515 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 %slice)
516 %slice.7 = add i32 %slice, 7
517 %res2 = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32 %slice.7)
518 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res2
521 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @za_read_vg1x4_h(i32 %slice) {
522 ; CHECK-LABEL: za_read_vg1x4_h:
524 ; CHECK-NEXT: mov w8, w0
525 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4]
526 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4]
528 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 %slice)
529 %slice.7 = add i32 %slice, 7
530 %res2 = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32 %slice.7)
531 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res2
534 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @za_read_vg1x4_f16(i32 %slice) {
535 ; CHECK-LABEL: za_read_vg1x4_f16:
537 ; CHECK-NEXT: mov w8, w0
538 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4]
539 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4]
541 %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 %slice)
542 %slice.7 = add i32 %slice, 7
543 %res2 = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32 %slice.7)
544 ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res2
547 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @za_read_vg1x4_bf16(i32 %slice) {
548 ; CHECK-LABEL: za_read_vg1x4_bf16:
550 ; CHECK-NEXT: mov w8, w0
551 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4]
552 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4]
554 %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 %slice)
555 %slice.7 = add i32 %slice, 7
556 %res2 = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32 %slice.7)
557 ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res2
560 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @za_read_vg1x4_s(i32 %slice) {
561 ; CHECK-LABEL: za_read_vg1x4_s:
563 ; CHECK-NEXT: mov w8, w0
564 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4]
565 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4]
567 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 %slice)
568 %slice.7 = add i32 %slice, 7
569 %res2 = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32 %slice.7)
570 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res2
573 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @za_read_vg1x4_f32(i32 %slice) {
574 ; CHECK-LABEL: za_read_vg1x4_f32:
576 ; CHECK-NEXT: mov w8, w0
577 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4]
578 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4]
580 %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 %slice)
581 %slice.7 = add i32 %slice, 7
582 %res2 = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32 %slice.7)
583 ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res2
586 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @za_read_vg1x4_d(i32 %slice) {
587 ; CHECK-LABEL: za_read_vg1x4_d:
589 ; CHECK-NEXT: mov w8, w0
590 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4]
591 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4]
593 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 %slice)
594 %slice.7 = add i32 %slice, 7
595 %res2 = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 %slice.7)
596 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res2
599 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @za_read_vg1x4_f64(i32 %slice) {
600 ; CHECK-LABEL: za_read_vg1x4_f64:
602 ; CHECK-NEXT: mov w8, w0
603 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4]
604 ; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4]
606 %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 %slice)
607 %slice.7 = add i32 %slice, 7
608 %res2 = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 %slice.7)
609 ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res2
612 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32, i32)
613 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32, i32)
614 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32, i32)
615 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32, i32)
616 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32, i32)
617 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32, i32)
618 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32, i32)
619 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32, i32)
621 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32, i32)
622 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32, i32)
623 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32, i32)
624 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32, i32)
625 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32, i32)
626 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32, i32)
627 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32, i32)
628 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32, i32)
630 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32, i32)
631 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32, i32)
632 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32, i32)
633 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32, i32)
634 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32, i32)
635 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32, i32)
636 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32, i32)
637 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32, i32)
639 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32, i32)
640 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32, i32)
641 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32, i32)
642 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32, i32)
643 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32, i32)
644 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32, i32)
645 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32, i32)
646 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32, i32)
648 declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x2.nxv16i8(i32)
649 declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x2.nxv8i16(i32)
650 declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x2.nxv4i32(i32)
651 declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32)
652 declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x2.nxv8f16(i32)
653 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x2.nxv8bf16(i32)
654 declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x2.nxv4f32(i32)
655 declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32)
657 declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sme.read.vg1x4.nxv16i8(i32)
658 declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sme.read.vg1x4.nxv8i16(i32)
659 declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sme.read.vg1x4.nxv4i32(i32)
660 declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32)
661 declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.read.vg1x4.nxv8f16(i32)
662 declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sme.read.vg1x4.nxv8bf16(i32)
663 declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.read.vg1x4.nxv4f32(i32)
664 declare { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32)