1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
8 define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
9 ; CHECK-LABEL: clasta_i8:
11 ; CHECK-NEXT: clasta z0.b, p0, z0.b, z1.b
13 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> %pg,
14 <vscale x 16 x i8> %a,
15 <vscale x 16 x i8> %b)
16 ret <vscale x 16 x i8> %out
19 define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
20 ; CHECK-LABEL: clasta_i16:
22 ; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h
24 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> %pg,
25 <vscale x 8 x i16> %a,
26 <vscale x 8 x i16> %b)
27 ret <vscale x 8 x i16> %out
30 define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
31 ; CHECK-LABEL: clasta_i32:
33 ; CHECK-NEXT: clasta z0.s, p0, z0.s, z1.s
35 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> %pg,
36 <vscale x 4 x i32> %a,
37 <vscale x 4 x i32> %b)
38 ret <vscale x 4 x i32> %out
41 define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
42 ; CHECK-LABEL: clasta_i64:
44 ; CHECK-NEXT: clasta z0.d, p0, z0.d, z1.d
46 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> %pg,
47 <vscale x 2 x i64> %a,
48 <vscale x 2 x i64> %b)
49 ret <vscale x 2 x i64> %out
52 define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
53 ; CHECK-LABEL: clasta_f16:
55 ; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h
57 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg,
58 <vscale x 8 x half> %a,
59 <vscale x 8 x half> %b)
60 ret <vscale x 8 x half> %out
63 define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
64 ; CHECK-LABEL: clasta_bf16:
66 ; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h
68 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
69 <vscale x 8 x bfloat> %a,
70 <vscale x 8 x bfloat> %b)
71 ret <vscale x 8 x bfloat> %out
74 define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
75 ; CHECK-LABEL: clasta_f32:
77 ; CHECK-NEXT: clasta z0.s, p0, z0.s, z1.s
79 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg,
80 <vscale x 4 x float> %a,
81 <vscale x 4 x float> %b)
82 ret <vscale x 4 x float> %out
85 define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
86 ; CHECK-LABEL: clasta_f64:
88 ; CHECK-NEXT: clasta z0.d, p0, z0.d, z1.d
90 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> %pg,
91 <vscale x 2 x double> %a,
92 <vscale x 2 x double> %b)
93 ret <vscale x 2 x double> %out
100 define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
101 ; CHECK-LABEL: clasta_n_i8:
103 ; CHECK-NEXT: clasta w0, p0, w0, z0.b
105 %out = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> %pg,
107 <vscale x 16 x i8> %b)
111 define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
112 ; CHECK-LABEL: clasta_n_i16:
114 ; CHECK-NEXT: clasta w0, p0, w0, z0.h
116 %out = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> %pg,
118 <vscale x 8 x i16> %b)
122 define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
123 ; CHECK-LABEL: clasta_n_i32:
125 ; CHECK-NEXT: clasta w0, p0, w0, z0.s
127 %out = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> %pg,
129 <vscale x 4 x i32> %b)
133 define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
134 ; CHECK-LABEL: clasta_n_i64:
136 ; CHECK-NEXT: clasta x0, p0, x0, z0.d
138 %out = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> %pg,
140 <vscale x 2 x i64> %b)
144 define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
145 ; CHECK-LABEL: clasta_n_f16:
147 ; CHECK-NEXT: clasta h0, p0, h0, z1.h
149 %out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg,
151 <vscale x 8 x half> %b)
155 define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
156 ; CHECK-LABEL: clasta_n_bf16:
158 ; CHECK-NEXT: clasta h0, p0, h0, z1.h
160 %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
162 <vscale x 8 x bfloat> %b)
166 define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
167 ; CHECK-LABEL: clasta_n_f32:
169 ; CHECK-NEXT: clasta s0, p0, s0, z1.s
171 %out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg,
173 <vscale x 4 x float> %b)
177 define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
178 ; CHECK-LABEL: clasta_n_f64:
180 ; CHECK-NEXT: clasta d0, p0, d0, z1.d
182 %out = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> %pg,
184 <vscale x 2 x double> %b)
192 define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
193 ; CHECK-LABEL: clastb_i8:
195 ; CHECK-NEXT: clastb z0.b, p0, z0.b, z1.b
197 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> %pg,
198 <vscale x 16 x i8> %a,
199 <vscale x 16 x i8> %b)
200 ret <vscale x 16 x i8> %out
203 define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
204 ; CHECK-LABEL: clastb_i16:
206 ; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h
208 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> %pg,
209 <vscale x 8 x i16> %a,
210 <vscale x 8 x i16> %b)
211 ret <vscale x 8 x i16> %out
214 define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
215 ; CHECK-LABEL: clastb_i32:
217 ; CHECK-NEXT: clastb z0.s, p0, z0.s, z1.s
219 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> %pg,
220 <vscale x 4 x i32> %a,
221 <vscale x 4 x i32> %b)
222 ret <vscale x 4 x i32> %out
225 define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
226 ; CHECK-LABEL: clastb_i64:
228 ; CHECK-NEXT: clastb z0.d, p0, z0.d, z1.d
230 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> %pg,
231 <vscale x 2 x i64> %a,
232 <vscale x 2 x i64> %b)
233 ret <vscale x 2 x i64> %out
236 define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
237 ; CHECK-LABEL: clastb_f16:
239 ; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h
241 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg,
242 <vscale x 8 x half> %a,
243 <vscale x 8 x half> %b)
244 ret <vscale x 8 x half> %out
247 define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
248 ; CHECK-LABEL: clastb_bf16:
250 ; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h
252 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
253 <vscale x 8 x bfloat> %a,
254 <vscale x 8 x bfloat> %b)
255 ret <vscale x 8 x bfloat> %out
258 define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
259 ; CHECK-LABEL: clastb_f32:
261 ; CHECK-NEXT: clastb z0.s, p0, z0.s, z1.s
263 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg,
264 <vscale x 4 x float> %a,
265 <vscale x 4 x float> %b)
266 ret <vscale x 4 x float> %out
269 define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
270 ; CHECK-LABEL: clastb_f64:
272 ; CHECK-NEXT: clastb z0.d, p0, z0.d, z1.d
274 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> %pg,
275 <vscale x 2 x double> %a,
276 <vscale x 2 x double> %b)
277 ret <vscale x 2 x double> %out
284 define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
285 ; CHECK-LABEL: clastb_n_i8:
287 ; CHECK-NEXT: clastb w0, p0, w0, z0.b
289 %out = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> %pg,
291 <vscale x 16 x i8> %b)
295 define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
296 ; CHECK-LABEL: clastb_n_i16:
298 ; CHECK-NEXT: clastb w0, p0, w0, z0.h
300 %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg,
302 <vscale x 8 x i16> %b)
306 define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
307 ; CHECK-LABEL: clastb_n_i32:
309 ; CHECK-NEXT: clastb w0, p0, w0, z0.s
311 %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg,
313 <vscale x 4 x i32> %b)
317 define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
318 ; CHECK-LABEL: clastb_n_i64:
320 ; CHECK-NEXT: clastb x0, p0, x0, z0.d
322 %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg,
324 <vscale x 2 x i64> %b)
328 define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
329 ; CHECK-LABEL: clastb_n_f16:
331 ; CHECK-NEXT: clastb h0, p0, h0, z1.h
333 %out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg,
335 <vscale x 8 x half> %b)
339 define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
340 ; CHECK-LABEL: clastb_n_bf16:
342 ; CHECK-NEXT: clastb h0, p0, h0, z1.h
344 %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
346 <vscale x 8 x bfloat> %b)
350 define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
351 ; CHECK-LABEL: clastb_n_f32:
353 ; CHECK-NEXT: clastb s0, p0, s0, z1.s
355 %out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg,
357 <vscale x 4 x float> %b)
361 define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
362 ; CHECK-LABEL: clastb_n_f64:
364 ; CHECK-NEXT: clastb d0, p0, d0, z1.d
366 %out = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> %pg,
368 <vscale x 2 x double> %b)
376 define <vscale x 16 x i8> @dupq_i8(<vscale x 16 x i8> %a) {
377 ; CHECK-LABEL: dupq_i8:
379 ; CHECK-NEXT: mov z0.q, q0
381 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 0)
382 ret <vscale x 16 x i8> %out
385 define <vscale x 8 x i16> @dupq_i16(<vscale x 8 x i16> %a) {
386 ; CHECK-LABEL: dupq_i16:
388 ; CHECK-NEXT: mov z0.q, z0.q[1]
390 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 1)
391 ret <vscale x 8 x i16> %out
394 define <vscale x 4 x i32> @dupq_i32(<vscale x 4 x i32> %a) {
395 ; CHECK-LABEL: dupq_i32:
397 ; CHECK-NEXT: mov z0.q, z0.q[2]
399 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 2)
400 ret <vscale x 4 x i32> %out
403 define <vscale x 2 x i64> @dupq_i64(<vscale x 2 x i64> %a) {
404 ; CHECK-LABEL: dupq_i64:
406 ; CHECK-NEXT: mov z0.q, z0.q[3]
408 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 3)
409 ret <vscale x 2 x i64> %out
412 define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
413 ; CHECK-LABEL: dupq_f16:
415 ; CHECK-NEXT: mov z0.q, q0
417 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0)
418 ret <vscale x 8 x half> %out
421 define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
422 ; CHECK-LABEL: dupq_bf16:
424 ; CHECK-NEXT: mov z0.q, q0
426 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
427 ret <vscale x 8 x bfloat> %out
430 define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
431 ; CHECK-LABEL: dupq_f32:
433 ; CHECK-NEXT: mov z0.q, z0.q[1]
435 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1)
436 ret <vscale x 4 x float> %out
439 define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) {
440 ; CHECK-LABEL: dupq_f64:
442 ; CHECK-NEXT: mov z0.q, z0.q[2]
444 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 2)
445 ret <vscale x 2 x double> %out
452 define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) {
453 ; CHECK-LABEL: dupq_lane_i8:
455 ; CHECK-NEXT: index z1.d, #0, #1
456 ; CHECK-NEXT: add x8, x0, x0
457 ; CHECK-NEXT: mov z2.d, x8
458 ; CHECK-NEXT: and z1.d, z1.d, #0x1
459 ; CHECK-NEXT: add z1.d, z1.d, z2.d
460 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
462 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 %idx)
463 ret <vscale x 16 x i8> %out
466 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
467 define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) {
468 ; CHECK-LABEL: dupq_lane_i16:
470 ; CHECK-NEXT: index z1.d, #0, #1
471 ; CHECK-NEXT: add x8, x0, x0
472 ; CHECK-NEXT: mov z2.d, x8
473 ; CHECK-NEXT: and z1.d, z1.d, #0x1
474 ; CHECK-NEXT: add z1.d, z1.d, z2.d
475 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
477 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 %idx)
478 ret <vscale x 8 x i16> %out
481 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
482 define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) {
483 ; CHECK-LABEL: dupq_lane_i32:
485 ; CHECK-NEXT: index z1.d, #0, #1
486 ; CHECK-NEXT: add x8, x0, x0
487 ; CHECK-NEXT: mov z2.d, x8
488 ; CHECK-NEXT: and z1.d, z1.d, #0x1
489 ; CHECK-NEXT: add z1.d, z1.d, z2.d
490 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
492 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 %idx)
493 ret <vscale x 4 x i32> %out
496 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
497 define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) {
498 ; CHECK-LABEL: dupq_lane_i64:
500 ; CHECK-NEXT: index z1.d, #0, #1
501 ; CHECK-NEXT: add x8, x0, x0
502 ; CHECK-NEXT: mov z2.d, x8
503 ; CHECK-NEXT: and z1.d, z1.d, #0x1
504 ; CHECK-NEXT: add z1.d, z1.d, z2.d
505 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
507 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 %idx)
508 ret <vscale x 2 x i64> %out
511 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
512 define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
513 ; CHECK-LABEL: dupq_lane_f16:
515 ; CHECK-NEXT: index z1.d, #0, #1
516 ; CHECK-NEXT: add x8, x0, x0
517 ; CHECK-NEXT: mov z2.d, x8
518 ; CHECK-NEXT: and z1.d, z1.d, #0x1
519 ; CHECK-NEXT: add z1.d, z1.d, z2.d
520 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
522 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx)
523 ret <vscale x 8 x half> %out
526 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
527 define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
528 ; CHECK-LABEL: dupq_lane_bf16:
530 ; CHECK-NEXT: index z1.d, #0, #1
531 ; CHECK-NEXT: add x8, x0, x0
532 ; CHECK-NEXT: mov z2.d, x8
533 ; CHECK-NEXT: and z1.d, z1.d, #0x1
534 ; CHECK-NEXT: add z1.d, z1.d, z2.d
535 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
537 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
538 ret <vscale x 8 x bfloat> %out
541 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
542 define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
543 ; CHECK-LABEL: dupq_lane_f32:
545 ; CHECK-NEXT: index z1.d, #0, #1
546 ; CHECK-NEXT: add x8, x0, x0
547 ; CHECK-NEXT: mov z2.d, x8
548 ; CHECK-NEXT: and z1.d, z1.d, #0x1
549 ; CHECK-NEXT: add z1.d, z1.d, z2.d
550 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
552 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 %idx)
553 ret <vscale x 4 x float> %out
556 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
557 define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx) {
558 ; CHECK-LABEL: dupq_lane_f64:
560 ; CHECK-NEXT: index z1.d, #0, #1
561 ; CHECK-NEXT: add x8, x0, x0
562 ; CHECK-NEXT: mov z2.d, x8
563 ; CHECK-NEXT: and z1.d, z1.d, #0x1
564 ; CHECK-NEXT: add z1.d, z1.d, z2.d
565 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
567 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 %idx)
568 ret <vscale x 2 x double> %out
571 ; NOTE: Index out of range (0-3)
572 define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
573 ; CHECK-LABEL: dupq_i64_range:
575 ; CHECK-NEXT: index z1.d, #0, #1
576 ; CHECK-NEXT: and z1.d, z1.d, #0x1
577 ; CHECK-NEXT: orr z1.d, z1.d, #0x8
578 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
580 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4)
581 ret <vscale x 2 x i64> %out
587 define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %y) {
588 ; CHECK-LABEL: dupq_f32_repeat_complex:
590 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
591 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
592 ; CHECK-NEXT: mov v0.s[1], v1.s[0]
593 ; CHECK-NEXT: mov z0.d, d0
595 %1 = insertelement <4 x float> undef, float %x, i64 0
596 %2 = insertelement <4 x float> %1, float %y, i64 1
597 %3 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %2, i64 0)
598 %4 = bitcast <vscale x 4 x float> %3 to <vscale x 2 x double>
599 %5 = shufflevector <vscale x 2 x double> %4, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
600 %6 = bitcast <vscale x 2 x double> %5 to <vscale x 4 x float>
601 ret <vscale x 4 x float> %6
604 define dso_local <vscale x 8 x half> @dupq_f16_repeat_complex(half %x, half %y) {
605 ; CHECK-LABEL: dupq_f16_repeat_complex:
607 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
608 ; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
609 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
610 ; CHECK-NEXT: mov z0.s, s0
612 %1 = insertelement <8 x half> undef, half %x, i64 0
613 %2 = insertelement <8 x half> %1, half %y, i64 1
614 %3 = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> %2, i64 0)
615 %4 = bitcast <vscale x 8 x half> %3 to <vscale x 4 x float>
616 %5 = shufflevector <vscale x 4 x float> %4, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
617 %6 = bitcast <vscale x 4 x float> %5 to <vscale x 8 x half>
618 ret <vscale x 8 x half> %6
621 define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
622 ; CHECK-LABEL: ext_i8:
624 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #255
626 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
627 <vscale x 16 x i8> %b,
629 ret <vscale x 16 x i8> %out
632 define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
633 ; CHECK-LABEL: ext_i16:
635 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #0
637 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
638 <vscale x 8 x i16> %b,
640 ret <vscale x 8 x i16> %out
643 define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
644 ; CHECK-LABEL: ext_i32:
646 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #4
648 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
649 <vscale x 4 x i32> %b,
651 ret <vscale x 4 x i32> %out
654 define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
655 ; CHECK-LABEL: ext_i64:
657 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #16
659 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
660 <vscale x 2 x i64> %b,
662 ret <vscale x 2 x i64> %out
665 define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
666 ; CHECK-LABEL: ext_bf16:
668 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #6
670 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
671 <vscale x 8 x bfloat> %b,
673 ret <vscale x 8 x bfloat> %out
676 define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
677 ; CHECK-LABEL: ext_f16:
679 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #6
681 %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
682 <vscale x 8 x half> %b,
684 ret <vscale x 8 x half> %out
687 define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
688 ; CHECK-LABEL: ext_f32:
690 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #16
692 %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
693 <vscale x 4 x float> %b,
695 ret <vscale x 4 x float> %out
698 define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
699 ; CHECK-LABEL: ext_f64:
701 ; CHECK-NEXT: ext z0.b, z0.b, z1.b, #40
703 %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
704 <vscale x 2 x double> %b,
706 ret <vscale x 2 x double> %out
713 define i8 @lasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
714 ; CHECK-LABEL: lasta_i8:
716 ; CHECK-NEXT: lasta w0, p0, z0.b
718 %res = call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg,
719 <vscale x 16 x i8> %a)
723 define i16 @lasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
724 ; CHECK-LABEL: lasta_i16:
726 ; CHECK-NEXT: lasta w0, p0, z0.h
728 %res = call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> %pg,
729 <vscale x 8 x i16> %a)
733 define i32 @lasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
734 ; CHECK-LABEL: lasta_i32:
736 ; CHECK-NEXT: lasta w0, p0, z0.s
738 %res = call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> %pg,
739 <vscale x 4 x i32> %a)
743 define i64 @lasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
744 ; CHECK-LABEL: lasta_i64:
746 ; CHECK-NEXT: lasta x0, p0, z0.d
748 %res = call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> %pg,
749 <vscale x 2 x i64> %a)
753 define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
754 ; CHECK-LABEL: lasta_f16:
756 ; CHECK-NEXT: lasta h0, p0, z0.h
758 %res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg,
759 <vscale x 8 x half> %a)
763 define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
764 ; CHECK-LABEL: lasta_bf16:
766 ; CHECK-NEXT: lasta h0, p0, z0.h
768 %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
769 <vscale x 8 x bfloat> %a)
773 define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
774 ; CHECK-LABEL: lasta_f32:
776 ; CHECK-NEXT: lasta s0, p0, z0.s
778 %res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg,
779 <vscale x 4 x float> %a)
783 define float @lasta_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
784 ; CHECK-LABEL: lasta_f32_v2:
786 ; CHECK-NEXT: lasta s0, p0, z0.s
788 %res = call float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1> %pg,
789 <vscale x 2 x float> %a)
793 define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
794 ; CHECK-LABEL: lasta_f64:
796 ; CHECK-NEXT: lasta d0, p0, z0.d
798 %res = call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> %pg,
799 <vscale x 2 x double> %a)
807 define i8 @lastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
808 ; CHECK-LABEL: lastb_i8:
810 ; CHECK-NEXT: lastb w0, p0, z0.b
812 %res = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg,
813 <vscale x 16 x i8> %a)
817 define i16 @lastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
818 ; CHECK-LABEL: lastb_i16:
820 ; CHECK-NEXT: lastb w0, p0, z0.h
822 %res = call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> %pg,
823 <vscale x 8 x i16> %a)
827 define i32 @lastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
828 ; CHECK-LABEL: lastb_i32:
830 ; CHECK-NEXT: lastb w0, p0, z0.s
832 %res = call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> %pg,
833 <vscale x 4 x i32> %a)
837 define i64 @lastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
838 ; CHECK-LABEL: lastb_i64:
840 ; CHECK-NEXT: lastb x0, p0, z0.d
842 %res = call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> %pg,
843 <vscale x 2 x i64> %a)
847 define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
848 ; CHECK-LABEL: lastb_f16:
850 ; CHECK-NEXT: lastb h0, p0, z0.h
852 %res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg,
853 <vscale x 8 x half> %a)
857 define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
858 ; CHECK-LABEL: lastb_bf16:
860 ; CHECK-NEXT: lastb h0, p0, z0.h
862 %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
863 <vscale x 8 x bfloat> %a)
867 define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
868 ; CHECK-LABEL: lastb_f32:
870 ; CHECK-NEXT: lastb s0, p0, z0.s
872 %res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg,
873 <vscale x 4 x float> %a)
877 define float @lastb_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
878 ; CHECK-LABEL: lastb_f32_v2:
880 ; CHECK-NEXT: lastb s0, p0, z0.s
882 %res = call float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1> %pg,
883 <vscale x 2 x float> %a)
887 define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
888 ; CHECK-LABEL: lastb_f64:
890 ; CHECK-NEXT: lastb d0, p0, z0.d
892 %res = call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> %pg,
893 <vscale x 2 x double> %a)
901 define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
902 ; CHECK-LABEL: compact_i32:
904 ; CHECK-NEXT: compact z0.s, p0, z0.s
906 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg,
907 <vscale x 4 x i32> %a)
908 ret <vscale x 4 x i32> %out
911 define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
912 ; CHECK-LABEL: compact_i64:
914 ; CHECK-NEXT: compact z0.d, p0, z0.d
916 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg,
917 <vscale x 2 x i64> %a)
918 ret <vscale x 2 x i64> %out
921 define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
922 ; CHECK-LABEL: compact_f32:
924 ; CHECK-NEXT: compact z0.s, p0, z0.s
926 %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg,
927 <vscale x 4 x float> %a)
928 ret <vscale x 4 x float> %out
931 define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
932 ; CHECK-LABEL: compact_f64:
934 ; CHECK-NEXT: compact z0.d, p0, z0.d
936 %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg,
937 <vscale x 2 x double> %a)
938 ret <vscale x 2 x double> %out
945 define <vscale x 16 x i1> @rev_nxv16i1(<vscale x 16 x i1> %a) {
946 ; CHECK-LABEL: rev_nxv16i1:
948 ; CHECK-NEXT: rev p0.b, p0.b
950 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> %a)
951 ret <vscale x 16 x i1> %res
954 define <vscale x 8 x i1> @rev_nxv8i1(<vscale x 8 x i1> %a) {
955 ; CHECK-LABEL: rev_nxv8i1:
957 ; CHECK-NEXT: rev p0.h, p0.h
959 %res = call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> %a)
960 ret <vscale x 8 x i1> %res
963 define <vscale x 4 x i1> @rev_nxv4i1(<vscale x 4 x i1> %a) {
964 ; CHECK-LABEL: rev_nxv4i1:
966 ; CHECK-NEXT: rev p0.s, p0.s
968 %res = call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> %a)
969 ret <vscale x 4 x i1> %res
972 define <vscale x 2 x i1> @rev_nxv2i1(<vscale x 2 x i1> %a) {
973 ; CHECK-LABEL: rev_nxv2i1:
975 ; CHECK-NEXT: rev p0.d, p0.d
977 %res = call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> %a)
978 ret <vscale x 2 x i1> %res
981 define <vscale x 16 x i1> @rev_b16(<vscale x 16 x i1> %a) {
982 ; CHECK-LABEL: rev_b16:
984 ; CHECK-NEXT: rev p0.h, p0.h
986 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %a)
987 ret <vscale x 16 x i1> %res
990 define <vscale x 16 x i1> @rev_b32(<vscale x 16 x i1> %a) {
991 ; CHECK-LABEL: rev_b32:
993 ; CHECK-NEXT: rev p0.s, p0.s
995 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %a)
996 ret <vscale x 16 x i1> %res
999 define <vscale x 16 x i1> @rev_b64(<vscale x 16 x i1> %a) {
1000 ; CHECK-LABEL: rev_b64:
1002 ; CHECK-NEXT: rev p0.d, p0.d
1004 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %a)
1005 ret <vscale x 16 x i1> %res
1008 define <vscale x 16 x i8> @rev_i8(<vscale x 16 x i8> %a) {
1009 ; CHECK-LABEL: rev_i8:
1011 ; CHECK-NEXT: rev z0.b, z0.b
1013 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> %a)
1014 ret <vscale x 16 x i8> %res
1017 define <vscale x 8 x i16> @rev_i16(<vscale x 8 x i16> %a) {
1018 ; CHECK-LABEL: rev_i16:
1020 ; CHECK-NEXT: rev z0.h, z0.h
1022 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> %a)
1023 ret <vscale x 8 x i16> %res
1026 define <vscale x 4 x i32> @rev_i32(<vscale x 4 x i32> %a) {
1027 ; CHECK-LABEL: rev_i32:
1029 ; CHECK-NEXT: rev z0.s, z0.s
1031 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> %a)
1032 ret <vscale x 4 x i32> %res
1035 define <vscale x 2 x i64> @rev_i64(<vscale x 2 x i64> %a) {
1036 ; CHECK-LABEL: rev_i64:
1038 ; CHECK-NEXT: rev z0.d, z0.d
1040 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> %a)
1041 ret <vscale x 2 x i64> %res
1044 define <vscale x 8 x bfloat> @rev_bf16(<vscale x 8 x bfloat> %a) #0 {
1045 ; CHECK-LABEL: rev_bf16:
1047 ; CHECK-NEXT: rev z0.h, z0.h
1049 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> %a)
1050 ret <vscale x 8 x bfloat> %res
1053 define <vscale x 8 x half> @rev_f16(<vscale x 8 x half> %a) {
1054 ; CHECK-LABEL: rev_f16:
1056 ; CHECK-NEXT: rev z0.h, z0.h
1058 %res = call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> %a)
1059 ret <vscale x 8 x half> %res
1062 define <vscale x 4 x float> @rev_f32(<vscale x 4 x float> %a) {
1063 ; CHECK-LABEL: rev_f32:
1065 ; CHECK-NEXT: rev z0.s, z0.s
1067 %res = call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> %a)
1068 ret <vscale x 4 x float> %res
1071 define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) {
1072 ; CHECK-LABEL: rev_f64:
1074 ; CHECK-NEXT: rev z0.d, z0.d
1076 %res = call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> %a)
1077 ret <vscale x 2 x double> %res
1084 define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1085 ; CHECK-LABEL: splice_i8:
1087 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
1089 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
1090 <vscale x 16 x i8> %a,
1091 <vscale x 16 x i8> %b)
1092 ret <vscale x 16 x i8> %out
1095 define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1096 ; CHECK-LABEL: splice_i16:
1098 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
1100 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
1101 <vscale x 8 x i16> %a,
1102 <vscale x 8 x i16> %b)
1103 ret <vscale x 8 x i16> %out
1106 define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1107 ; CHECK-LABEL: splice_i32:
1109 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
1111 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
1112 <vscale x 4 x i32> %a,
1113 <vscale x 4 x i32> %b)
1114 ret <vscale x 4 x i32> %out
1117 define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1118 ; CHECK-LABEL: splice_i64:
1120 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
1122 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
1123 <vscale x 2 x i64> %a,
1124 <vscale x 2 x i64> %b)
1125 ret <vscale x 2 x i64> %out
1128 define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1129 ; CHECK-LABEL: splice_bf16:
1131 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
1133 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
1134 <vscale x 8 x bfloat> %a,
1135 <vscale x 8 x bfloat> %b)
1136 ret <vscale x 8 x bfloat> %out
1139 define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1140 ; CHECK-LABEL: splice_f16:
1142 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
1144 %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
1145 <vscale x 8 x half> %a,
1146 <vscale x 8 x half> %b)
1147 ret <vscale x 8 x half> %out
1150 define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1151 ; CHECK-LABEL: splice_f32:
1153 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
1155 %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
1156 <vscale x 4 x float> %a,
1157 <vscale x 4 x float> %b)
1158 ret <vscale x 4 x float> %out
1161 define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1162 ; CHECK-LABEL: splice_f64:
1164 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
1166 %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,
1167 <vscale x 2 x double> %a,
1168 <vscale x 2 x double> %b)
1169 ret <vscale x 2 x double> %out
1176 define <vscale x 8 x i16> @sunpkhi_i16(<vscale x 16 x i8> %a) {
1177 ; CHECK-LABEL: sunpkhi_i16:
1179 ; CHECK-NEXT: sunpkhi z0.h, z0.b
1181 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1182 ret <vscale x 8 x i16> %res
1185 define <vscale x 4 x i32> @sunpkhi_i32(<vscale x 8 x i16> %a) {
1186 ; CHECK-LABEL: sunpkhi_i32:
1188 ; CHECK-NEXT: sunpkhi z0.s, z0.h
1190 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1191 ret <vscale x 4 x i32> %res
1194 define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) {
1195 ; CHECK-LABEL: sunpkhi_i64:
1197 ; CHECK-NEXT: sunpkhi z0.d, z0.s
1199 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1200 ret <vscale x 2 x i64> %res
1207 define <vscale x 8 x i16> @sunpklo_i16(<vscale x 16 x i8> %a) {
1208 ; CHECK-LABEL: sunpklo_i16:
1210 ; CHECK-NEXT: sunpklo z0.h, z0.b
1212 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> %a)
1213 ret <vscale x 8 x i16> %res
1216 define <vscale x 4 x i32> @sunpklo_i32(<vscale x 8 x i16> %a) {
1217 ; CHECK-LABEL: sunpklo_i32:
1219 ; CHECK-NEXT: sunpklo z0.s, z0.h
1221 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %a)
1222 ret <vscale x 4 x i32> %res
1225 define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) {
1226 ; CHECK-LABEL: sunpklo_i64:
1228 ; CHECK-NEXT: sunpklo z0.d, z0.s
1230 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> %a)
1231 ret <vscale x 2 x i64> %res
1238 define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1239 ; CHECK-LABEL: tbl_i8:
1241 ; CHECK-NEXT: tbl z0.b, { z0.b }, z1.b
1243 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> %a,
1244 <vscale x 16 x i8> %b)
1245 ret <vscale x 16 x i8> %out
1248 define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1249 ; CHECK-LABEL: tbl_i16:
1251 ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
1253 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> %a,
1254 <vscale x 8 x i16> %b)
1255 ret <vscale x 8 x i16> %out
1258 define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1259 ; CHECK-LABEL: tbl_i32:
1261 ; CHECK-NEXT: tbl z0.s, { z0.s }, z1.s
1263 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> %a,
1264 <vscale x 4 x i32> %b)
1265 ret <vscale x 4 x i32> %out
1268 define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1269 ; CHECK-LABEL: tbl_i64:
1271 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
1273 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> %a,
1274 <vscale x 2 x i64> %b)
1275 ret <vscale x 2 x i64> %out
1278 define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
1279 ; CHECK-LABEL: tbl_f16:
1281 ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
1283 %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> %a,
1284 <vscale x 8 x i16> %b)
1285 ret <vscale x 8 x half> %out
1288 define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) #0 {
1289 ; CHECK-LABEL: tbl_bf16:
1291 ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
1293 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a,
1294 <vscale x 8 x i16> %b)
1295 ret <vscale x 8 x bfloat> %out
1298 define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
1299 ; CHECK-LABEL: tbl_f32:
1301 ; CHECK-NEXT: tbl z0.s, { z0.s }, z1.s
1303 %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> %a,
1304 <vscale x 4 x i32> %b)
1305 ret <vscale x 4 x float> %out
1308 define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
1309 ; CHECK-LABEL: tbl_f64:
1311 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
1313 %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> %a,
1314 <vscale x 2 x i64> %b)
1315 ret <vscale x 2 x double> %out
1322 define <vscale x 8 x i16> @uunpkhi_i16(<vscale x 16 x i8> %a) {
1323 ; CHECK-LABEL: uunpkhi_i16:
1325 ; CHECK-NEXT: uunpkhi z0.h, z0.b
1327 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1328 ret <vscale x 8 x i16> %res
1331 define <vscale x 4 x i32> @uunpkhi_i32(<vscale x 8 x i16> %a) {
1332 ; CHECK-LABEL: uunpkhi_i32:
1334 ; CHECK-NEXT: uunpkhi z0.s, z0.h
1336 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1337 ret <vscale x 4 x i32> %res
1340 define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) {
1341 ; CHECK-LABEL: uunpkhi_i64:
1343 ; CHECK-NEXT: uunpkhi z0.d, z0.s
1345 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1346 ret <vscale x 2 x i64> %res
1353 define <vscale x 8 x i16> @uunpklo_i16(<vscale x 16 x i8> %a) {
1354 ; CHECK-LABEL: uunpklo_i16:
1356 ; CHECK-NEXT: uunpklo z0.h, z0.b
1358 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> %a)
1359 ret <vscale x 8 x i16> %res
1362 define <vscale x 4 x i32> @uunpklo_i32(<vscale x 8 x i16> %a) {
1363 ; CHECK-LABEL: uunpklo_i32:
1365 ; CHECK-NEXT: uunpklo z0.s, z0.h
1367 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %a)
1368 ret <vscale x 4 x i32> %res
1371 define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) {
1372 ; CHECK-LABEL: uunpklo_i64:
1374 ; CHECK-NEXT: uunpklo z0.d, z0.s
1376 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> %a)
1377 ret <vscale x 2 x i64> %res
1384 define <vscale x 16 x i1> @trn1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1385 ; CHECK-LABEL: trn1_nxv16i1:
1387 ; CHECK-NEXT: trn1 p0.b, p0.b, p1.b
1389 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> %a,
1390 <vscale x 16 x i1> %b)
1391 ret <vscale x 16 x i1> %out
1394 define <vscale x 8 x i1> @trn1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1395 ; CHECK-LABEL: trn1_nxv8i1:
1397 ; CHECK-NEXT: trn1 p0.h, p0.h, p1.h
1399 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> %a,
1400 <vscale x 8 x i1> %b)
1401 ret <vscale x 8 x i1> %out
1404 define <vscale x 4 x i1> @trn1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1405 ; CHECK-LABEL: trn1_nxv4i1:
1407 ; CHECK-NEXT: trn1 p0.s, p0.s, p1.s
1409 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> %a,
1410 <vscale x 4 x i1> %b)
1411 ret <vscale x 4 x i1> %out
1414 define <vscale x 2 x i1> @trn1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1415 ; CHECK-LABEL: trn1_nxv2i1:
1417 ; CHECK-NEXT: trn1 p0.d, p0.d, p1.d
1419 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> %a,
1420 <vscale x 2 x i1> %b)
1421 ret <vscale x 2 x i1> %out
1424 define <vscale x 16 x i1> @trn1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1425 ; CHECK-LABEL: trn1_b16:
1427 ; CHECK-NEXT: trn1 p0.h, p0.h, p1.h
1429 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1> %a,
1430 <vscale x 16 x i1> %b)
1431 ret <vscale x 16 x i1> %out
1434 define <vscale x 16 x i1> @trn1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1435 ; CHECK-LABEL: trn1_b32:
1437 ; CHECK-NEXT: trn1 p0.s, p0.s, p1.s
1439 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1> %a,
1440 <vscale x 16 x i1> %b)
1441 ret <vscale x 16 x i1> %out
1444 define <vscale x 16 x i1> @trn1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1445 ; CHECK-LABEL: trn1_b64:
1447 ; CHECK-NEXT: trn1 p0.d, p0.d, p1.d
1449 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1> %a,
1450 <vscale x 16 x i1> %b)
1451 ret <vscale x 16 x i1> %out
1454 define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1455 ; CHECK-LABEL: trn1_i8:
1457 ; CHECK-NEXT: trn1 z0.b, z0.b, z1.b
1459 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> %a,
1460 <vscale x 16 x i8> %b)
1461 ret <vscale x 16 x i8> %out
1464 define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1465 ; CHECK-LABEL: trn1_i16:
1467 ; CHECK-NEXT: trn1 z0.h, z0.h, z1.h
1469 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> %a,
1470 <vscale x 8 x i16> %b)
1471 ret <vscale x 8 x i16> %out
1474 define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1475 ; CHECK-LABEL: trn1_i32:
1477 ; CHECK-NEXT: trn1 z0.s, z0.s, z1.s
1479 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> %a,
1480 <vscale x 4 x i32> %b)
1481 ret <vscale x 4 x i32> %out
1484 define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1485 ; CHECK-LABEL: trn1_i64:
1487 ; CHECK-NEXT: trn1 z0.d, z0.d, z1.d
1489 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> %a,
1490 <vscale x 2 x i64> %b)
1491 ret <vscale x 2 x i64> %out
1494 define <vscale x 2 x half> @trn1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1495 ; CHECK-LABEL: trn1_f16_v2:
1497 ; CHECK-NEXT: trn1 z0.d, z0.d, z1.d
1499 %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half> %a,
1500 <vscale x 2 x half> %b)
1501 ret <vscale x 2 x half> %out
1504 define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1505 ; CHECK-LABEL: trn1_f16_v4:
1507 ; CHECK-NEXT: trn1 z0.s, z0.s, z1.s
1509 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half> %a,
1510 <vscale x 4 x half> %b)
1511 ret <vscale x 4 x half> %out
1514 define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1515 ; CHECK-LABEL: trn1_bf16:
1517 ; CHECK-NEXT: trn1 z0.h, z0.h, z1.h
1519 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> %a,
1520 <vscale x 8 x bfloat> %b)
1521 ret <vscale x 8 x bfloat> %out
1524 define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1525 ; CHECK-LABEL: trn1_f16:
1527 ; CHECK-NEXT: trn1 z0.h, z0.h, z1.h
1529 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> %a,
1530 <vscale x 8 x half> %b)
1531 ret <vscale x 8 x half> %out
1534 define <vscale x 2 x float> @trn1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1535 ; CHECK-LABEL: trn1_f32_v2:
1537 ; CHECK-NEXT: trn1 z0.d, z0.d, z1.d
1539 %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float> %a,
1540 <vscale x 2 x float> %b)
1541 ret <vscale x 2 x float> %out
1544 define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1545 ; CHECK-LABEL: trn1_f32:
1547 ; CHECK-NEXT: trn1 z0.s, z0.s, z1.s
1549 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> %a,
1550 <vscale x 4 x float> %b)
1551 ret <vscale x 4 x float> %out
1554 define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1555 ; CHECK-LABEL: trn1_f64:
1557 ; CHECK-NEXT: trn1 z0.d, z0.d, z1.d
1559 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> %a,
1560 <vscale x 2 x double> %b)
1561 ret <vscale x 2 x double> %out
1568 define <vscale x 16 x i1> @trn2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1569 ; CHECK-LABEL: trn2_nxv16i1:
1571 ; CHECK-NEXT: trn2 p0.b, p0.b, p1.b
1573 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> %a,
1574 <vscale x 16 x i1> %b)
1575 ret <vscale x 16 x i1> %out
1578 define <vscale x 8 x i1> @trn2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1579 ; CHECK-LABEL: trn2_nxv8i1:
1581 ; CHECK-NEXT: trn2 p0.h, p0.h, p1.h
1583 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> %a,
1584 <vscale x 8 x i1> %b)
1585 ret <vscale x 8 x i1> %out
1588 define <vscale x 4 x i1> @trn2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1589 ; CHECK-LABEL: trn2_nxv4i1:
1591 ; CHECK-NEXT: trn2 p0.s, p0.s, p1.s
1593 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> %a,
1594 <vscale x 4 x i1> %b)
1595 ret <vscale x 4 x i1> %out
1598 define <vscale x 2 x i1> @trn2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1599 ; CHECK-LABEL: trn2_nxv2i1:
1601 ; CHECK-NEXT: trn2 p0.d, p0.d, p1.d
1603 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> %a,
1604 <vscale x 2 x i1> %b)
1605 ret <vscale x 2 x i1> %out
1608 define <vscale x 16 x i1> @trn2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1609 ; CHECK-LABEL: trn2_b16:
1611 ; CHECK-NEXT: trn2 p0.h, p0.h, p1.h
1613 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1> %a,
1614 <vscale x 16 x i1> %b)
1615 ret <vscale x 16 x i1> %out
1618 define <vscale x 16 x i1> @trn2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1619 ; CHECK-LABEL: trn2_b32:
1621 ; CHECK-NEXT: trn2 p0.s, p0.s, p1.s
1623 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1> %a,
1624 <vscale x 16 x i1> %b)
1625 ret <vscale x 16 x i1> %out
1628 define <vscale x 16 x i1> @trn2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1629 ; CHECK-LABEL: trn2_b64:
1631 ; CHECK-NEXT: trn2 p0.d, p0.d, p1.d
1633 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1> %a,
1634 <vscale x 16 x i1> %b)
1635 ret <vscale x 16 x i1> %out
1638 define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1639 ; CHECK-LABEL: trn2_i8:
1641 ; CHECK-NEXT: trn2 z0.b, z0.b, z1.b
1643 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> %a,
1644 <vscale x 16 x i8> %b)
1645 ret <vscale x 16 x i8> %out
1648 define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1649 ; CHECK-LABEL: trn2_i16:
1651 ; CHECK-NEXT: trn2 z0.h, z0.h, z1.h
1653 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> %a,
1654 <vscale x 8 x i16> %b)
1655 ret <vscale x 8 x i16> %out
1658 define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1659 ; CHECK-LABEL: trn2_i32:
1661 ; CHECK-NEXT: trn2 z0.s, z0.s, z1.s
1663 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> %a,
1664 <vscale x 4 x i32> %b)
1665 ret <vscale x 4 x i32> %out
1668 define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1669 ; CHECK-LABEL: trn2_i64:
1671 ; CHECK-NEXT: trn2 z0.d, z0.d, z1.d
1673 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> %a,
1674 <vscale x 2 x i64> %b)
1675 ret <vscale x 2 x i64> %out
1678 define <vscale x 2 x half> @trn2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1679 ; CHECK-LABEL: trn2_f16_v2:
1681 ; CHECK-NEXT: trn2 z0.d, z0.d, z1.d
1683 %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half> %a,
1684 <vscale x 2 x half> %b)
1685 ret <vscale x 2 x half> %out
1688 define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1689 ; CHECK-LABEL: trn2_f16_v4:
1691 ; CHECK-NEXT: trn2 z0.s, z0.s, z1.s
1693 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half> %a,
1694 <vscale x 4 x half> %b)
1695 ret <vscale x 4 x half> %out
1698 define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1699 ; CHECK-LABEL: trn2_bf16:
1701 ; CHECK-NEXT: trn2 z0.h, z0.h, z1.h
1703 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> %a,
1704 <vscale x 8 x bfloat> %b)
1705 ret <vscale x 8 x bfloat> %out
1708 define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1709 ; CHECK-LABEL: trn2_f16:
1711 ; CHECK-NEXT: trn2 z0.h, z0.h, z1.h
1713 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> %a,
1714 <vscale x 8 x half> %b)
1715 ret <vscale x 8 x half> %out
1718 define <vscale x 2 x float> @trn2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1719 ; CHECK-LABEL: trn2_f32_v2:
1721 ; CHECK-NEXT: trn2 z0.d, z0.d, z1.d
1723 %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float> %a,
1724 <vscale x 2 x float> %b)
1725 ret <vscale x 2 x float> %out
1728 define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1729 ; CHECK-LABEL: trn2_f32:
1731 ; CHECK-NEXT: trn2 z0.s, z0.s, z1.s
1733 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> %a,
1734 <vscale x 4 x float> %b)
1735 ret <vscale x 4 x float> %out
1738 define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1739 ; CHECK-LABEL: trn2_f64:
1741 ; CHECK-NEXT: trn2 z0.d, z0.d, z1.d
1743 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> %a,
1744 <vscale x 2 x double> %b)
1745 ret <vscale x 2 x double> %out
1752 define <vscale x 16 x i1> @uzp1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1753 ; CHECK-LABEL: uzp1_nxv16i1:
1755 ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b
1757 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> %a,
1758 <vscale x 16 x i1> %b)
1759 ret <vscale x 16 x i1> %out
1762 define <vscale x 8 x i1> @uzp1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1763 ; CHECK-LABEL: uzp1_nxv8i1:
1765 ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
1767 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> %a,
1768 <vscale x 8 x i1> %b)
1769 ret <vscale x 8 x i1> %out
1772 define <vscale x 4 x i1> @uzp1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1773 ; CHECK-LABEL: uzp1_nxv4i1:
1775 ; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
1777 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> %a,
1778 <vscale x 4 x i1> %b)
1779 ret <vscale x 4 x i1> %out
1782 define <vscale x 2 x i1> @uzp1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1783 ; CHECK-LABEL: uzp1_nxv2i1:
1785 ; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
1787 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> %a,
1788 <vscale x 2 x i1> %b)
1789 ret <vscale x 2 x i1> %out
1792 define <vscale x 16 x i1> @uzp1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1793 ; CHECK-LABEL: uzp1_b16:
1795 ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
1797 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1> %a,
1798 <vscale x 16 x i1> %b)
1799 ret <vscale x 16 x i1> %out
1802 define <vscale x 16 x i1> @uzp1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1803 ; CHECK-LABEL: uzp1_b32:
1805 ; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
1807 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1> %a,
1808 <vscale x 16 x i1> %b)
1809 ret <vscale x 16 x i1> %out
1812 define <vscale x 16 x i1> @uzp1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1813 ; CHECK-LABEL: uzp1_b64:
1815 ; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
1817 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1> %a,
1818 <vscale x 16 x i1> %b)
1819 ret <vscale x 16 x i1> %out
1822 define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1823 ; CHECK-LABEL: uzp1_i8:
1825 ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
1827 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> %a,
1828 <vscale x 16 x i8> %b)
1829 ret <vscale x 16 x i8> %out
1832 define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1833 ; CHECK-LABEL: uzp1_i16:
1835 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
1837 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %a,
1838 <vscale x 8 x i16> %b)
1839 ret <vscale x 8 x i16> %out
1842 define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1843 ; CHECK-LABEL: uzp1_i32:
1845 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
1847 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a,
1848 <vscale x 4 x i32> %b)
1849 ret <vscale x 4 x i32> %out
1852 define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1853 ; CHECK-LABEL: uzp1_i64:
1855 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
1857 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> %a,
1858 <vscale x 2 x i64> %b)
1859 ret <vscale x 2 x i64> %out
1862 define <vscale x 2 x half> @uzp1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1863 ; CHECK-LABEL: uzp1_f16_v2:
1865 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
1867 %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half> %a,
1868 <vscale x 2 x half> %b)
1869 ret <vscale x 2 x half> %out
1872 define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1873 ; CHECK-LABEL: uzp1_f16_v4:
1875 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
1877 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half> %a,
1878 <vscale x 4 x half> %b)
1879 ret <vscale x 4 x half> %out
1882 define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1883 ; CHECK-LABEL: uzp1_bf16:
1885 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
1887 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> %a,
1888 <vscale x 8 x bfloat> %b)
1889 ret <vscale x 8 x bfloat> %out
1892 define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1893 ; CHECK-LABEL: uzp1_f16:
1895 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
1897 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> %a,
1898 <vscale x 8 x half> %b)
1899 ret <vscale x 8 x half> %out
1902 define <vscale x 2 x float> @uzp1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1903 ; CHECK-LABEL: uzp1_f32_v2:
1905 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
1907 %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float> %a,
1908 <vscale x 2 x float> %b)
1909 ret <vscale x 2 x float> %out
1912 define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1913 ; CHECK-LABEL: uzp1_f32:
1915 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
1917 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> %a,
1918 <vscale x 4 x float> %b)
1919 ret <vscale x 4 x float> %out
1922 define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1923 ; CHECK-LABEL: uzp1_f64:
1925 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
1927 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> %a,
1928 <vscale x 2 x double> %b)
1929 ret <vscale x 2 x double> %out
1936 define <vscale x 16 x i1> @uzp2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1937 ; CHECK-LABEL: uzp2_nxv16i1:
1939 ; CHECK-NEXT: uzp2 p0.b, p0.b, p1.b
1941 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> %a,
1942 <vscale x 16 x i1> %b)
1943 ret <vscale x 16 x i1> %out
1946 define <vscale x 8 x i1> @uzp2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1947 ; CHECK-LABEL: uzp2_nxv8i1:
1949 ; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h
1951 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> %a,
1952 <vscale x 8 x i1> %b)
1953 ret <vscale x 8 x i1> %out
1956 define <vscale x 4 x i1> @uzp2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1957 ; CHECK-LABEL: uzp2_nxv4i1:
1959 ; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s
1961 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> %a,
1962 <vscale x 4 x i1> %b)
1963 ret <vscale x 4 x i1> %out
1966 define <vscale x 2 x i1> @uzp2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1967 ; CHECK-LABEL: uzp2_nxv2i1:
1969 ; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d
1971 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> %a,
1972 <vscale x 2 x i1> %b)
1973 ret <vscale x 2 x i1> %out
1976 define <vscale x 16 x i1> @uzp2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1977 ; CHECK-LABEL: uzp2_b16:
1979 ; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h
1981 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1> %a,
1982 <vscale x 16 x i1> %b)
1983 ret <vscale x 16 x i1> %out
1986 define <vscale x 16 x i1> @uzp2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1987 ; CHECK-LABEL: uzp2_b32:
1989 ; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s
1991 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1> %a,
1992 <vscale x 16 x i1> %b)
1993 ret <vscale x 16 x i1> %out
1996 define <vscale x 16 x i1> @uzp2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1997 ; CHECK-LABEL: uzp2_b64:
1999 ; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d
2001 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1> %a,
2002 <vscale x 16 x i1> %b)
2003 ret <vscale x 16 x i1> %out
2006 define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2007 ; CHECK-LABEL: uzp2_i8:
2009 ; CHECK-NEXT: uzp2 z0.b, z0.b, z1.b
2011 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> %a,
2012 <vscale x 16 x i8> %b)
2013 ret <vscale x 16 x i8> %out
2016 define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2017 ; CHECK-LABEL: uzp2_i16:
2019 ; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h
2021 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> %a,
2022 <vscale x 8 x i16> %b)
2023 ret <vscale x 8 x i16> %out
2026 define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2027 ; CHECK-LABEL: uzp2_i32:
2029 ; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s
2031 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a,
2032 <vscale x 4 x i32> %b)
2033 ret <vscale x 4 x i32> %out
2036 define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2037 ; CHECK-LABEL: uzp2_i64:
2039 ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
2041 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> %a,
2042 <vscale x 2 x i64> %b)
2043 ret <vscale x 2 x i64> %out
2046 define <vscale x 2 x half> @uzp2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2047 ; CHECK-LABEL: uzp2_f16_v2:
2049 ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
2051 %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half> %a,
2052 <vscale x 2 x half> %b)
2053 ret <vscale x 2 x half> %out
2056 define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2057 ; CHECK-LABEL: uzp2_f16_v4:
2059 ; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s
2061 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half> %a,
2062 <vscale x 4 x half> %b)
2063 ret <vscale x 4 x half> %out
2066 define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2067 ; CHECK-LABEL: uzp2_bf16:
2069 ; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h
2071 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> %a,
2072 <vscale x 8 x bfloat> %b)
2073 ret <vscale x 8 x bfloat> %out
2076 define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2077 ; CHECK-LABEL: uzp2_f16:
2079 ; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h
2081 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> %a,
2082 <vscale x 8 x half> %b)
2083 ret <vscale x 8 x half> %out
2086 define <vscale x 2 x float> @uzp2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2087 ; CHECK-LABEL: uzp2_f32_v2:
2089 ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
2091 %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float> %a,
2092 <vscale x 2 x float> %b)
2093 ret <vscale x 2 x float> %out
2096 define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2097 ; CHECK-LABEL: uzp2_f32:
2099 ; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s
2101 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> %a,
2102 <vscale x 4 x float> %b)
2103 ret <vscale x 4 x float> %out
2106 define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2107 ; CHECK-LABEL: uzp2_f64:
2109 ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
2111 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> %a,
2112 <vscale x 2 x double> %b)
2113 ret <vscale x 2 x double> %out
2120 define <vscale x 16 x i1> @zip1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2121 ; CHECK-LABEL: zip1_nxv16i1:
2123 ; CHECK-NEXT: zip1 p0.b, p0.b, p1.b
2125 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> %a,
2126 <vscale x 16 x i1> %b)
2127 ret <vscale x 16 x i1> %out
2130 define <vscale x 8 x i1> @zip1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
2131 ; CHECK-LABEL: zip1_nxv8i1:
2133 ; CHECK-NEXT: zip1 p0.h, p0.h, p1.h
2135 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> %a,
2136 <vscale x 8 x i1> %b)
2137 ret <vscale x 8 x i1> %out
2140 define <vscale x 4 x i1> @zip1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
2141 ; CHECK-LABEL: zip1_nxv4i1:
2143 ; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
2145 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> %a,
2146 <vscale x 4 x i1> %b)
2147 ret <vscale x 4 x i1> %out
2150 define <vscale x 2 x i1> @zip1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
2151 ; CHECK-LABEL: zip1_nxv2i1:
2153 ; CHECK-NEXT: zip1 p0.d, p0.d, p1.d
2155 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> %a,
2156 <vscale x 2 x i1> %b)
2157 ret <vscale x 2 x i1> %out
2160 define <vscale x 16 x i1> @zip1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2161 ; CHECK-LABEL: zip1_b16:
2163 ; CHECK-NEXT: zip1 p0.h, p0.h, p1.h
2165 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1> %a,
2166 <vscale x 16 x i1> %b)
2167 ret <vscale x 16 x i1> %out
2170 define <vscale x 16 x i1> @zip1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2171 ; CHECK-LABEL: zip1_b32:
2173 ; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
2175 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1> %a,
2176 <vscale x 16 x i1> %b)
2177 ret <vscale x 16 x i1> %out
2180 define <vscale x 16 x i1> @zip1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2181 ; CHECK-LABEL: zip1_b64:
2183 ; CHECK-NEXT: zip1 p0.d, p0.d, p1.d
2185 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1> %a,
2186 <vscale x 16 x i1> %b)
2187 ret <vscale x 16 x i1> %out
2190 define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2191 ; CHECK-LABEL: zip1_i8:
2193 ; CHECK-NEXT: zip1 z0.b, z0.b, z1.b
2195 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> %a,
2196 <vscale x 16 x i8> %b)
2197 ret <vscale x 16 x i8> %out
2200 define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2201 ; CHECK-LABEL: zip1_i16:
2203 ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
2205 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> %a,
2206 <vscale x 8 x i16> %b)
2207 ret <vscale x 8 x i16> %out
2210 define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2211 ; CHECK-LABEL: zip1_i32:
2213 ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
2215 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %a,
2216 <vscale x 4 x i32> %b)
2217 ret <vscale x 4 x i32> %out
2220 define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2221 ; CHECK-LABEL: zip1_i64:
2223 ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
2225 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> %a,
2226 <vscale x 2 x i64> %b)
2227 ret <vscale x 2 x i64> %out
2230 define <vscale x 2 x half> @zip1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2231 ; CHECK-LABEL: zip1_f16_v2:
2233 ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
2235 %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half> %a,
2236 <vscale x 2 x half> %b)
2237 ret <vscale x 2 x half> %out
2240 define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2241 ; CHECK-LABEL: zip1_f16_v4:
2243 ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
2245 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half> %a,
2246 <vscale x 4 x half> %b)
2247 ret <vscale x 4 x half> %out
2250 define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2251 ; CHECK-LABEL: zip1_bf16:
2253 ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
2255 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> %a,
2256 <vscale x 8 x bfloat> %b)
2257 ret <vscale x 8 x bfloat> %out
2260 define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2261 ; CHECK-LABEL: zip1_f16:
2263 ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
2265 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> %a,
2266 <vscale x 8 x half> %b)
2267 ret <vscale x 8 x half> %out
2270 define <vscale x 2 x float> @zip1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2271 ; CHECK-LABEL: zip1_f32_v2:
2273 ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
2275 %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float> %a,
2276 <vscale x 2 x float> %b)
2277 ret <vscale x 2 x float> %out
2280 define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2281 ; CHECK-LABEL: zip1_f32:
2283 ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
2285 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> %a,
2286 <vscale x 4 x float> %b)
2287 ret <vscale x 4 x float> %out
2290 define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2291 ; CHECK-LABEL: zip1_f64:
2293 ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
2295 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> %a,
2296 <vscale x 2 x double> %b)
2297 ret <vscale x 2 x double> %out
2304 define <vscale x 16 x i1> @zip2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2305 ; CHECK-LABEL: zip2_nxv16i1:
2307 ; CHECK-NEXT: zip2 p0.b, p0.b, p1.b
2309 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> %a,
2310 <vscale x 16 x i1> %b)
2311 ret <vscale x 16 x i1> %out
2314 define <vscale x 8 x i1> @zip2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
2315 ; CHECK-LABEL: zip2_nxv8i1:
2317 ; CHECK-NEXT: zip2 p0.h, p0.h, p1.h
2319 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> %a,
2320 <vscale x 8 x i1> %b)
2321 ret <vscale x 8 x i1> %out
2324 define <vscale x 4 x i1> @zip2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
2325 ; CHECK-LABEL: zip2_nxv4i1:
2327 ; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
2329 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> %a,
2330 <vscale x 4 x i1> %b)
2331 ret <vscale x 4 x i1> %out
2334 define <vscale x 2 x i1> @zip2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
2335 ; CHECK-LABEL: zip2_nxv2i1:
2337 ; CHECK-NEXT: zip2 p0.d, p0.d, p1.d
2339 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> %a,
2340 <vscale x 2 x i1> %b)
2341 ret <vscale x 2 x i1> %out
2344 define <vscale x 16 x i1> @zip2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2345 ; CHECK-LABEL: zip2_b16:
2347 ; CHECK-NEXT: zip2 p0.h, p0.h, p1.h
2349 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1> %a,
2350 <vscale x 16 x i1> %b)
2351 ret <vscale x 16 x i1> %out
2354 define <vscale x 16 x i1> @zip2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2355 ; CHECK-LABEL: zip2_b32:
2357 ; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
2359 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1> %a,
2360 <vscale x 16 x i1> %b)
2361 ret <vscale x 16 x i1> %out
2364 define <vscale x 16 x i1> @zip2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2365 ; CHECK-LABEL: zip2_b64:
2367 ; CHECK-NEXT: zip2 p0.d, p0.d, p1.d
2369 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1> %a,
2370 <vscale x 16 x i1> %b)
2371 ret <vscale x 16 x i1> %out
2374 define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2375 ; CHECK-LABEL: zip2_i8:
2377 ; CHECK-NEXT: zip2 z0.b, z0.b, z1.b
2379 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> %a,
2380 <vscale x 16 x i8> %b)
2381 ret <vscale x 16 x i8> %out
2384 define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2385 ; CHECK-LABEL: zip2_i16:
2387 ; CHECK-NEXT: zip2 z0.h, z0.h, z1.h
2389 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> %a,
2390 <vscale x 8 x i16> %b)
2391 ret <vscale x 8 x i16> %out
2394 define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2395 ; CHECK-LABEL: zip2_i32:
2397 ; CHECK-NEXT: zip2 z0.s, z0.s, z1.s
2399 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %a,
2400 <vscale x 4 x i32> %b)
2401 ret <vscale x 4 x i32> %out
2404 define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2405 ; CHECK-LABEL: zip2_i64:
2407 ; CHECK-NEXT: zip2 z0.d, z0.d, z1.d
2409 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> %a,
2410 <vscale x 2 x i64> %b)
2411 ret <vscale x 2 x i64> %out
2414 define <vscale x 2 x half> @zip2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2415 ; CHECK-LABEL: zip2_f16_v2:
2417 ; CHECK-NEXT: zip2 z0.d, z0.d, z1.d
2419 %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half> %a,
2420 <vscale x 2 x half> %b)
2421 ret <vscale x 2 x half> %out
2424 define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2425 ; CHECK-LABEL: zip2_f16_v4:
2427 ; CHECK-NEXT: zip2 z0.s, z0.s, z1.s
2429 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half> %a,
2430 <vscale x 4 x half> %b)
2431 ret <vscale x 4 x half> %out
2434 define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2435 ; CHECK-LABEL: zip2_bf16:
2437 ; CHECK-NEXT: zip2 z0.h, z0.h, z1.h
2439 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> %a,
2440 <vscale x 8 x bfloat> %b)
2441 ret <vscale x 8 x bfloat> %out
2444 define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2445 ; CHECK-LABEL: zip2_f16:
2447 ; CHECK-NEXT: zip2 z0.h, z0.h, z1.h
2449 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> %a,
2450 <vscale x 8 x half> %b)
2451 ret <vscale x 8 x half> %out
2454 define <vscale x 2 x float> @zip2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2455 ; CHECK-LABEL: zip2_f32_v2:
2457 ; CHECK-NEXT: zip2 z0.d, z0.d, z1.d
2459 %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float> %a,
2460 <vscale x 2 x float> %b)
2461 ret <vscale x 2 x float> %out
2464 define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2465 ; CHECK-LABEL: zip2_f32:
2467 ; CHECK-NEXT: zip2 z0.s, z0.s, z1.s
2469 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> %a,
2470 <vscale x 4 x float> %b)
2471 ret <vscale x 4 x float> %out
2474 define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2475 ; CHECK-LABEL: zip2_f64:
2477 ; CHECK-NEXT: zip2 z0.d, z0.d, z1.d
2479 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> %a,
2480 <vscale x 2 x double> %b)
2481 ret <vscale x 2 x double> %out
2484 declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2485 declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2486 declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2487 declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2488 declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2489 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2490 declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2491 declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2493 declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
2494 declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
2495 declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
2496 declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
2497 declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
2498 declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
2499 declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
2500 declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
2502 declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2503 declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2504 declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2505 declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2506 declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2507 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2508 declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2509 declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2511 declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
2512 declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
2513 declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
2514 declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
2515 declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
2516 declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
2517 declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
2518 declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
2520 declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2521 declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2522 declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2523 declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2525 declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64)
2526 declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64)
2527 declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)
2528 declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)
2529 declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)
2530 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64)
2531 declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)
2532 declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)
2534 declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2535 declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2536 declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2537 declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2538 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
2539 declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
2540 declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
2541 declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
2543 declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2544 declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2545 declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2546 declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2547 declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2548 declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2549 declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2550 declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2551 declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2553 declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2554 declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2555 declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2556 declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2557 declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2558 declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2559 declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2560 declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2561 declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2563 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>)
2564 declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>)
2565 declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>)
2566 declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>)
2567 declare <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8>)
2568 declare <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16>)
2569 declare <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32>)
2570 declare <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64>)
2571 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat>)
2572 declare <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half>)
2573 declare <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float>)
2574 declare <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double>)
2576 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1>)
2577 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1>)
2578 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1>)
2580 declare <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2581 declare <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2582 declare <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2583 declare <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2584 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2585 declare <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2586 declare <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2587 declare <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2589 declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8>)
2590 declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
2591 declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32>)
2593 declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8>)
2594 declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
2595 declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32>)
2597 declare <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2598 declare <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2599 declare <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2600 declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2601 declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
2602 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>)
2603 declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
2604 declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
2606 declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8>)
2607 declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
2608 declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32>)
2610 declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8>)
2611 declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
2612 declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32>)
2614 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2615 declare <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2616 declare <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2617 declare <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2618 declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2619 declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2620 declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2621 declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2622 declare <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2623 declare <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2624 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2625 declare <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2626 declare <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2627 declare <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2628 declare <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2630 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2631 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2632 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2634 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2635 declare <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2636 declare <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2637 declare <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2638 declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2639 declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2640 declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2641 declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2642 declare <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2643 declare <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2644 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2645 declare <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2646 declare <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2647 declare <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2648 declare <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2650 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2651 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2652 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2654 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2655 declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2656 declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2657 declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2658 declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2659 declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2660 declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2661 declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2662 declare <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2663 declare <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2664 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2665 declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2666 declare <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2667 declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2668 declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2670 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2671 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2672 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2674 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2675 declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2676 declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2677 declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2678 declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2679 declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2680 declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2681 declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2682 declare <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2683 declare <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2684 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2685 declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2686 declare <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2687 declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2688 declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2690 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2691 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2692 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2694 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2695 declare <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2696 declare <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2697 declare <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2698 declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2699 declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2700 declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2701 declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2702 declare <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2703 declare <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2704 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2705 declare <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2706 declare <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2707 declare <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2708 declare <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2710 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2711 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2712 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2714 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2715 declare <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2716 declare <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2717 declare <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2718 declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2719 declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2720 declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2721 declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2722 declare <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2723 declare <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2724 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2725 declare <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2726 declare <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2727 declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2728 declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2730 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2731 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2732 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2734 declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64)
2735 declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
2736 declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64)
2737 declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
2738 declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
2739 declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
2740 declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)
2741 declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat>, <8 x bfloat>, i64)
2743 ; +bf16 is required for the bfloat version.
2744 attributes #0 = { "target-features"="+sve,+bf16" }