1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,SVE
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
9 define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
10 ; CHECK-LABEL: clasta_i8:
12 ; CHECK-NEXT: clasta z0.b, p0, z0.b, z1.b
14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> %pg,
15 <vscale x 16 x i8> %a,
16 <vscale x 16 x i8> %b)
17 ret <vscale x 16 x i8> %out
20 define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
21 ; CHECK-LABEL: clasta_i16:
23 ; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h
25 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> %pg,
26 <vscale x 8 x i16> %a,
27 <vscale x 8 x i16> %b)
28 ret <vscale x 8 x i16> %out
31 define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
32 ; CHECK-LABEL: clasta_i32:
34 ; CHECK-NEXT: clasta z0.s, p0, z0.s, z1.s
36 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> %pg,
37 <vscale x 4 x i32> %a,
38 <vscale x 4 x i32> %b)
39 ret <vscale x 4 x i32> %out
42 define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
43 ; CHECK-LABEL: clasta_i64:
45 ; CHECK-NEXT: clasta z0.d, p0, z0.d, z1.d
47 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> %pg,
48 <vscale x 2 x i64> %a,
49 <vscale x 2 x i64> %b)
50 ret <vscale x 2 x i64> %out
53 define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
54 ; CHECK-LABEL: clasta_f16:
56 ; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h
58 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg,
59 <vscale x 8 x half> %a,
60 <vscale x 8 x half> %b)
61 ret <vscale x 8 x half> %out
64 define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
65 ; CHECK-LABEL: clasta_bf16:
67 ; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h
69 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
70 <vscale x 8 x bfloat> %a,
71 <vscale x 8 x bfloat> %b)
72 ret <vscale x 8 x bfloat> %out
75 define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
76 ; CHECK-LABEL: clasta_f32:
78 ; CHECK-NEXT: clasta z0.s, p0, z0.s, z1.s
80 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg,
81 <vscale x 4 x float> %a,
82 <vscale x 4 x float> %b)
83 ret <vscale x 4 x float> %out
86 define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
87 ; CHECK-LABEL: clasta_f64:
89 ; CHECK-NEXT: clasta z0.d, p0, z0.d, z1.d
91 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> %pg,
92 <vscale x 2 x double> %a,
93 <vscale x 2 x double> %b)
94 ret <vscale x 2 x double> %out
101 define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
102 ; CHECK-LABEL: clasta_n_i8:
104 ; CHECK-NEXT: clasta w0, p0, w0, z0.b
106 %out = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> %pg,
108 <vscale x 16 x i8> %b)
112 define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
113 ; CHECK-LABEL: clasta_n_i16:
115 ; CHECK-NEXT: clasta w0, p0, w0, z0.h
117 %out = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> %pg,
119 <vscale x 8 x i16> %b)
123 define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
124 ; CHECK-LABEL: clasta_n_i32:
126 ; CHECK-NEXT: clasta w0, p0, w0, z0.s
128 %out = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> %pg,
130 <vscale x 4 x i32> %b)
134 define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
135 ; CHECK-LABEL: clasta_n_i64:
137 ; CHECK-NEXT: clasta x0, p0, x0, z0.d
139 %out = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> %pg,
141 <vscale x 2 x i64> %b)
145 define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
146 ; CHECK-LABEL: clasta_n_f16:
148 ; CHECK-NEXT: clasta h0, p0, h0, z1.h
150 %out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg,
152 <vscale x 8 x half> %b)
156 define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
157 ; CHECK-LABEL: clasta_n_bf16:
159 ; CHECK-NEXT: clasta h0, p0, h0, z1.h
161 %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
163 <vscale x 8 x bfloat> %b)
167 define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
168 ; CHECK-LABEL: clasta_n_f32:
170 ; CHECK-NEXT: clasta s0, p0, s0, z1.s
172 %out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg,
174 <vscale x 4 x float> %b)
178 define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
179 ; CHECK-LABEL: clasta_n_f64:
181 ; CHECK-NEXT: clasta d0, p0, d0, z1.d
183 %out = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> %pg,
185 <vscale x 2 x double> %b)
193 define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
194 ; CHECK-LABEL: clastb_i8:
196 ; CHECK-NEXT: clastb z0.b, p0, z0.b, z1.b
198 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> %pg,
199 <vscale x 16 x i8> %a,
200 <vscale x 16 x i8> %b)
201 ret <vscale x 16 x i8> %out
204 define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
205 ; CHECK-LABEL: clastb_i16:
207 ; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h
209 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> %pg,
210 <vscale x 8 x i16> %a,
211 <vscale x 8 x i16> %b)
212 ret <vscale x 8 x i16> %out
215 define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
216 ; CHECK-LABEL: clastb_i32:
218 ; CHECK-NEXT: clastb z0.s, p0, z0.s, z1.s
220 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> %pg,
221 <vscale x 4 x i32> %a,
222 <vscale x 4 x i32> %b)
223 ret <vscale x 4 x i32> %out
226 define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
227 ; CHECK-LABEL: clastb_i64:
229 ; CHECK-NEXT: clastb z0.d, p0, z0.d, z1.d
231 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> %pg,
232 <vscale x 2 x i64> %a,
233 <vscale x 2 x i64> %b)
234 ret <vscale x 2 x i64> %out
237 define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
238 ; CHECK-LABEL: clastb_f16:
240 ; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h
242 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg,
243 <vscale x 8 x half> %a,
244 <vscale x 8 x half> %b)
245 ret <vscale x 8 x half> %out
248 define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
249 ; CHECK-LABEL: clastb_bf16:
251 ; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h
253 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
254 <vscale x 8 x bfloat> %a,
255 <vscale x 8 x bfloat> %b)
256 ret <vscale x 8 x bfloat> %out
259 define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
260 ; CHECK-LABEL: clastb_f32:
262 ; CHECK-NEXT: clastb z0.s, p0, z0.s, z1.s
264 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg,
265 <vscale x 4 x float> %a,
266 <vscale x 4 x float> %b)
267 ret <vscale x 4 x float> %out
270 define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
271 ; CHECK-LABEL: clastb_f64:
273 ; CHECK-NEXT: clastb z0.d, p0, z0.d, z1.d
275 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> %pg,
276 <vscale x 2 x double> %a,
277 <vscale x 2 x double> %b)
278 ret <vscale x 2 x double> %out
285 define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
286 ; CHECK-LABEL: clastb_n_i8:
288 ; CHECK-NEXT: clastb w0, p0, w0, z0.b
290 %out = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> %pg,
292 <vscale x 16 x i8> %b)
296 define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
297 ; CHECK-LABEL: clastb_n_i16:
299 ; CHECK-NEXT: clastb w0, p0, w0, z0.h
301 %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg,
303 <vscale x 8 x i16> %b)
307 define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
308 ; CHECK-LABEL: clastb_n_i32:
310 ; CHECK-NEXT: clastb w0, p0, w0, z0.s
312 %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg,
314 <vscale x 4 x i32> %b)
318 define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
319 ; CHECK-LABEL: clastb_n_i64:
321 ; CHECK-NEXT: clastb x0, p0, x0, z0.d
323 %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg,
325 <vscale x 2 x i64> %b)
329 define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
330 ; CHECK-LABEL: clastb_n_f16:
332 ; CHECK-NEXT: clastb h0, p0, h0, z1.h
334 %out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg,
336 <vscale x 8 x half> %b)
340 define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
341 ; CHECK-LABEL: clastb_n_bf16:
343 ; CHECK-NEXT: clastb h0, p0, h0, z1.h
345 %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
347 <vscale x 8 x bfloat> %b)
351 define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
352 ; CHECK-LABEL: clastb_n_f32:
354 ; CHECK-NEXT: clastb s0, p0, s0, z1.s
356 %out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg,
358 <vscale x 4 x float> %b)
362 define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
363 ; CHECK-LABEL: clastb_n_f64:
365 ; CHECK-NEXT: clastb d0, p0, d0, z1.d
367 %out = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> %pg,
369 <vscale x 2 x double> %b)
377 define <vscale x 16 x i8> @dupq_i8(<vscale x 16 x i8> %a) {
378 ; CHECK-LABEL: dupq_i8:
380 ; CHECK-NEXT: mov z0.q, q0
382 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 0)
383 ret <vscale x 16 x i8> %out
386 define <vscale x 8 x i16> @dupq_i16(<vscale x 8 x i16> %a) {
387 ; CHECK-LABEL: dupq_i16:
389 ; CHECK-NEXT: mov z0.q, z0.q[1]
391 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 1)
392 ret <vscale x 8 x i16> %out
395 define <vscale x 4 x i32> @dupq_i32(<vscale x 4 x i32> %a) {
396 ; CHECK-LABEL: dupq_i32:
398 ; CHECK-NEXT: mov z0.q, z0.q[2]
400 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 2)
401 ret <vscale x 4 x i32> %out
404 define <vscale x 2 x i64> @dupq_i64(<vscale x 2 x i64> %a) {
405 ; CHECK-LABEL: dupq_i64:
407 ; CHECK-NEXT: mov z0.q, z0.q[3]
409 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 3)
410 ret <vscale x 2 x i64> %out
413 define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
414 ; CHECK-LABEL: dupq_f16:
416 ; CHECK-NEXT: mov z0.q, q0
418 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0)
419 ret <vscale x 8 x half> %out
422 define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
423 ; CHECK-LABEL: dupq_bf16:
425 ; CHECK-NEXT: mov z0.q, q0
427 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
428 ret <vscale x 8 x bfloat> %out
431 define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
432 ; CHECK-LABEL: dupq_f32:
434 ; CHECK-NEXT: mov z0.q, z0.q[1]
436 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1)
437 ret <vscale x 4 x float> %out
440 define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) {
441 ; CHECK-LABEL: dupq_f64:
443 ; CHECK-NEXT: mov z0.q, z0.q[2]
445 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 2)
446 ret <vscale x 2 x double> %out
453 define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) {
454 ; CHECK-LABEL: dupq_lane_i8:
456 ; CHECK-NEXT: index z1.d, #0, #1
457 ; CHECK-NEXT: add x8, x0, x0
458 ; CHECK-NEXT: mov z2.d, x8
459 ; CHECK-NEXT: and z1.d, z1.d, #0x1
460 ; CHECK-NEXT: add z1.d, z1.d, z2.d
461 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
463 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 %idx)
464 ret <vscale x 16 x i8> %out
467 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
468 define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) {
469 ; CHECK-LABEL: dupq_lane_i16:
471 ; CHECK-NEXT: index z1.d, #0, #1
472 ; CHECK-NEXT: add x8, x0, x0
473 ; CHECK-NEXT: mov z2.d, x8
474 ; CHECK-NEXT: and z1.d, z1.d, #0x1
475 ; CHECK-NEXT: add z1.d, z1.d, z2.d
476 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
478 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 %idx)
479 ret <vscale x 8 x i16> %out
482 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
483 define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) {
484 ; CHECK-LABEL: dupq_lane_i32:
486 ; CHECK-NEXT: index z1.d, #0, #1
487 ; CHECK-NEXT: add x8, x0, x0
488 ; CHECK-NEXT: mov z2.d, x8
489 ; CHECK-NEXT: and z1.d, z1.d, #0x1
490 ; CHECK-NEXT: add z1.d, z1.d, z2.d
491 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
493 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 %idx)
494 ret <vscale x 4 x i32> %out
497 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
498 define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) {
499 ; CHECK-LABEL: dupq_lane_i64:
501 ; CHECK-NEXT: index z1.d, #0, #1
502 ; CHECK-NEXT: add x8, x0, x0
503 ; CHECK-NEXT: mov z2.d, x8
504 ; CHECK-NEXT: and z1.d, z1.d, #0x1
505 ; CHECK-NEXT: add z1.d, z1.d, z2.d
506 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
508 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 %idx)
509 ret <vscale x 2 x i64> %out
512 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
513 define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
514 ; CHECK-LABEL: dupq_lane_f16:
516 ; CHECK-NEXT: index z1.d, #0, #1
517 ; CHECK-NEXT: add x8, x0, x0
518 ; CHECK-NEXT: mov z2.d, x8
519 ; CHECK-NEXT: and z1.d, z1.d, #0x1
520 ; CHECK-NEXT: add z1.d, z1.d, z2.d
521 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
523 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx)
524 ret <vscale x 8 x half> %out
527 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
528 define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
529 ; CHECK-LABEL: dupq_lane_bf16:
531 ; CHECK-NEXT: index z1.d, #0, #1
532 ; CHECK-NEXT: add x8, x0, x0
533 ; CHECK-NEXT: mov z2.d, x8
534 ; CHECK-NEXT: and z1.d, z1.d, #0x1
535 ; CHECK-NEXT: add z1.d, z1.d, z2.d
536 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
538 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
539 ret <vscale x 8 x bfloat> %out
542 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
543 define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
544 ; CHECK-LABEL: dupq_lane_f32:
546 ; CHECK-NEXT: index z1.d, #0, #1
547 ; CHECK-NEXT: add x8, x0, x0
548 ; CHECK-NEXT: mov z2.d, x8
549 ; CHECK-NEXT: and z1.d, z1.d, #0x1
550 ; CHECK-NEXT: add z1.d, z1.d, z2.d
551 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
553 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 %idx)
554 ret <vscale x 4 x float> %out
557 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
558 define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx) {
559 ; CHECK-LABEL: dupq_lane_f64:
561 ; CHECK-NEXT: index z1.d, #0, #1
562 ; CHECK-NEXT: add x8, x0, x0
563 ; CHECK-NEXT: mov z2.d, x8
564 ; CHECK-NEXT: and z1.d, z1.d, #0x1
565 ; CHECK-NEXT: add z1.d, z1.d, z2.d
566 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
568 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 %idx)
569 ret <vscale x 2 x double> %out
572 ; NOTE: Index out of range (0-3)
573 define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
574 ; SVE-LABEL: dupq_i64_range:
576 ; SVE-NEXT: index z1.d, #0, #1
577 ; SVE-NEXT: and z1.d, z1.d, #0x1
578 ; SVE-NEXT: orr z1.d, z1.d, #0x8
579 ; SVE-NEXT: tbl z0.d, { z0.d }, z1.d
582 ; SVE2-LABEL: dupq_i64_range:
584 ; SVE2-NEXT: index z1.d, #0, #1
585 ; SVE2-NEXT: and z1.d, z1.d, #0x1
586 ; SVE2-NEXT: add z1.d, z1.d, #8 // =0x8
587 ; SVE2-NEXT: tbl z0.d, { z0.d }, z1.d
589 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4)
590 ret <vscale x 2 x i64> %out
596 define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %y) {
597 ; CHECK-LABEL: dupq_f32_repeat_complex:
599 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
600 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
601 ; CHECK-NEXT: mov v0.s[1], v1.s[0]
602 ; CHECK-NEXT: mov z0.d, d0
604 %1 = insertelement <4 x float> undef, float %x, i64 0
605 %2 = insertelement <4 x float> %1, float %y, i64 1
606 %3 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %2, i64 0)
607 %4 = bitcast <vscale x 4 x float> %3 to <vscale x 2 x double>
608 %5 = shufflevector <vscale x 2 x double> %4, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
609 %6 = bitcast <vscale x 2 x double> %5 to <vscale x 4 x float>
610 ret <vscale x 4 x float> %6
613 define dso_local <vscale x 8 x half> @dupq_f16_repeat_complex(half %x, half %y) {
614 ; CHECK-LABEL: dupq_f16_repeat_complex:
616 ; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
617 ; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
618 ; CHECK-NEXT: mov v0.h[1], v1.h[0]
619 ; CHECK-NEXT: mov z0.s, s0
621 %1 = insertelement <8 x half> undef, half %x, i64 0
622 %2 = insertelement <8 x half> %1, half %y, i64 1
623 %3 = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> %2, i64 0)
624 %4 = bitcast <vscale x 8 x half> %3 to <vscale x 4 x float>
625 %5 = shufflevector <vscale x 4 x float> %4, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
626 %6 = bitcast <vscale x 4 x float> %5 to <vscale x 8 x half>
627 ret <vscale x 8 x half> %6
630 define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
633 ; SVE-NEXT: ext z0.b, z0.b, z1.b, #255
636 ; SVE2-LABEL: ext_i8:
638 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
639 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
640 ; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #255
642 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
643 <vscale x 16 x i8> %b,
645 ret <vscale x 16 x i8> %out
648 define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
649 ; SVE-LABEL: ext_i16:
651 ; SVE-NEXT: ext z0.b, z0.b, z1.b, #0
654 ; SVE2-LABEL: ext_i16:
656 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
657 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
658 ; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #0
660 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
661 <vscale x 8 x i16> %b,
663 ret <vscale x 8 x i16> %out
666 define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
667 ; SVE-LABEL: ext_i32:
669 ; SVE-NEXT: ext z0.b, z0.b, z1.b, #4
672 ; SVE2-LABEL: ext_i32:
674 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
675 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
676 ; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #4
678 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
679 <vscale x 4 x i32> %b,
681 ret <vscale x 4 x i32> %out
684 define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
685 ; SVE-LABEL: ext_i64:
687 ; SVE-NEXT: ext z0.b, z0.b, z1.b, #16
690 ; SVE2-LABEL: ext_i64:
692 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
693 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
694 ; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #16
696 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
697 <vscale x 2 x i64> %b,
699 ret <vscale x 2 x i64> %out
702 define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
703 ; SVE-LABEL: ext_bf16:
705 ; SVE-NEXT: ext z0.b, z0.b, z1.b, #6
708 ; SVE2-LABEL: ext_bf16:
710 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
711 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
712 ; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #6
714 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
715 <vscale x 8 x bfloat> %b,
717 ret <vscale x 8 x bfloat> %out
720 define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
721 ; SVE-LABEL: ext_f16:
723 ; SVE-NEXT: ext z0.b, z0.b, z1.b, #6
726 ; SVE2-LABEL: ext_f16:
728 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
729 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
730 ; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #6
732 %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
733 <vscale x 8 x half> %b,
735 ret <vscale x 8 x half> %out
738 define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
739 ; SVE-LABEL: ext_f32:
741 ; SVE-NEXT: ext z0.b, z0.b, z1.b, #16
744 ; SVE2-LABEL: ext_f32:
746 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
747 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
748 ; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #16
750 %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
751 <vscale x 4 x float> %b,
753 ret <vscale x 4 x float> %out
756 define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
757 ; SVE-LABEL: ext_f64:
759 ; SVE-NEXT: ext z0.b, z0.b, z1.b, #40
762 ; SVE2-LABEL: ext_f64:
764 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
765 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
766 ; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #40
768 %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
769 <vscale x 2 x double> %b,
771 ret <vscale x 2 x double> %out
778 define i8 @lasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
779 ; CHECK-LABEL: lasta_i8:
781 ; CHECK-NEXT: lasta w0, p0, z0.b
783 %res = call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg,
784 <vscale x 16 x i8> %a)
788 define i16 @lasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
789 ; CHECK-LABEL: lasta_i16:
791 ; CHECK-NEXT: lasta w0, p0, z0.h
793 %res = call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> %pg,
794 <vscale x 8 x i16> %a)
798 define i32 @lasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
799 ; CHECK-LABEL: lasta_i32:
801 ; CHECK-NEXT: lasta w0, p0, z0.s
803 %res = call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> %pg,
804 <vscale x 4 x i32> %a)
808 define i64 @lasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
809 ; CHECK-LABEL: lasta_i64:
811 ; CHECK-NEXT: lasta x0, p0, z0.d
813 %res = call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> %pg,
814 <vscale x 2 x i64> %a)
818 define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
819 ; CHECK-LABEL: lasta_f16:
821 ; CHECK-NEXT: lasta h0, p0, z0.h
823 %res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg,
824 <vscale x 8 x half> %a)
828 define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
829 ; CHECK-LABEL: lasta_bf16:
831 ; CHECK-NEXT: lasta h0, p0, z0.h
833 %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
834 <vscale x 8 x bfloat> %a)
838 define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
839 ; CHECK-LABEL: lasta_f32:
841 ; CHECK-NEXT: lasta s0, p0, z0.s
843 %res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg,
844 <vscale x 4 x float> %a)
848 define float @lasta_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
849 ; CHECK-LABEL: lasta_f32_v2:
851 ; CHECK-NEXT: lasta s0, p0, z0.s
853 %res = call float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1> %pg,
854 <vscale x 2 x float> %a)
858 define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
859 ; CHECK-LABEL: lasta_f64:
861 ; CHECK-NEXT: lasta d0, p0, z0.d
863 %res = call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> %pg,
864 <vscale x 2 x double> %a)
872 define i8 @lastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
873 ; CHECK-LABEL: lastb_i8:
875 ; CHECK-NEXT: lastb w0, p0, z0.b
877 %res = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg,
878 <vscale x 16 x i8> %a)
882 define i16 @lastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
883 ; CHECK-LABEL: lastb_i16:
885 ; CHECK-NEXT: lastb w0, p0, z0.h
887 %res = call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> %pg,
888 <vscale x 8 x i16> %a)
892 define i32 @lastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
893 ; CHECK-LABEL: lastb_i32:
895 ; CHECK-NEXT: lastb w0, p0, z0.s
897 %res = call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> %pg,
898 <vscale x 4 x i32> %a)
902 define i64 @lastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
903 ; CHECK-LABEL: lastb_i64:
905 ; CHECK-NEXT: lastb x0, p0, z0.d
907 %res = call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> %pg,
908 <vscale x 2 x i64> %a)
912 define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
913 ; CHECK-LABEL: lastb_f16:
915 ; CHECK-NEXT: lastb h0, p0, z0.h
917 %res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg,
918 <vscale x 8 x half> %a)
922 define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
923 ; CHECK-LABEL: lastb_bf16:
925 ; CHECK-NEXT: lastb h0, p0, z0.h
927 %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
928 <vscale x 8 x bfloat> %a)
932 define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
933 ; CHECK-LABEL: lastb_f32:
935 ; CHECK-NEXT: lastb s0, p0, z0.s
937 %res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg,
938 <vscale x 4 x float> %a)
942 define float @lastb_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
943 ; CHECK-LABEL: lastb_f32_v2:
945 ; CHECK-NEXT: lastb s0, p0, z0.s
947 %res = call float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1> %pg,
948 <vscale x 2 x float> %a)
952 define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
953 ; CHECK-LABEL: lastb_f64:
955 ; CHECK-NEXT: lastb d0, p0, z0.d
957 %res = call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> %pg,
958 <vscale x 2 x double> %a)
966 define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
967 ; CHECK-LABEL: compact_i32:
969 ; CHECK-NEXT: compact z0.s, p0, z0.s
971 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg,
972 <vscale x 4 x i32> %a)
973 ret <vscale x 4 x i32> %out
976 define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
977 ; CHECK-LABEL: compact_i64:
979 ; CHECK-NEXT: compact z0.d, p0, z0.d
981 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg,
982 <vscale x 2 x i64> %a)
983 ret <vscale x 2 x i64> %out
986 define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
987 ; CHECK-LABEL: compact_f32:
989 ; CHECK-NEXT: compact z0.s, p0, z0.s
991 %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg,
992 <vscale x 4 x float> %a)
993 ret <vscale x 4 x float> %out
996 define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
997 ; CHECK-LABEL: compact_f64:
999 ; CHECK-NEXT: compact z0.d, p0, z0.d
1001 %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg,
1002 <vscale x 2 x double> %a)
1003 ret <vscale x 2 x double> %out
1010 define <vscale x 16 x i1> @rev_nxv16i1(<vscale x 16 x i1> %a) {
1011 ; CHECK-LABEL: rev_nxv16i1:
1013 ; CHECK-NEXT: rev p0.b, p0.b
1015 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> %a)
1016 ret <vscale x 16 x i1> %res
1019 define <vscale x 8 x i1> @rev_nxv8i1(<vscale x 8 x i1> %a) {
1020 ; CHECK-LABEL: rev_nxv8i1:
1022 ; CHECK-NEXT: rev p0.h, p0.h
1024 %res = call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> %a)
1025 ret <vscale x 8 x i1> %res
1028 define <vscale x 4 x i1> @rev_nxv4i1(<vscale x 4 x i1> %a) {
1029 ; CHECK-LABEL: rev_nxv4i1:
1031 ; CHECK-NEXT: rev p0.s, p0.s
1033 %res = call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> %a)
1034 ret <vscale x 4 x i1> %res
1037 define <vscale x 2 x i1> @rev_nxv2i1(<vscale x 2 x i1> %a) {
1038 ; CHECK-LABEL: rev_nxv2i1:
1040 ; CHECK-NEXT: rev p0.d, p0.d
1042 %res = call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> %a)
1043 ret <vscale x 2 x i1> %res
1046 define <vscale x 16 x i1> @rev_b16(<vscale x 16 x i1> %a) {
1047 ; CHECK-LABEL: rev_b16:
1049 ; CHECK-NEXT: rev p0.h, p0.h
1051 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %a)
1052 ret <vscale x 16 x i1> %res
1055 define <vscale x 16 x i1> @rev_b32(<vscale x 16 x i1> %a) {
1056 ; CHECK-LABEL: rev_b32:
1058 ; CHECK-NEXT: rev p0.s, p0.s
1060 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %a)
1061 ret <vscale x 16 x i1> %res
1064 define <vscale x 16 x i1> @rev_b64(<vscale x 16 x i1> %a) {
1065 ; CHECK-LABEL: rev_b64:
1067 ; CHECK-NEXT: rev p0.d, p0.d
1069 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %a)
1070 ret <vscale x 16 x i1> %res
1073 define <vscale x 16 x i8> @rev_i8(<vscale x 16 x i8> %a) {
1074 ; CHECK-LABEL: rev_i8:
1076 ; CHECK-NEXT: rev z0.b, z0.b
1078 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> %a)
1079 ret <vscale x 16 x i8> %res
1082 define <vscale x 8 x i16> @rev_i16(<vscale x 8 x i16> %a) {
1083 ; CHECK-LABEL: rev_i16:
1085 ; CHECK-NEXT: rev z0.h, z0.h
1087 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> %a)
1088 ret <vscale x 8 x i16> %res
1091 define <vscale x 4 x i32> @rev_i32(<vscale x 4 x i32> %a) {
1092 ; CHECK-LABEL: rev_i32:
1094 ; CHECK-NEXT: rev z0.s, z0.s
1096 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> %a)
1097 ret <vscale x 4 x i32> %res
1100 define <vscale x 2 x i64> @rev_i64(<vscale x 2 x i64> %a) {
1101 ; CHECK-LABEL: rev_i64:
1103 ; CHECK-NEXT: rev z0.d, z0.d
1105 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> %a)
1106 ret <vscale x 2 x i64> %res
1109 define <vscale x 8 x bfloat> @rev_bf16(<vscale x 8 x bfloat> %a) #0 {
1110 ; CHECK-LABEL: rev_bf16:
1112 ; CHECK-NEXT: rev z0.h, z0.h
1114 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> %a)
1115 ret <vscale x 8 x bfloat> %res
1118 define <vscale x 8 x half> @rev_f16(<vscale x 8 x half> %a) {
1119 ; CHECK-LABEL: rev_f16:
1121 ; CHECK-NEXT: rev z0.h, z0.h
1123 %res = call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> %a)
1124 ret <vscale x 8 x half> %res
1127 define <vscale x 4 x float> @rev_f32(<vscale x 4 x float> %a) {
1128 ; CHECK-LABEL: rev_f32:
1130 ; CHECK-NEXT: rev z0.s, z0.s
1132 %res = call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> %a)
1133 ret <vscale x 4 x float> %res
1136 define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) {
1137 ; CHECK-LABEL: rev_f64:
1139 ; CHECK-NEXT: rev z0.d, z0.d
1141 %res = call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> %a)
1142 ret <vscale x 2 x double> %res
1149 define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1150 ; SVE-LABEL: splice_i8:
1152 ; SVE-NEXT: splice z0.b, p0, z0.b, z1.b
1155 ; SVE2-LABEL: splice_i8:
1157 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1158 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1159 ; SVE2-NEXT: splice z0.b, p0, { z0.b, z1.b }
1161 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
1162 <vscale x 16 x i8> %a,
1163 <vscale x 16 x i8> %b)
1164 ret <vscale x 16 x i8> %out
1167 define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1168 ; SVE-LABEL: splice_i16:
1170 ; SVE-NEXT: splice z0.h, p0, z0.h, z1.h
1173 ; SVE2-LABEL: splice_i16:
1175 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1176 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1177 ; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h }
1179 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
1180 <vscale x 8 x i16> %a,
1181 <vscale x 8 x i16> %b)
1182 ret <vscale x 8 x i16> %out
1185 define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1186 ; SVE-LABEL: splice_i32:
1188 ; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
1191 ; SVE2-LABEL: splice_i32:
1193 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1194 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1195 ; SVE2-NEXT: splice z0.s, p0, { z0.s, z1.s }
1197 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
1198 <vscale x 4 x i32> %a,
1199 <vscale x 4 x i32> %b)
1200 ret <vscale x 4 x i32> %out
1203 define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1204 ; SVE-LABEL: splice_i64:
1206 ; SVE-NEXT: splice z0.d, p0, z0.d, z1.d
1209 ; SVE2-LABEL: splice_i64:
1211 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1212 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1213 ; SVE2-NEXT: splice z0.d, p0, { z0.d, z1.d }
1215 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
1216 <vscale x 2 x i64> %a,
1217 <vscale x 2 x i64> %b)
1218 ret <vscale x 2 x i64> %out
1221 define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1222 ; SVE-LABEL: splice_bf16:
1224 ; SVE-NEXT: splice z0.h, p0, z0.h, z1.h
1227 ; SVE2-LABEL: splice_bf16:
1229 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1230 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1231 ; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h }
1233 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
1234 <vscale x 8 x bfloat> %a,
1235 <vscale x 8 x bfloat> %b)
1236 ret <vscale x 8 x bfloat> %out
1239 define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1240 ; SVE-LABEL: splice_f16:
1242 ; SVE-NEXT: splice z0.h, p0, z0.h, z1.h
1245 ; SVE2-LABEL: splice_f16:
1247 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1248 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1249 ; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h }
1251 %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
1252 <vscale x 8 x half> %a,
1253 <vscale x 8 x half> %b)
1254 ret <vscale x 8 x half> %out
1257 define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1258 ; SVE-LABEL: splice_f32:
1260 ; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
1263 ; SVE2-LABEL: splice_f32:
1265 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1266 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1267 ; SVE2-NEXT: splice z0.s, p0, { z0.s, z1.s }
1269 %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
1270 <vscale x 4 x float> %a,
1271 <vscale x 4 x float> %b)
1272 ret <vscale x 4 x float> %out
1275 define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1276 ; SVE-LABEL: splice_f64:
1278 ; SVE-NEXT: splice z0.d, p0, z0.d, z1.d
1281 ; SVE2-LABEL: splice_f64:
1283 ; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
1284 ; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
1285 ; SVE2-NEXT: splice z0.d, p0, { z0.d, z1.d }
1287 %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,
1288 <vscale x 2 x double> %a,
1289 <vscale x 2 x double> %b)
1290 ret <vscale x 2 x double> %out
1297 define <vscale x 8 x i16> @sunpkhi_i16(<vscale x 16 x i8> %a) {
1298 ; CHECK-LABEL: sunpkhi_i16:
1300 ; CHECK-NEXT: sunpkhi z0.h, z0.b
1302 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1303 ret <vscale x 8 x i16> %res
1306 define <vscale x 4 x i32> @sunpkhi_i32(<vscale x 8 x i16> %a) {
1307 ; CHECK-LABEL: sunpkhi_i32:
1309 ; CHECK-NEXT: sunpkhi z0.s, z0.h
1311 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1312 ret <vscale x 4 x i32> %res
1315 define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) {
1316 ; CHECK-LABEL: sunpkhi_i64:
1318 ; CHECK-NEXT: sunpkhi z0.d, z0.s
1320 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1321 ret <vscale x 2 x i64> %res
1328 define <vscale x 8 x i16> @sunpklo_i16(<vscale x 16 x i8> %a) {
1329 ; CHECK-LABEL: sunpklo_i16:
1331 ; CHECK-NEXT: sunpklo z0.h, z0.b
1333 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> %a)
1334 ret <vscale x 8 x i16> %res
1337 define <vscale x 4 x i32> @sunpklo_i32(<vscale x 8 x i16> %a) {
1338 ; CHECK-LABEL: sunpklo_i32:
1340 ; CHECK-NEXT: sunpklo z0.s, z0.h
1342 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %a)
1343 ret <vscale x 4 x i32> %res
1346 define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) {
1347 ; CHECK-LABEL: sunpklo_i64:
1349 ; CHECK-NEXT: sunpklo z0.d, z0.s
1351 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> %a)
1352 ret <vscale x 2 x i64> %res
1359 define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1360 ; CHECK-LABEL: tbl_i8:
1362 ; CHECK-NEXT: tbl z0.b, { z0.b }, z1.b
1364 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> %a,
1365 <vscale x 16 x i8> %b)
1366 ret <vscale x 16 x i8> %out
1369 define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1370 ; CHECK-LABEL: tbl_i16:
1372 ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
1374 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> %a,
1375 <vscale x 8 x i16> %b)
1376 ret <vscale x 8 x i16> %out
1379 define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1380 ; CHECK-LABEL: tbl_i32:
1382 ; CHECK-NEXT: tbl z0.s, { z0.s }, z1.s
1384 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> %a,
1385 <vscale x 4 x i32> %b)
1386 ret <vscale x 4 x i32> %out
1389 define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1390 ; CHECK-LABEL: tbl_i64:
1392 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
1394 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> %a,
1395 <vscale x 2 x i64> %b)
1396 ret <vscale x 2 x i64> %out
1399 define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
1400 ; CHECK-LABEL: tbl_f16:
1402 ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
1404 %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> %a,
1405 <vscale x 8 x i16> %b)
1406 ret <vscale x 8 x half> %out
1409 define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) #0 {
1410 ; CHECK-LABEL: tbl_bf16:
1412 ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
1414 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a,
1415 <vscale x 8 x i16> %b)
1416 ret <vscale x 8 x bfloat> %out
1419 define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
1420 ; CHECK-LABEL: tbl_f32:
1422 ; CHECK-NEXT: tbl z0.s, { z0.s }, z1.s
1424 %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> %a,
1425 <vscale x 4 x i32> %b)
1426 ret <vscale x 4 x float> %out
1429 define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
1430 ; CHECK-LABEL: tbl_f64:
1432 ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d
1434 %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> %a,
1435 <vscale x 2 x i64> %b)
1436 ret <vscale x 2 x double> %out
1443 define <vscale x 8 x i16> @uunpkhi_i16(<vscale x 16 x i8> %a) {
1444 ; CHECK-LABEL: uunpkhi_i16:
1446 ; CHECK-NEXT: uunpkhi z0.h, z0.b
1448 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1449 ret <vscale x 8 x i16> %res
1452 define <vscale x 4 x i32> @uunpkhi_i32(<vscale x 8 x i16> %a) {
1453 ; CHECK-LABEL: uunpkhi_i32:
1455 ; CHECK-NEXT: uunpkhi z0.s, z0.h
1457 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1458 ret <vscale x 4 x i32> %res
1461 define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) {
1462 ; CHECK-LABEL: uunpkhi_i64:
1464 ; CHECK-NEXT: uunpkhi z0.d, z0.s
1466 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1467 ret <vscale x 2 x i64> %res
1474 define <vscale x 8 x i16> @uunpklo_i16(<vscale x 16 x i8> %a) {
1475 ; CHECK-LABEL: uunpklo_i16:
1477 ; CHECK-NEXT: uunpklo z0.h, z0.b
1479 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> %a)
1480 ret <vscale x 8 x i16> %res
1483 define <vscale x 4 x i32> @uunpklo_i32(<vscale x 8 x i16> %a) {
1484 ; CHECK-LABEL: uunpklo_i32:
1486 ; CHECK-NEXT: uunpklo z0.s, z0.h
1488 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %a)
1489 ret <vscale x 4 x i32> %res
1492 define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) {
1493 ; CHECK-LABEL: uunpklo_i64:
1495 ; CHECK-NEXT: uunpklo z0.d, z0.s
1497 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> %a)
1498 ret <vscale x 2 x i64> %res
1505 define <vscale x 16 x i1> @trn1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1506 ; CHECK-LABEL: trn1_nxv16i1:
1508 ; CHECK-NEXT: trn1 p0.b, p0.b, p1.b
1510 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> %a,
1511 <vscale x 16 x i1> %b)
1512 ret <vscale x 16 x i1> %out
1515 define <vscale x 8 x i1> @trn1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1516 ; CHECK-LABEL: trn1_nxv8i1:
1518 ; CHECK-NEXT: trn1 p0.h, p0.h, p1.h
1520 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> %a,
1521 <vscale x 8 x i1> %b)
1522 ret <vscale x 8 x i1> %out
1525 define <vscale x 4 x i1> @trn1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1526 ; CHECK-LABEL: trn1_nxv4i1:
1528 ; CHECK-NEXT: trn1 p0.s, p0.s, p1.s
1530 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> %a,
1531 <vscale x 4 x i1> %b)
1532 ret <vscale x 4 x i1> %out
1535 define <vscale x 2 x i1> @trn1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1536 ; CHECK-LABEL: trn1_nxv2i1:
1538 ; CHECK-NEXT: trn1 p0.d, p0.d, p1.d
1540 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> %a,
1541 <vscale x 2 x i1> %b)
1542 ret <vscale x 2 x i1> %out
1545 define <vscale x 16 x i1> @trn1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1546 ; CHECK-LABEL: trn1_b16:
1548 ; CHECK-NEXT: trn1 p0.h, p0.h, p1.h
1550 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1> %a,
1551 <vscale x 16 x i1> %b)
1552 ret <vscale x 16 x i1> %out
1555 define <vscale x 16 x i1> @trn1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1556 ; CHECK-LABEL: trn1_b32:
1558 ; CHECK-NEXT: trn1 p0.s, p0.s, p1.s
1560 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1> %a,
1561 <vscale x 16 x i1> %b)
1562 ret <vscale x 16 x i1> %out
1565 define <vscale x 16 x i1> @trn1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1566 ; CHECK-LABEL: trn1_b64:
1568 ; CHECK-NEXT: trn1 p0.d, p0.d, p1.d
1570 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1> %a,
1571 <vscale x 16 x i1> %b)
1572 ret <vscale x 16 x i1> %out
1575 define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1576 ; CHECK-LABEL: trn1_i8:
1578 ; CHECK-NEXT: trn1 z0.b, z0.b, z1.b
1580 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> %a,
1581 <vscale x 16 x i8> %b)
1582 ret <vscale x 16 x i8> %out
1585 define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1586 ; CHECK-LABEL: trn1_i16:
1588 ; CHECK-NEXT: trn1 z0.h, z0.h, z1.h
1590 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> %a,
1591 <vscale x 8 x i16> %b)
1592 ret <vscale x 8 x i16> %out
1595 define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1596 ; CHECK-LABEL: trn1_i32:
1598 ; CHECK-NEXT: trn1 z0.s, z0.s, z1.s
1600 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> %a,
1601 <vscale x 4 x i32> %b)
1602 ret <vscale x 4 x i32> %out
1605 define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1606 ; CHECK-LABEL: trn1_i64:
1608 ; CHECK-NEXT: trn1 z0.d, z0.d, z1.d
1610 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> %a,
1611 <vscale x 2 x i64> %b)
1612 ret <vscale x 2 x i64> %out
1615 define <vscale x 2 x half> @trn1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1616 ; CHECK-LABEL: trn1_f16_v2:
1618 ; CHECK-NEXT: trn1 z0.d, z0.d, z1.d
1620 %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half> %a,
1621 <vscale x 2 x half> %b)
1622 ret <vscale x 2 x half> %out
1625 define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1626 ; CHECK-LABEL: trn1_f16_v4:
1628 ; CHECK-NEXT: trn1 z0.s, z0.s, z1.s
1630 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half> %a,
1631 <vscale x 4 x half> %b)
1632 ret <vscale x 4 x half> %out
1635 define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1636 ; CHECK-LABEL: trn1_bf16:
1638 ; CHECK-NEXT: trn1 z0.h, z0.h, z1.h
1640 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> %a,
1641 <vscale x 8 x bfloat> %b)
1642 ret <vscale x 8 x bfloat> %out
1645 define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1646 ; CHECK-LABEL: trn1_f16:
1648 ; CHECK-NEXT: trn1 z0.h, z0.h, z1.h
1650 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> %a,
1651 <vscale x 8 x half> %b)
1652 ret <vscale x 8 x half> %out
1655 define <vscale x 2 x float> @trn1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1656 ; CHECK-LABEL: trn1_f32_v2:
1658 ; CHECK-NEXT: trn1 z0.d, z0.d, z1.d
1660 %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float> %a,
1661 <vscale x 2 x float> %b)
1662 ret <vscale x 2 x float> %out
1665 define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1666 ; CHECK-LABEL: trn1_f32:
1668 ; CHECK-NEXT: trn1 z0.s, z0.s, z1.s
1670 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> %a,
1671 <vscale x 4 x float> %b)
1672 ret <vscale x 4 x float> %out
1675 define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1676 ; CHECK-LABEL: trn1_f64:
1678 ; CHECK-NEXT: trn1 z0.d, z0.d, z1.d
1680 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> %a,
1681 <vscale x 2 x double> %b)
1682 ret <vscale x 2 x double> %out
1689 define <vscale x 16 x i1> @trn2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1690 ; CHECK-LABEL: trn2_nxv16i1:
1692 ; CHECK-NEXT: trn2 p0.b, p0.b, p1.b
1694 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> %a,
1695 <vscale x 16 x i1> %b)
1696 ret <vscale x 16 x i1> %out
1699 define <vscale x 8 x i1> @trn2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1700 ; CHECK-LABEL: trn2_nxv8i1:
1702 ; CHECK-NEXT: trn2 p0.h, p0.h, p1.h
1704 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> %a,
1705 <vscale x 8 x i1> %b)
1706 ret <vscale x 8 x i1> %out
1709 define <vscale x 4 x i1> @trn2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1710 ; CHECK-LABEL: trn2_nxv4i1:
1712 ; CHECK-NEXT: trn2 p0.s, p0.s, p1.s
1714 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> %a,
1715 <vscale x 4 x i1> %b)
1716 ret <vscale x 4 x i1> %out
1719 define <vscale x 2 x i1> @trn2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1720 ; CHECK-LABEL: trn2_nxv2i1:
1722 ; CHECK-NEXT: trn2 p0.d, p0.d, p1.d
1724 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> %a,
1725 <vscale x 2 x i1> %b)
1726 ret <vscale x 2 x i1> %out
1729 define <vscale x 16 x i1> @trn2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1730 ; CHECK-LABEL: trn2_b16:
1732 ; CHECK-NEXT: trn2 p0.h, p0.h, p1.h
1734 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1> %a,
1735 <vscale x 16 x i1> %b)
1736 ret <vscale x 16 x i1> %out
1739 define <vscale x 16 x i1> @trn2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1740 ; CHECK-LABEL: trn2_b32:
1742 ; CHECK-NEXT: trn2 p0.s, p0.s, p1.s
1744 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1> %a,
1745 <vscale x 16 x i1> %b)
1746 ret <vscale x 16 x i1> %out
1749 define <vscale x 16 x i1> @trn2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1750 ; CHECK-LABEL: trn2_b64:
1752 ; CHECK-NEXT: trn2 p0.d, p0.d, p1.d
1754 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1> %a,
1755 <vscale x 16 x i1> %b)
1756 ret <vscale x 16 x i1> %out
1759 define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1760 ; CHECK-LABEL: trn2_i8:
1762 ; CHECK-NEXT: trn2 z0.b, z0.b, z1.b
1764 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> %a,
1765 <vscale x 16 x i8> %b)
1766 ret <vscale x 16 x i8> %out
1769 define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1770 ; CHECK-LABEL: trn2_i16:
1772 ; CHECK-NEXT: trn2 z0.h, z0.h, z1.h
1774 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> %a,
1775 <vscale x 8 x i16> %b)
1776 ret <vscale x 8 x i16> %out
1779 define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1780 ; CHECK-LABEL: trn2_i32:
1782 ; CHECK-NEXT: trn2 z0.s, z0.s, z1.s
1784 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> %a,
1785 <vscale x 4 x i32> %b)
1786 ret <vscale x 4 x i32> %out
1789 define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1790 ; CHECK-LABEL: trn2_i64:
1792 ; CHECK-NEXT: trn2 z0.d, z0.d, z1.d
1794 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> %a,
1795 <vscale x 2 x i64> %b)
1796 ret <vscale x 2 x i64> %out
1799 define <vscale x 2 x half> @trn2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1800 ; CHECK-LABEL: trn2_f16_v2:
1802 ; CHECK-NEXT: trn2 z0.d, z0.d, z1.d
1804 %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half> %a,
1805 <vscale x 2 x half> %b)
1806 ret <vscale x 2 x half> %out
1809 define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1810 ; CHECK-LABEL: trn2_f16_v4:
1812 ; CHECK-NEXT: trn2 z0.s, z0.s, z1.s
1814 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half> %a,
1815 <vscale x 4 x half> %b)
1816 ret <vscale x 4 x half> %out
1819 define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1820 ; CHECK-LABEL: trn2_bf16:
1822 ; CHECK-NEXT: trn2 z0.h, z0.h, z1.h
1824 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> %a,
1825 <vscale x 8 x bfloat> %b)
1826 ret <vscale x 8 x bfloat> %out
1829 define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1830 ; CHECK-LABEL: trn2_f16:
1832 ; CHECK-NEXT: trn2 z0.h, z0.h, z1.h
1834 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> %a,
1835 <vscale x 8 x half> %b)
1836 ret <vscale x 8 x half> %out
1839 define <vscale x 2 x float> @trn2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1840 ; CHECK-LABEL: trn2_f32_v2:
1842 ; CHECK-NEXT: trn2 z0.d, z0.d, z1.d
1844 %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float> %a,
1845 <vscale x 2 x float> %b)
1846 ret <vscale x 2 x float> %out
1849 define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1850 ; CHECK-LABEL: trn2_f32:
1852 ; CHECK-NEXT: trn2 z0.s, z0.s, z1.s
1854 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> %a,
1855 <vscale x 4 x float> %b)
1856 ret <vscale x 4 x float> %out
1859 define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1860 ; CHECK-LABEL: trn2_f64:
1862 ; CHECK-NEXT: trn2 z0.d, z0.d, z1.d
1864 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> %a,
1865 <vscale x 2 x double> %b)
1866 ret <vscale x 2 x double> %out
1873 define <vscale x 16 x i1> @uzp1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1874 ; CHECK-LABEL: uzp1_nxv16i1:
1876 ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b
1878 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> %a,
1879 <vscale x 16 x i1> %b)
1880 ret <vscale x 16 x i1> %out
1883 define <vscale x 8 x i1> @uzp1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1884 ; CHECK-LABEL: uzp1_nxv8i1:
1886 ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
1888 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> %a,
1889 <vscale x 8 x i1> %b)
1890 ret <vscale x 8 x i1> %out
1893 define <vscale x 4 x i1> @uzp1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1894 ; CHECK-LABEL: uzp1_nxv4i1:
1896 ; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
1898 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> %a,
1899 <vscale x 4 x i1> %b)
1900 ret <vscale x 4 x i1> %out
1903 define <vscale x 2 x i1> @uzp1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1904 ; CHECK-LABEL: uzp1_nxv2i1:
1906 ; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
1908 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> %a,
1909 <vscale x 2 x i1> %b)
1910 ret <vscale x 2 x i1> %out
1913 define <vscale x 16 x i1> @uzp1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1914 ; CHECK-LABEL: uzp1_b16:
1916 ; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h
1918 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1> %a,
1919 <vscale x 16 x i1> %b)
1920 ret <vscale x 16 x i1> %out
1923 define <vscale x 16 x i1> @uzp1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1924 ; CHECK-LABEL: uzp1_b32:
1926 ; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
1928 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1> %a,
1929 <vscale x 16 x i1> %b)
1930 ret <vscale x 16 x i1> %out
1933 define <vscale x 16 x i1> @uzp1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1934 ; CHECK-LABEL: uzp1_b64:
1936 ; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d
1938 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1> %a,
1939 <vscale x 16 x i1> %b)
1940 ret <vscale x 16 x i1> %out
1943 define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1944 ; CHECK-LABEL: uzp1_i8:
1946 ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
1948 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> %a,
1949 <vscale x 16 x i8> %b)
1950 ret <vscale x 16 x i8> %out
1953 define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1954 ; CHECK-LABEL: uzp1_i16:
1956 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
1958 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %a,
1959 <vscale x 8 x i16> %b)
1960 ret <vscale x 8 x i16> %out
1963 define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1964 ; CHECK-LABEL: uzp1_i32:
1966 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
1968 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a,
1969 <vscale x 4 x i32> %b)
1970 ret <vscale x 4 x i32> %out
1973 define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1974 ; CHECK-LABEL: uzp1_i64:
1976 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
1978 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> %a,
1979 <vscale x 2 x i64> %b)
1980 ret <vscale x 2 x i64> %out
1983 define <vscale x 2 x half> @uzp1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1984 ; CHECK-LABEL: uzp1_f16_v2:
1986 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
1988 %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half> %a,
1989 <vscale x 2 x half> %b)
1990 ret <vscale x 2 x half> %out
1993 define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1994 ; CHECK-LABEL: uzp1_f16_v4:
1996 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
1998 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half> %a,
1999 <vscale x 4 x half> %b)
2000 ret <vscale x 4 x half> %out
2003 define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2004 ; CHECK-LABEL: uzp1_bf16:
2006 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
2008 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> %a,
2009 <vscale x 8 x bfloat> %b)
2010 ret <vscale x 8 x bfloat> %out
2013 define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2014 ; CHECK-LABEL: uzp1_f16:
2016 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
2018 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> %a,
2019 <vscale x 8 x half> %b)
2020 ret <vscale x 8 x half> %out
2023 define <vscale x 2 x float> @uzp1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2024 ; CHECK-LABEL: uzp1_f32_v2:
2026 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
2028 %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float> %a,
2029 <vscale x 2 x float> %b)
2030 ret <vscale x 2 x float> %out
2033 define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2034 ; CHECK-LABEL: uzp1_f32:
2036 ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
2038 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> %a,
2039 <vscale x 4 x float> %b)
2040 ret <vscale x 4 x float> %out
2043 define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2044 ; CHECK-LABEL: uzp1_f64:
2046 ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d
2048 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> %a,
2049 <vscale x 2 x double> %b)
2050 ret <vscale x 2 x double> %out
2057 define <vscale x 16 x i1> @uzp2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2058 ; CHECK-LABEL: uzp2_nxv16i1:
2060 ; CHECK-NEXT: uzp2 p0.b, p0.b, p1.b
2062 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> %a,
2063 <vscale x 16 x i1> %b)
2064 ret <vscale x 16 x i1> %out
2067 define <vscale x 8 x i1> @uzp2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
2068 ; CHECK-LABEL: uzp2_nxv8i1:
2070 ; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h
2072 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> %a,
2073 <vscale x 8 x i1> %b)
2074 ret <vscale x 8 x i1> %out
2077 define <vscale x 4 x i1> @uzp2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
2078 ; CHECK-LABEL: uzp2_nxv4i1:
2080 ; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s
2082 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> %a,
2083 <vscale x 4 x i1> %b)
2084 ret <vscale x 4 x i1> %out
2087 define <vscale x 2 x i1> @uzp2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
2088 ; CHECK-LABEL: uzp2_nxv2i1:
2090 ; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d
2092 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> %a,
2093 <vscale x 2 x i1> %b)
2094 ret <vscale x 2 x i1> %out
2097 define <vscale x 16 x i1> @uzp2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2098 ; CHECK-LABEL: uzp2_b16:
2100 ; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h
2102 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1> %a,
2103 <vscale x 16 x i1> %b)
2104 ret <vscale x 16 x i1> %out
2107 define <vscale x 16 x i1> @uzp2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2108 ; CHECK-LABEL: uzp2_b32:
2110 ; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s
2112 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1> %a,
2113 <vscale x 16 x i1> %b)
2114 ret <vscale x 16 x i1> %out
2117 define <vscale x 16 x i1> @uzp2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2118 ; CHECK-LABEL: uzp2_b64:
2120 ; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d
2122 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1> %a,
2123 <vscale x 16 x i1> %b)
2124 ret <vscale x 16 x i1> %out
2127 define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2128 ; CHECK-LABEL: uzp2_i8:
2130 ; CHECK-NEXT: uzp2 z0.b, z0.b, z1.b
2132 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> %a,
2133 <vscale x 16 x i8> %b)
2134 ret <vscale x 16 x i8> %out
2137 define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2138 ; CHECK-LABEL: uzp2_i16:
2140 ; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h
2142 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> %a,
2143 <vscale x 8 x i16> %b)
2144 ret <vscale x 8 x i16> %out
2147 define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2148 ; CHECK-LABEL: uzp2_i32:
2150 ; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s
2152 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a,
2153 <vscale x 4 x i32> %b)
2154 ret <vscale x 4 x i32> %out
2157 define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2158 ; CHECK-LABEL: uzp2_i64:
2160 ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
2162 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> %a,
2163 <vscale x 2 x i64> %b)
2164 ret <vscale x 2 x i64> %out
2167 define <vscale x 2 x half> @uzp2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2168 ; CHECK-LABEL: uzp2_f16_v2:
2170 ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
2172 %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half> %a,
2173 <vscale x 2 x half> %b)
2174 ret <vscale x 2 x half> %out
2177 define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2178 ; CHECK-LABEL: uzp2_f16_v4:
2180 ; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s
2182 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half> %a,
2183 <vscale x 4 x half> %b)
2184 ret <vscale x 4 x half> %out
2187 define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2188 ; CHECK-LABEL: uzp2_bf16:
2190 ; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h
2192 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> %a,
2193 <vscale x 8 x bfloat> %b)
2194 ret <vscale x 8 x bfloat> %out
2197 define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2198 ; CHECK-LABEL: uzp2_f16:
2200 ; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h
2202 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> %a,
2203 <vscale x 8 x half> %b)
2204 ret <vscale x 8 x half> %out
2207 define <vscale x 2 x float> @uzp2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2208 ; CHECK-LABEL: uzp2_f32_v2:
2210 ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
2212 %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float> %a,
2213 <vscale x 2 x float> %b)
2214 ret <vscale x 2 x float> %out
2217 define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2218 ; CHECK-LABEL: uzp2_f32:
2220 ; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s
2222 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> %a,
2223 <vscale x 4 x float> %b)
2224 ret <vscale x 4 x float> %out
2227 define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2228 ; CHECK-LABEL: uzp2_f64:
2230 ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
2232 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> %a,
2233 <vscale x 2 x double> %b)
2234 ret <vscale x 2 x double> %out
2241 define <vscale x 16 x i1> @zip1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2242 ; CHECK-LABEL: zip1_nxv16i1:
2244 ; CHECK-NEXT: zip1 p0.b, p0.b, p1.b
2246 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> %a,
2247 <vscale x 16 x i1> %b)
2248 ret <vscale x 16 x i1> %out
2251 define <vscale x 8 x i1> @zip1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
2252 ; CHECK-LABEL: zip1_nxv8i1:
2254 ; CHECK-NEXT: zip1 p0.h, p0.h, p1.h
2256 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> %a,
2257 <vscale x 8 x i1> %b)
2258 ret <vscale x 8 x i1> %out
2261 define <vscale x 4 x i1> @zip1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
2262 ; CHECK-LABEL: zip1_nxv4i1:
2264 ; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
2266 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> %a,
2267 <vscale x 4 x i1> %b)
2268 ret <vscale x 4 x i1> %out
2271 define <vscale x 2 x i1> @zip1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
2272 ; CHECK-LABEL: zip1_nxv2i1:
2274 ; CHECK-NEXT: zip1 p0.d, p0.d, p1.d
2276 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> %a,
2277 <vscale x 2 x i1> %b)
2278 ret <vscale x 2 x i1> %out
2281 define <vscale x 16 x i1> @zip1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2282 ; CHECK-LABEL: zip1_b16:
2284 ; CHECK-NEXT: zip1 p0.h, p0.h, p1.h
2286 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1> %a,
2287 <vscale x 16 x i1> %b)
2288 ret <vscale x 16 x i1> %out
2291 define <vscale x 16 x i1> @zip1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2292 ; CHECK-LABEL: zip1_b32:
2294 ; CHECK-NEXT: zip1 p0.s, p0.s, p1.s
2296 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1> %a,
2297 <vscale x 16 x i1> %b)
2298 ret <vscale x 16 x i1> %out
2301 define <vscale x 16 x i1> @zip1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2302 ; CHECK-LABEL: zip1_b64:
2304 ; CHECK-NEXT: zip1 p0.d, p0.d, p1.d
2306 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1> %a,
2307 <vscale x 16 x i1> %b)
2308 ret <vscale x 16 x i1> %out
2311 define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2312 ; CHECK-LABEL: zip1_i8:
2314 ; CHECK-NEXT: zip1 z0.b, z0.b, z1.b
2316 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> %a,
2317 <vscale x 16 x i8> %b)
2318 ret <vscale x 16 x i8> %out
2321 define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2322 ; CHECK-LABEL: zip1_i16:
2324 ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
2326 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> %a,
2327 <vscale x 8 x i16> %b)
2328 ret <vscale x 8 x i16> %out
2331 define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2332 ; CHECK-LABEL: zip1_i32:
2334 ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
2336 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %a,
2337 <vscale x 4 x i32> %b)
2338 ret <vscale x 4 x i32> %out
2341 define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2342 ; CHECK-LABEL: zip1_i64:
2344 ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
2346 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> %a,
2347 <vscale x 2 x i64> %b)
2348 ret <vscale x 2 x i64> %out
2351 define <vscale x 2 x half> @zip1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2352 ; CHECK-LABEL: zip1_f16_v2:
2354 ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
2356 %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half> %a,
2357 <vscale x 2 x half> %b)
2358 ret <vscale x 2 x half> %out
2361 define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2362 ; CHECK-LABEL: zip1_f16_v4:
2364 ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
2366 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half> %a,
2367 <vscale x 4 x half> %b)
2368 ret <vscale x 4 x half> %out
2371 define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2372 ; CHECK-LABEL: zip1_bf16:
2374 ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
2376 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> %a,
2377 <vscale x 8 x bfloat> %b)
2378 ret <vscale x 8 x bfloat> %out
2381 define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2382 ; CHECK-LABEL: zip1_f16:
2384 ; CHECK-NEXT: zip1 z0.h, z0.h, z1.h
2386 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> %a,
2387 <vscale x 8 x half> %b)
2388 ret <vscale x 8 x half> %out
2391 define <vscale x 2 x float> @zip1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2392 ; CHECK-LABEL: zip1_f32_v2:
2394 ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
2396 %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float> %a,
2397 <vscale x 2 x float> %b)
2398 ret <vscale x 2 x float> %out
2401 define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2402 ; CHECK-LABEL: zip1_f32:
2404 ; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
2406 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> %a,
2407 <vscale x 4 x float> %b)
2408 ret <vscale x 4 x float> %out
2411 define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2412 ; CHECK-LABEL: zip1_f64:
2414 ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d
2416 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> %a,
2417 <vscale x 2 x double> %b)
2418 ret <vscale x 2 x double> %out
2425 define <vscale x 16 x i1> @zip2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2426 ; CHECK-LABEL: zip2_nxv16i1:
2428 ; CHECK-NEXT: zip2 p0.b, p0.b, p1.b
2430 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> %a,
2431 <vscale x 16 x i1> %b)
2432 ret <vscale x 16 x i1> %out
2435 define <vscale x 8 x i1> @zip2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
2436 ; CHECK-LABEL: zip2_nxv8i1:
2438 ; CHECK-NEXT: zip2 p0.h, p0.h, p1.h
2440 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> %a,
2441 <vscale x 8 x i1> %b)
2442 ret <vscale x 8 x i1> %out
2445 define <vscale x 4 x i1> @zip2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
2446 ; CHECK-LABEL: zip2_nxv4i1:
2448 ; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
2450 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> %a,
2451 <vscale x 4 x i1> %b)
2452 ret <vscale x 4 x i1> %out
2455 define <vscale x 2 x i1> @zip2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
2456 ; CHECK-LABEL: zip2_nxv2i1:
2458 ; CHECK-NEXT: zip2 p0.d, p0.d, p1.d
2460 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> %a,
2461 <vscale x 2 x i1> %b)
2462 ret <vscale x 2 x i1> %out
2465 define <vscale x 16 x i1> @zip2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2466 ; CHECK-LABEL: zip2_b16:
2468 ; CHECK-NEXT: zip2 p0.h, p0.h, p1.h
2470 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1> %a,
2471 <vscale x 16 x i1> %b)
2472 ret <vscale x 16 x i1> %out
2475 define <vscale x 16 x i1> @zip2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2476 ; CHECK-LABEL: zip2_b32:
2478 ; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
2480 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1> %a,
2481 <vscale x 16 x i1> %b)
2482 ret <vscale x 16 x i1> %out
2485 define <vscale x 16 x i1> @zip2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
2486 ; CHECK-LABEL: zip2_b64:
2488 ; CHECK-NEXT: zip2 p0.d, p0.d, p1.d
2490 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1> %a,
2491 <vscale x 16 x i1> %b)
2492 ret <vscale x 16 x i1> %out
2495 define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
2496 ; CHECK-LABEL: zip2_i8:
2498 ; CHECK-NEXT: zip2 z0.b, z0.b, z1.b
2500 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> %a,
2501 <vscale x 16 x i8> %b)
2502 ret <vscale x 16 x i8> %out
2505 define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2506 ; CHECK-LABEL: zip2_i16:
2508 ; CHECK-NEXT: zip2 z0.h, z0.h, z1.h
2510 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> %a,
2511 <vscale x 8 x i16> %b)
2512 ret <vscale x 8 x i16> %out
2515 define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
2516 ; CHECK-LABEL: zip2_i32:
2518 ; CHECK-NEXT: zip2 z0.s, z0.s, z1.s
2520 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %a,
2521 <vscale x 4 x i32> %b)
2522 ret <vscale x 4 x i32> %out
2525 define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
2526 ; CHECK-LABEL: zip2_i64:
2528 ; CHECK-NEXT: zip2 z0.d, z0.d, z1.d
2530 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> %a,
2531 <vscale x 2 x i64> %b)
2532 ret <vscale x 2 x i64> %out
2535 define <vscale x 2 x half> @zip2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
2536 ; CHECK-LABEL: zip2_f16_v2:
2538 ; CHECK-NEXT: zip2 z0.d, z0.d, z1.d
2540 %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half> %a,
2541 <vscale x 2 x half> %b)
2542 ret <vscale x 2 x half> %out
2545 define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
2546 ; CHECK-LABEL: zip2_f16_v4:
2548 ; CHECK-NEXT: zip2 z0.s, z0.s, z1.s
2550 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half> %a,
2551 <vscale x 4 x half> %b)
2552 ret <vscale x 4 x half> %out
2555 define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
2556 ; CHECK-LABEL: zip2_bf16:
2558 ; CHECK-NEXT: zip2 z0.h, z0.h, z1.h
2560 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> %a,
2561 <vscale x 8 x bfloat> %b)
2562 ret <vscale x 8 x bfloat> %out
2565 define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
2566 ; CHECK-LABEL: zip2_f16:
2568 ; CHECK-NEXT: zip2 z0.h, z0.h, z1.h
2570 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> %a,
2571 <vscale x 8 x half> %b)
2572 ret <vscale x 8 x half> %out
2575 define <vscale x 2 x float> @zip2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2576 ; CHECK-LABEL: zip2_f32_v2:
2578 ; CHECK-NEXT: zip2 z0.d, z0.d, z1.d
2580 %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float> %a,
2581 <vscale x 2 x float> %b)
2582 ret <vscale x 2 x float> %out
2585 define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2586 ; CHECK-LABEL: zip2_f32:
2588 ; CHECK-NEXT: zip2 z0.s, z0.s, z1.s
2590 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> %a,
2591 <vscale x 4 x float> %b)
2592 ret <vscale x 4 x float> %out
2595 define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2596 ; CHECK-LABEL: zip2_f64:
2598 ; CHECK-NEXT: zip2 z0.d, z0.d, z1.d
2600 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> %a,
2601 <vscale x 2 x double> %b)
2602 ret <vscale x 2 x double> %out
2605 declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2606 declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2607 declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2608 declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2609 declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2610 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2611 declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2612 declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2614 declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
2615 declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
2616 declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
2617 declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
2618 declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
2619 declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
2620 declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
2621 declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
2623 declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2624 declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2625 declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2626 declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2627 declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2628 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2629 declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2630 declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2632 declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
2633 declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
2634 declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
2635 declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
2636 declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
2637 declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
2638 declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
2639 declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
2641 declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2642 declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2643 declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2644 declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2646 declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64)
2647 declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64)
2648 declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)
2649 declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)
2650 declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)
2651 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64)
2652 declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)
2653 declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)
2655 declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2656 declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2657 declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2658 declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2659 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
2660 declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
2661 declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
2662 declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
2664 declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2665 declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2666 declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2667 declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2668 declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2669 declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2670 declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2671 declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2672 declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2674 declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2675 declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2676 declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2677 declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2678 declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2679 declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2680 declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2681 declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2682 declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2684 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>)
2685 declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>)
2686 declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>)
2687 declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>)
2688 declare <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8>)
2689 declare <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16>)
2690 declare <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32>)
2691 declare <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64>)
2692 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat>)
2693 declare <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half>)
2694 declare <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float>)
2695 declare <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double>)
2697 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1>)
2698 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1>)
2699 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1>)
2701 declare <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2702 declare <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2703 declare <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2704 declare <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2705 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2706 declare <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2707 declare <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2708 declare <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2710 declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8>)
2711 declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
2712 declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32>)
2714 declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8>)
2715 declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
2716 declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32>)
2718 declare <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2719 declare <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2720 declare <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2721 declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2722 declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
2723 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>)
2724 declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
2725 declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
2727 declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8>)
2728 declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
2729 declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32>)
2731 declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8>)
2732 declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
2733 declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32>)
2735 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2736 declare <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2737 declare <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2738 declare <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2739 declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2740 declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2741 declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2742 declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2743 declare <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2744 declare <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2745 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2746 declare <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2747 declare <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2748 declare <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2749 declare <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2751 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2752 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2753 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2755 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2756 declare <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2757 declare <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2758 declare <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2759 declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2760 declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2761 declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2762 declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2763 declare <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2764 declare <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2765 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2766 declare <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2767 declare <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2768 declare <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2769 declare <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2771 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2772 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2773 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2775 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2776 declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2777 declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2778 declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2779 declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2780 declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2781 declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2782 declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2783 declare <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2784 declare <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2785 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2786 declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2787 declare <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2788 declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2789 declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2791 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2792 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2793 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2795 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2796 declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2797 declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2798 declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2799 declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2800 declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2801 declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2802 declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2803 declare <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2804 declare <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2805 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2806 declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2807 declare <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2808 declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2809 declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2811 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2812 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2813 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2815 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2816 declare <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2817 declare <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2818 declare <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2819 declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2820 declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2821 declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2822 declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2823 declare <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2824 declare <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2825 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2826 declare <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2827 declare <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2828 declare <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2829 declare <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2831 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2832 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2833 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2835 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2836 declare <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2837 declare <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2838 declare <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2839 declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2840 declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2841 declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2842 declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2843 declare <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2844 declare <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2845 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2846 declare <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2847 declare <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2848 declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2849 declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2851 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>)
2852 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>)
2853 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>)
2855 declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64)
2856 declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
2857 declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64)
2858 declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
2859 declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
2860 declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
2861 declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)
2862 declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat>, <8 x bfloat>, i64)
2864 ; +bf16 is required for the bfloat version.
2865 attributes #0 = { "target-features"="+sve,+bf16" }