1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
7 define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
8 ; CHECK-LABEL: clasta_i8:
9 ; CHECK: clasta z0.b, p0, z0.b, z1.b
11 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> %pg,
12 <vscale x 16 x i8> %a,
13 <vscale x 16 x i8> %b)
14 ret <vscale x 16 x i8> %out
17 define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
18 ; CHECK-LABEL: clasta_i16:
19 ; CHECK: clasta z0.h, p0, z0.h, z1.h
21 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> %pg,
22 <vscale x 8 x i16> %a,
23 <vscale x 8 x i16> %b)
24 ret <vscale x 8 x i16> %out
27 define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
28 ; CHECK-LABEL: clasta_i32:
29 ; CHECK: clasta z0.s, p0, z0.s, z1.s
31 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> %pg,
32 <vscale x 4 x i32> %a,
33 <vscale x 4 x i32> %b)
34 ret <vscale x 4 x i32> %out
37 define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
38 ; CHECK-LABEL: clasta_i64:
39 ; CHECK: clasta z0.d, p0, z0.d, z1.d
41 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> %pg,
42 <vscale x 2 x i64> %a,
43 <vscale x 2 x i64> %b)
44 ret <vscale x 2 x i64> %out
47 define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
48 ; CHECK-LABEL: clasta_f16:
49 ; CHECK: clasta z0.h, p0, z0.h, z1.h
51 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg,
52 <vscale x 8 x half> %a,
53 <vscale x 8 x half> %b)
54 ret <vscale x 8 x half> %out
57 define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
58 ; CHECK-LABEL: clasta_bf16:
59 ; CHECK: clasta z0.h, p0, z0.h, z1.h
61 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
62 <vscale x 8 x bfloat> %a,
63 <vscale x 8 x bfloat> %b)
64 ret <vscale x 8 x bfloat> %out
67 define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
68 ; CHECK-LABEL: clasta_f32:
69 ; CHECK: clasta z0.s, p0, z0.s, z1.s
71 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg,
72 <vscale x 4 x float> %a,
73 <vscale x 4 x float> %b)
74 ret <vscale x 4 x float> %out
77 define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
78 ; CHECK-LABEL: clasta_f64:
79 ; CHECK: clasta z0.d, p0, z0.d, z1.d
81 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> %pg,
82 <vscale x 2 x double> %a,
83 <vscale x 2 x double> %b)
84 ret <vscale x 2 x double> %out
91 define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
92 ; CHECK-LABEL: clasta_n_i8:
93 ; CHECK: clasta w0, p0, w0, z0.b
95 %out = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> %pg,
97 <vscale x 16 x i8> %b)
101 define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
102 ; CHECK-LABEL: clasta_n_i16:
103 ; CHECK: clasta w0, p0, w0, z0.h
105 %out = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> %pg,
107 <vscale x 8 x i16> %b)
111 define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
112 ; CHECK-LABEL: clasta_n_i32:
113 ; CHECK: clasta w0, p0, w0, z0.s
115 %out = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> %pg,
117 <vscale x 4 x i32> %b)
121 define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
122 ; CHECK-LABEL: clasta_n_i64:
123 ; CHECK: clasta x0, p0, x0, z0.d
125 %out = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> %pg,
127 <vscale x 2 x i64> %b)
131 define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
132 ; CHECK-LABEL: clasta_n_f16:
133 ; CHECK: clasta h0, p0, h0, z1.h
135 %out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg,
137 <vscale x 8 x half> %b)
141 define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
142 ; CHECK-LABEL: clasta_n_bf16:
143 ; CHECK: clasta h0, p0, h0, z1.h
145 %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
147 <vscale x 8 x bfloat> %b)
151 define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
152 ; CHECK-LABEL: clasta_n_f32:
153 ; CHECK: clasta s0, p0, s0, z1.s
155 %out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg,
157 <vscale x 4 x float> %b)
161 define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
162 ; CHECK-LABEL: clasta_n_f64:
163 ; CHECK: clasta d0, p0, d0, z1.d
165 %out = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> %pg,
167 <vscale x 2 x double> %b)
175 define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
176 ; CHECK-LABEL: clastb_i8:
177 ; CHECK: clastb z0.b, p0, z0.b, z1.b
179 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> %pg,
180 <vscale x 16 x i8> %a,
181 <vscale x 16 x i8> %b)
182 ret <vscale x 16 x i8> %out
185 define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
186 ; CHECK-LABEL: clastb_i16:
187 ; CHECK: clastb z0.h, p0, z0.h, z1.h
189 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> %pg,
190 <vscale x 8 x i16> %a,
191 <vscale x 8 x i16> %b)
192 ret <vscale x 8 x i16> %out
195 define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
196 ; CHECK-LABEL: clastb_i32:
197 ; CHECK: clastb z0.s, p0, z0.s, z1.s
199 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> %pg,
200 <vscale x 4 x i32> %a,
201 <vscale x 4 x i32> %b)
202 ret <vscale x 4 x i32> %out
205 define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
206 ; CHECK-LABEL: clastb_i64:
207 ; CHECK: clastb z0.d, p0, z0.d, z1.d
209 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> %pg,
210 <vscale x 2 x i64> %a,
211 <vscale x 2 x i64> %b)
212 ret <vscale x 2 x i64> %out
215 define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
216 ; CHECK-LABEL: clastb_f16:
217 ; CHECK: clastb z0.h, p0, z0.h, z1.h
219 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg,
220 <vscale x 8 x half> %a,
221 <vscale x 8 x half> %b)
222 ret <vscale x 8 x half> %out
225 define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
226 ; CHECK-LABEL: clastb_bf16:
227 ; CHECK: clastb z0.h, p0, z0.h, z1.h
229 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
230 <vscale x 8 x bfloat> %a,
231 <vscale x 8 x bfloat> %b)
232 ret <vscale x 8 x bfloat> %out
235 define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
236 ; CHECK-LABEL: clastb_f32:
237 ; CHECK: clastb z0.s, p0, z0.s, z1.s
239 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg,
240 <vscale x 4 x float> %a,
241 <vscale x 4 x float> %b)
242 ret <vscale x 4 x float> %out
245 define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
246 ; CHECK-LABEL: clastb_f64:
247 ; CHECK: clastb z0.d, p0, z0.d, z1.d
249 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> %pg,
250 <vscale x 2 x double> %a,
251 <vscale x 2 x double> %b)
252 ret <vscale x 2 x double> %out
259 define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
260 ; CHECK-LABEL: clastb_n_i8:
261 ; CHECK: clastb w0, p0, w0, z0.b
263 %out = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> %pg,
265 <vscale x 16 x i8> %b)
269 define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
270 ; CHECK-LABEL: clastb_n_i16:
271 ; CHECK: clastb w0, p0, w0, z0.h
273 %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg,
275 <vscale x 8 x i16> %b)
279 define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
280 ; CHECK-LABEL: clastb_n_i32:
281 ; CHECK: clastb w0, p0, w0, z0.s
283 %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg,
285 <vscale x 4 x i32> %b)
289 define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
290 ; CHECK-LABEL: clastb_n_i64:
291 ; CHECK: clastb x0, p0, x0, z0.d
293 %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg,
295 <vscale x 2 x i64> %b)
299 define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
300 ; CHECK-LABEL: clastb_n_f16:
301 ; CHECK: clastb h0, p0, h0, z1.h
303 %out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg,
305 <vscale x 8 x half> %b)
309 define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
310 ; CHECK-LABEL: clastb_n_bf16:
311 ; CHECK: clastb h0, p0, h0, z1.h
313 %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
315 <vscale x 8 x bfloat> %b)
319 define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
320 ; CHECK-LABEL: clastb_n_f32:
321 ; CHECK: clastb s0, p0, s0, z1.s
323 %out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg,
325 <vscale x 4 x float> %b)
329 define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
330 ; CHECK-LABEL: clastb_n_f64:
331 ; CHECK: clastb d0, p0, d0, z1.d
333 %out = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> %pg,
335 <vscale x 2 x double> %b)
343 define <vscale x 16 x i8> @dupq_i8(<vscale x 16 x i8> %a) {
344 ; CHECK-LABEL: dupq_i8:
345 ; CHECK: mov z0.q, q0
347 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 0)
348 ret <vscale x 16 x i8> %out
351 define <vscale x 8 x i16> @dupq_i16(<vscale x 8 x i16> %a) {
352 ; CHECK-LABEL: dupq_i16:
353 ; CHECK: mov z0.q, z0.q[1]
355 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 1)
356 ret <vscale x 8 x i16> %out
359 define <vscale x 4 x i32> @dupq_i32(<vscale x 4 x i32> %a) {
360 ; CHECK-LABEL: dupq_i32:
361 ; CHECK: mov z0.q, z0.q[2]
363 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 2)
364 ret <vscale x 4 x i32> %out
367 define <vscale x 2 x i64> @dupq_i64(<vscale x 2 x i64> %a) {
368 ; CHECK-LABEL: dupq_i64:
369 ; CHECK: mov z0.q, z0.q[3]
371 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 3)
372 ret <vscale x 2 x i64> %out
375 define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
376 ; CHECK-LABEL: dupq_f16:
377 ; CHECK: mov z0.q, q0
379 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0)
380 ret <vscale x 8 x half> %out
383 define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
384 ; CHECK-LABEL: dupq_bf16:
385 ; CHECK: mov z0.q, q0
387 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
388 ret <vscale x 8 x bfloat> %out
391 define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
392 ; CHECK-LABEL: dupq_f32:
393 ; CHECK: mov z0.q, z0.q[1]
395 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1)
396 ret <vscale x 4 x float> %out
399 define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) {
400 ; CHECK-LABEL: dupq_f64:
401 ; CHECK: mov z0.q, z0.q[2]
403 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 2)
404 ret <vscale x 2 x double> %out
411 define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) {
412 ; CHECK-LABEL: dupq_lane_i8:
413 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
414 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
415 ; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
416 ; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
417 ; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
418 ; CHECK-NEXT: tbl z0.d, { z0.d }, [[Z4]].d
420 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 %idx)
421 ret <vscale x 16 x i8> %out
424 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
425 define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) {
426 ; CHECK-LABEL: dupq_lane_i16:
427 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
428 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
429 ; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
430 ; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
431 ; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
432 ; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
434 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 %idx)
435 ret <vscale x 8 x i16> %out
438 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
439 define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) {
440 ; CHECK-LABEL: dupq_lane_i32:
441 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
442 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
443 ; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
444 ; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
445 ; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
446 ; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
448 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 %idx)
449 ret <vscale x 4 x i32> %out
452 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
453 define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) {
454 ; CHECK-LABEL: dupq_lane_i64:
455 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
456 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
457 ; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
458 ; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
459 ; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
460 ; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
462 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 %idx)
463 ret <vscale x 2 x i64> %out
466 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
467 define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
468 ; CHECK-LABEL: dupq_lane_f16:
469 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
470 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
471 ; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
472 ; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
473 ; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
474 ; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
476 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx)
477 ret <vscale x 8 x half> %out
480 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
481 define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
482 ; CHECK-LABEL: dupq_lane_bf16:
483 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
484 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
485 ; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
486 ; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
487 ; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
488 ; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
490 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
491 ret <vscale x 8 x bfloat> %out
494 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
495 define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
496 ; CHECK-LABEL: dupq_lane_f32:
497 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
498 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
499 ; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
500 ; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
501 ; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
502 ; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
504 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 %idx)
505 ret <vscale x 4 x float> %out
508 ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
509 define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx) {
510 ; CHECK-LABEL: dupq_lane_f64:
511 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
512 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
513 ; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
514 ; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
515 ; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
516 ; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
518 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 %idx)
519 ret <vscale x 2 x double> %out
522 ; NOTE: Index out of range (0-3)
523 define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
524 ; CHECK-LABEL: dupq_i64_range:
525 ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
526 ; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
527 ; CHECK-DAG: add [[Z3:z[0-9]+]].d, [[Z2]].d, #8
528 ; CHECK: tbl z0.d, { z0.d }, [[Z3]].d
530 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4)
531 ret <vscale x 2 x i64> %out
538 define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
539 ; CHECK-LABEL: ext_i8:
540 ; CHECK: ext z0.b, z0.b, z1.b, #255
542 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
543 <vscale x 16 x i8> %b,
545 ret <vscale x 16 x i8> %out
548 define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
549 ; CHECK-LABEL: ext_i16:
550 ; CHECK: ext z0.b, z0.b, z1.b, #0
552 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
553 <vscale x 8 x i16> %b,
555 ret <vscale x 8 x i16> %out
558 define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
559 ; CHECK-LABEL: ext_i32:
560 ; CHECK: ext z0.b, z0.b, z1.b, #4
562 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
563 <vscale x 4 x i32> %b,
565 ret <vscale x 4 x i32> %out
568 define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
569 ; CHECK-LABEL: ext_i64:
570 ; CHECK: ext z0.b, z0.b, z1.b, #16
572 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
573 <vscale x 2 x i64> %b,
575 ret <vscale x 2 x i64> %out
578 define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
579 ; CHECK-LABEL: ext_bf16:
580 ; CHECK: ext z0.b, z0.b, z1.b, #6
582 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
583 <vscale x 8 x bfloat> %b,
585 ret <vscale x 8 x bfloat> %out
588 define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
589 ; CHECK-LABEL: ext_f16:
590 ; CHECK: ext z0.b, z0.b, z1.b, #6
592 %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
593 <vscale x 8 x half> %b,
595 ret <vscale x 8 x half> %out
598 define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
599 ; CHECK-LABEL: ext_f32:
600 ; CHECK: ext z0.b, z0.b, z1.b, #16
602 %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
603 <vscale x 4 x float> %b,
605 ret <vscale x 4 x float> %out
608 define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
609 ; CHECK-LABEL: ext_f64:
610 ; CHECK: ext z0.b, z0.b, z1.b, #40
612 %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
613 <vscale x 2 x double> %b,
615 ret <vscale x 2 x double> %out
622 define i8 @lasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
623 ; CHECK-LABEL: lasta_i8
624 ; CHECK: lasta w0, p0, z0.b
626 %res = call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg,
627 <vscale x 16 x i8> %a)
631 define i16 @lasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
632 ; CHECK-LABEL: lasta_i16
633 ; CHECK: lasta w0, p0, z0.h
635 %res = call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> %pg,
636 <vscale x 8 x i16> %a)
640 define i32 @lasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
641 ; CHECK-LABEL: lasta_i32
642 ; CHECK: lasta w0, p0, z0.s
644 %res = call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> %pg,
645 <vscale x 4 x i32> %a)
649 define i64 @lasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
650 ; CHECK-LABEL: lasta_i64
651 ; CHECK: lasta x0, p0, z0.d
653 %res = call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> %pg,
654 <vscale x 2 x i64> %a)
658 define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
659 ; CHECK-LABEL: lasta_f16
660 ; CHECK: lasta h0, p0, z0.h
662 %res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg,
663 <vscale x 8 x half> %a)
667 define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
668 ; CHECK-LABEL: lasta_bf16
669 ; CHECK: lasta h0, p0, z0.h
671 %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
672 <vscale x 8 x bfloat> %a)
676 define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
677 ; CHECK-LABEL: lasta_f32
678 ; CHECK: lasta s0, p0, z0.s
680 %res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg,
681 <vscale x 4 x float> %a)
685 define float @lasta_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
686 ; CHECK-LABEL: lasta_f32_v2
687 ; CHECK: lasta s0, p0, z0.s
689 %res = call float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1> %pg,
690 <vscale x 2 x float> %a)
694 define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
695 ; CHECK-LABEL: lasta_f64
696 ; CHECK: lasta d0, p0, z0.d
698 %res = call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> %pg,
699 <vscale x 2 x double> %a)
707 define i8 @lastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
708 ; CHECK-LABEL: lastb_i8
709 ; CHECK: lastb w0, p0, z0.b
711 %res = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg,
712 <vscale x 16 x i8> %a)
716 define i16 @lastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
717 ; CHECK-LABEL: lastb_i16
718 ; CHECK: lastb w0, p0, z0.h
720 %res = call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> %pg,
721 <vscale x 8 x i16> %a)
725 define i32 @lastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
726 ; CHECK-LABEL: lastb_i32
727 ; CHECK: lastb w0, p0, z0.s
729 %res = call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> %pg,
730 <vscale x 4 x i32> %a)
734 define i64 @lastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
735 ; CHECK-LABEL: lastb_i64
736 ; CHECK: lastb x0, p0, z0.d
738 %res = call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> %pg,
739 <vscale x 2 x i64> %a)
743 define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
744 ; CHECK-LABEL: lastb_f16
745 ; CHECK: lastb h0, p0, z0.h
747 %res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg,
748 <vscale x 8 x half> %a)
752 define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
753 ; CHECK-LABEL: lastb_bf16
754 ; CHECK: lastb h0, p0, z0.h
756 %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
757 <vscale x 8 x bfloat> %a)
761 define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
762 ; CHECK-LABEL: lastb_f32
763 ; CHECK: lastb s0, p0, z0.s
765 %res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg,
766 <vscale x 4 x float> %a)
770 define float @lastb_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
771 ; CHECK-LABEL: lastb_f32_v2
772 ; CHECK: lastb s0, p0, z0.s
774 %res = call float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1> %pg,
775 <vscale x 2 x float> %a)
779 define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
780 ; CHECK-LABEL: lastb_f64
781 ; CHECK: lastb d0, p0, z0.d
783 %res = call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> %pg,
784 <vscale x 2 x double> %a)
792 define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
793 ; CHECK-LABEL: compact_i32:
794 ; CHECK: compact z0.s, p0, z0.s
796 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg,
797 <vscale x 4 x i32> %a)
798 ret <vscale x 4 x i32> %out
801 define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
802 ; CHECK-LABEL: compact_i64:
803 ; CHECK: compact z0.d, p0, z0.d
805 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg,
806 <vscale x 2 x i64> %a)
807 ret <vscale x 2 x i64> %out
810 define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
811 ; CHECK-LABEL: compact_f32:
812 ; CHECK: compact z0.s, p0, z0.s
814 %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg,
815 <vscale x 4 x float> %a)
816 ret <vscale x 4 x float> %out
819 define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
820 ; CHECK-LABEL: compact_f64:
821 ; CHECK: compact z0.d, p0, z0.d
823 %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg,
824 <vscale x 2 x double> %a)
825 ret <vscale x 2 x double> %out
832 define <vscale x 16 x i1> @rev_b8( <vscale x 16 x i1> %a) {
833 ; CHECK-LABEL: rev_b8
834 ; CHECK: rev p0.b, p0.b
836 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> %a)
837 ret <vscale x 16 x i1> %res
840 define <vscale x 8 x i1> @rev_b16(<vscale x 8 x i1> %a) {
841 ; CHECK-LABEL: rev_b16
842 ; CHECK: rev p0.h, p0.h
844 %res = call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> %a)
845 ret <vscale x 8 x i1> %res
848 define <vscale x 4 x i1> @rev_b32(<vscale x 4 x i1> %a) {
849 ; CHECK-LABEL: rev_b32
850 ; CHECK: rev p0.s, p0.s
852 %res = call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> %a)
853 ret <vscale x 4 x i1> %res
856 define <vscale x 2 x i1> @rev_b64(<vscale x 2 x i1> %a) {
857 ; CHECK-LABEL: rev_b64
858 ; CHECK: rev p0.d, p0.d
860 %res = call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> %a)
861 ret <vscale x 2 x i1> %res
864 define <vscale x 16 x i8> @rev_i8( <vscale x 16 x i8> %a) {
865 ; CHECK-LABEL: rev_i8
866 ; CHECK: rev z0.b, z0.b
868 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> %a)
869 ret <vscale x 16 x i8> %res
872 define <vscale x 8 x i16> @rev_i16(<vscale x 8 x i16> %a) {
873 ; CHECK-LABEL: rev_i16
874 ; CHECK: rev z0.h, z0.h
876 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> %a)
877 ret <vscale x 8 x i16> %res
880 define <vscale x 4 x i32> @rev_i32(<vscale x 4 x i32> %a) {
881 ; CHECK-LABEL: rev_i32
882 ; CHECK: rev z0.s, z0.s
884 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> %a)
885 ret <vscale x 4 x i32> %res
888 define <vscale x 2 x i64> @rev_i64(<vscale x 2 x i64> %a) {
889 ; CHECK-LABEL: rev_i64
890 ; CHECK: rev z0.d, z0.d
892 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> %a)
893 ret <vscale x 2 x i64> %res
896 define <vscale x 8 x bfloat> @rev_bf16(<vscale x 8 x bfloat> %a) #0 {
897 ; CHECK-LABEL: rev_bf16
898 ; CHECK: rev z0.h, z0.h
900 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> %a)
901 ret <vscale x 8 x bfloat> %res
904 define <vscale x 8 x half> @rev_f16(<vscale x 8 x half> %a) {
905 ; CHECK-LABEL: rev_f16
906 ; CHECK: rev z0.h, z0.h
908 %res = call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> %a)
909 ret <vscale x 8 x half> %res
912 define <vscale x 4 x float> @rev_f32(<vscale x 4 x float> %a) {
913 ; CHECK-LABEL: rev_f32
914 ; CHECK: rev z0.s, z0.s
916 %res = call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> %a)
917 ret <vscale x 4 x float> %res
920 define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) {
921 ; CHECK-LABEL: rev_f64
922 ; CHECK: rev z0.d, z0.d
924 %res = call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> %a)
925 ret <vscale x 2 x double> %res
932 define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
933 ; CHECK-LABEL: splice_i8:
934 ; CHECK: splice z0.b, p0, z0.b, z1.b
936 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
937 <vscale x 16 x i8> %a,
938 <vscale x 16 x i8> %b)
939 ret <vscale x 16 x i8> %out
942 define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
943 ; CHECK-LABEL: splice_i16:
944 ; CHECK: splice z0.h, p0, z0.h, z1.h
946 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
947 <vscale x 8 x i16> %a,
948 <vscale x 8 x i16> %b)
949 ret <vscale x 8 x i16> %out
952 define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
953 ; CHECK-LABEL: splice_i32:
954 ; CHECK: splice z0.s, p0, z0.s, z1.s
956 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
957 <vscale x 4 x i32> %a,
958 <vscale x 4 x i32> %b)
959 ret <vscale x 4 x i32> %out
962 define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
963 ; CHECK-LABEL: splice_i64:
964 ; CHECK: splice z0.d, p0, z0.d, z1.d
966 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
967 <vscale x 2 x i64> %a,
968 <vscale x 2 x i64> %b)
969 ret <vscale x 2 x i64> %out
972 define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
973 ; CHECK-LABEL: splice_bf16:
974 ; CHECK: splice z0.h, p0, z0.h, z1.h
976 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
977 <vscale x 8 x bfloat> %a,
978 <vscale x 8 x bfloat> %b)
979 ret <vscale x 8 x bfloat> %out
982 define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
983 ; CHECK-LABEL: splice_f16:
984 ; CHECK: splice z0.h, p0, z0.h, z1.h
986 %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
987 <vscale x 8 x half> %a,
988 <vscale x 8 x half> %b)
989 ret <vscale x 8 x half> %out
992 define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
993 ; CHECK-LABEL: splice_f32:
994 ; CHECK: splice z0.s, p0, z0.s, z1.s
996 %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
997 <vscale x 4 x float> %a,
998 <vscale x 4 x float> %b)
999 ret <vscale x 4 x float> %out
1002 define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1003 ; CHECK-LABEL: splice_f64:
1004 ; CHECK: splice z0.d, p0, z0.d, z1.d
1006 %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,
1007 <vscale x 2 x double> %a,
1008 <vscale x 2 x double> %b)
1009 ret <vscale x 2 x double> %out
1016 define <vscale x 8 x i16> @sunpkhi_i16(<vscale x 16 x i8> %a) {
1017 ; CHECK-LABEL: sunpkhi_i16
1018 ; CHECK: sunpkhi z0.h, z0.b
1020 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1021 ret <vscale x 8 x i16> %res
1024 define <vscale x 4 x i32> @sunpkhi_i32(<vscale x 8 x i16> %a) {
1025 ; CHECK-LABEL: sunpkhi_i32
1026 ; CHECK: sunpkhi z0.s, z0.h
1028 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1029 ret <vscale x 4 x i32> %res
1032 define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) {
1033 ; CHECK-LABEL: sunpkhi_i64
1034 ; CHECK: sunpkhi z0.d, z0.s
1036 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1037 ret <vscale x 2 x i64> %res
1044 define <vscale x 8 x i16> @sunpklo_i16(<vscale x 16 x i8> %a) {
1045 ; CHECK-LABEL: sunpklo_i16
1046 ; CHECK: sunpklo z0.h, z0.b
1048 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> %a)
1049 ret <vscale x 8 x i16> %res
1052 define <vscale x 4 x i32> @sunpklo_i32(<vscale x 8 x i16> %a) {
1053 ; CHECK-LABEL: sunpklo_i32
1054 ; CHECK: sunpklo z0.s, z0.h
1056 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %a)
1057 ret <vscale x 4 x i32> %res
1060 define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) {
1061 ; CHECK-LABEL: sunpklo_i64
1062 ; CHECK: sunpklo z0.d, z0.s
1064 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> %a)
1065 ret <vscale x 2 x i64> %res
1072 define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1073 ; CHECK-LABEL: tbl_i8:
1074 ; CHECK: tbl z0.b, { z0.b }, z1.b
1076 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> %a,
1077 <vscale x 16 x i8> %b)
1078 ret <vscale x 16 x i8> %out
1081 define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1082 ; CHECK-LABEL: tbl_i16:
1083 ; CHECK: tbl z0.h, { z0.h }, z1.h
1085 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> %a,
1086 <vscale x 8 x i16> %b)
1087 ret <vscale x 8 x i16> %out
1090 define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1091 ; CHECK-LABEL: tbl_i32:
1092 ; CHECK: tbl z0.s, { z0.s }, z1.s
1094 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> %a,
1095 <vscale x 4 x i32> %b)
1096 ret <vscale x 4 x i32> %out
1099 define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1100 ; CHECK-LABEL: tbl_i64:
1101 ; CHECK: tbl z0.d, { z0.d }, z1.d
1103 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> %a,
1104 <vscale x 2 x i64> %b)
1105 ret <vscale x 2 x i64> %out
1108 define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
1109 ; CHECK-LABEL: tbl_f16:
1110 ; CHECK: tbl z0.h, { z0.h }, z1.h
1112 %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> %a,
1113 <vscale x 8 x i16> %b)
1114 ret <vscale x 8 x half> %out
1117 define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) #0 {
1118 ; CHECK-LABEL: tbl_bf16:
1119 ; CHECK: tbl z0.h, { z0.h }, z1.h
1121 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a,
1122 <vscale x 8 x i16> %b)
1123 ret <vscale x 8 x bfloat> %out
1126 define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
1127 ; CHECK-LABEL: tbl_f32:
1128 ; CHECK: tbl z0.s, { z0.s }, z1.s
1130 %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> %a,
1131 <vscale x 4 x i32> %b)
1132 ret <vscale x 4 x float> %out
1135 define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
1136 ; CHECK-LABEL: tbl_f64:
1137 ; CHECK: tbl z0.d, { z0.d }, z1.d
1139 %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> %a,
1140 <vscale x 2 x i64> %b)
1141 ret <vscale x 2 x double> %out
1148 define <vscale x 8 x i16> @uunpkhi_i16(<vscale x 16 x i8> %a) {
1149 ; CHECK-LABEL: uunpkhi_i16
1150 ; CHECK: uunpkhi z0.h, z0.b
1152 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1153 ret <vscale x 8 x i16> %res
1156 define <vscale x 4 x i32> @uunpkhi_i32(<vscale x 8 x i16> %a) {
1157 ; CHECK-LABEL: uunpkhi_i32
1158 ; CHECK: uunpkhi z0.s, z0.h
1160 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1161 ret <vscale x 4 x i32> %res
1164 define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) {
1165 ; CHECK-LABEL: uunpkhi_i64
1166 ; CHECK: uunpkhi z0.d, z0.s
1168 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1169 ret <vscale x 2 x i64> %res
1176 define <vscale x 8 x i16> @uunpklo_i16(<vscale x 16 x i8> %a) {
1177 ; CHECK-LABEL: uunpklo_i16
1178 ; CHECK: uunpklo z0.h, z0.b
1180 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> %a)
1181 ret <vscale x 8 x i16> %res
1184 define <vscale x 4 x i32> @uunpklo_i32(<vscale x 8 x i16> %a) {
1185 ; CHECK-LABEL: uunpklo_i32
1186 ; CHECK: uunpklo z0.s, z0.h
1188 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %a)
1189 ret <vscale x 4 x i32> %res
1192 define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) {
1193 ; CHECK-LABEL: uunpklo_i64
1194 ; CHECK: uunpklo z0.d, z0.s
1196 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> %a)
1197 ret <vscale x 2 x i64> %res
1204 define <vscale x 16 x i1> @trn1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1205 ; CHECK-LABEL: trn1_b8:
1206 ; CHECK: trn1 p0.b, p0.b, p1.b
1208 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> %a,
1209 <vscale x 16 x i1> %b)
1210 ret <vscale x 16 x i1> %out
1213 define <vscale x 8 x i1> @trn1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1214 ; CHECK-LABEL: trn1_b16:
1215 ; CHECK: trn1 p0.h, p0.h, p1.h
1217 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> %a,
1218 <vscale x 8 x i1> %b)
1219 ret <vscale x 8 x i1> %out
1222 define <vscale x 4 x i1> @trn1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1223 ; CHECK-LABEL: trn1_b32:
1224 ; CHECK: trn1 p0.s, p0.s, p1.s
1226 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> %a,
1227 <vscale x 4 x i1> %b)
1228 ret <vscale x 4 x i1> %out
1231 define <vscale x 2 x i1> @trn1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1232 ; CHECK-LABEL: trn1_b64:
1233 ; CHECK: trn1 p0.d, p0.d, p1.d
1235 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> %a,
1236 <vscale x 2 x i1> %b)
1237 ret <vscale x 2 x i1> %out
1240 define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1241 ; CHECK-LABEL: trn1_i8:
1242 ; CHECK: trn1 z0.b, z0.b, z1.b
1244 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> %a,
1245 <vscale x 16 x i8> %b)
1246 ret <vscale x 16 x i8> %out
1249 define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1250 ; CHECK-LABEL: trn1_i16:
1251 ; CHECK: trn1 z0.h, z0.h, z1.h
1253 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> %a,
1254 <vscale x 8 x i16> %b)
1255 ret <vscale x 8 x i16> %out
1258 define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1259 ; CHECK-LABEL: trn1_i32:
1260 ; CHECK: trn1 z0.s, z0.s, z1.s
1262 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> %a,
1263 <vscale x 4 x i32> %b)
1264 ret <vscale x 4 x i32> %out
1267 define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1268 ; CHECK-LABEL: trn1_i64:
1269 ; CHECK: trn1 z0.d, z0.d, z1.d
1271 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> %a,
1272 <vscale x 2 x i64> %b)
1273 ret <vscale x 2 x i64> %out
1276 define <vscale x 2 x half> @trn1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1277 ; CHECK-LABEL: trn1_f16_v2:
1278 ; CHECK: trn1 z0.d, z0.d, z1.d
1280 %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half> %a,
1281 <vscale x 2 x half> %b)
1282 ret <vscale x 2 x half> %out
1285 define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1286 ; CHECK-LABEL: trn1_f16_v4:
1287 ; CHECK: trn1 z0.s, z0.s, z1.s
1289 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half> %a,
1290 <vscale x 4 x half> %b)
1291 ret <vscale x 4 x half> %out
1294 define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1295 ; CHECK-LABEL: trn1_bf16:
1296 ; CHECK: trn1 z0.h, z0.h, z1.h
1298 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> %a,
1299 <vscale x 8 x bfloat> %b)
1300 ret <vscale x 8 x bfloat> %out
1303 define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1304 ; CHECK-LABEL: trn1_f16:
1305 ; CHECK: trn1 z0.h, z0.h, z1.h
1307 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> %a,
1308 <vscale x 8 x half> %b)
1309 ret <vscale x 8 x half> %out
1312 define <vscale x 2 x float> @trn1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1313 ; CHECK-LABEL: trn1_f32_v2:
1314 ; CHECK: trn1 z0.d, z0.d, z1.d
1316 %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float> %a,
1317 <vscale x 2 x float> %b)
1318 ret <vscale x 2 x float> %out
1321 define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1322 ; CHECK-LABEL: trn1_f32:
1323 ; CHECK: trn1 z0.s, z0.s, z1.s
1325 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> %a,
1326 <vscale x 4 x float> %b)
1327 ret <vscale x 4 x float> %out
1330 define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1331 ; CHECK-LABEL: trn1_f64:
1332 ; CHECK: trn1 z0.d, z0.d, z1.d
1334 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> %a,
1335 <vscale x 2 x double> %b)
1336 ret <vscale x 2 x double> %out
1343 define <vscale x 16 x i1> @trn2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1344 ; CHECK-LABEL: trn2_b8:
1345 ; CHECK: trn2 p0.b, p0.b, p1.b
1347 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> %a,
1348 <vscale x 16 x i1> %b)
1349 ret <vscale x 16 x i1> %out
1352 define <vscale x 8 x i1> @trn2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1353 ; CHECK-LABEL: trn2_b16:
1354 ; CHECK: trn2 p0.h, p0.h, p1.h
1356 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> %a,
1357 <vscale x 8 x i1> %b)
1358 ret <vscale x 8 x i1> %out
1361 define <vscale x 4 x i1> @trn2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1362 ; CHECK-LABEL: trn2_b32:
1363 ; CHECK: trn2 p0.s, p0.s, p1.s
1365 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> %a,
1366 <vscale x 4 x i1> %b)
1367 ret <vscale x 4 x i1> %out
1370 define <vscale x 2 x i1> @trn2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1371 ; CHECK-LABEL: trn2_b64:
1372 ; CHECK: trn2 p0.d, p0.d, p1.d
1374 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> %a,
1375 <vscale x 2 x i1> %b)
1376 ret <vscale x 2 x i1> %out
1379 define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1380 ; CHECK-LABEL: trn2_i8:
1381 ; CHECK: trn2 z0.b, z0.b, z1.b
1383 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> %a,
1384 <vscale x 16 x i8> %b)
1385 ret <vscale x 16 x i8> %out
1388 define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1389 ; CHECK-LABEL: trn2_i16:
1390 ; CHECK: trn2 z0.h, z0.h, z1.h
1392 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> %a,
1393 <vscale x 8 x i16> %b)
1394 ret <vscale x 8 x i16> %out
1397 define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1398 ; CHECK-LABEL: trn2_i32:
1399 ; CHECK: trn2 z0.s, z0.s, z1.s
1401 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> %a,
1402 <vscale x 4 x i32> %b)
1403 ret <vscale x 4 x i32> %out
1406 define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1407 ; CHECK-LABEL: trn2_i64:
1408 ; CHECK: trn2 z0.d, z0.d, z1.d
1410 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> %a,
1411 <vscale x 2 x i64> %b)
1412 ret <vscale x 2 x i64> %out
1415 define <vscale x 2 x half> @trn2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1416 ; CHECK-LABEL: trn2_f16_v2:
1417 ; CHECK: trn2 z0.d, z0.d, z1.d
1419 %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half> %a,
1420 <vscale x 2 x half> %b)
1421 ret <vscale x 2 x half> %out
1424 define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1425 ; CHECK-LABEL: trn2_f16_v4:
1426 ; CHECK: trn2 z0.s, z0.s, z1.s
1428 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half> %a,
1429 <vscale x 4 x half> %b)
1430 ret <vscale x 4 x half> %out
1433 define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1434 ; CHECK-LABEL: trn2_bf16:
1435 ; CHECK: trn2 z0.h, z0.h, z1.h
1437 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> %a,
1438 <vscale x 8 x bfloat> %b)
1439 ret <vscale x 8 x bfloat> %out
1442 define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1443 ; CHECK-LABEL: trn2_f16:
1444 ; CHECK: trn2 z0.h, z0.h, z1.h
1446 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> %a,
1447 <vscale x 8 x half> %b)
1448 ret <vscale x 8 x half> %out
1451 define <vscale x 2 x float> @trn2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1452 ; CHECK-LABEL: trn2_f32_v2:
1453 ; CHECK: trn2 z0.d, z0.d, z1.d
1455 %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float> %a,
1456 <vscale x 2 x float> %b)
1457 ret <vscale x 2 x float> %out
1460 define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1461 ; CHECK-LABEL: trn2_f32:
1462 ; CHECK: trn2 z0.s, z0.s, z1.s
1464 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> %a,
1465 <vscale x 4 x float> %b)
1466 ret <vscale x 4 x float> %out
1469 define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1470 ; CHECK-LABEL: trn2_f64:
1471 ; CHECK: trn2 z0.d, z0.d, z1.d
1473 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> %a,
1474 <vscale x 2 x double> %b)
1475 ret <vscale x 2 x double> %out
1482 define <vscale x 16 x i1> @uzp1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1483 ; CHECK-LABEL: uzp1_b8:
1484 ; CHECK: uzp1 p0.b, p0.b, p1.b
1486 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> %a,
1487 <vscale x 16 x i1> %b)
1488 ret <vscale x 16 x i1> %out
1491 define <vscale x 8 x i1> @uzp1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1492 ; CHECK-LABEL: uzp1_b16:
1493 ; CHECK: uzp1 p0.h, p0.h, p1.h
1495 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> %a,
1496 <vscale x 8 x i1> %b)
1497 ret <vscale x 8 x i1> %out
1500 define <vscale x 4 x i1> @uzp1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1501 ; CHECK-LABEL: uzp1_b32:
1502 ; CHECK: uzp1 p0.s, p0.s, p1.s
1504 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> %a,
1505 <vscale x 4 x i1> %b)
1506 ret <vscale x 4 x i1> %out
1509 define <vscale x 2 x i1> @uzp1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1510 ; CHECK-LABEL: uzp1_b64:
1511 ; CHECK: uzp1 p0.d, p0.d, p1.d
1513 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> %a,
1514 <vscale x 2 x i1> %b)
1515 ret <vscale x 2 x i1> %out
1518 define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1519 ; CHECK-LABEL: uzp1_i8:
1520 ; CHECK: uzp1 z0.b, z0.b, z1.b
1522 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> %a,
1523 <vscale x 16 x i8> %b)
1524 ret <vscale x 16 x i8> %out
1527 define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1528 ; CHECK-LABEL: uzp1_i16:
1529 ; CHECK: uzp1 z0.h, z0.h, z1.h
1531 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %a,
1532 <vscale x 8 x i16> %b)
1533 ret <vscale x 8 x i16> %out
1536 define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1537 ; CHECK-LABEL: uzp1_i32:
1538 ; CHECK: uzp1 z0.s, z0.s, z1.s
1540 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a,
1541 <vscale x 4 x i32> %b)
1542 ret <vscale x 4 x i32> %out
1545 define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1546 ; CHECK-LABEL: uzp1_i64:
1547 ; CHECK: uzp1 z0.d, z0.d, z1.d
1549 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> %a,
1550 <vscale x 2 x i64> %b)
1551 ret <vscale x 2 x i64> %out
1554 define <vscale x 2 x half> @uzp1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1555 ; CHECK-LABEL: uzp1_f16_v2:
1556 ; CHECK: uzp1 z0.d, z0.d, z1.d
1558 %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half> %a,
1559 <vscale x 2 x half> %b)
1560 ret <vscale x 2 x half> %out
1563 define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1564 ; CHECK-LABEL: uzp1_f16_v4:
1565 ; CHECK: uzp1 z0.s, z0.s, z1.s
1567 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half> %a,
1568 <vscale x 4 x half> %b)
1569 ret <vscale x 4 x half> %out
1572 define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1573 ; CHECK-LABEL: uzp1_bf16:
1574 ; CHECK: uzp1 z0.h, z0.h, z1.h
1576 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> %a,
1577 <vscale x 8 x bfloat> %b)
1578 ret <vscale x 8 x bfloat> %out
1581 define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1582 ; CHECK-LABEL: uzp1_f16:
1583 ; CHECK: uzp1 z0.h, z0.h, z1.h
1585 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> %a,
1586 <vscale x 8 x half> %b)
1587 ret <vscale x 8 x half> %out
1590 define <vscale x 2 x float> @uzp1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1591 ; CHECK-LABEL: uzp1_f32_v2:
1592 ; CHECK: uzp1 z0.d, z0.d, z1.d
1594 %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float> %a,
1595 <vscale x 2 x float> %b)
1596 ret <vscale x 2 x float> %out
1599 define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1600 ; CHECK-LABEL: uzp1_f32:
1601 ; CHECK: uzp1 z0.s, z0.s, z1.s
1603 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> %a,
1604 <vscale x 4 x float> %b)
1605 ret <vscale x 4 x float> %out
1608 define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1609 ; CHECK-LABEL: uzp1_f64:
1610 ; CHECK: uzp1 z0.d, z0.d, z1.d
1612 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> %a,
1613 <vscale x 2 x double> %b)
1614 ret <vscale x 2 x double> %out
1621 define <vscale x 16 x i1> @uzp2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1622 ; CHECK-LABEL: uzp2_b8:
1623 ; CHECK: uzp2 p0.b, p0.b, p1.b
1625 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> %a,
1626 <vscale x 16 x i1> %b)
1627 ret <vscale x 16 x i1> %out
1630 define <vscale x 8 x i1> @uzp2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1631 ; CHECK-LABEL: uzp2_b16:
1632 ; CHECK: uzp2 p0.h, p0.h, p1.h
1634 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> %a,
1635 <vscale x 8 x i1> %b)
1636 ret <vscale x 8 x i1> %out
1639 define <vscale x 4 x i1> @uzp2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1640 ; CHECK-LABEL: uzp2_b32:
1641 ; CHECK: uzp2 p0.s, p0.s, p1.s
1643 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> %a,
1644 <vscale x 4 x i1> %b)
1645 ret <vscale x 4 x i1> %out
1648 define <vscale x 2 x i1> @uzp2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1649 ; CHECK-LABEL: uzp2_b64:
1650 ; CHECK: uzp2 p0.d, p0.d, p1.d
1652 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> %a,
1653 <vscale x 2 x i1> %b)
1654 ret <vscale x 2 x i1> %out
1657 define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1658 ; CHECK-LABEL: uzp2_i8:
1659 ; CHECK: uzp2 z0.b, z0.b, z1.b
1661 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> %a,
1662 <vscale x 16 x i8> %b)
1663 ret <vscale x 16 x i8> %out
1666 define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1667 ; CHECK-LABEL: uzp2_i16:
1668 ; CHECK: uzp2 z0.h, z0.h, z1.h
1670 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> %a,
1671 <vscale x 8 x i16> %b)
1672 ret <vscale x 8 x i16> %out
1675 define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1676 ; CHECK-LABEL: uzp2_i32:
1677 ; CHECK: uzp2 z0.s, z0.s, z1.s
1679 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a,
1680 <vscale x 4 x i32> %b)
1681 ret <vscale x 4 x i32> %out
1684 define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1685 ; CHECK-LABEL: uzp2_i64:
1686 ; CHECK: uzp2 z0.d, z0.d, z1.d
1688 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> %a,
1689 <vscale x 2 x i64> %b)
1690 ret <vscale x 2 x i64> %out
1693 define <vscale x 2 x half> @uzp2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1694 ; CHECK-LABEL: uzp2_f16_v2:
1695 ; CHECK: uzp2 z0.d, z0.d, z1.d
1697 %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half> %a,
1698 <vscale x 2 x half> %b)
1699 ret <vscale x 2 x half> %out
1702 define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1703 ; CHECK-LABEL: uzp2_f16_v4:
1704 ; CHECK: uzp2 z0.s, z0.s, z1.s
1706 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half> %a,
1707 <vscale x 4 x half> %b)
1708 ret <vscale x 4 x half> %out
1711 define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1712 ; CHECK-LABEL: uzp2_bf16:
1713 ; CHECK: uzp2 z0.h, z0.h, z1.h
1715 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> %a,
1716 <vscale x 8 x bfloat> %b)
1717 ret <vscale x 8 x bfloat> %out
1720 define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1721 ; CHECK-LABEL: uzp2_f16:
1722 ; CHECK: uzp2 z0.h, z0.h, z1.h
1724 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> %a,
1725 <vscale x 8 x half> %b)
1726 ret <vscale x 8 x half> %out
1729 define <vscale x 2 x float> @uzp2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1730 ; CHECK-LABEL: uzp2_f32_v2:
1731 ; CHECK: uzp2 z0.d, z0.d, z1.d
1733 %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float> %a,
1734 <vscale x 2 x float> %b)
1735 ret <vscale x 2 x float> %out
1738 define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1739 ; CHECK-LABEL: uzp2_f32:
1740 ; CHECK: uzp2 z0.s, z0.s, z1.s
1742 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> %a,
1743 <vscale x 4 x float> %b)
1744 ret <vscale x 4 x float> %out
1747 define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1748 ; CHECK-LABEL: uzp2_f64:
1749 ; CHECK: uzp2 z0.d, z0.d, z1.d
1751 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> %a,
1752 <vscale x 2 x double> %b)
1753 ret <vscale x 2 x double> %out
1760 define <vscale x 16 x i1> @zip1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1761 ; CHECK-LABEL: zip1_b8:
1762 ; CHECK: zip1 p0.b, p0.b, p1.b
1764 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> %a,
1765 <vscale x 16 x i1> %b)
1766 ret <vscale x 16 x i1> %out
1769 define <vscale x 8 x i1> @zip1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1770 ; CHECK-LABEL: zip1_b16:
1771 ; CHECK: zip1 p0.h, p0.h, p1.h
1773 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> %a,
1774 <vscale x 8 x i1> %b)
1775 ret <vscale x 8 x i1> %out
1778 define <vscale x 4 x i1> @zip1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1779 ; CHECK-LABEL: zip1_b32:
1780 ; CHECK: zip1 p0.s, p0.s, p1.s
1782 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> %a,
1783 <vscale x 4 x i1> %b)
1784 ret <vscale x 4 x i1> %out
1787 define <vscale x 2 x i1> @zip1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1788 ; CHECK-LABEL: zip1_b64:
1789 ; CHECK: zip1 p0.d, p0.d, p1.d
1791 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> %a,
1792 <vscale x 2 x i1> %b)
1793 ret <vscale x 2 x i1> %out
1796 define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1797 ; CHECK-LABEL: zip1_i8:
1798 ; CHECK: zip1 z0.b, z0.b, z1.b
1800 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> %a,
1801 <vscale x 16 x i8> %b)
1802 ret <vscale x 16 x i8> %out
1805 define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1806 ; CHECK-LABEL: zip1_i16:
1807 ; CHECK: zip1 z0.h, z0.h, z1.h
1809 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> %a,
1810 <vscale x 8 x i16> %b)
1811 ret <vscale x 8 x i16> %out
1814 define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1815 ; CHECK-LABEL: zip1_i32:
1816 ; CHECK: zip1 z0.s, z0.s, z1.s
1818 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %a,
1819 <vscale x 4 x i32> %b)
1820 ret <vscale x 4 x i32> %out
1823 define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1824 ; CHECK-LABEL: zip1_i64:
1825 ; CHECK: zip1 z0.d, z0.d, z1.d
1827 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> %a,
1828 <vscale x 2 x i64> %b)
1829 ret <vscale x 2 x i64> %out
1832 define <vscale x 2 x half> @zip1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1833 ; CHECK-LABEL: zip1_f16_v2:
1834 ; CHECK: zip1 z0.d, z0.d, z1.d
1836 %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half> %a,
1837 <vscale x 2 x half> %b)
1838 ret <vscale x 2 x half> %out
1841 define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1842 ; CHECK-LABEL: zip1_f16_v4:
1843 ; CHECK: zip1 z0.s, z0.s, z1.s
1845 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half> %a,
1846 <vscale x 4 x half> %b)
1847 ret <vscale x 4 x half> %out
1850 define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1851 ; CHECK-LABEL: zip1_bf16:
1852 ; CHECK: zip1 z0.h, z0.h, z1.h
1854 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> %a,
1855 <vscale x 8 x bfloat> %b)
1856 ret <vscale x 8 x bfloat> %out
1859 define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1860 ; CHECK-LABEL: zip1_f16:
1861 ; CHECK: zip1 z0.h, z0.h, z1.h
1863 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> %a,
1864 <vscale x 8 x half> %b)
1865 ret <vscale x 8 x half> %out
1868 define <vscale x 2 x float> @zip1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1869 ; CHECK-LABEL: zip1_f32_v2:
1870 ; CHECK: zip1 z0.d, z0.d, z1.d
1872 %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float> %a,
1873 <vscale x 2 x float> %b)
1874 ret <vscale x 2 x float> %out
1877 define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1878 ; CHECK-LABEL: zip1_f32:
1879 ; CHECK: zip1 z0.s, z0.s, z1.s
1881 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> %a,
1882 <vscale x 4 x float> %b)
1883 ret <vscale x 4 x float> %out
1886 define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1887 ; CHECK-LABEL: zip1_f64:
1888 ; CHECK: zip1 z0.d, z0.d, z1.d
1890 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> %a,
1891 <vscale x 2 x double> %b)
1892 ret <vscale x 2 x double> %out
1899 define <vscale x 16 x i1> @zip2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1900 ; CHECK-LABEL: zip2_b8:
1901 ; CHECK: zip2 p0.b, p0.b, p1.b
1903 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> %a,
1904 <vscale x 16 x i1> %b)
1905 ret <vscale x 16 x i1> %out
1908 define <vscale x 8 x i1> @zip2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1909 ; CHECK-LABEL: zip2_b16:
1910 ; CHECK: zip2 p0.h, p0.h, p1.h
1912 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> %a,
1913 <vscale x 8 x i1> %b)
1914 ret <vscale x 8 x i1> %out
1917 define <vscale x 4 x i1> @zip2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1918 ; CHECK-LABEL: zip2_b32:
1919 ; CHECK: zip2 p0.s, p0.s, p1.s
1921 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> %a,
1922 <vscale x 4 x i1> %b)
1923 ret <vscale x 4 x i1> %out
1926 define <vscale x 2 x i1> @zip2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1927 ; CHECK-LABEL: zip2_b64:
1928 ; CHECK: zip2 p0.d, p0.d, p1.d
1930 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> %a,
1931 <vscale x 2 x i1> %b)
1932 ret <vscale x 2 x i1> %out
1935 define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1936 ; CHECK-LABEL: zip2_i8:
1937 ; CHECK: zip2 z0.b, z0.b, z1.b
1939 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> %a,
1940 <vscale x 16 x i8> %b)
1941 ret <vscale x 16 x i8> %out
1944 define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1945 ; CHECK-LABEL: zip2_i16:
1946 ; CHECK: zip2 z0.h, z0.h, z1.h
1948 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> %a,
1949 <vscale x 8 x i16> %b)
1950 ret <vscale x 8 x i16> %out
1953 define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1954 ; CHECK-LABEL: zip2_i32:
1955 ; CHECK: zip2 z0.s, z0.s, z1.s
1957 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %a,
1958 <vscale x 4 x i32> %b)
1959 ret <vscale x 4 x i32> %out
1962 define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1963 ; CHECK-LABEL: zip2_i64:
1964 ; CHECK: zip2 z0.d, z0.d, z1.d
1966 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> %a,
1967 <vscale x 2 x i64> %b)
1968 ret <vscale x 2 x i64> %out
1971 define <vscale x 2 x half> @zip2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1972 ; CHECK-LABEL: zip2_f16_v2:
1973 ; CHECK: zip2 z0.d, z0.d, z1.d
1975 %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half> %a,
1976 <vscale x 2 x half> %b)
1977 ret <vscale x 2 x half> %out
1980 define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1981 ; CHECK-LABEL: zip2_f16_v4:
1982 ; CHECK: zip2 z0.s, z0.s, z1.s
1984 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half> %a,
1985 <vscale x 4 x half> %b)
1986 ret <vscale x 4 x half> %out
1989 define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1990 ; CHECK-LABEL: zip2_bf16:
1991 ; CHECK: zip2 z0.h, z0.h, z1.h
1993 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> %a,
1994 <vscale x 8 x bfloat> %b)
1995 ret <vscale x 8 x bfloat> %out
1998 define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1999 ; CHECK-LABEL: zip2_f16:
2000 ; CHECK: zip2 z0.h, z0.h, z1.h
2002 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> %a,
2003 <vscale x 8 x half> %b)
2004 ret <vscale x 8 x half> %out
2007 define <vscale x 2 x float> @zip2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
2008 ; CHECK-LABEL: zip2_f32_v2:
2009 ; CHECK: zip2 z0.d, z0.d, z1.d
2011 %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float> %a,
2012 <vscale x 2 x float> %b)
2013 ret <vscale x 2 x float> %out
2016 define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
2017 ; CHECK-LABEL: zip2_f32:
2018 ; CHECK: zip2 z0.s, z0.s, z1.s
2020 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> %a,
2021 <vscale x 4 x float> %b)
2022 ret <vscale x 4 x float> %out
2025 define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
2026 ; CHECK-LABEL: zip2_f64:
2027 ; CHECK: zip2 z0.d, z0.d, z1.d
2029 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> %a,
2030 <vscale x 2 x double> %b)
2031 ret <vscale x 2 x double> %out
2034 declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2035 declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2036 declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2037 declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2038 declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2039 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2040 declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2041 declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2043 declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
2044 declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
2045 declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
2046 declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
2047 declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
2048 declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
2049 declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
2050 declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
2052 declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2053 declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2054 declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2055 declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2056 declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2057 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2058 declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2059 declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2061 declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
2062 declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
2063 declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
2064 declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
2065 declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
2066 declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
2067 declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
2068 declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
2070 declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2071 declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2072 declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2073 declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2075 declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64)
2076 declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64)
2077 declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)
2078 declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)
2079 declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)
2080 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64)
2081 declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)
2082 declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)
2084 declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
2085 declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
2086 declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
2087 declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
2088 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
2089 declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
2090 declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
2091 declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
2093 declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2094 declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2095 declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2096 declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2097 declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2098 declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2099 declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2100 declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2101 declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2103 declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2104 declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2105 declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2106 declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2107 declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2108 declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2109 declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2110 declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2111 declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2113 declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>)
2114 declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>)
2115 declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>)
2116 declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>)
2117 declare <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8>)
2118 declare <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16>)
2119 declare <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32>)
2120 declare <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64>)
2121 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat>)
2122 declare <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half>)
2123 declare <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float>)
2124 declare <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double>)
2126 declare <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2127 declare <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2128 declare <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2129 declare <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2130 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2131 declare <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2132 declare <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2133 declare <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2135 declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8>)
2136 declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
2137 declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32>)
2139 declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8>)
2140 declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
2141 declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32>)
2143 declare <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2144 declare <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2145 declare <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2146 declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2147 declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
2148 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>)
2149 declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
2150 declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
2152 declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8>)
2153 declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
2154 declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32>)
2156 declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8>)
2157 declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
2158 declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32>)
2160 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2161 declare <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2162 declare <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2163 declare <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2164 declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2165 declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2166 declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2167 declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2168 declare <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2169 declare <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2170 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2171 declare <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2172 declare <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2173 declare <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2174 declare <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2176 declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2177 declare <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2178 declare <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2179 declare <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2180 declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2181 declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2182 declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2183 declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2184 declare <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2185 declare <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2186 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2187 declare <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2188 declare <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2189 declare <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2190 declare <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2192 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2193 declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2194 declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2195 declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2196 declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2197 declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2198 declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2199 declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2200 declare <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2201 declare <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2202 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2203 declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2204 declare <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2205 declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2206 declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2208 declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2209 declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2210 declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2211 declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2212 declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2213 declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2214 declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2215 declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2216 declare <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2217 declare <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2218 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2219 declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2220 declare <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2221 declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2222 declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2224 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2225 declare <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2226 declare <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2227 declare <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2228 declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2229 declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2230 declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2231 declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2232 declare <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2233 declare <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2234 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2235 declare <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2236 declare <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2237 declare <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2238 declare <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2240 declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2241 declare <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2242 declare <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2243 declare <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2244 declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2245 declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2246 declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2247 declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2248 declare <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
2249 declare <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2250 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2251 declare <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2252 declare <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
2253 declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2254 declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2256 ; +bf16 is required for the bfloat version.
2257 attributes #0 = { "target-features"="+sve,+bf16" }