1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
8 define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
11 ; CHECK-NEXT: add z0.b, z0.b, z1.b
13 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> %pg,
15 <vscale x 16 x i8> %a,
16 <vscale x 16 x i8> %b)
17 ret <vscale x 16 x i8> %out
20 define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
21 ; CHECK-LABEL: add_i16:
23 ; CHECK-NEXT: add z0.h, z0.h, z1.h
25 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
26 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
27 <vscale x 8 x i16> %a,
28 <vscale x 8 x i16> %b)
29 ret <vscale x 8 x i16> %out
32 define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
33 ; CHECK-LABEL: add_i32:
35 ; CHECK-NEXT: add z0.s, z0.s, z1.s
37 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
38 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
39 <vscale x 4 x i32> %a,
40 <vscale x 4 x i32> %b)
41 ret <vscale x 4 x i32> %out
44 define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
45 ; CHECK-LABEL: add_i64:
47 ; CHECK-NEXT: add z0.d, z0.d, z1.d
49 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
50 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
51 <vscale x 2 x i64> %a,
52 <vscale x 2 x i64> %b)
53 ret <vscale x 2 x i64> %out
60 define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
61 ; CHECK-LABEL: sub_i8:
63 ; CHECK-NEXT: sub z0.b, z0.b, z1.b
65 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
66 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> %pg,
67 <vscale x 16 x i8> %a,
68 <vscale x 16 x i8> %b)
69 ret <vscale x 16 x i8> %out
72 define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
73 ; CHECK-LABEL: sub_i16:
75 ; CHECK-NEXT: sub z0.h, z0.h, z1.h
77 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
78 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
79 <vscale x 8 x i16> %a,
80 <vscale x 8 x i16> %b)
81 ret <vscale x 8 x i16> %out
84 define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
85 ; CHECK-LABEL: sub_i32:
87 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
89 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
90 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
91 <vscale x 4 x i32> %a,
92 <vscale x 4 x i32> %b)
93 ret <vscale x 4 x i32> %out
96 define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
97 ; CHECK-LABEL: sub_i64:
99 ; CHECK-NEXT: sub z0.d, z0.d, z1.d
101 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
102 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
103 <vscale x 2 x i64> %a,
104 <vscale x 2 x i64> %b)
105 ret <vscale x 2 x i64> %out
108 ; As sub_i32 but where pg is i8 based and thus compatible for i32.
109 define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
110 ; CHECK-LABEL: sub_i32_ptrue_all_b:
112 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
114 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
115 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
116 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
117 <vscale x 4 x i32> %a,
118 <vscale x 4 x i32> %b)
119 ret <vscale x 4 x i32> %out
122 ; As sub_i32 but where pg is i16 based and thus compatible for i32.
123 define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
124 ; CHECK-LABEL: sub_i32_ptrue_all_h:
126 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
128 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
129 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
130 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
131 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
132 <vscale x 4 x i32> %a,
133 <vscale x 4 x i32> %b)
134 ret <vscale x 4 x i32> %out
137 ; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and
138 ; thus inactive lanes are important and the immediate form cannot be used.
139 define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
140 ; CHECK-LABEL: sub_i32_ptrue_all_d:
142 ; CHECK-NEXT: ptrue p0.d
143 ; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
145 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
146 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
147 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
148 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
149 <vscale x 4 x i32> %a,
150 <vscale x 4 x i32> %b)
151 ret <vscale x 4 x i32> %out
158 define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
159 ; CHECK-LABEL: mul_i8:
161 ; CHECK-NEXT: mul z0.b, z0.b, z1.b
163 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
164 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> %pg,
165 <vscale x 16 x i8> %a,
166 <vscale x 16 x i8> %b)
167 ret <vscale x 16 x i8> %out
170 define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
171 ; CHECK-LABEL: mul_i16:
173 ; CHECK-NEXT: mul z0.h, z0.h, z1.h
175 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
176 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> %pg,
177 <vscale x 8 x i16> %a,
178 <vscale x 8 x i16> %b)
179 ret <vscale x 8 x i16> %out
182 define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
183 ; CHECK-LABEL: mul_i32:
185 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
187 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
188 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg,
189 <vscale x 4 x i32> %a,
190 <vscale x 4 x i32> %b)
191 ret <vscale x 4 x i32> %out
194 define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
195 ; CHECK-LABEL: mul_i64:
197 ; CHECK-NEXT: mul z0.d, z0.d, z1.d
199 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
200 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> %pg,
201 <vscale x 2 x i64> %a,
202 <vscale x 2 x i64> %b)
203 ret <vscale x 2 x i64> %out
210 define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
211 ; CHECK-LABEL: smulh_i8:
213 ; CHECK-NEXT: smulh z0.b, z0.b, z1.b
215 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
216 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> %pg,
217 <vscale x 16 x i8> %a,
218 <vscale x 16 x i8> %b)
219 ret <vscale x 16 x i8> %out
222 define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
223 ; CHECK-LABEL: smulh_i16:
225 ; CHECK-NEXT: smulh z0.h, z0.h, z1.h
227 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
228 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> %pg,
229 <vscale x 8 x i16> %a,
230 <vscale x 8 x i16> %b)
231 ret <vscale x 8 x i16> %out
234 define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
235 ; CHECK-LABEL: smulh_i32:
237 ; CHECK-NEXT: smulh z0.s, z0.s, z1.s
239 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
240 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> %pg,
241 <vscale x 4 x i32> %a,
242 <vscale x 4 x i32> %b)
243 ret <vscale x 4 x i32> %out
246 define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
247 ; CHECK-LABEL: smulh_i64:
249 ; CHECK-NEXT: smulh z0.d, z0.d, z1.d
251 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
252 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> %pg,
253 <vscale x 2 x i64> %a,
254 <vscale x 2 x i64> %b)
255 ret <vscale x 2 x i64> %out
262 define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
263 ; CHECK-LABEL: umulh_i8:
265 ; CHECK-NEXT: umulh z0.b, z0.b, z1.b
267 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
268 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> %pg,
269 <vscale x 16 x i8> %a,
270 <vscale x 16 x i8> %b)
271 ret <vscale x 16 x i8> %out
274 define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
275 ; CHECK-LABEL: umulh_i16:
277 ; CHECK-NEXT: umulh z0.h, z0.h, z1.h
279 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
280 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> %pg,
281 <vscale x 8 x i16> %a,
282 <vscale x 8 x i16> %b)
283 ret <vscale x 8 x i16> %out
286 define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
287 ; CHECK-LABEL: umulh_i32:
289 ; CHECK-NEXT: umulh z0.s, z0.s, z1.s
291 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
292 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg,
293 <vscale x 4 x i32> %a,
294 <vscale x 4 x i32> %b)
295 ret <vscale x 4 x i32> %out
298 define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
299 ; CHECK-LABEL: umulh_i64:
301 ; CHECK-NEXT: umulh z0.d, z0.d, z1.d
303 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
304 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> %pg,
305 <vscale x 2 x i64> %a,
306 <vscale x 2 x i64> %b)
307 ret <vscale x 2 x i64> %out
310 ; As umulh_i32 but where pg is i8 based and thus compatible for i32.
311 define <vscale x 4 x i32> @umulh_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
312 ; CHECK-LABEL: umulh_i32_ptrue_all_b:
314 ; CHECK-NEXT: umulh z0.s, z0.s, z1.s
316 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
317 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
318 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg.s,
319 <vscale x 4 x i32> %a,
320 <vscale x 4 x i32> %b)
321 ret <vscale x 4 x i32> %out
324 ; As umulh_i32 but where pg is i16 based and thus compatible for i32.
325 define <vscale x 4 x i32> @umulh_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
326 ; CHECK-LABEL: umulh_i32_ptrue_all_h:
328 ; CHECK-NEXT: umulh z0.s, z0.s, z1.s
330 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
331 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
332 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
333 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg.s,
334 <vscale x 4 x i32> %a,
335 <vscale x 4 x i32> %b)
336 ret <vscale x 4 x i32> %out
339 ; As umulh_i32 but where pg is i64 based, which is not compatibile for i32 and
340 ; thus inactive lanes are important and the immediate form cannot be used.
341 define <vscale x 4 x i32> @umulh_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
342 ; CHECK-LABEL: umulh_i32_ptrue_all_d:
344 ; CHECK-NEXT: ptrue p0.d
345 ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
347 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
348 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
349 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
350 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s,
351 <vscale x 4 x i32> %a,
352 <vscale x 4 x i32> %b)
353 ret <vscale x 4 x i32> %out
360 define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
361 ; CHECK-LABEL: and_i8:
363 ; CHECK-NEXT: and z0.d, z0.d, z1.d
365 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
366 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> %pg,
367 <vscale x 16 x i8> %a,
368 <vscale x 16 x i8> %b)
369 ret <vscale x 16 x i8> %out
372 define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
373 ; CHECK-LABEL: and_i16:
375 ; CHECK-NEXT: and z0.d, z0.d, z1.d
377 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
378 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> %pg,
379 <vscale x 8 x i16> %a,
380 <vscale x 8 x i16> %b)
381 ret <vscale x 8 x i16> %out
384 define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
385 ; CHECK-LABEL: and_i32:
387 ; CHECK-NEXT: and z0.d, z0.d, z1.d
389 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
390 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> %pg,
391 <vscale x 4 x i32> %a,
392 <vscale x 4 x i32> %b)
393 ret <vscale x 4 x i32> %out
396 define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
397 ; CHECK-LABEL: and_i64:
399 ; CHECK-NEXT: and z0.d, z0.d, z1.d
401 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
402 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> %pg,
403 <vscale x 2 x i64> %a,
404 <vscale x 2 x i64> %b)
405 ret <vscale x 2 x i64> %out
412 define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
413 ; CHECK-LABEL: bic_i8:
415 ; CHECK-NEXT: bic z0.d, z0.d, z1.d
417 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
418 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> %pg,
419 <vscale x 16 x i8> %a,
420 <vscale x 16 x i8> %b)
421 ret <vscale x 16 x i8> %out
424 define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
425 ; CHECK-LABEL: bic_i16:
427 ; CHECK-NEXT: bic z0.d, z0.d, z1.d
429 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
430 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> %pg,
431 <vscale x 8 x i16> %a,
432 <vscale x 8 x i16> %b)
433 ret <vscale x 8 x i16> %out
436 define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
437 ; CHECK-LABEL: bic_i32:
439 ; CHECK-NEXT: bic z0.d, z0.d, z1.d
441 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
442 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> %pg,
443 <vscale x 4 x i32> %a,
444 <vscale x 4 x i32> %b)
445 ret <vscale x 4 x i32> %out
448 define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
449 ; CHECK-LABEL: bic_i64:
451 ; CHECK-NEXT: bic z0.d, z0.d, z1.d
453 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
454 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> %pg,
455 <vscale x 2 x i64> %a,
456 <vscale x 2 x i64> %b)
457 ret <vscale x 2 x i64> %out
464 define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
465 ; CHECK-LABEL: eor_i8:
467 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
469 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
470 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> %pg,
471 <vscale x 16 x i8> %a,
472 <vscale x 16 x i8> %b)
473 ret <vscale x 16 x i8> %out
476 define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
477 ; CHECK-LABEL: eor_i16:
479 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
481 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
482 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> %pg,
483 <vscale x 8 x i16> %a,
484 <vscale x 8 x i16> %b)
485 ret <vscale x 8 x i16> %out
488 define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
489 ; CHECK-LABEL: eor_i32:
491 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
493 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
494 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> %pg,
495 <vscale x 4 x i32> %a,
496 <vscale x 4 x i32> %b)
497 ret <vscale x 4 x i32> %out
500 define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
501 ; CHECK-LABEL: eor_i64:
503 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
505 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
506 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> %pg,
507 <vscale x 2 x i64> %a,
508 <vscale x 2 x i64> %b)
509 ret <vscale x 2 x i64> %out
516 define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
517 ; CHECK-LABEL: orr_i8:
519 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
521 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
522 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> %pg,
523 <vscale x 16 x i8> %a,
524 <vscale x 16 x i8> %b)
525 ret <vscale x 16 x i8> %out
528 define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
529 ; CHECK-LABEL: orr_i16:
531 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
533 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
534 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> %pg,
535 <vscale x 8 x i16> %a,
536 <vscale x 8 x i16> %b)
537 ret <vscale x 8 x i16> %out
540 define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
541 ; CHECK-LABEL: orr_i32:
543 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
545 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
546 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg,
547 <vscale x 4 x i32> %a,
548 <vscale x 4 x i32> %b)
549 ret <vscale x 4 x i32> %out
552 define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
553 ; CHECK-LABEL: orr_i64:
555 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
557 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
558 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> %pg,
559 <vscale x 2 x i64> %a,
560 <vscale x 2 x i64> %b)
561 ret <vscale x 2 x i64> %out
564 ; As orr_i32 but where pg is i8 based and thus compatible for i32.
565 define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
566 ; CHECK-LABEL: orr_i32_ptrue_all_b:
568 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
570 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
571 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
572 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
573 <vscale x 4 x i32> %a,
574 <vscale x 4 x i32> %b)
575 ret <vscale x 4 x i32> %out
578 ; As orr_i32 but where pg is i16 based and thus compatible for i32.
579 define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
580 ; CHECK-LABEL: orr_i32_ptrue_all_h:
582 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
584 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
585 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
586 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
587 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
588 <vscale x 4 x i32> %a,
589 <vscale x 4 x i32> %b)
590 ret <vscale x 4 x i32> %out
593 ; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and
594 ; thus inactive lanes are important and the immediate form cannot be used.
595 define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
596 ; CHECK-LABEL: orr_i32_ptrue_all_d:
598 ; CHECK-NEXT: ptrue p0.d
599 ; CHECK-NEXT: orr z0.s, p0/m, z0.s, z1.s
601 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
602 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
603 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
604 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
605 <vscale x 4 x i32> %a,
606 <vscale x 4 x i32> %b)
607 ret <vscale x 4 x i32> %out
614 define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
615 ; CHECK-LABEL: sqadd_i8:
617 ; CHECK-NEXT: sqadd z0.b, z0.b, z1.b
619 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
620 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
621 <vscale x 16 x i8> %a,
622 <vscale x 16 x i8> %b)
623 ret <vscale x 16 x i8> %out
626 define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
627 ; CHECK-LABEL: sqadd_i16:
629 ; CHECK-NEXT: sqadd z0.h, z0.h, z1.h
631 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
632 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
633 <vscale x 8 x i16> %a,
634 <vscale x 8 x i16> %b)
635 ret <vscale x 8 x i16> %out
638 define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
639 ; CHECK-LABEL: sqadd_i32:
641 ; CHECK-NEXT: sqadd z0.s, z0.s, z1.s
643 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
644 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
645 <vscale x 4 x i32> %a,
646 <vscale x 4 x i32> %b)
647 ret <vscale x 4 x i32> %out
650 define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
651 ; CHECK-LABEL: sqadd_i64:
653 ; CHECK-NEXT: sqadd z0.d, z0.d, z1.d
655 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
656 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
657 <vscale x 2 x i64> %a,
658 <vscale x 2 x i64> %b)
659 ret <vscale x 2 x i64> %out
666 define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
667 ; CHECK-LABEL: sqsub_i8:
669 ; CHECK-NEXT: sqsub z0.b, z0.b, z1.b
671 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
672 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
673 <vscale x 16 x i8> %a,
674 <vscale x 16 x i8> %b)
675 ret <vscale x 16 x i8> %out
678 define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
679 ; CHECK-LABEL: sqsub_i16:
681 ; CHECK-NEXT: sqsub z0.h, z0.h, z1.h
683 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
684 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
685 <vscale x 8 x i16> %a,
686 <vscale x 8 x i16> %b)
687 ret <vscale x 8 x i16> %out
690 define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
691 ; CHECK-LABEL: sqsub_i32:
693 ; CHECK-NEXT: sqsub z0.s, z0.s, z1.s
695 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
696 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
697 <vscale x 4 x i32> %a,
698 <vscale x 4 x i32> %b)
699 ret <vscale x 4 x i32> %out
702 define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
703 ; CHECK-LABEL: sqsub_i64:
705 ; CHECK-NEXT: sqsub z0.d, z0.d, z1.d
707 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
708 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
709 <vscale x 2 x i64> %a,
710 <vscale x 2 x i64> %b)
711 ret <vscale x 2 x i64> %out
718 define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
719 ; CHECK-LABEL: uqadd_i8:
721 ; CHECK-NEXT: uqadd z0.b, z0.b, z1.b
723 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
724 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
725 <vscale x 16 x i8> %a,
726 <vscale x 16 x i8> %b)
727 ret <vscale x 16 x i8> %out
730 define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
731 ; CHECK-LABEL: uqadd_i16:
733 ; CHECK-NEXT: uqadd z0.h, z0.h, z1.h
735 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
736 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
737 <vscale x 8 x i16> %a,
738 <vscale x 8 x i16> %b)
739 ret <vscale x 8 x i16> %out
742 define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
743 ; CHECK-LABEL: uqadd_i32:
745 ; CHECK-NEXT: uqadd z0.s, z0.s, z1.s
747 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
748 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
749 <vscale x 4 x i32> %a,
750 <vscale x 4 x i32> %b)
751 ret <vscale x 4 x i32> %out
754 define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
755 ; CHECK-LABEL: uqadd_i64:
757 ; CHECK-NEXT: uqadd z0.d, z0.d, z1.d
759 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
760 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
761 <vscale x 2 x i64> %a,
762 <vscale x 2 x i64> %b)
763 ret <vscale x 2 x i64> %out
770 define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
771 ; CHECK-LABEL: uqsub_i8:
773 ; CHECK-NEXT: uqsub z0.b, z0.b, z1.b
775 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
776 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
777 <vscale x 16 x i8> %a,
778 <vscale x 16 x i8> %b)
779 ret <vscale x 16 x i8> %out
782 define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
783 ; CHECK-LABEL: uqsub_i16:
785 ; CHECK-NEXT: uqsub z0.h, z0.h, z1.h
787 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
788 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
789 <vscale x 8 x i16> %a,
790 <vscale x 8 x i16> %b)
791 ret <vscale x 8 x i16> %out
794 define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
795 ; CHECK-LABEL: uqsub_i32:
797 ; CHECK-NEXT: uqsub z0.s, z0.s, z1.s
799 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
800 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
801 <vscale x 4 x i32> %a,
802 <vscale x 4 x i32> %b)
803 ret <vscale x 4 x i32> %out
806 define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
807 ; CHECK-LABEL: uqsub_i64:
809 ; CHECK-NEXT: uqsub z0.d, z0.d, z1.d
811 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
812 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
813 <vscale x 2 x i64> %a,
814 <vscale x 2 x i64> %b)
815 ret <vscale x 2 x i64> %out
818 ; As uqsub_i32 but where pg is i8 based and thus compatible for i32.
819 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
820 ; CHECK-LABEL: uqsub_i32_ptrue_all_b:
822 ; CHECK-NEXT: uqsub z0.s, z0.s, z1.s
824 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
825 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
826 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
827 <vscale x 4 x i32> %a,
828 <vscale x 4 x i32> %b)
829 ret <vscale x 4 x i32> %out
832 ; As uqsub_i32 but where pg is i16 based and thus compatible for i32.
833 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
834 ; CHECK-LABEL: uqsub_i32_ptrue_all_h:
836 ; CHECK-NEXT: uqsub z0.s, z0.s, z1.s
838 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
839 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
840 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
841 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
842 <vscale x 4 x i32> %a,
843 <vscale x 4 x i32> %b)
844 ret <vscale x 4 x i32> %out
847 ; As uqsub_i32 but where pg is i64 based, which is not compatibile for i32 and
848 ; thus inactive lanes are important and the immediate form cannot be used.
849 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
850 ; CHECK-LABEL: uqsub_i32_ptrue_all_d:
852 ; CHECK-NEXT: ptrue p0.d
853 ; CHECK-NEXT: uqsub z0.s, p0/m, z0.s, z1.s
855 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
856 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
857 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
858 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
859 <vscale x 4 x i32> %a,
860 <vscale x 4 x i32> %b)
861 ret <vscale x 4 x i32> %out
868 define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
869 ; CHECK-LABEL: asr_i8:
871 ; CHECK-NEXT: asr z0.b, z0.b, z1.d
873 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
874 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
875 <vscale x 16 x i8> %a,
876 <vscale x 2 x i64> %b)
877 ret <vscale x 16 x i8> %out
880 define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
881 ; CHECK-LABEL: asr_i16:
883 ; CHECK-NEXT: asr z0.h, z0.h, z1.d
885 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
886 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
887 <vscale x 8 x i16> %a,
888 <vscale x 2 x i64> %b)
889 ret <vscale x 8 x i16> %out
892 define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
893 ; CHECK-LABEL: asr_i32:
895 ; CHECK-NEXT: asr z0.s, z0.s, z1.d
897 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
898 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
899 <vscale x 4 x i32> %a,
900 <vscale x 2 x i64> %b)
901 ret <vscale x 4 x i32> %out
908 define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
909 ; CHECK-LABEL: lsl_i8:
911 ; CHECK-NEXT: lsl z0.b, z0.b, z1.d
913 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
914 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
915 <vscale x 16 x i8> %a,
916 <vscale x 2 x i64> %b)
917 ret <vscale x 16 x i8> %out
920 define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
921 ; CHECK-LABEL: lsl_i16:
923 ; CHECK-NEXT: lsl z0.h, z0.h, z1.d
925 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
926 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
927 <vscale x 8 x i16> %a,
928 <vscale x 2 x i64> %b)
929 ret <vscale x 8 x i16> %out
932 define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
933 ; CHECK-LABEL: lsl_i32:
935 ; CHECK-NEXT: lsl z0.s, z0.s, z1.d
937 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
938 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
939 <vscale x 4 x i32> %a,
940 <vscale x 2 x i64> %b)
941 ret <vscale x 4 x i32> %out
948 define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
949 ; CHECK-LABEL: lsr_i8:
951 ; CHECK-NEXT: lsr z0.b, z0.b, z1.d
953 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
954 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
955 <vscale x 16 x i8> %a,
956 <vscale x 2 x i64> %b)
957 ret <vscale x 16 x i8> %out
960 define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
961 ; CHECK-LABEL: lsr_i16:
963 ; CHECK-NEXT: lsr z0.h, z0.h, z1.d
965 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
966 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
967 <vscale x 8 x i16> %a,
968 <vscale x 2 x i64> %b)
969 ret <vscale x 8 x i16> %out
972 define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
973 ; CHECK-LABEL: lsr_i32:
975 ; CHECK-NEXT: lsr z0.s, z0.s, z1.d
977 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
978 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
979 <vscale x 4 x i32> %a,
980 <vscale x 2 x i64> %b)
981 ret <vscale x 4 x i32> %out
984 ; As lsr_i32 but where pg is i8 based and thus compatible for i32.
985 define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
986 ; CHECK-LABEL: lsr_i32_ptrue_all_b:
988 ; CHECK-NEXT: lsr z0.s, z0.s, z1.d
990 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
991 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
992 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
993 <vscale x 4 x i32> %a,
994 <vscale x 2 x i64> %b)
995 ret <vscale x 4 x i32> %out
998 ; As lsr_i32 but where pg is i16 based and thus compatible for i32.
999 define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
1000 ; CHECK-LABEL: lsr_i32_ptrue_all_h:
1002 ; CHECK-NEXT: lsr z0.s, z0.s, z1.d
1004 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1005 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1006 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1007 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
1008 <vscale x 4 x i32> %a,
1009 <vscale x 2 x i64> %b)
1010 ret <vscale x 4 x i32> %out
1013 ; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and
1014 ; thus inactive lanes are important and the immediate form cannot be used.
1015 define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
1016 ; CHECK-LABEL: lsr_i32_ptrue_all_d:
1018 ; CHECK-NEXT: ptrue p0.d
1019 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.d
1021 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1022 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1023 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1024 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
1025 <vscale x 4 x i32> %a,
1026 <vscale x 2 x i64> %b)
1027 ret <vscale x 4 x i32> %out
1034 define <vscale x 8 x half> @fadd_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1035 ; CHECK-LABEL: fadd_half:
1037 ; CHECK-NEXT: fadd z0.h, z0.h, z1.h
1039 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1040 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> %pg,
1041 <vscale x 8 x half> %a,
1042 <vscale x 8 x half> %b)
1043 ret <vscale x 8 x half> %out
1046 define <vscale x 4 x float> @fadd_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1047 ; CHECK-LABEL: fadd_float:
1049 ; CHECK-NEXT: fadd z0.s, z0.s, z1.s
1051 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1052 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> %pg,
1053 <vscale x 4 x float> %a,
1054 <vscale x 4 x float> %b)
1055 ret <vscale x 4 x float> %out
1058 define <vscale x 2 x double> @fadd_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1059 ; CHECK-LABEL: fadd_double:
1061 ; CHECK-NEXT: fadd z0.d, z0.d, z1.d
1063 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1064 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> %pg,
1065 <vscale x 2 x double> %a,
1066 <vscale x 2 x double> %b)
1067 ret <vscale x 2 x double> %out
1074 define <vscale x 8 x half> @fsub_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1075 ; CHECK-LABEL: fsub_half:
1077 ; CHECK-NEXT: fsub z0.h, z0.h, z1.h
1079 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1080 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> %pg,
1081 <vscale x 8 x half> %a,
1082 <vscale x 8 x half> %b)
1083 ret <vscale x 8 x half> %out
1086 define <vscale x 4 x float> @fsub_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1087 ; CHECK-LABEL: fsub_float:
1089 ; CHECK-NEXT: fsub z0.s, z0.s, z1.s
1091 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1092 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> %pg,
1093 <vscale x 4 x float> %a,
1094 <vscale x 4 x float> %b)
1095 ret <vscale x 4 x float> %out
1098 define <vscale x 2 x double> @fsub_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1099 ; CHECK-LABEL: fsub_double:
1101 ; CHECK-NEXT: fsub z0.d, z0.d, z1.d
1103 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1104 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %pg,
1105 <vscale x 2 x double> %a,
1106 <vscale x 2 x double> %b)
1107 ret <vscale x 2 x double> %out
1114 define <vscale x 8 x half> @fmul_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1115 ; CHECK-LABEL: fmul_half:
1117 ; CHECK-NEXT: fmul z0.h, z0.h, z1.h
1119 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1120 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %pg,
1121 <vscale x 8 x half> %a,
1122 <vscale x 8 x half> %b)
1123 ret <vscale x 8 x half> %out
1126 define <vscale x 4 x float> @fmul_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1127 ; CHECK-LABEL: fmul_float:
1129 ; CHECK-NEXT: fmul z0.s, z0.s, z1.s
1131 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1132 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> %pg,
1133 <vscale x 4 x float> %a,
1134 <vscale x 4 x float> %b)
1135 ret <vscale x 4 x float> %out
1138 define <vscale x 2 x double> @fmul_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1139 ; CHECK-LABEL: fmul_double:
1141 ; CHECK-NEXT: fmul z0.d, z0.d, z1.d
1143 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1144 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %pg,
1145 <vscale x 2 x double> %a,
1146 <vscale x 2 x double> %b)
1147 ret <vscale x 2 x double> %out
1150 declare <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1151 declare <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1152 declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1153 declare <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1155 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1157 declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1158 declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1159 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1160 declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1162 declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1163 declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1164 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1165 declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1167 declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1168 declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1169 declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1170 declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1172 declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1174 declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1175 declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1176 declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1177 declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1179 declare <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1180 declare <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1181 declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1182 declare <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1184 declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1185 declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1186 declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1187 declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1189 declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1190 declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1191 declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1192 declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1194 declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1196 declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1197 declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1198 declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1199 declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1201 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1202 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1203 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1204 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1206 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1207 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1208 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1209 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1211 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1212 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1213 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1214 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1216 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1218 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1219 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1220 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1221 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1223 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
1224 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
1225 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
1227 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
1228 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
1229 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
1231 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
1232 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
1233 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
1235 declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
1236 declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
1237 declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1239 declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
1240 declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
1241 declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1243 declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
1244 declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
1245 declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1247 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
1248 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
1249 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
1251 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
1252 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
1253 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
1255 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
1256 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
1257 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
1258 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)