1 ; RUN: llc < %s | FileCheck %s
3 target triple = "aarch64-unknown-linux-gnu"
9 define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
10 ; CHECK-LABEL: add_i8:
11 ; CHECK: add z0.b, z0.b, z1.b
13 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %pg,
15 <vscale x 16 x i8> %a,
16 <vscale x 16 x i8> %b)
17 ret <vscale x 16 x i8> %out
20 define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
21 ; CHECK-LABEL: add_i16:
22 ; CHECK: add z0.h, z0.h, z1.h
24 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
25 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
26 <vscale x 8 x i16> %a,
27 <vscale x 8 x i16> %b)
28 ret <vscale x 8 x i16> %out
31 define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
32 ; CHECK-LABEL: add_i32:
33 ; CHECK: add z0.s, z0.s, z1.s
35 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
36 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
37 <vscale x 4 x i32> %a,
38 <vscale x 4 x i32> %b)
39 ret <vscale x 4 x i32> %out
42 define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
43 ; CHECK-LABEL: add_i64:
44 ; CHECK: add z0.d, z0.d, z1.d
46 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
47 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
48 <vscale x 2 x i64> %a,
49 <vscale x 2 x i64> %b)
50 ret <vscale x 2 x i64> %out
57 define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
58 ; CHECK-LABEL: sub_i8:
59 ; CHECK: sub z0.b, z0.b, z1.b
61 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
62 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %pg,
63 <vscale x 16 x i8> %a,
64 <vscale x 16 x i8> %b)
65 ret <vscale x 16 x i8> %out
68 define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
69 ; CHECK-LABEL: sub_i16:
70 ; CHECK: sub z0.h, z0.h, z1.h
72 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
73 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
74 <vscale x 8 x i16> %a,
75 <vscale x 8 x i16> %b)
76 ret <vscale x 8 x i16> %out
79 define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
80 ; CHECK-LABEL: sub_i32:
81 ; CHECK: sub z0.s, z0.s, z1.s
83 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
84 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
85 <vscale x 4 x i32> %a,
86 <vscale x 4 x i32> %b)
87 ret <vscale x 4 x i32> %out
90 define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
91 ; CHECK-LABEL: sub_i64:
92 ; CHECK: sub z0.d, z0.d, z1.d
94 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
95 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
96 <vscale x 2 x i64> %a,
97 <vscale x 2 x i64> %b)
98 ret <vscale x 2 x i64> %out
101 ; As sub_i32 but where pg is i8 based and thus compatible for i32.
102 define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
103 ; CHECK-LABEL: sub_i32_ptrue_all_b:
104 ; CHECK: sub z0.s, z0.s, z1.s
106 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
107 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
108 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
109 <vscale x 4 x i32> %a,
110 <vscale x 4 x i32> %b)
111 ret <vscale x 4 x i32> %out
114 ; As sub_i32 but where pg is i16 based and thus compatible for i32.
115 define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
116 ; CHECK-LABEL: sub_i32_ptrue_all_h:
117 ; CHECK: sub z0.s, z0.s, z1.s
119 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
120 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
121 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
122 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
123 <vscale x 4 x i32> %a,
124 <vscale x 4 x i32> %b)
125 ret <vscale x 4 x i32> %out
128 ; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and
129 ; thus inactive lanes are important and the immediate form cannot be used.
130 define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
131 ; CHECK-LABEL: sub_i32_ptrue_all_d:
132 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
133 ; CHECK-DAG: sub z0.s, [[PG]]/m, z0.s, z1.s
135 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
136 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
137 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
138 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
139 <vscale x 4 x i32> %a,
140 <vscale x 4 x i32> %b)
141 ret <vscale x 4 x i32> %out
148 define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
149 ; CHECK-LABEL: mul_i8:
150 ; CHECK: mul z0.b, z0.b, z1.b
152 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
153 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> %pg,
154 <vscale x 16 x i8> %a,
155 <vscale x 16 x i8> %b)
156 ret <vscale x 16 x i8> %out
159 define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
160 ; CHECK-LABEL: mul_i16:
161 ; CHECK: mul z0.h, z0.h, z1.h
163 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
164 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %pg,
165 <vscale x 8 x i16> %a,
166 <vscale x 8 x i16> %b)
167 ret <vscale x 8 x i16> %out
170 define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
171 ; CHECK-LABEL: mul_i32:
172 ; CHECK: mul z0.s, z0.s, z1.s
174 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
175 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg,
176 <vscale x 4 x i32> %a,
177 <vscale x 4 x i32> %b)
178 ret <vscale x 4 x i32> %out
181 define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
182 ; CHECK-LABEL: mul_i64:
183 ; CHECK: mul z0.d, z0.d, z1.d
185 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
186 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %pg,
187 <vscale x 2 x i64> %a,
188 <vscale x 2 x i64> %b)
189 ret <vscale x 2 x i64> %out
196 define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
197 ; CHECK-LABEL: smulh_i8:
198 ; CHECK: smulh z0.b, z0.b, z1.b
200 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
201 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %pg,
202 <vscale x 16 x i8> %a,
203 <vscale x 16 x i8> %b)
204 ret <vscale x 16 x i8> %out
207 define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
208 ; CHECK-LABEL: smulh_i16:
209 ; CHECK: smulh z0.h, z0.h, z1.h
211 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
212 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %pg,
213 <vscale x 8 x i16> %a,
214 <vscale x 8 x i16> %b)
215 ret <vscale x 8 x i16> %out
218 define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
219 ; CHECK-LABEL: smulh_i32:
220 ; CHECK: smulh z0.s, z0.s, z1.s
222 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
223 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %pg,
224 <vscale x 4 x i32> %a,
225 <vscale x 4 x i32> %b)
226 ret <vscale x 4 x i32> %out
229 define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
230 ; CHECK-LABEL: smulh_i64:
231 ; CHECK: smulh z0.d, z0.d, z1.d
233 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
234 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %pg,
235 <vscale x 2 x i64> %a,
236 <vscale x 2 x i64> %b)
237 ret <vscale x 2 x i64> %out
244 define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
245 ; CHECK-LABEL: umulh_i8:
246 ; CHECK: umulh z0.b, z0.b, z1.b
248 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
249 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %pg,
250 <vscale x 16 x i8> %a,
251 <vscale x 16 x i8> %b)
252 ret <vscale x 16 x i8> %out
255 define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
256 ; CHECK-LABEL: umulh_i16:
257 ; CHECK: umulh z0.h, z0.h, z1.h
259 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
260 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %pg,
261 <vscale x 8 x i16> %a,
262 <vscale x 8 x i16> %b)
263 ret <vscale x 8 x i16> %out
266 define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
267 ; CHECK-LABEL: umulh_i32:
268 ; CHECK: umulh z0.s, z0.s, z1.s
270 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
271 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg,
272 <vscale x 4 x i32> %a,
273 <vscale x 4 x i32> %b)
274 ret <vscale x 4 x i32> %out
277 define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
278 ; CHECK-LABEL: umulh_i64:
279 ; CHECK: umulh z0.d, z0.d, z1.d
281 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
282 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %pg,
283 <vscale x 2 x i64> %a,
284 <vscale x 2 x i64> %b)
285 ret <vscale x 2 x i64> %out
288 ; As umulh_i32 but where pg is i8 based and thus compatible for i32.
289 define <vscale x 4 x i32> @umulh_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
290 ; CHECK-LABEL: umulh_i32_ptrue_all_b:
291 ; CHECK: umulh z0.s, z0.s, z1.s
293 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
294 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
295 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s,
296 <vscale x 4 x i32> %a,
297 <vscale x 4 x i32> %b)
298 ret <vscale x 4 x i32> %out
301 ; As umulh_i32 but where pg is i16 based and thus compatible for i32.
302 define <vscale x 4 x i32> @umulh_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
303 ; CHECK-LABEL: umulh_i32_ptrue_all_h:
304 ; CHECK: umulh z0.s, z0.s, z1.s
306 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
307 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
308 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
309 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s,
310 <vscale x 4 x i32> %a,
311 <vscale x 4 x i32> %b)
312 ret <vscale x 4 x i32> %out
315 ; As umulh_i32 but where pg is i64 based, which is not compatibile for i32 and
316 ; thus inactive lanes are important and the immediate form cannot be used.
317 define <vscale x 4 x i32> @umulh_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
318 ; CHECK-LABEL: umulh_i32_ptrue_all_d:
319 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
320 ; CHECK-DAG: umulh z0.s, [[PG]]/m, z0.s, z1.s
322 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
323 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
324 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
325 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s,
326 <vscale x 4 x i32> %a,
327 <vscale x 4 x i32> %b)
328 ret <vscale x 4 x i32> %out
335 define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
336 ; CHECK-LABEL: and_i8:
337 ; CHECK: and z0.d, z0.d, z1.d
339 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
340 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %pg,
341 <vscale x 16 x i8> %a,
342 <vscale x 16 x i8> %b)
343 ret <vscale x 16 x i8> %out
346 define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
347 ; CHECK-LABEL: and_i16:
348 ; CHECK: and z0.d, z0.d, z1.d
350 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
351 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %pg,
352 <vscale x 8 x i16> %a,
353 <vscale x 8 x i16> %b)
354 ret <vscale x 8 x i16> %out
357 define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
358 ; CHECK-LABEL: and_i32:
359 ; CHECK: and z0.d, z0.d, z1.d
361 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
362 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %pg,
363 <vscale x 4 x i32> %a,
364 <vscale x 4 x i32> %b)
365 ret <vscale x 4 x i32> %out
368 define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
369 ; CHECK-LABEL: and_i64:
370 ; CHECK: and z0.d, z0.d, z1.d
372 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
373 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %pg,
374 <vscale x 2 x i64> %a,
375 <vscale x 2 x i64> %b)
376 ret <vscale x 2 x i64> %out
383 define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
384 ; CHECK-LABEL: bic_i8:
385 ; CHECK: bic z0.d, z0.d, z1.d
387 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
388 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
389 <vscale x 16 x i8> %a,
390 <vscale x 16 x i8> %b)
391 ret <vscale x 16 x i8> %out
394 define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
395 ; CHECK-LABEL: bic_i16:
396 ; CHECK: bic z0.d, z0.d, z1.d
398 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
399 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
400 <vscale x 8 x i16> %a,
401 <vscale x 8 x i16> %b)
402 ret <vscale x 8 x i16> %out
405 define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
406 ; CHECK-LABEL: bic_i32:
407 ; CHECK: bic z0.d, z0.d, z1.d
409 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
410 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
411 <vscale x 4 x i32> %a,
412 <vscale x 4 x i32> %b)
413 ret <vscale x 4 x i32> %out
416 define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
417 ; CHECK-LABEL: bic_i64:
418 ; CHECK: bic z0.d, z0.d, z1.d
420 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
421 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
422 <vscale x 2 x i64> %a,
423 <vscale x 2 x i64> %b)
424 ret <vscale x 2 x i64> %out
431 define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
432 ; CHECK-LABEL: eor_i8:
433 ; CHECK: eor z0.d, z0.d, z1.d
435 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
436 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %pg,
437 <vscale x 16 x i8> %a,
438 <vscale x 16 x i8> %b)
439 ret <vscale x 16 x i8> %out
442 define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
443 ; CHECK-LABEL: eor_i16:
444 ; CHECK: eor z0.d, z0.d, z1.d
446 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
447 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %pg,
448 <vscale x 8 x i16> %a,
449 <vscale x 8 x i16> %b)
450 ret <vscale x 8 x i16> %out
453 define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
454 ; CHECK-LABEL: eor_i32:
455 ; CHECK: eor z0.d, z0.d, z1.d
457 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
458 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %pg,
459 <vscale x 4 x i32> %a,
460 <vscale x 4 x i32> %b)
461 ret <vscale x 4 x i32> %out
464 define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
465 ; CHECK-LABEL: eor_i64:
466 ; CHECK: eor z0.d, z0.d, z1.d
468 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
469 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %pg,
470 <vscale x 2 x i64> %a,
471 <vscale x 2 x i64> %b)
472 ret <vscale x 2 x i64> %out
479 define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
480 ; CHECK-LABEL: orr_i8:
481 ; CHECK: orr z0.d, z0.d, z1.d
483 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
484 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %pg,
485 <vscale x 16 x i8> %a,
486 <vscale x 16 x i8> %b)
487 ret <vscale x 16 x i8> %out
490 define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
491 ; CHECK-LABEL: orr_i16:
492 ; CHECK: orr z0.d, z0.d, z1.d
494 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
495 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %pg,
496 <vscale x 8 x i16> %a,
497 <vscale x 8 x i16> %b)
498 ret <vscale x 8 x i16> %out
501 define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
502 ; CHECK-LABEL: orr_i32:
503 ; CHECK: orr z0.d, z0.d, z1.d
505 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
506 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg,
507 <vscale x 4 x i32> %a,
508 <vscale x 4 x i32> %b)
509 ret <vscale x 4 x i32> %out
512 define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
513 ; CHECK-LABEL: orr_i64:
514 ; CHECK: orr z0.d, z0.d, z1.d
516 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
517 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %pg,
518 <vscale x 2 x i64> %a,
519 <vscale x 2 x i64> %b)
520 ret <vscale x 2 x i64> %out
523 ; As orr_i32 but where pg is i8 based and thus compatible for i32.
524 define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
525 ; CHECK-LABEL: orr_i32_ptrue_all_b:
526 ; CHECK: orr z0.d, z0.d, z1.d
528 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
529 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
530 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
531 <vscale x 4 x i32> %a,
532 <vscale x 4 x i32> %b)
533 ret <vscale x 4 x i32> %out
536 ; As orr_i32 but where pg is i16 based and thus compatible for i32.
537 define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
538 ; CHECK-LABEL: orr_i32_ptrue_all_h:
539 ; CHECK: orr z0.d, z0.d, z1.d
541 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
542 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
543 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
544 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
545 <vscale x 4 x i32> %a,
546 <vscale x 4 x i32> %b)
547 ret <vscale x 4 x i32> %out
550 ; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and
551 ; thus inactive lanes are important and the immediate form cannot be used.
552 define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
553 ; CHECK-LABEL: orr_i32_ptrue_all_d:
554 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
555 ; CHECK-DAG: orr z0.s, [[PG]]/m, z0.s, z1.s
557 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
558 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
559 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
560 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
561 <vscale x 4 x i32> %a,
562 <vscale x 4 x i32> %b)
563 ret <vscale x 4 x i32> %out
570 define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
571 ; CHECK-LABEL: sqadd_i8:
572 ; CHECK: sqadd z0.b, z0.b, z1.b
574 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
575 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
576 <vscale x 16 x i8> %a,
577 <vscale x 16 x i8> %b)
578 ret <vscale x 16 x i8> %out
581 define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
582 ; CHECK-LABEL: sqadd_i16:
583 ; CHECK: sqadd z0.h, z0.h, z1.h
585 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
586 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
587 <vscale x 8 x i16> %a,
588 <vscale x 8 x i16> %b)
589 ret <vscale x 8 x i16> %out
592 define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
593 ; CHECK-LABEL: sqadd_i32:
594 ; CHECK: sqadd z0.s, z0.s, z1.s
596 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
597 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
598 <vscale x 4 x i32> %a,
599 <vscale x 4 x i32> %b)
600 ret <vscale x 4 x i32> %out
603 define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
604 ; CHECK-LABEL: sqadd_i64:
605 ; CHECK: sqadd z0.d, z0.d, z1.d
607 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
608 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
609 <vscale x 2 x i64> %a,
610 <vscale x 2 x i64> %b)
611 ret <vscale x 2 x i64> %out
618 define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
619 ; CHECK-LABEL: sqsub_i8:
620 ; CHECK: sqsub z0.b, z0.b, z1.b
622 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
623 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg,
624 <vscale x 16 x i8> %a,
625 <vscale x 16 x i8> %b)
626 ret <vscale x 16 x i8> %out
629 define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
630 ; CHECK-LABEL: sqsub_i16:
631 ; CHECK: sqsub z0.h, z0.h, z1.h
633 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
634 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
635 <vscale x 8 x i16> %a,
636 <vscale x 8 x i16> %b)
637 ret <vscale x 8 x i16> %out
640 define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
641 ; CHECK-LABEL: sqsub_i32:
642 ; CHECK: sqsub z0.s, z0.s, z1.s
644 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
645 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
646 <vscale x 4 x i32> %a,
647 <vscale x 4 x i32> %b)
648 ret <vscale x 4 x i32> %out
651 define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
652 ; CHECK-LABEL: sqsub_i64:
653 ; CHECK: sqsub z0.d, z0.d, z1.d
655 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
656 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
657 <vscale x 2 x i64> %a,
658 <vscale x 2 x i64> %b)
659 ret <vscale x 2 x i64> %out
666 define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
667 ; CHECK-LABEL: uqadd_i8:
668 ; CHECK: uqadd z0.b, z0.b, z1.b
670 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
671 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
672 <vscale x 16 x i8> %a,
673 <vscale x 16 x i8> %b)
674 ret <vscale x 16 x i8> %out
677 define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
678 ; CHECK-LABEL: uqadd_i16:
679 ; CHECK: uqadd z0.h, z0.h, z1.h
681 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
682 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
683 <vscale x 8 x i16> %a,
684 <vscale x 8 x i16> %b)
685 ret <vscale x 8 x i16> %out
688 define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
689 ; CHECK-LABEL: uqadd_i32:
690 ; CHECK: uqadd z0.s, z0.s, z1.s
692 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
693 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
694 <vscale x 4 x i32> %a,
695 <vscale x 4 x i32> %b)
696 ret <vscale x 4 x i32> %out
699 define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
700 ; CHECK-LABEL: uqadd_i64:
701 ; CHECK: uqadd z0.d, z0.d, z1.d
703 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
704 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
705 <vscale x 2 x i64> %a,
706 <vscale x 2 x i64> %b)
707 ret <vscale x 2 x i64> %out
714 define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
715 ; CHECK-LABEL: uqsub_i8:
716 ; CHECK: uqsub z0.b, z0.b, z1.b
718 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
719 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg,
720 <vscale x 16 x i8> %a,
721 <vscale x 16 x i8> %b)
722 ret <vscale x 16 x i8> %out
725 define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
726 ; CHECK-LABEL: uqsub_i16:
727 ; CHECK: uqsub z0.h, z0.h, z1.h
729 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
730 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
731 <vscale x 8 x i16> %a,
732 <vscale x 8 x i16> %b)
733 ret <vscale x 8 x i16> %out
736 define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
737 ; CHECK-LABEL: uqsub_i32:
738 ; CHECK: uqsub z0.s, z0.s, z1.s
740 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
741 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
742 <vscale x 4 x i32> %a,
743 <vscale x 4 x i32> %b)
744 ret <vscale x 4 x i32> %out
747 define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
748 ; CHECK-LABEL: uqsub_i64:
749 ; CHECK: uqsub z0.d, z0.d, z1.d
751 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
752 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
753 <vscale x 2 x i64> %a,
754 <vscale x 2 x i64> %b)
755 ret <vscale x 2 x i64> %out
758 ; As uqsub_i32 but where pg is i8 based and thus compatible for i32.
759 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
760 ; CHECK-LABEL: uqsub_i32_ptrue_all_b:
761 ; CHECK: uqsub z0.s, z0.s, z1.s
763 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
764 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
765 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
766 <vscale x 4 x i32> %a,
767 <vscale x 4 x i32> %b)
768 ret <vscale x 4 x i32> %out
771 ; As uqsub_i32 but where pg is i16 based and thus compatible for i32.
772 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
773 ; CHECK-LABEL: uqsub_i32_ptrue_all_h:
774 ; CHECK: uqsub z0.s, z0.s, z1.s
776 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
777 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
778 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
779 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
780 <vscale x 4 x i32> %a,
781 <vscale x 4 x i32> %b)
782 ret <vscale x 4 x i32> %out
785 ; As uqsub_i32 but where pg is i64 based, which is not compatibile for i32 and
786 ; thus inactive lanes are important and the immediate form cannot be used.
787 define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
788 ; CHECK-LABEL: uqsub_i32_ptrue_all_d:
789 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
790 ; CHECK-DAG: uqsub z0.s, [[PG]]/m, z0.s, z1.s
792 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
793 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
794 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
795 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
796 <vscale x 4 x i32> %a,
797 <vscale x 4 x i32> %b)
798 ret <vscale x 4 x i32> %out
805 define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) #0 {
806 ; CHECK-LABEL: asr_i8:
807 ; CHECK: asr z0.b, z0.b, z1.d
809 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
810 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
811 <vscale x 16 x i8> %a,
812 <vscale x 2 x i64> %b)
813 ret <vscale x 16 x i8> %out
816 define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) #0 {
817 ; CHECK-LABEL: asr_i16:
818 ; CHECK: asr z0.h, z0.h, z1.d
820 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
821 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
822 <vscale x 8 x i16> %a,
823 <vscale x 2 x i64> %b)
824 ret <vscale x 8 x i16> %out
827 define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
828 ; CHECK-LABEL: asr_i32:
829 ; CHECK: asr z0.s, z0.s, z1.d
831 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
832 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
833 <vscale x 4 x i32> %a,
834 <vscale x 2 x i64> %b)
835 ret <vscale x 4 x i32> %out
842 define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) #0 {
843 ; CHECK-LABEL: lsl_i8:
844 ; CHECK: lsl z0.b, z0.b, z1.d
846 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
847 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
848 <vscale x 16 x i8> %a,
849 <vscale x 2 x i64> %b)
850 ret <vscale x 16 x i8> %out
853 define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) #0 {
854 ; CHECK-LABEL: lsl_i16:
855 ; CHECK: lsl z0.h, z0.h, z1.d
857 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
858 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
859 <vscale x 8 x i16> %a,
860 <vscale x 2 x i64> %b)
861 ret <vscale x 8 x i16> %out
864 define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
865 ; CHECK-LABEL: lsl_i32:
866 ; CHECK: lsl z0.s, z0.s, z1.d
868 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
869 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
870 <vscale x 4 x i32> %a,
871 <vscale x 2 x i64> %b)
872 ret <vscale x 4 x i32> %out
879 define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) #0 {
880 ; CHECK-LABEL: lsr_i8:
881 ; CHECK: lsr z0.b, z0.b, z1.d
883 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
884 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
885 <vscale x 16 x i8> %a,
886 <vscale x 2 x i64> %b)
887 ret <vscale x 16 x i8> %out
890 define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) #0 {
891 ; CHECK-LABEL: lsr_i16:
892 ; CHECK: lsr z0.h, z0.h, z1.d
894 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
895 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
896 <vscale x 8 x i16> %a,
897 <vscale x 2 x i64> %b)
898 ret <vscale x 8 x i16> %out
901 define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
902 ; CHECK-LABEL: lsr_i32:
903 ; CHECK: lsr z0.s, z0.s, z1.d
905 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
906 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
907 <vscale x 4 x i32> %a,
908 <vscale x 2 x i64> %b)
909 ret <vscale x 4 x i32> %out
912 ; As lsr_i32 but where pg is i8 based and thus compatible for i32.
913 define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
914 ; CHECK-LABEL: lsr_i32_ptrue_all_b:
915 ; CHECK: lsr z0.s, z0.s, z1.d
917 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
918 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
919 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
920 <vscale x 4 x i32> %a,
921 <vscale x 2 x i64> %b)
922 ret <vscale x 4 x i32> %out
925 ; As lsr_i32 but where pg is i16 based and thus compatible for i32.
926 define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
927 ; CHECK-LABEL: lsr_i32_ptrue_all_h:
928 ; CHECK: lsr z0.s, z0.s, z1.d
930 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
931 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
932 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
933 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
934 <vscale x 4 x i32> %a,
935 <vscale x 2 x i64> %b)
936 ret <vscale x 4 x i32> %out
939 ; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and
940 ; thus inactive lanes are important and the immediate form cannot be used.
941 define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) #0 {
942 ; CHECK-LABEL: lsr_i32_ptrue_all_d:
943 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
944 ; CHECK-DAG: lsr z0.s, [[PG]]/m, z0.s, z1.d
946 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
947 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
948 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
949 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
950 <vscale x 4 x i32> %a,
951 <vscale x 2 x i64> %b)
952 ret <vscale x 4 x i32> %out
959 define <vscale x 8 x half> @fadd_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
960 ; CHECK-LABEL: fadd_half:
961 ; CHECK: fadd z0.h, z0.h, z1.h
963 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
964 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
965 <vscale x 8 x half> %a,
966 <vscale x 8 x half> %b)
967 ret <vscale x 8 x half> %out
970 define <vscale x 4 x float> @fadd_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
971 ; CHECK-LABEL: fadd_float:
972 ; CHECK: fadd z0.s, z0.s, z1.s
974 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
975 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
976 <vscale x 4 x float> %a,
977 <vscale x 4 x float> %b)
978 ret <vscale x 4 x float> %out
981 define <vscale x 2 x double> @fadd_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
982 ; CHECK-LABEL: fadd_double:
983 ; CHECK: fadd z0.d, z0.d, z1.d
985 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
986 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
987 <vscale x 2 x double> %a,
988 <vscale x 2 x double> %b)
989 ret <vscale x 2 x double> %out
996 define <vscale x 8 x half> @fsub_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
997 ; CHECK-LABEL: fsub_half:
998 ; CHECK: fsub z0.h, z0.h, z1.h
1000 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1001 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
1002 <vscale x 8 x half> %a,
1003 <vscale x 8 x half> %b)
1004 ret <vscale x 8 x half> %out
1007 define <vscale x 4 x float> @fsub_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
1008 ; CHECK-LABEL: fsub_float:
1009 ; CHECK: fsub z0.s, z0.s, z1.s
1011 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1012 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
1013 <vscale x 4 x float> %a,
1014 <vscale x 4 x float> %b)
1015 ret <vscale x 4 x float> %out
1018 define <vscale x 2 x double> @fsub_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
1019 ; CHECK-LABEL: fsub_double:
1020 ; CHECK: fsub z0.d, z0.d, z1.d
1022 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1023 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
1024 <vscale x 2 x double> %a,
1025 <vscale x 2 x double> %b)
1026 ret <vscale x 2 x double> %out
1033 define <vscale x 8 x half> @fmul_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
1034 ; CHECK-LABEL: fmul_half:
1035 ; CHECK: fmul z0.h, z0.h, z1.h
1037 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1038 %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
1039 <vscale x 8 x half> %a,
1040 <vscale x 8 x half> %b)
1041 ret <vscale x 8 x half> %out
1044 define <vscale x 4 x float> @fmul_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
1045 ; CHECK-LABEL: fmul_float:
1046 ; CHECK: fmul z0.s, z0.s, z1.s
1048 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1049 %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
1050 <vscale x 4 x float> %a,
1051 <vscale x 4 x float> %b)
1052 ret <vscale x 4 x float> %out
1055 define <vscale x 2 x double> @fmul_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
1056 ; CHECK-LABEL: fmul_double:
1057 ; CHECK: fmul z0.d, z0.d, z1.d
1059 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1060 %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
1061 <vscale x 2 x double> %a,
1062 <vscale x 2 x double> %b)
1063 ret <vscale x 2 x double> %out
1066 declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1067 declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1068 declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1069 declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1071 declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1072 declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1073 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1074 declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1076 declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1077 declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1078 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1079 declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1081 declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1082 declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1083 declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1084 declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1086 declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1087 declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1088 declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1089 declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1091 declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1092 declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1093 declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1094 declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1096 declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1097 declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1098 declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1099 declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1101 declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1102 declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1103 declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1104 declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1106 declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1107 declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1108 declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1109 declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1111 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1112 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1113 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1114 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1116 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1117 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1118 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1119 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1121 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1122 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1123 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1124 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1126 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1127 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1128 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1129 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1131 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
1132 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
1133 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
1135 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
1136 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
1137 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
1139 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
1140 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
1141 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
1143 declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
1144 declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
1145 declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1147 declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
1148 declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
1149 declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1151 declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
1152 declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
1153 declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1155 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
1156 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
1157 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
1159 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
1160 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
1161 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
1163 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
1164 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
1165 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
1166 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
1168 attributes #0 = { "target-features"="+sve2" }