1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
9 ; CHECK-NEXT: add z0.b, z0.b, #127 // =0x7f
11 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
12 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> %pg,
13 <vscale x 16 x i8> %a,
14 <vscale x 16 x i8> splat(i8 127))
15 ret <vscale x 16 x i8> %out
18 define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a) {
19 ; CHECK-LABEL: add_i16:
21 ; CHECK-NEXT: add z0.h, z0.h, #127 // =0x7f
23 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
24 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
25 <vscale x 8 x i16> %a,
26 <vscale x 8 x i16> splat(i16 127))
27 ret <vscale x 8 x i16> %out
30 define <vscale x 8 x i16> @add_i16_out_of_range(<vscale x 8 x i16> %a) {
31 ; CHECK-LABEL: add_i16_out_of_range:
33 ; CHECK-NEXT: dupm z1.b, #0x1
34 ; CHECK-NEXT: add z0.h, z0.h, z1.h
36 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
37 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
38 <vscale x 8 x i16> %a,
39 <vscale x 8 x i16> splat(i16 257))
40 ret <vscale x 8 x i16> %out
43 define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a) {
44 ; CHECK-LABEL: add_i32:
46 ; CHECK-NEXT: add z0.s, z0.s, #127 // =0x7f
48 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
49 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
50 <vscale x 4 x i32> %a,
51 <vscale x 4 x i32> splat(i32 127))
52 ret <vscale x 4 x i32> %out
55 define <vscale x 4 x i32> @add_i32_out_of_range(<vscale x 4 x i32> %a) {
56 ; CHECK-LABEL: add_i32_out_of_range:
58 ; CHECK-NEXT: mov w8, #257 // =0x101
59 ; CHECK-NEXT: mov z1.s, w8
60 ; CHECK-NEXT: add z0.s, z0.s, z1.s
62 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
63 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
64 <vscale x 4 x i32> %a,
65 <vscale x 4 x i32> splat(i32 257))
66 ret <vscale x 4 x i32> %out
69 define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a) {
70 ; CHECK-LABEL: add_i64:
72 ; CHECK-NEXT: add z0.d, z0.d, #127 // =0x7f
74 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
75 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
76 <vscale x 2 x i64> %a,
77 <vscale x 2 x i64> splat(i64 127))
78 ret <vscale x 2 x i64> %out
81 define <vscale x 2 x i64> @add_i64_out_of_range(<vscale x 2 x i64> %a) {
82 ; CHECK-LABEL: add_i64_out_of_range:
84 ; CHECK-NEXT: mov w8, #257 // =0x101
85 ; CHECK-NEXT: mov z1.d, x8
86 ; CHECK-NEXT: add z0.d, z0.d, z1.d
88 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
89 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
90 <vscale x 2 x i64> %a,
91 <vscale x 2 x i64> splat(i64 257))
92 ret <vscale x 2 x i64> %out
97 define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
98 ; CHECK-LABEL: sub_i8:
100 ; CHECK-NEXT: sub z0.b, z0.b, #127 // =0x7f
102 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
103 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> %pg,
104 <vscale x 16 x i8> %a,
105 <vscale x 16 x i8> splat(i8 127))
106 ret <vscale x 16 x i8> %out
109 define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a) {
110 ; CHECK-LABEL: sub_i16:
112 ; CHECK-NEXT: sub z0.h, z0.h, #127 // =0x7f
114 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
115 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
116 <vscale x 8 x i16> %a,
117 <vscale x 8 x i16> splat(i16 127))
118 ret <vscale x 8 x i16> %out
121 define <vscale x 8 x i16> @sub_i16_out_of_range(<vscale x 8 x i16> %a) {
122 ; CHECK-LABEL: sub_i16_out_of_range:
124 ; CHECK-NEXT: dupm z1.b, #0x1
125 ; CHECK-NEXT: sub z0.h, z0.h, z1.h
127 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
128 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
129 <vscale x 8 x i16> %a,
130 <vscale x 8 x i16> splat(i16 257))
131 ret <vscale x 8 x i16> %out
134 define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a) {
135 ; CHECK-LABEL: sub_i32:
137 ; CHECK-NEXT: sub z0.s, z0.s, #127 // =0x7f
139 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
140 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
141 <vscale x 4 x i32> %a,
142 <vscale x 4 x i32> splat(i32 127))
143 ret <vscale x 4 x i32> %out
146 define <vscale x 4 x i32> @sub_i32_out_of_range(<vscale x 4 x i32> %a) {
147 ; CHECK-LABEL: sub_i32_out_of_range:
149 ; CHECK-NEXT: mov w8, #257 // =0x101
150 ; CHECK-NEXT: mov z1.s, w8
151 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
153 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
154 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
155 <vscale x 4 x i32> %a,
156 <vscale x 4 x i32> splat(i32 257))
157 ret <vscale x 4 x i32> %out
160 define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a) {
161 ; CHECK-LABEL: sub_i64:
163 ; CHECK-NEXT: sub z0.d, z0.d, #127 // =0x7f
165 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
166 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
167 <vscale x 2 x i64> %a,
168 <vscale x 2 x i64> splat(i64 127))
169 ret <vscale x 2 x i64> %out
172 define <vscale x 2 x i64> @sub_i64_out_of_range(<vscale x 2 x i64> %a) {
173 ; CHECK-LABEL: sub_i64_out_of_range:
175 ; CHECK-NEXT: mov w8, #257 // =0x101
176 ; CHECK-NEXT: mov z1.d, x8
177 ; CHECK-NEXT: sub z0.d, z0.d, z1.d
179 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
180 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
181 <vscale x 2 x i64> %a,
182 <vscale x 2 x i64> splat(i64 257))
183 ret <vscale x 2 x i64> %out
186 ; As sub_i32 but where pg is i8 based and thus compatible for i32.
187 define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
188 ; CHECK-LABEL: sub_i32_ptrue_all_b:
190 ; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
192 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
193 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
194 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
195 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
196 <vscale x 4 x i32> %a,
197 <vscale x 4 x i32> %b)
198 ret <vscale x 4 x i32> %out
201 ; As sub_i32 but where pg is i16 based and thus compatible for i32.
202 define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
203 ; CHECK-LABEL: sub_i32_ptrue_all_h:
205 ; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
207 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
208 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
209 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
210 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
211 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
212 <vscale x 4 x i32> %a,
213 <vscale x 4 x i32> %b)
214 ret <vscale x 4 x i32> %out
217 ; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and
218 ; thus inactive lanes are important and the immediate form cannot be used.
219 define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
220 ; CHECK-LABEL: sub_i32_ptrue_all_d:
222 ; CHECK-NEXT: mov z1.s, #1 // =0x1
223 ; CHECK-NEXT: ptrue p0.d
224 ; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
226 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
227 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
228 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
229 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
230 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
231 <vscale x 4 x i32> %a,
232 <vscale x 4 x i32> %b)
233 ret <vscale x 4 x i32> %out
238 define <vscale x 16 x i8> @subr_i8(<vscale x 16 x i8> %a) {
239 ; CHECK-LABEL: subr_i8:
241 ; CHECK-NEXT: subr z0.b, z0.b, #127 // =0x7f
243 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
244 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> %pg,
245 <vscale x 16 x i8> %a,
246 <vscale x 16 x i8> splat(i8 127))
247 ret <vscale x 16 x i8> %out
250 define <vscale x 8 x i16> @subr_i16(<vscale x 8 x i16> %a) {
251 ; CHECK-LABEL: subr_i16:
253 ; CHECK-NEXT: subr z0.h, z0.h, #127 // =0x7f
255 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
256 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> %pg,
257 <vscale x 8 x i16> %a,
258 <vscale x 8 x i16> splat(i16 127))
259 ret <vscale x 8 x i16> %out
262 define <vscale x 8 x i16> @subr_i16_out_of_range(<vscale x 8 x i16> %a) {
263 ; CHECK-LABEL: subr_i16_out_of_range:
265 ; CHECK-NEXT: dupm z1.b, #0x1
266 ; CHECK-NEXT: sub z0.h, z1.h, z0.h
268 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
269 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> %pg,
270 <vscale x 8 x i16> %a,
271 <vscale x 8 x i16> splat(i16 257))
272 ret <vscale x 8 x i16> %out
275 define <vscale x 4 x i32> @subr_i32(<vscale x 4 x i32> %a) {
276 ; CHECK-LABEL: subr_i32:
278 ; CHECK-NEXT: subr z0.s, z0.s, #127 // =0x7f
280 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
281 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg,
282 <vscale x 4 x i32> %a,
283 <vscale x 4 x i32> splat(i32 127))
284 ret <vscale x 4 x i32> %out
287 define <vscale x 4 x i32> @subr_i32_out_of_range(<vscale x 4 x i32> %a) {
288 ; CHECK-LABEL: subr_i32_out_of_range:
290 ; CHECK-NEXT: mov w8, #257 // =0x101
291 ; CHECK-NEXT: mov z1.s, w8
292 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
294 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
295 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg,
296 <vscale x 4 x i32> %a,
297 <vscale x 4 x i32> splat(i32 257))
298 ret <vscale x 4 x i32> %out
301 define <vscale x 2 x i64> @subr_i64(<vscale x 2 x i64> %a) {
302 ; CHECK-LABEL: subr_i64:
304 ; CHECK-NEXT: subr z0.d, z0.d, #127 // =0x7f
306 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
307 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> %pg,
308 <vscale x 2 x i64> %a,
309 <vscale x 2 x i64> splat(i64 127))
310 ret <vscale x 2 x i64> %out
313 define <vscale x 2 x i64> @subr_i64_out_of_range(<vscale x 2 x i64> %a) {
314 ; CHECK-LABEL: subr_i64_out_of_range:
316 ; CHECK-NEXT: mov w8, #257 // =0x101
317 ; CHECK-NEXT: mov z1.d, x8
318 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
320 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
321 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> %pg,
322 <vscale x 2 x i64> %a,
323 <vscale x 2 x i64> splat(i64 257))
324 ret <vscale x 2 x i64> %out
327 ; As subr_i32 but where pg is i8 based and thus compatible for i32.
328 define <vscale x 4 x i32> @subr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
329 ; CHECK-LABEL: subr_i32_ptrue_all_b:
331 ; CHECK-NEXT: subr z0.s, z0.s, #1 // =0x1
333 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
334 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
335 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
336 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
337 <vscale x 4 x i32> %a,
338 <vscale x 4 x i32> %b)
339 ret <vscale x 4 x i32> %out
342 ; As subr_i32 but where pg is i16 based and thus compatible for i32.
343 define <vscale x 4 x i32> @subr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
344 ; CHECK-LABEL: subr_i32_ptrue_all_h:
346 ; CHECK-NEXT: subr z0.s, z0.s, #1 // =0x1
348 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
349 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
350 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
351 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
352 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
353 <vscale x 4 x i32> %a,
354 <vscale x 4 x i32> %b)
355 ret <vscale x 4 x i32> %out
358 ; As subr_i32 but where pg is i64 based, which is not compatibile for i32 and
359 ; thus inactive lanes are important and the immediate form cannot be used.
360 define <vscale x 4 x i32> @subr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
361 ; CHECK-LABEL: subr_i32_ptrue_all_d:
363 ; CHECK-NEXT: mov z1.s, #1 // =0x1
364 ; CHECK-NEXT: ptrue p0.d
365 ; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s
367 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
368 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
369 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
370 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
371 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
372 <vscale x 4 x i32> %a,
373 <vscale x 4 x i32> %b)
374 ret <vscale x 4 x i32> %out
379 define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a) {
380 ; CHECK-LABEL: smax_i8:
382 ; CHECK-NEXT: smax z0.b, z0.b, #-128
384 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
385 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> %pg,
386 <vscale x 16 x i8> %a,
387 <vscale x 16 x i8> splat(i8 -128))
388 ret <vscale x 16 x i8> %out
391 define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a) {
392 ; CHECK-LABEL: smax_i16:
394 ; CHECK-NEXT: smax z0.h, z0.h, #127
396 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
397 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> %pg,
398 <vscale x 8 x i16> %a,
399 <vscale x 8 x i16> splat(i16 127))
400 ret <vscale x 8 x i16> %out
403 define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
404 ; CHECK-LABEL: smax_i16_out_of_range:
406 ; CHECK-NEXT: mov w8, #129 // =0x81
407 ; CHECK-NEXT: ptrue p0.h
408 ; CHECK-NEXT: mov z1.h, w8
409 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
411 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
412 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> %pg,
413 <vscale x 8 x i16> %a,
414 <vscale x 8 x i16> splat(i16 129))
415 ret <vscale x 8 x i16> %out
418 define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
419 ; CHECK-LABEL: smax_i32:
421 ; CHECK-NEXT: smax z0.s, z0.s, #-128
423 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
424 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg,
425 <vscale x 4 x i32> %a,
426 <vscale x 4 x i32> splat(i32 -128))
427 ret <vscale x 4 x i32> %out
430 define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
431 ; CHECK-LABEL: smax_i32_out_of_range:
433 ; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f
434 ; CHECK-NEXT: ptrue p0.s
435 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
437 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
438 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg,
439 <vscale x 4 x i32> %a,
440 <vscale x 4 x i32> splat(i32 -129))
441 ret <vscale x 4 x i32> %out
444 define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
445 ; CHECK-LABEL: smax_i64:
447 ; CHECK-NEXT: smax z0.d, z0.d, #127
449 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
450 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> %pg,
451 <vscale x 2 x i64> %a,
452 <vscale x 2 x i64> splat(i64 127))
453 ret <vscale x 2 x i64> %out
456 define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
457 ; CHECK-LABEL: smax_i64_out_of_range:
459 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
460 ; CHECK-NEXT: ptrue p0.d
461 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
463 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
464 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> %pg,
465 <vscale x 2 x i64> %a,
466 <vscale x 2 x i64> splat(i64 65535))
467 ret <vscale x 2 x i64> %out
470 ; As smax_i32 but where pg is i8 based and thus compatible for i32.
471 define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
472 ; CHECK-LABEL: smax_i32_ptrue_all_b:
474 ; CHECK-NEXT: smax z0.s, z0.s, #1
476 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
477 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
478 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
479 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
480 <vscale x 4 x i32> %a,
481 <vscale x 4 x i32> %b)
482 ret <vscale x 4 x i32> %out
485 ; As smax_i32 but where pg is i16 based and thus compatible for i32.
486 define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
487 ; CHECK-LABEL: smax_i32_ptrue_all_h:
489 ; CHECK-NEXT: smax z0.s, z0.s, #1
491 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
492 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
493 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
494 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
495 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
496 <vscale x 4 x i32> %a,
497 <vscale x 4 x i32> %b)
498 ret <vscale x 4 x i32> %out
501 ; As smax_i32 but where pg is i64 based, which is not compatibile for i32 and
502 ; thus inactive lanes are important and the immediate form cannot be used.
503 define <vscale x 4 x i32> @smax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
504 ; CHECK-LABEL: smax_i32_ptrue_all_d:
506 ; CHECK-NEXT: mov z1.s, #1 // =0x1
507 ; CHECK-NEXT: ptrue p0.d
508 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
510 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
511 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
512 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
513 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
514 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg.s,
515 <vscale x 4 x i32> %a,
516 <vscale x 4 x i32> %b)
517 ret <vscale x 4 x i32> %out
522 define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a) {
523 ; CHECK-LABEL: smin_i8:
525 ; CHECK-NEXT: smin z0.b, z0.b, #127
527 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
528 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> %pg,
529 <vscale x 16 x i8> %a,
530 <vscale x 16 x i8> splat(i8 127))
531 ret <vscale x 16 x i8> %out
534 define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a) {
535 ; CHECK-LABEL: smin_i16:
537 ; CHECK-NEXT: smin z0.h, z0.h, #-128
539 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
540 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> %pg,
541 <vscale x 8 x i16> %a,
542 <vscale x 8 x i16> splat(i16 -128))
543 ret <vscale x 8 x i16> %out
546 define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
547 ; CHECK-LABEL: smin_i16_out_of_range:
549 ; CHECK-NEXT: mov z1.h, #-129 // =0xffffffffffffff7f
550 ; CHECK-NEXT: ptrue p0.h
551 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
553 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
554 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> %pg,
555 <vscale x 8 x i16> %a,
556 <vscale x 8 x i16> splat(i16 -129))
557 ret <vscale x 8 x i16> %out
560 define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
561 ; CHECK-LABEL: smin_i32:
563 ; CHECK-NEXT: smin z0.s, z0.s, #127
565 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
566 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg,
567 <vscale x 4 x i32> %a,
568 <vscale x 4 x i32> splat(i32 127))
569 ret <vscale x 4 x i32> %out
572 define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
573 ; CHECK-LABEL: smin_i32_out_of_range:
575 ; CHECK-NEXT: mov w8, #257 // =0x101
576 ; CHECK-NEXT: ptrue p0.s
577 ; CHECK-NEXT: mov z1.s, w8
578 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
580 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
581 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg,
582 <vscale x 4 x i32> %a,
583 <vscale x 4 x i32> splat(i32 257))
584 ret <vscale x 4 x i32> %out
588 define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
589 ; CHECK-LABEL: smin_i64:
591 ; CHECK-NEXT: smin z0.d, z0.d, #-128
593 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
594 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> %pg,
595 <vscale x 2 x i64> %a,
596 <vscale x 2 x i64> splat(i64 -128))
597 ret <vscale x 2 x i64> %out
600 define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
601 ; CHECK-LABEL: smin_i64_out_of_range:
603 ; CHECK-NEXT: mov z1.d, #-256 // =0xffffffffffffff00
604 ; CHECK-NEXT: ptrue p0.d
605 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
607 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
608 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> %pg,
609 <vscale x 2 x i64> %a,
610 <vscale x 2 x i64> splat(i64 -256))
611 ret <vscale x 2 x i64> %out
614 ; As smin_i32 but where pg is i8 based and thus compatible for i32.
615 define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
616 ; CHECK-LABEL: smin_i32_ptrue_all_b:
618 ; CHECK-NEXT: smin z0.s, z0.s, #1
620 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
621 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
622 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
623 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
624 <vscale x 4 x i32> %a,
625 <vscale x 4 x i32> %b)
626 ret <vscale x 4 x i32> %out
629 ; As smin_i32 but where pg is i16 based and thus compatible for i32.
630 define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
631 ; CHECK-LABEL: smin_i32_ptrue_all_h:
633 ; CHECK-NEXT: smin z0.s, z0.s, #1
635 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
636 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
637 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
638 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
639 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
640 <vscale x 4 x i32> %a,
641 <vscale x 4 x i32> %b)
642 ret <vscale x 4 x i32> %out
645 ; As smin_i32 but where pg is i64 based, which is not compatibile for i32 and
646 ; thus inactive lanes are important and the immediate form cannot be used.
647 define <vscale x 4 x i32> @smin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
648 ; CHECK-LABEL: smin_i32_ptrue_all_d:
650 ; CHECK-NEXT: mov z1.s, #1 // =0x1
651 ; CHECK-NEXT: ptrue p0.d
652 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
654 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
655 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
656 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
657 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
658 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg.s,
659 <vscale x 4 x i32> %a,
660 <vscale x 4 x i32> %b)
661 ret <vscale x 4 x i32> %out
666 define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a) {
667 ; CHECK-LABEL: umax_i8:
669 ; CHECK-NEXT: umax z0.b, z0.b, #0
671 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
672 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> %pg,
673 <vscale x 16 x i8> %a,
674 <vscale x 16 x i8> zeroinitializer)
675 ret <vscale x 16 x i8> %out
678 define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a) {
679 ; CHECK-LABEL: umax_i16:
681 ; CHECK-NEXT: umax z0.h, z0.h, #255
683 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
684 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> %pg,
685 <vscale x 8 x i16> %a,
686 <vscale x 8 x i16> splat(i16 255))
687 ret <vscale x 8 x i16> %out
690 define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
691 ; CHECK-LABEL: umax_i16_out_of_range:
693 ; CHECK-NEXT: dupm z1.b, #0x1
694 ; CHECK-NEXT: ptrue p0.h
695 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
697 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
698 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> %pg,
699 <vscale x 8 x i16> %a,
700 <vscale x 8 x i16> splat(i16 257))
701 ret <vscale x 8 x i16> %out
704 define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
705 ; CHECK-LABEL: umax_i32:
707 ; CHECK-NEXT: umax z0.s, z0.s, #0
709 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
710 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg,
711 <vscale x 4 x i32> %a,
712 <vscale x 4 x i32> zeroinitializer)
713 ret <vscale x 4 x i32> %out
716 define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
717 ; CHECK-LABEL: umax_i32_out_of_range:
719 ; CHECK-NEXT: mov w8, #257 // =0x101
720 ; CHECK-NEXT: ptrue p0.s
721 ; CHECK-NEXT: mov z1.s, w8
722 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
724 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
725 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg,
726 <vscale x 4 x i32> %a,
727 <vscale x 4 x i32> splat(i32 257))
728 ret <vscale x 4 x i32> %out
731 define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
732 ; CHECK-LABEL: umax_i64:
734 ; CHECK-NEXT: umax z0.d, z0.d, #255
736 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
737 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> %pg,
738 <vscale x 2 x i64> %a,
739 <vscale x 2 x i64> splat(i64 255))
740 ret <vscale x 2 x i64> %out
743 define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
744 ; CHECK-LABEL: umax_i64_out_of_range:
746 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
747 ; CHECK-NEXT: ptrue p0.d
748 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
750 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
751 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> %pg,
752 <vscale x 2 x i64> %a,
753 <vscale x 2 x i64> splat(i64 65535))
754 ret <vscale x 2 x i64> %out
757 ; As umax_i32 but where pg is i8 based and thus compatible for i32.
758 define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
759 ; CHECK-LABEL: umax_i32_ptrue_all_b:
761 ; CHECK-NEXT: umax z0.s, z0.s, #1
763 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
764 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
765 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
766 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
767 <vscale x 4 x i32> %a,
768 <vscale x 4 x i32> %b)
769 ret <vscale x 4 x i32> %out
772 ; As umax_i32 but where pg is i16 based and thus compatible for i32.
773 define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
774 ; CHECK-LABEL: umax_i32_ptrue_all_h:
776 ; CHECK-NEXT: umax z0.s, z0.s, #1
778 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
779 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
780 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
781 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
782 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
783 <vscale x 4 x i32> %a,
784 <vscale x 4 x i32> %b)
785 ret <vscale x 4 x i32> %out
788 ; As umax_i32 but where pg is i64 based, which is not compatibile for i32 and
789 ; thus inactive lanes are important and the immediate form cannot be used.
790 define <vscale x 4 x i32> @umax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
791 ; CHECK-LABEL: umax_i32_ptrue_all_d:
793 ; CHECK-NEXT: mov z1.s, #1 // =0x1
794 ; CHECK-NEXT: ptrue p0.d
795 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
797 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
798 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
799 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
800 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
801 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg.s,
802 <vscale x 4 x i32> %a,
803 <vscale x 4 x i32> %b)
804 ret <vscale x 4 x i32> %out
809 define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a) {
810 ; CHECK-LABEL: umin_i8:
812 ; CHECK-NEXT: umin z0.b, z0.b, #255
814 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
815 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> %pg,
816 <vscale x 16 x i8> %a,
817 <vscale x 16 x i8> splat(i8 255))
818 ret <vscale x 16 x i8> %out
821 define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a) {
822 ; CHECK-LABEL: umin_i16:
824 ; CHECK-NEXT: umin z0.h, z0.h, #0
826 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
827 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> %pg,
828 <vscale x 8 x i16> %a,
829 <vscale x 8 x i16> zeroinitializer)
830 ret <vscale x 8 x i16> %out
833 define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
834 ; CHECK-LABEL: umin_i16_out_of_range:
836 ; CHECK-NEXT: dupm z1.b, #0x1
837 ; CHECK-NEXT: ptrue p0.h
838 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
840 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
841 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> %pg,
842 <vscale x 8 x i16> %a,
843 <vscale x 8 x i16> splat(i16 257))
844 ret <vscale x 8 x i16> %out
847 define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
848 ; CHECK-LABEL: umin_i32:
850 ; CHECK-NEXT: umin z0.s, z0.s, #255
852 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
853 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg,
854 <vscale x 4 x i32> %a,
855 <vscale x 4 x i32> splat(i32 255))
856 ret <vscale x 4 x i32> %out
859 define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
860 ; CHECK-LABEL: umin_i32_out_of_range:
862 ; CHECK-NEXT: mov w8, #257 // =0x101
863 ; CHECK-NEXT: ptrue p0.s
864 ; CHECK-NEXT: mov z1.s, w8
865 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
867 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
868 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg,
869 <vscale x 4 x i32> %a,
870 <vscale x 4 x i32> splat(i32 257))
871 ret <vscale x 4 x i32> %out
874 define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
875 ; CHECK-LABEL: umin_i64:
877 ; CHECK-NEXT: umin z0.d, z0.d, #0
879 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
880 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> %pg,
881 <vscale x 2 x i64> %a,
882 <vscale x 2 x i64> zeroinitializer)
883 ret <vscale x 2 x i64> %out
886 define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
887 ; CHECK-LABEL: umin_i64_out_of_range:
889 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
890 ; CHECK-NEXT: ptrue p0.d
891 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
893 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
894 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> %pg,
895 <vscale x 2 x i64> %a,
896 <vscale x 2 x i64> splat(i64 65535))
897 ret <vscale x 2 x i64> %out
900 ; As umin_i32 but where pg is i8 based and thus compatible for i32.
901 define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
902 ; CHECK-LABEL: umin_i32_ptrue_all_b:
904 ; CHECK-NEXT: umin z0.s, z0.s, #1
906 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
907 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
908 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
909 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
910 <vscale x 4 x i32> %a,
911 <vscale x 4 x i32> %b)
912 ret <vscale x 4 x i32> %out
915 ; As umin_i32 but where pg is i16 based and thus compatible for i32.
916 define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
917 ; CHECK-LABEL: umin_i32_ptrue_all_h:
919 ; CHECK-NEXT: umin z0.s, z0.s, #1
921 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
922 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
923 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
924 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
925 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
926 <vscale x 4 x i32> %a,
927 <vscale x 4 x i32> %b)
928 ret <vscale x 4 x i32> %out
931 ; As umin_i32 but where pg is i64 based, which is not compatibile for i32 and
932 ; thus inactive lanes are important and the immediate form cannot be used.
933 define <vscale x 4 x i32> @umin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
934 ; CHECK-LABEL: umin_i32_ptrue_all_d:
936 ; CHECK-NEXT: mov z1.s, #1 // =0x1
937 ; CHECK-NEXT: ptrue p0.d
938 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
940 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
941 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
942 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
943 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
944 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg.s,
945 <vscale x 4 x i32> %a,
946 <vscale x 4 x i32> %b)
947 ret <vscale x 4 x i32> %out
952 define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
953 ; CHECK-LABEL: sqadd_b_lowimm:
955 ; CHECK-NEXT: sqadd z0.b, z0.b, #27 // =0x1b
957 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
958 <vscale x 16 x i8> splat(i8 27))
959 ret <vscale x 16 x i8> %out
962 ; Immediate instruction form only supports positive values.
963 define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) {
964 ; CHECK-LABEL: sqadd_b_negimm:
966 ; CHECK-NEXT: sqsub z0.b, z0.b, #128 // =0x80
968 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
969 <vscale x 16 x i8> splat(i8 -128))
970 ret <vscale x 16 x i8> %out
973 define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
974 ; CHECK-LABEL: sqadd_h_lowimm:
976 ; CHECK-NEXT: sqadd z0.h, z0.h, #43 // =0x2b
978 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
979 <vscale x 8 x i16> splat(i16 43))
980 ret <vscale x 8 x i16> %out
983 define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
984 ; CHECK-LABEL: sqadd_h_highimm:
986 ; CHECK-NEXT: sqadd z0.h, z0.h, #2048 // =0x800
988 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
989 <vscale x 8 x i16> splat(i16 2048))
990 ret <vscale x 8 x i16> %out
993 ; Immediate instruction form only supports positive values.
994 define <vscale x 8 x i16> @sqadd_h_negimm(<vscale x 8 x i16> %a) {
995 ; CHECK-LABEL: sqadd_h_negimm:
997 ; CHECK-NEXT: sqsub z0.h, z0.h, #1 // =0x1
999 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1000 <vscale x 8 x i16> splat(i16 -1))
1001 ret <vscale x 8 x i16> %out
1004 define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
1005 ; CHECK-LABEL: sqadd_s_lowimm:
1007 ; CHECK-NEXT: sqadd z0.s, z0.s, #1 // =0x1
1009 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1010 <vscale x 4 x i32> splat(i32 1))
1011 ret <vscale x 4 x i32> %out
1014 define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
1015 ; CHECK-LABEL: sqadd_s_highimm:
1017 ; CHECK-NEXT: sqadd z0.s, z0.s, #8192 // =0x2000
1019 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1020 <vscale x 4 x i32> splat(i32 8192))
1021 ret <vscale x 4 x i32> %out
1024 ; Immediate instruction form only supports positive values.
1025 define <vscale x 4 x i32> @sqadd_s_negimm(<vscale x 4 x i32> %a) {
1026 ; CHECK-LABEL: sqadd_s_negimm:
1028 ; CHECK-NEXT: sqsub z0.s, z0.s, #65280 // =0xff00
1030 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1031 <vscale x 4 x i32> splat(i32 -65280))
1032 ret <vscale x 4 x i32> %out
1035 define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
1036 ; CHECK-LABEL: sqadd_d_lowimm:
1038 ; CHECK-NEXT: sqadd z0.d, z0.d, #255 // =0xff
1040 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1041 <vscale x 2 x i64> splat(i64 255))
1042 ret <vscale x 2 x i64> %out
1045 define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
1046 ; CHECK-LABEL: sqadd_d_highimm:
1048 ; CHECK-NEXT: sqadd z0.d, z0.d, #65280 // =0xff00
1050 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1051 <vscale x 2 x i64> splat(i64 65280))
1052 ret <vscale x 2 x i64> %out
1055 ; Immediate instruction form only supports positive values.
1056 define <vscale x 2 x i64> @sqadd_d_negimm(<vscale x 2 x i64> %a) {
1057 ; CHECK-LABEL: sqadd_d_negimm:
1059 ; CHECK-NEXT: sqsub z0.d, z0.d, #3840 // =0xf00
1061 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1062 <vscale x 2 x i64> splat(i64 -3840))
1063 ret <vscale x 2 x i64> %out
1068 define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
1069 ; CHECK-LABEL: sqsub_b_lowimm:
1071 ; CHECK-NEXT: sqsub z0.b, z0.b, #27 // =0x1b
1073 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1074 <vscale x 16 x i8> splat(i8 27))
1075 ret <vscale x 16 x i8> %out
1078 ; Immediate instruction form only supports positive values.
1079 define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) {
1080 ; CHECK-LABEL: sqsub_b_negimm:
1082 ; CHECK-NEXT: sqadd z0.b, z0.b, #1 // =0x1
1084 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1085 <vscale x 16 x i8> splat(i8 -1))
1086 ret <vscale x 16 x i8> %out
1089 define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
1090 ; CHECK-LABEL: sqsub_h_lowimm:
1092 ; CHECK-NEXT: sqsub z0.h, z0.h, #43 // =0x2b
1094 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1095 <vscale x 8 x i16> splat(i16 43))
1096 ret <vscale x 8 x i16> %out
1099 define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
1100 ; CHECK-LABEL: sqsub_h_highimm:
1102 ; CHECK-NEXT: sqsub z0.h, z0.h, #2048 // =0x800
1104 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1105 <vscale x 8 x i16> splat(i16 2048))
1106 ret <vscale x 8 x i16> %out
1109 ; Immediate instruction form only supports positive values.
1110 define <vscale x 8 x i16> @sqsub_h_negimm(<vscale x 8 x i16> %a) {
1111 ; CHECK-LABEL: sqsub_h_negimm:
1113 ; CHECK-NEXT: sqadd z0.h, z0.h, #128 // =0x80
1115 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1116 <vscale x 8 x i16> splat(i16 -128))
1117 ret <vscale x 8 x i16> %out
1120 define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
1121 ; CHECK-LABEL: sqsub_s_lowimm:
1123 ; CHECK-NEXT: sqsub z0.s, z0.s, #1 // =0x1
1125 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1126 <vscale x 4 x i32> splat(i32 1))
1127 ret <vscale x 4 x i32> %out
1130 define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
1131 ; CHECK-LABEL: sqsub_s_highimm:
1133 ; CHECK-NEXT: sqsub z0.s, z0.s, #8192 // =0x2000
1135 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1136 <vscale x 4 x i32> splat(i32 8192))
1137 ret <vscale x 4 x i32> %out
1140 ; Immediate instruction form only supports positive values.
1141 define <vscale x 4 x i32> @sqsub_s_negimm(<vscale x 4 x i32> %a) {
1142 ; CHECK-LABEL: sqsub_s_negimm:
1144 ; CHECK-NEXT: sqadd z0.s, z0.s, #32768 // =0x8000
1146 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1147 <vscale x 4 x i32> splat(i32 -32768))
1148 ret <vscale x 4 x i32> %out
1151 define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
1152 ; CHECK-LABEL: sqsub_d_lowimm:
1154 ; CHECK-NEXT: sqsub z0.d, z0.d, #255 // =0xff
1156 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1157 <vscale x 2 x i64> splat(i64 255))
1158 ret <vscale x 2 x i64> %out
1161 define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
1162 ; CHECK-LABEL: sqsub_d_highimm:
1164 ; CHECK-NEXT: sqsub z0.d, z0.d, #65280 // =0xff00
1166 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1167 <vscale x 2 x i64> splat(i64 65280))
1168 ret <vscale x 2 x i64> %out
1171 ; Immediate instruction form only supports positive values.
1172 define <vscale x 2 x i64> @sqsub_d_negimm(<vscale x 2 x i64> %a) {
1173 ; CHECK-LABEL: sqsub_d_negimm:
1175 ; CHECK-NEXT: sqadd z0.d, z0.d, #57344 // =0xe000
1177 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1178 <vscale x 2 x i64> splat(i64 -57344))
1179 ret <vscale x 2 x i64> %out
1184 define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {
1185 ; CHECK-LABEL: uqadd_b_lowimm:
1187 ; CHECK-NEXT: uqadd z0.b, z0.b, #27 // =0x1b
1189 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8> %a,
1190 <vscale x 16 x i8> splat(i8 27))
1191 ret <vscale x 16 x i8> %out
1194 define <vscale x 8 x i16> @uqadd_h_lowimm(<vscale x 8 x i16> %a) {
1195 ; CHECK-LABEL: uqadd_h_lowimm:
1197 ; CHECK-NEXT: uqadd z0.h, z0.h, #43 // =0x2b
1199 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1200 <vscale x 8 x i16> splat(i16 43))
1201 ret <vscale x 8 x i16> %out
1204 define <vscale x 8 x i16> @uqadd_h_highimm(<vscale x 8 x i16> %a) {
1205 ; CHECK-LABEL: uqadd_h_highimm:
1207 ; CHECK-NEXT: uqadd z0.h, z0.h, #2048 // =0x800
1209 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1210 <vscale x 8 x i16> splat(i16 2048))
1211 ret <vscale x 8 x i16> %out
1214 define <vscale x 4 x i32> @uqadd_s_lowimm(<vscale x 4 x i32> %a) {
1215 ; CHECK-LABEL: uqadd_s_lowimm:
1217 ; CHECK-NEXT: uqadd z0.s, z0.s, #1 // =0x1
1219 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1220 <vscale x 4 x i32> splat(i32 1))
1221 ret <vscale x 4 x i32> %out
1224 define <vscale x 4 x i32> @uqadd_s_highimm(<vscale x 4 x i32> %a) {
1225 ; CHECK-LABEL: uqadd_s_highimm:
1227 ; CHECK-NEXT: uqadd z0.s, z0.s, #8192 // =0x2000
1229 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1230 <vscale x 4 x i32> splat(i32 8192))
1231 ret <vscale x 4 x i32> %out
1234 define <vscale x 2 x i64> @uqadd_d_lowimm(<vscale x 2 x i64> %a) {
1235 ; CHECK-LABEL: uqadd_d_lowimm:
1237 ; CHECK-NEXT: uqadd z0.d, z0.d, #255 // =0xff
1239 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1240 <vscale x 2 x i64> splat(i64 255))
1241 ret <vscale x 2 x i64> %out
1244 define <vscale x 2 x i64> @uqadd_d_highimm(<vscale x 2 x i64> %a) {
1245 ; CHECK-LABEL: uqadd_d_highimm:
1247 ; CHECK-NEXT: uqadd z0.d, z0.d, #65280 // =0xff00
1249 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1250 <vscale x 2 x i64> splat(i64 65280))
1251 ret <vscale x 2 x i64> %out
1256 define <vscale x 16 x i8> @uqsub_b_lowimm(<vscale x 16 x i8> %a) {
1257 ; CHECK-LABEL: uqsub_b_lowimm:
1259 ; CHECK-NEXT: uqsub z0.b, z0.b, #27 // =0x1b
1261 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1262 <vscale x 16 x i8> splat(i8 27))
1263 ret <vscale x 16 x i8> %out
1266 define <vscale x 8 x i16> @uqsub_h_lowimm(<vscale x 8 x i16> %a) {
1267 ; CHECK-LABEL: uqsub_h_lowimm:
1269 ; CHECK-NEXT: uqsub z0.h, z0.h, #43 // =0x2b
1271 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1272 <vscale x 8 x i16> splat(i16 43))
1273 ret <vscale x 8 x i16> %out
1276 define <vscale x 8 x i16> @uqsub_h_highimm(<vscale x 8 x i16> %a) {
1277 ; CHECK-LABEL: uqsub_h_highimm:
1279 ; CHECK-NEXT: uqsub z0.h, z0.h, #2048 // =0x800
1281 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1282 <vscale x 8 x i16> splat(i16 2048))
1283 ret <vscale x 8 x i16> %out
1286 define <vscale x 4 x i32> @uqsub_s_lowimm(<vscale x 4 x i32> %a) {
1287 ; CHECK-LABEL: uqsub_s_lowimm:
1289 ; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
1291 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1292 <vscale x 4 x i32> splat(i32 1))
1293 ret <vscale x 4 x i32> %out
1296 define <vscale x 4 x i32> @uqsub_s_highimm(<vscale x 4 x i32> %a) {
1297 ; CHECK-LABEL: uqsub_s_highimm:
1299 ; CHECK-NEXT: uqsub z0.s, z0.s, #8192 // =0x2000
1301 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1302 <vscale x 4 x i32> splat(i32 8192))
1303 ret <vscale x 4 x i32> %out
1306 define <vscale x 2 x i64> @uqsub_d_lowimm(<vscale x 2 x i64> %a) {
1307 ; CHECK-LABEL: uqsub_d_lowimm:
1309 ; CHECK-NEXT: uqsub z0.d, z0.d, #255 // =0xff
1311 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1312 <vscale x 2 x i64> splat(i64 255))
1313 ret <vscale x 2 x i64> %out
1316 define <vscale x 2 x i64> @uqsub_d_highimm(<vscale x 2 x i64> %a) {
1317 ; CHECK-LABEL: uqsub_d_highimm:
1319 ; CHECK-NEXT: uqsub z0.d, z0.d, #65280 // =0xff00
1321 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1322 <vscale x 2 x i64> splat(i64 65280))
1323 ret <vscale x 2 x i64> %out
1328 define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1329 ; CHECK-LABEL: asr_i8:
1331 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8
1333 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1334 <vscale x 16 x i8> %a,
1335 <vscale x 16 x i8> splat(i8 9))
1336 ret <vscale x 16 x i8> %out
1339 define <vscale x 16 x i8> @asr_i8_all_active(<vscale x 16 x i8> %a) {
1340 ; CHECK-LABEL: asr_i8_all_active:
1342 ; CHECK-NEXT: asr z0.b, z0.b, #8
1344 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1345 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> %pg,
1346 <vscale x 16 x i8> %a,
1347 <vscale x 16 x i8> splat(i8 8))
1348 ret <vscale x 16 x i8> %out
1351 ; Ensure we don't match a right shift by zero to the immediate form.
1352 define <vscale x 16 x i8> @asr_i8_too_small(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1353 ; CHECK-LABEL: asr_i8_too_small:
1355 ; CHECK-NEXT: mov z1.b, #0 // =0x0
1356 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
1358 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1359 <vscale x 16 x i8> %a,
1360 <vscale x 16 x i8> zeroinitializer)
1361 ret <vscale x 16 x i8> %out
1364 define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1365 ; CHECK-LABEL: asr_i16:
1367 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16
1369 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1370 <vscale x 8 x i16> %a,
1371 <vscale x 8 x i16> splat(i16 17))
1372 ret <vscale x 8 x i16> %out
1375 define <vscale x 8 x i16> @asr_i16_all_active(<vscale x 8 x i16> %a) {
1376 ; CHECK-LABEL: asr_i16_all_active:
1378 ; CHECK-NEXT: asr z0.h, z0.h, #16
1380 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1381 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> %pg,
1382 <vscale x 8 x i16> %a,
1383 <vscale x 8 x i16> splat(i16 16))
1384 ret <vscale x 8 x i16> %out
1387 ; Ensure we don't match a right shift by zero to the immediate form.
1388 define <vscale x 8 x i16> @asr_i16_too_small(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1389 ; CHECK-LABEL: asr_i16_too_small:
1391 ; CHECK-NEXT: mov z1.h, #0 // =0x0
1392 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
1394 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1395 <vscale x 8 x i16> %a,
1396 <vscale x 8 x i16> zeroinitializer)
1397 ret <vscale x 8 x i16> %out
1400 define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1401 ; CHECK-LABEL: asr_i32:
1403 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32
1405 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1406 <vscale x 4 x i32> %a,
1407 <vscale x 4 x i32> splat(i32 33))
1408 ret <vscale x 4 x i32> %out
1411 define <vscale x 4 x i32> @asr_i32_all_active(<vscale x 4 x i32> %a) {
1412 ; CHECK-LABEL: asr_i32_all_active:
1414 ; CHECK-NEXT: asr z0.s, z0.s, #32
1416 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1417 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> %pg,
1418 <vscale x 4 x i32> %a,
1419 <vscale x 4 x i32> splat(i32 32))
1420 ret <vscale x 4 x i32> %out
1423 ; Ensure we don't match a right shift by zero to the immediate form.
1424 define <vscale x 4 x i32> @asr_i32_too_small(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1425 ; CHECK-LABEL: asr_i32_too_small:
1427 ; CHECK-NEXT: mov z1.s, #0 // =0x0
1428 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
1430 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1431 <vscale x 4 x i32> %a,
1432 <vscale x 4 x i32> zeroinitializer)
1433 ret <vscale x 4 x i32> %out
1436 define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1437 ; CHECK-LABEL: asr_i64:
1439 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64
1441 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1442 <vscale x 2 x i64> %a,
1443 <vscale x 2 x i64> splat(i64 65))
1444 ret <vscale x 2 x i64> %out
1447 define <vscale x 2 x i64> @asr_i64_all_active(<vscale x 2 x i64> %a) {
1448 ; CHECK-LABEL: asr_i64_all_active:
1450 ; CHECK-NEXT: asr z0.d, z0.d, #64
1452 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1453 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> %pg,
1454 <vscale x 2 x i64> %a,
1455 <vscale x 2 x i64> splat(i64 64))
1456 ret <vscale x 2 x i64> %out
1459 ; Ensure we don't match a right shift by zero to the immediate form.
1460 define <vscale x 2 x i64> @asr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1461 ; CHECK-LABEL: asr_i64_too_small:
1463 ; CHECK-NEXT: mov z1.d, #0 // =0x0
1464 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
1466 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1467 <vscale x 2 x i64> %a,
1468 <vscale x 2 x i64> zeroinitializer)
1469 ret <vscale x 2 x i64> %out
1474 define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1475 ; CHECK-LABEL: lsl_i8:
1477 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
1479 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1480 <vscale x 16 x i8> %a,
1481 <vscale x 16 x i8> splat(i8 7))
1482 ret <vscale x 16 x i8> %out
1485 define <vscale x 16 x i8> @lsl_i8_all_active(<vscale x 16 x i8> %a) {
1486 ; CHECK-LABEL: lsl_i8_all_active:
1488 ; CHECK-NEXT: lsl z0.b, z0.b, #7
1490 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1491 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> %pg,
1492 <vscale x 16 x i8> %a,
1493 <vscale x 16 x i8> splat(i8 7))
1494 ret <vscale x 16 x i8> %out
1497 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1498 define <vscale x 16 x i8> @lsl_i8_too_big(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1499 ; CHECK-LABEL: lsl_i8_too_big:
1501 ; CHECK-NEXT: mov z1.b, #8 // =0x8
1502 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
1504 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1505 <vscale x 16 x i8> %a,
1506 <vscale x 16 x i8> splat(i8 8))
1507 ret <vscale x 16 x i8> %out
1510 define <vscale x 16 x i8> @lsl_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1511 ; CHECK-LABEL: lsl_i8_zero:
1513 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #0
1515 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1516 <vscale x 16 x i8> %a,
1517 <vscale x 16 x i8> zeroinitializer)
1518 ret <vscale x 16 x i8> %out
1521 define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1522 ; CHECK-LABEL: lsl_i16:
1524 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
1526 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1527 <vscale x 8 x i16> %a,
1528 <vscale x 8 x i16> splat(i16 15))
1529 ret <vscale x 8 x i16> %out
1532 define <vscale x 8 x i16> @lsl_i16_all_active(<vscale x 8 x i16> %a) {
1533 ; CHECK-LABEL: lsl_i16_all_active:
1535 ; CHECK-NEXT: lsl z0.h, z0.h, #15
1537 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1538 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> %pg,
1539 <vscale x 8 x i16> %a,
1540 <vscale x 8 x i16> splat(i16 15))
1541 ret <vscale x 8 x i16> %out
1544 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1545 define <vscale x 8 x i16> @lsl_i16_too_big(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1546 ; CHECK-LABEL: lsl_i16_too_big:
1548 ; CHECK-NEXT: mov z1.h, #16 // =0x10
1549 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
1551 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1552 <vscale x 8 x i16> %a,
1553 <vscale x 8 x i16> splat(i16 16))
1554 ret <vscale x 8 x i16> %out
1557 define <vscale x 8 x i16> @lsl_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1558 ; CHECK-LABEL: lsl_i16_zero:
1560 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #0
1562 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1563 <vscale x 8 x i16> %a,
1564 <vscale x 8 x i16> zeroinitializer)
1565 ret <vscale x 8 x i16> %out
1568 define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1569 ; CHECK-LABEL: lsl_i32:
1571 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
1573 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1574 <vscale x 4 x i32> %a,
1575 <vscale x 4 x i32> splat(i32 31))
1576 ret <vscale x 4 x i32> %out
1579 define <vscale x 4 x i32> @lsl_i32_all_active(<vscale x 4 x i32> %a) {
1580 ; CHECK-LABEL: lsl_i32_all_active:
1582 ; CHECK-NEXT: lsl z0.s, z0.s, #31
1584 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1585 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> %pg,
1586 <vscale x 4 x i32> %a,
1587 <vscale x 4 x i32> splat(i32 31))
1588 ret <vscale x 4 x i32> %out
1591 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1592 define <vscale x 4 x i32> @lsl_i32_too_big(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1593 ; CHECK-LABEL: lsl_i32_too_big:
1595 ; CHECK-NEXT: mov z1.s, #32 // =0x20
1596 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
1598 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1599 <vscale x 4 x i32> %a,
1600 <vscale x 4 x i32> splat(i32 32))
1601 ret <vscale x 4 x i32> %out
1604 define <vscale x 4 x i32> @lsl_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1605 ; CHECK-LABEL: lsl_i32_zero:
1607 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #0
1609 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1610 <vscale x 4 x i32> %a,
1611 <vscale x 4 x i32> zeroinitializer)
1612 ret <vscale x 4 x i32> %out
1615 define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1616 ; CHECK-LABEL: lsl_i64:
1618 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
1620 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1621 <vscale x 2 x i64> %a,
1622 <vscale x 2 x i64> splat(i64 63))
1623 ret <vscale x 2 x i64> %out
1626 define <vscale x 2 x i64> @lsl_i64_all_active(<vscale x 2 x i64> %a) {
1627 ; CHECK-LABEL: lsl_i64_all_active:
1629 ; CHECK-NEXT: lsl z0.d, z0.d, #63
1631 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1632 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> %pg,
1633 <vscale x 2 x i64> %a,
1634 <vscale x 2 x i64> splat(i64 63))
1635 ret <vscale x 2 x i64> %out
1638 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1639 define <vscale x 2 x i64> @lsl_i64_too_big(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1640 ; CHECK-LABEL: lsl_i64_too_big:
1642 ; CHECK-NEXT: mov z1.d, #64 // =0x40
1643 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
1645 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1646 <vscale x 2 x i64> %a,
1647 <vscale x 2 x i64> splat(i64 64))
1648 ret <vscale x 2 x i64> %out
1651 define <vscale x 2 x i64> @lsl_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1652 ; CHECK-LABEL: lsl_i64_zero:
1654 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #0
1656 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1657 <vscale x 2 x i64> %a,
1658 <vscale x 2 x i64> zeroinitializer)
1659 ret <vscale x 2 x i64> %out
1664 define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1665 ; CHECK-LABEL: lsr_i8:
1667 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8
1669 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1670 <vscale x 16 x i8> %a,
1671 <vscale x 16 x i8> splat(i8 9))
1672 ret <vscale x 16 x i8> %out
1675 define <vscale x 16 x i8> @lsr_i8_all_active(<vscale x 16 x i8> %a) {
1676 ; CHECK-LABEL: lsr_i8_all_active:
1678 ; CHECK-NEXT: lsr z0.b, z0.b, #8
1680 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1681 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> %pg,
1682 <vscale x 16 x i8> %a,
1683 <vscale x 16 x i8> splat(i8 8))
1684 ret <vscale x 16 x i8> %out
1687 ; Ensure we don't match a right shift by zero to the immediate form.
1688 define <vscale x 16 x i8> @lsr_i8_too_small(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1689 ; CHECK-LABEL: lsr_i8_too_small:
1691 ; CHECK-NEXT: mov z1.b, #0 // =0x0
1692 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
1694 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1695 <vscale x 16 x i8> %a,
1696 <vscale x 16 x i8> zeroinitializer)
1697 ret <vscale x 16 x i8> %out
1700 define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1701 ; CHECK-LABEL: lsr_i16:
1703 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16
1705 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1706 <vscale x 8 x i16> %a,
1707 <vscale x 8 x i16> splat(i16 17))
1708 ret <vscale x 8 x i16> %out
1711 define <vscale x 8 x i16> @lsr_i16_all_active(<vscale x 8 x i16> %a) {
1712 ; CHECK-LABEL: lsr_i16_all_active:
1714 ; CHECK-NEXT: lsr z0.h, z0.h, #16
1716 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1717 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> %pg,
1718 <vscale x 8 x i16> %a,
1719 <vscale x 8 x i16> splat(i16 16))
1720 ret <vscale x 8 x i16> %out
1723 ; Ensure we don't match a right shift by zero to the immediate form.
1724 define <vscale x 8 x i16> @lsr_i16_too_small(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1725 ; CHECK-LABEL: lsr_i16_too_small:
1727 ; CHECK-NEXT: mov z1.h, #0 // =0x0
1728 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
1730 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1731 <vscale x 8 x i16> %a,
1732 <vscale x 8 x i16> zeroinitializer)
1733 ret <vscale x 8 x i16> %out
1736 define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1737 ; CHECK-LABEL: lsr_i32:
1739 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32
1741 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1742 <vscale x 4 x i32> %a,
1743 <vscale x 4 x i32> splat(i32 33))
1744 ret <vscale x 4 x i32> %out
1747 define <vscale x 4 x i32> @lsr_i32_all_active(<vscale x 4 x i32> %a) {
1748 ; CHECK-LABEL: lsr_i32_all_active:
1750 ; CHECK-NEXT: lsr z0.s, z0.s, #32
1752 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1753 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg,
1754 <vscale x 4 x i32> %a,
1755 <vscale x 4 x i32> splat(i32 32))
1756 ret <vscale x 4 x i32> %out
1759 ; Ensure we don't match a right shift by zero to the immediate form.
1760 define <vscale x 4 x i32> @lsr_i32_too_small(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1761 ; CHECK-LABEL: lsr_i32_too_small:
1763 ; CHECK-NEXT: mov z1.s, #0 // =0x0
1764 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
1766 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1767 <vscale x 4 x i32> %a,
1768 <vscale x 4 x i32> zeroinitializer)
1769 ret <vscale x 4 x i32> %out
1772 define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1773 ; CHECK-LABEL: lsr_i64:
1775 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64
1777 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
1778 <vscale x 2 x i64> %a,
1779 <vscale x 2 x i64> splat(i64 65))
1780 ret <vscale x 2 x i64> %out
1783 define <vscale x 2 x i64> @lsr_i64_all_active(<vscale x 2 x i64> %a) {
1784 ; CHECK-LABEL: lsr_i64_all_active:
1786 ; CHECK-NEXT: lsr z0.d, z0.d, #64
1788 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1789 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> %pg,
1790 <vscale x 2 x i64> %a,
1791 <vscale x 2 x i64> splat(i64 64))
1792 ret <vscale x 2 x i64> %out
1795 ; Ensure we don't match a right shift by zero to the immediate form.
1796 define <vscale x 2 x i64> @lsr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1797 ; CHECK-LABEL: lsr_i64_too_small:
1799 ; CHECK-NEXT: mov z1.d, #0 // =0x0
1800 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
1802 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
1803 <vscale x 2 x i64> %a,
1804 <vscale x 2 x i64> zeroinitializer)
1805 ret <vscale x 2 x i64> %out
1808 ; As lsr_i32 but where pg is i8 based and thus compatible for i32.
1809 define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
1810 ; CHECK-LABEL: lsr_i32_ptrue_all_b:
1812 ; CHECK-NEXT: lsr z0.s, z0.s, #1
1814 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1815 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1816 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1817 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1818 <vscale x 4 x i32> %a,
1819 <vscale x 4 x i32> %b)
1820 ret <vscale x 4 x i32> %out
1823 ; As lsr_i32 but where pg is i16 based and thus compatible for i32.
1824 define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
1825 ; CHECK-LABEL: lsr_i32_ptrue_all_h:
1827 ; CHECK-NEXT: lsr z0.s, z0.s, #1
1829 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1830 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1831 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1832 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1833 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1834 <vscale x 4 x i32> %a,
1835 <vscale x 4 x i32> %b)
1836 ret <vscale x 4 x i32> %out
1839 ; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and
1840 ; thus inactive lanes are important and the immediate form cannot be used.
1841 define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
1842 ; CHECK-LABEL: lsr_i32_ptrue_all_d:
1844 ; CHECK-NEXT: ptrue p0.d
1845 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #1
1847 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1848 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1849 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1850 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1851 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg.s,
1852 <vscale x 4 x i32> %a,
1853 <vscale x 4 x i32> %b)
1854 ret <vscale x 4 x i32> %out
1861 ; As mul_i32 but where pg is i8 based and thus compatible for i32.
1862 define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
1863 ; CHECK-LABEL: mul_i32_ptrue_all_b:
1865 ; CHECK-NEXT: mul z0.s, z0.s, #1
1867 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1868 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1869 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1870 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1871 <vscale x 4 x i32> %a,
1872 <vscale x 4 x i32> %b)
1873 ret <vscale x 4 x i32> %out
1876 ; As mul_i32 but where pg is i16 based and thus compatible for i32.
1877 define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
1878 ; CHECK-LABEL: mul_i32_ptrue_all_h:
1880 ; CHECK-NEXT: mul z0.s, z0.s, #1
1882 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1883 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1884 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1885 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1886 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1887 <vscale x 4 x i32> %a,
1888 <vscale x 4 x i32> %b)
1889 ret <vscale x 4 x i32> %out
1892 ; As mul_i32 but where pg is i64 based, which is not compatibile for i32 and
1893 ; thus inactive lanes are important and the immediate form cannot be used.
1894 define <vscale x 4 x i32> @mul_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
1895 ; CHECK-LABEL: mul_i32_ptrue_all_d:
1897 ; CHECK-NEXT: mov z1.s, #1 // =0x1
1898 ; CHECK-NEXT: ptrue p0.d
1899 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
1901 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1902 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1903 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1904 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1905 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
1906 <vscale x 4 x i32> %a,
1907 <vscale x 4 x i32> %b)
1908 ret <vscale x 4 x i32> %out
1911 declare <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1912 declare <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1913 declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1914 declare <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1916 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1918 declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1919 declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1920 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1921 declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1923 declare <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1924 declare <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1925 declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1926 declare <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1928 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
1929 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
1930 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
1931 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
1933 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
1934 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
1935 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
1936 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
1938 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
1939 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
1940 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
1941 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
1943 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
1944 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
1945 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
1946 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
1948 declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1950 declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1951 declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1952 declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1953 declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1955 declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1957 declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1958 declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1959 declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1960 declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1962 declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1964 declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1965 declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1966 declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1967 declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1969 declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1971 declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1972 declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1973 declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1974 declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1976 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1977 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1978 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1979 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1981 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1982 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1983 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1984 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1986 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1987 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1988 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1989 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1991 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1992 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1993 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1994 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1996 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1997 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1998 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1999 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2001 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2002 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2003 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2004 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2006 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2008 declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2009 declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2010 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2011 declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2013 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
2014 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
2015 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
2017 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
2018 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
2019 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
2021 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
2023 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 %pattern)
2024 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 %pattern)
2025 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 %pattern)
2026 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 %pattern)