1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
9 ; CHECK-NEXT: add z0.b, z0.b, #127
11 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
12 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
13 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %pg,
15 <vscale x 16 x i8> %a,
16 <vscale x 16 x i8> %splat)
17 ret <vscale x 16 x i8> %out
20 define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a) {
21 ; CHECK-LABEL: add_i16:
23 ; CHECK-NEXT: add z0.h, z0.h, #127
25 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
26 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
27 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
28 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
29 <vscale x 8 x i16> %a,
30 <vscale x 8 x i16> %splat)
31 ret <vscale x 8 x i16> %out
34 define <vscale x 8 x i16> @add_i16_out_of_range(<vscale x 8 x i16> %a) {
35 ; CHECK-LABEL: add_i16_out_of_range:
37 ; CHECK-NEXT: mov w8, #257
38 ; CHECK-NEXT: mov z1.h, w8
39 ; CHECK-NEXT: add z0.h, z0.h, z1.h
41 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
42 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
43 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
44 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %pg,
45 <vscale x 8 x i16> %a,
46 <vscale x 8 x i16> %splat)
47 ret <vscale x 8 x i16> %out
50 define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a) {
51 ; CHECK-LABEL: add_i32:
53 ; CHECK-NEXT: add z0.s, z0.s, #127
55 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
56 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
57 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
58 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
59 <vscale x 4 x i32> %a,
60 <vscale x 4 x i32> %splat)
61 ret <vscale x 4 x i32> %out
64 define <vscale x 4 x i32> @add_i32_out_of_range(<vscale x 4 x i32> %a) {
65 ; CHECK-LABEL: add_i32_out_of_range:
67 ; CHECK-NEXT: mov w8, #257
68 ; CHECK-NEXT: mov z1.s, w8
69 ; CHECK-NEXT: add z0.s, z0.s, z1.s
71 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
72 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
73 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
74 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %pg,
75 <vscale x 4 x i32> %a,
76 <vscale x 4 x i32> %splat)
77 ret <vscale x 4 x i32> %out
80 define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a) {
81 ; CHECK-LABEL: add_i64:
83 ; CHECK-NEXT: add z0.d, z0.d, #127
85 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
86 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
87 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
88 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
89 <vscale x 2 x i64> %a,
90 <vscale x 2 x i64> %splat)
91 ret <vscale x 2 x i64> %out
94 define <vscale x 2 x i64> @add_i64_out_of_range(<vscale x 2 x i64> %a) {
95 ; CHECK-LABEL: add_i64_out_of_range:
97 ; CHECK-NEXT: mov w8, #257
98 ; CHECK-NEXT: mov z1.d, x8
99 ; CHECK-NEXT: add z0.d, z0.d, z1.d
101 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
102 %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
103 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
104 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %pg,
105 <vscale x 2 x i64> %a,
106 <vscale x 2 x i64> %splat)
107 ret <vscale x 2 x i64> %out
112 define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
113 ; CHECK-LABEL: sub_i8:
115 ; CHECK-NEXT: sub z0.b, z0.b, #127
117 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
118 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
119 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
120 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %pg,
121 <vscale x 16 x i8> %a,
122 <vscale x 16 x i8> %splat)
123 ret <vscale x 16 x i8> %out
126 define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a) {
127 ; CHECK-LABEL: sub_i16:
129 ; CHECK-NEXT: sub z0.h, z0.h, #127
131 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
132 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
133 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
134 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
135 <vscale x 8 x i16> %a,
136 <vscale x 8 x i16> %splat)
137 ret <vscale x 8 x i16> %out
140 define <vscale x 8 x i16> @sub_i16_out_of_range(<vscale x 8 x i16> %a) {
141 ; CHECK-LABEL: sub_i16_out_of_range:
143 ; CHECK-NEXT: mov w8, #257
144 ; CHECK-NEXT: mov z1.h, w8
145 ; CHECK-NEXT: sub z0.h, z0.h, z1.h
147 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
148 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
149 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
150 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %pg,
151 <vscale x 8 x i16> %a,
152 <vscale x 8 x i16> %splat)
153 ret <vscale x 8 x i16> %out
156 define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a) {
157 ; CHECK-LABEL: sub_i32:
159 ; CHECK-NEXT: sub z0.s, z0.s, #127
161 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
162 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
163 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
164 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
165 <vscale x 4 x i32> %a,
166 <vscale x 4 x i32> %splat)
167 ret <vscale x 4 x i32> %out
170 define <vscale x 4 x i32> @sub_i32_out_of_range(<vscale x 4 x i32> %a) {
171 ; CHECK-LABEL: sub_i32_out_of_range:
173 ; CHECK-NEXT: mov w8, #257
174 ; CHECK-NEXT: mov z1.s, w8
175 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
177 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
178 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
179 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
180 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg,
181 <vscale x 4 x i32> %a,
182 <vscale x 4 x i32> %splat)
183 ret <vscale x 4 x i32> %out
186 define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a) {
187 ; CHECK-LABEL: sub_i64:
189 ; CHECK-NEXT: sub z0.d, z0.d, #127
191 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
192 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
193 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
194 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
195 <vscale x 2 x i64> %a,
196 <vscale x 2 x i64> %splat)
197 ret <vscale x 2 x i64> %out
200 define <vscale x 2 x i64> @sub_i64_out_of_range(<vscale x 2 x i64> %a) {
201 ; CHECK-LABEL: sub_i64_out_of_range:
203 ; CHECK-NEXT: mov w8, #257
204 ; CHECK-NEXT: mov z1.d, x8
205 ; CHECK-NEXT: sub z0.d, z0.d, z1.d
207 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
208 %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
209 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
210 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %pg,
211 <vscale x 2 x i64> %a,
212 <vscale x 2 x i64> %splat)
213 ret <vscale x 2 x i64> %out
216 ; As sub_i32 but where pg is i8 based and thus compatible for i32.
217 define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
218 ; CHECK-LABEL: sub_i32_ptrue_all_b:
219 ; CHECK: sub z0.s, z0.s, #1
221 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
222 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
223 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
224 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
225 <vscale x 4 x i32> %a,
226 <vscale x 4 x i32> %b)
227 ret <vscale x 4 x i32> %out
230 ; As sub_i32 but where pg is i16 based and thus compatible for i32.
231 define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
232 ; CHECK-LABEL: sub_i32_ptrue_all_h:
233 ; CHECK: sub z0.s, z0.s, #1
235 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
236 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
237 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
238 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
239 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
240 <vscale x 4 x i32> %a,
241 <vscale x 4 x i32> %b)
242 ret <vscale x 4 x i32> %out
245 ; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and
246 ; thus inactive lanes are important and the immediate form cannot be used.
247 define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
248 ; CHECK-LABEL: sub_i32_ptrue_all_d:
249 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
250 ; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
251 ; CHECK-DAG: sub z0.s, [[PG]]/m, z0.s, [[DUP]].s
253 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
254 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
255 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
256 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
257 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
258 <vscale x 4 x i32> %a,
259 <vscale x 4 x i32> %b)
260 ret <vscale x 4 x i32> %out
265 define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a) {
266 ; CHECK-LABEL: smax_i8:
268 ; CHECK-NEXT: smax z0.b, z0.b, #-128
270 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
271 %elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0
272 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
273 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> %pg,
274 <vscale x 16 x i8> %a,
275 <vscale x 16 x i8> %splat)
276 ret <vscale x 16 x i8> %out
279 define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a) {
280 ; CHECK-LABEL: smax_i16:
282 ; CHECK-NEXT: smax z0.h, z0.h, #127
284 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
285 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
286 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
287 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %pg,
288 <vscale x 8 x i16> %a,
289 <vscale x 8 x i16> %splat)
290 ret <vscale x 8 x i16> %out
293 define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
294 ; CHECK-LABEL: smax_i16_out_of_range:
296 ; CHECK-NEXT: mov w8, #129
297 ; CHECK-NEXT: ptrue p0.h
298 ; CHECK-NEXT: mov z1.h, w8
299 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
301 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
302 %elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
303 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
304 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %pg,
305 <vscale x 8 x i16> %a,
306 <vscale x 8 x i16> %splat)
307 ret <vscale x 8 x i16> %out
310 define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
311 ; CHECK-LABEL: smax_i32:
313 ; CHECK-NEXT: smax z0.s, z0.s, #-128
315 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
316 %elt = insertelement <vscale x 4 x i32> undef, i32 -128, i32 0
317 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
318 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg,
319 <vscale x 4 x i32> %a,
320 <vscale x 4 x i32> %splat)
321 ret <vscale x 4 x i32> %out
324 define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
325 ; CHECK-LABEL: smax_i32_out_of_range:
327 ; CHECK-NEXT: mov w8, #-129
328 ; CHECK-NEXT: ptrue p0.s
329 ; CHECK-NEXT: mov z1.s, w8
330 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
332 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
333 %elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
334 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
335 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg,
336 <vscale x 4 x i32> %a,
337 <vscale x 4 x i32> %splat)
338 ret <vscale x 4 x i32> %out
341 define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
342 ; CHECK-LABEL: smax_i64:
344 ; CHECK-NEXT: smax z0.d, z0.d, #127
346 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
347 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
348 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
349 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %pg,
350 <vscale x 2 x i64> %a,
351 <vscale x 2 x i64> %splat)
352 ret <vscale x 2 x i64> %out
355 define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
356 ; CHECK-LABEL: smax_i64_out_of_range:
358 ; CHECK-NEXT: mov w8, #65535
359 ; CHECK-NEXT: ptrue p0.d
360 ; CHECK-NEXT: mov z1.d, x8
361 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
363 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
364 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
365 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
366 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %pg,
367 <vscale x 2 x i64> %a,
368 <vscale x 2 x i64> %splat)
369 ret <vscale x 2 x i64> %out
372 ; As smax_i32 but where pg is i8 based and thus compatible for i32.
373 define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
374 ; CHECK-LABEL: smax_i32_ptrue_all_b:
375 ; CHECK: smax z0.s, z0.s, #1
377 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
378 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
379 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
380 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg.s,
381 <vscale x 4 x i32> %a,
382 <vscale x 4 x i32> %b)
383 ret <vscale x 4 x i32> %out
386 ; As smax_i32 but where pg is i16 based and thus compatible for i32.
387 define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
388 ; CHECK-LABEL: smax_i32_ptrue_all_h:
389 ; CHECK: smax z0.s, z0.s, #1
391 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
392 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
393 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
394 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
395 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg.s,
396 <vscale x 4 x i32> %a,
397 <vscale x 4 x i32> %b)
398 ret <vscale x 4 x i32> %out
401 ; As smax_i32 but where pg is i64 based, which is not compatibile for i32 and
402 ; thus inactive lanes are important and the immediate form cannot be used.
403 define <vscale x 4 x i32> @smax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
404 ; CHECK-LABEL: smax_i32_ptrue_all_d:
405 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
406 ; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
407 ; CHECK-DAG: smax z0.s, [[PG]]/m, z0.s, [[DUP]].s
409 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
410 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
411 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
412 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
413 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg.s,
414 <vscale x 4 x i32> %a,
415 <vscale x 4 x i32> %b)
416 ret <vscale x 4 x i32> %out
421 define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a) {
422 ; CHECK-LABEL: smin_i8:
424 ; CHECK-NEXT: smin z0.b, z0.b, #127
426 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
427 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
428 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
429 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> %pg,
430 <vscale x 16 x i8> %a,
431 <vscale x 16 x i8> %splat)
432 ret <vscale x 16 x i8> %out
435 define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a) {
436 ; CHECK-LABEL: smin_i16:
438 ; CHECK-NEXT: smin z0.h, z0.h, #-128
440 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
441 %elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0
442 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
443 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %pg,
444 <vscale x 8 x i16> %a,
445 <vscale x 8 x i16> %splat)
446 ret <vscale x 8 x i16> %out
449 define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
450 ; CHECK-LABEL: smin_i16_out_of_range:
452 ; CHECK-NEXT: mov w8, #-129
453 ; CHECK-NEXT: ptrue p0.h
454 ; CHECK-NEXT: mov z1.h, w8
455 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
457 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
458 %elt = insertelement <vscale x 8 x i16> undef, i16 -129, i32 0
459 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
460 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %pg,
461 <vscale x 8 x i16> %a,
462 <vscale x 8 x i16> %splat)
463 ret <vscale x 8 x i16> %out
466 define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
467 ; CHECK-LABEL: smin_i32:
469 ; CHECK-NEXT: smin z0.s, z0.s, #127
471 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
472 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
473 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
474 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg,
475 <vscale x 4 x i32> %a,
476 <vscale x 4 x i32> %splat)
477 ret <vscale x 4 x i32> %out
480 define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
481 ; CHECK-LABEL: smin_i32_out_of_range:
483 ; CHECK-NEXT: mov w8, #257
484 ; CHECK-NEXT: ptrue p0.s
485 ; CHECK-NEXT: mov z1.s, w8
486 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
488 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
489 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
490 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
491 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg,
492 <vscale x 4 x i32> %a,
493 <vscale x 4 x i32> %splat)
494 ret <vscale x 4 x i32> %out
498 define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
499 ; CHECK-LABEL: smin_i64:
501 ; CHECK-NEXT: smin z0.d, z0.d, #-128
503 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
504 %elt = insertelement <vscale x 2 x i64> undef, i64 -128, i64 0
505 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
506 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %pg,
507 <vscale x 2 x i64> %a,
508 <vscale x 2 x i64> %splat)
509 ret <vscale x 2 x i64> %out
512 define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
513 ; CHECK-LABEL: smin_i64_out_of_range:
515 ; CHECK-NEXT: ptrue p0.d
516 ; CHECK-NEXT: mov z1.d, #-256 // =0xffffffffffffff00
517 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
519 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
520 %elt = insertelement <vscale x 2 x i64> undef, i64 -256, i64 0
521 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
522 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %pg,
523 <vscale x 2 x i64> %a,
524 <vscale x 2 x i64> %splat)
525 ret <vscale x 2 x i64> %out
528 ; As smin_i32 but where pg is i8 based and thus compatible for i32.
529 define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
530 ; CHECK-LABEL: smin_i32_ptrue_all_b:
531 ; CHECK: smin z0.s, z0.s, #1
533 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
534 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
535 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
536 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg.s,
537 <vscale x 4 x i32> %a,
538 <vscale x 4 x i32> %b)
539 ret <vscale x 4 x i32> %out
542 ; As smin_i32 but where pg is i16 based and thus compatible for i32.
543 define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
544 ; CHECK-LABEL: smin_i32_ptrue_all_h:
545 ; CHECK: smin z0.s, z0.s, #1
547 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
548 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
549 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
550 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
551 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg.s,
552 <vscale x 4 x i32> %a,
553 <vscale x 4 x i32> %b)
554 ret <vscale x 4 x i32> %out
557 ; As smin_i32 but where pg is i64 based, which is not compatibile for i32 and
558 ; thus inactive lanes are important and the immediate form cannot be used.
559 define <vscale x 4 x i32> @smin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
560 ; CHECK-LABEL: smin_i32_ptrue_all_d:
561 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
562 ; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
563 ; CHECK-DAG: smin z0.s, [[PG]]/m, z0.s, [[DUP]].s
565 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
566 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
567 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
568 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
569 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg.s,
570 <vscale x 4 x i32> %a,
571 <vscale x 4 x i32> %b)
572 ret <vscale x 4 x i32> %out
577 define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a) {
578 ; CHECK-LABEL: umax_i8:
580 ; CHECK-NEXT: umax z0.b, z0.b, #0
582 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
583 %elt = insertelement <vscale x 16 x i8> undef, i8 0, i32 0
584 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
585 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> %pg,
586 <vscale x 16 x i8> %a,
587 <vscale x 16 x i8> %splat)
588 ret <vscale x 16 x i8> %out
591 define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a) {
592 ; CHECK-LABEL: umax_i16:
594 ; CHECK-NEXT: umax z0.h, z0.h, #255
596 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
597 %elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
598 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
599 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %pg,
600 <vscale x 8 x i16> %a,
601 <vscale x 8 x i16> %splat)
602 ret <vscale x 8 x i16> %out
605 define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
606 ; CHECK-LABEL: umax_i16_out_of_range:
608 ; CHECK-NEXT: mov w8, #257
609 ; CHECK-NEXT: ptrue p0.h
610 ; CHECK-NEXT: mov z1.h, w8
611 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
613 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
614 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
615 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
616 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %pg,
617 <vscale x 8 x i16> %a,
618 <vscale x 8 x i16> %splat)
619 ret <vscale x 8 x i16> %out
622 define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
623 ; CHECK-LABEL: umax_i32:
625 ; CHECK-NEXT: umax z0.s, z0.s, #0
627 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
628 %elt = insertelement <vscale x 4 x i32> undef, i32 0, i32 0
629 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
630 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg,
631 <vscale x 4 x i32> %a,
632 <vscale x 4 x i32> %splat)
633 ret <vscale x 4 x i32> %out
636 define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
637 ; CHECK-LABEL: umax_i32_out_of_range:
639 ; CHECK-NEXT: mov w8, #257
640 ; CHECK-NEXT: ptrue p0.s
641 ; CHECK-NEXT: mov z1.s, w8
642 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
644 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
645 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
646 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
647 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg,
648 <vscale x 4 x i32> %a,
649 <vscale x 4 x i32> %splat)
650 ret <vscale x 4 x i32> %out
653 define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
654 ; CHECK-LABEL: umax_i64:
656 ; CHECK-NEXT: umax z0.d, z0.d, #255
658 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
659 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i64 0
660 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
661 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %pg,
662 <vscale x 2 x i64> %a,
663 <vscale x 2 x i64> %splat)
664 ret <vscale x 2 x i64> %out
667 define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
668 ; CHECK-LABEL: umax_i64_out_of_range:
670 ; CHECK-NEXT: mov w8, #65535
671 ; CHECK-NEXT: ptrue p0.d
672 ; CHECK-NEXT: mov z1.d, x8
673 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
675 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
676 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
677 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
678 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %pg,
679 <vscale x 2 x i64> %a,
680 <vscale x 2 x i64> %splat)
681 ret <vscale x 2 x i64> %out
684 ; As umax_i32 but where pg is i8 based and thus compatible for i32.
685 define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
686 ; CHECK-LABEL: umax_i32_ptrue_all_b:
687 ; CHECK: umax z0.s, z0.s, #1
689 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
690 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
691 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
692 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg.s,
693 <vscale x 4 x i32> %a,
694 <vscale x 4 x i32> %b)
695 ret <vscale x 4 x i32> %out
698 ; As umax_i32 but where pg is i16 based and thus compatible for i32.
699 define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
700 ; CHECK-LABEL: umax_i32_ptrue_all_h:
701 ; CHECK: umax z0.s, z0.s, #1
703 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
704 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
705 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
706 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
707 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg.s,
708 <vscale x 4 x i32> %a,
709 <vscale x 4 x i32> %b)
710 ret <vscale x 4 x i32> %out
713 ; As umax_i32 but where pg is i64 based, which is not compatibile for i32 and
714 ; thus inactive lanes are important and the immediate form cannot be used.
715 define <vscale x 4 x i32> @umax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
716 ; CHECK-LABEL: umax_i32_ptrue_all_d:
717 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
718 ; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
719 ; CHECK-DAG: umax z0.s, [[PG]]/m, z0.s, [[DUP]].s
721 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
722 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
723 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
724 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
725 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg.s,
726 <vscale x 4 x i32> %a,
727 <vscale x 4 x i32> %b)
728 ret <vscale x 4 x i32> %out
733 define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a) {
734 ; CHECK-LABEL: umin_i8:
736 ; CHECK-NEXT: umin z0.b, z0.b, #255
738 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
739 %elt = insertelement <vscale x 16 x i8> undef, i8 255, i32 0
740 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
741 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> %pg,
742 <vscale x 16 x i8> %a,
743 <vscale x 16 x i8> %splat)
744 ret <vscale x 16 x i8> %out
747 define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a) {
748 ; CHECK-LABEL: umin_i16:
750 ; CHECK-NEXT: umin z0.h, z0.h, #0
752 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
753 %elt = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
754 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
755 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %pg,
756 <vscale x 8 x i16> %a,
757 <vscale x 8 x i16> %splat)
758 ret <vscale x 8 x i16> %out
761 define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
762 ; CHECK-LABEL: umin_i16_out_of_range:
764 ; CHECK-NEXT: mov w8, #257
765 ; CHECK-NEXT: ptrue p0.h
766 ; CHECK-NEXT: mov z1.h, w8
767 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
769 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
770 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
771 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
772 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %pg,
773 <vscale x 8 x i16> %a,
774 <vscale x 8 x i16> %splat)
775 ret <vscale x 8 x i16> %out
778 define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
779 ; CHECK-LABEL: umin_i32:
781 ; CHECK-NEXT: umin z0.s, z0.s, #255
783 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
784 %elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
785 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
786 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg,
787 <vscale x 4 x i32> %a,
788 <vscale x 4 x i32> %splat)
789 ret <vscale x 4 x i32> %out
792 define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
793 ; CHECK-LABEL: umin_i32_out_of_range:
795 ; CHECK-NEXT: mov w8, #257
796 ; CHECK-NEXT: ptrue p0.s
797 ; CHECK-NEXT: mov z1.s, w8
798 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
800 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
801 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
802 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
803 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg,
804 <vscale x 4 x i32> %a,
805 <vscale x 4 x i32> %splat)
806 ret <vscale x 4 x i32> %out
809 define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
810 ; CHECK-LABEL: umin_i64:
812 ; CHECK-NEXT: umin z0.d, z0.d, #0
814 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
815 %elt = insertelement <vscale x 2 x i64> undef, i64 0, i64 0
816 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
817 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg,
818 <vscale x 2 x i64> %a,
819 <vscale x 2 x i64> %splat)
820 ret <vscale x 2 x i64> %out
823 define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
824 ; CHECK-LABEL: umin_i64_out_of_range:
826 ; CHECK-NEXT: mov w8, #65535
827 ; CHECK-NEXT: ptrue p0.d
828 ; CHECK-NEXT: mov z1.d, x8
829 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
831 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
832 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
833 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
834 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg,
835 <vscale x 2 x i64> %a,
836 <vscale x 2 x i64> %splat)
837 ret <vscale x 2 x i64> %out
840 ; As umin_i32 but where pg is i8 based and thus compatible for i32.
841 define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
842 ; CHECK-LABEL: umin_i32_ptrue_all_b:
843 ; CHECK: umin z0.s, z0.s, #1
845 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
846 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
847 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
848 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg.s,
849 <vscale x 4 x i32> %a,
850 <vscale x 4 x i32> %b)
851 ret <vscale x 4 x i32> %out
854 ; As umin_i32 but where pg is i16 based and thus compatible for i32.
855 define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
856 ; CHECK-LABEL: umin_i32_ptrue_all_h:
857 ; CHECK: umin z0.s, z0.s, #1
859 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
860 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
861 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
862 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
863 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg.s,
864 <vscale x 4 x i32> %a,
865 <vscale x 4 x i32> %b)
866 ret <vscale x 4 x i32> %out
869 ; As umin_i32 but where pg is i64 based, which is not compatibile for i32 and
870 ; thus inactive lanes are important and the immediate form cannot be used.
871 define <vscale x 4 x i32> @umin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
872 ; CHECK-LABEL: umin_i32_ptrue_all_d:
873 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
874 ; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
875 ; CHECK-DAG: umin z0.s, [[PG]]/m, z0.s, [[DUP]].s
877 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
878 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
879 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
880 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
881 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg.s,
882 <vscale x 4 x i32> %a,
883 <vscale x 4 x i32> %b)
884 ret <vscale x 4 x i32> %out
889 define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
890 ; CHECK-LABEL: sqadd_b_lowimm:
892 ; CHECK-NEXT: sqadd z0.b, z0.b, #27 // =0x1b
894 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
895 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
896 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
897 <vscale x 16 x i8> %splat)
898 ret <vscale x 16 x i8> %out
901 define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
902 ; CHECK-LABEL: sqadd_h_lowimm:
904 ; CHECK-NEXT: sqadd z0.h, z0.h, #43 // =0x2b
906 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
907 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
908 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
909 <vscale x 8 x i16> %splat)
910 ret <vscale x 8 x i16> %out
913 define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
914 ; CHECK-LABEL: sqadd_h_highimm:
916 ; CHECK-NEXT: sqadd z0.h, z0.h, #2048 // =0x800
918 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
919 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
920 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
921 <vscale x 8 x i16> %splat)
922 ret <vscale x 8 x i16> %out
925 define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
926 ; CHECK-LABEL: sqadd_s_lowimm:
928 ; CHECK-NEXT: sqadd z0.s, z0.s, #1 // =0x1
930 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
931 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
932 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
933 <vscale x 4 x i32> %splat)
934 ret <vscale x 4 x i32> %out
937 define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
938 ; CHECK-LABEL: sqadd_s_highimm:
940 ; CHECK-NEXT: sqadd z0.s, z0.s, #8192 // =0x2000
942 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
943 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
944 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
945 <vscale x 4 x i32> %splat)
946 ret <vscale x 4 x i32> %out
949 define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
950 ; CHECK-LABEL: sqadd_d_lowimm:
952 ; CHECK-NEXT: sqadd z0.d, z0.d, #255 // =0xff
954 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
955 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
956 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
957 <vscale x 2 x i64> %splat)
958 ret <vscale x 2 x i64> %out
961 define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
962 ; CHECK-LABEL: sqadd_d_highimm:
964 ; CHECK-NEXT: sqadd z0.d, z0.d, #65280 // =0xff00
966 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
967 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
968 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
969 <vscale x 2 x i64> %splat)
970 ret <vscale x 2 x i64> %out
975 define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
976 ; CHECK-LABEL: sqsub_b_lowimm:
978 ; CHECK-NEXT: sqsub z0.b, z0.b, #27 // =0x1b
980 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
981 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
982 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
983 <vscale x 16 x i8> %splat)
984 ret <vscale x 16 x i8> %out
987 define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
988 ; CHECK-LABEL: sqsub_h_lowimm:
990 ; CHECK-NEXT: sqsub z0.h, z0.h, #43 // =0x2b
992 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
993 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
994 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
995 <vscale x 8 x i16> %splat)
996 ret <vscale x 8 x i16> %out
999 define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
1000 ; CHECK-LABEL: sqsub_h_highimm:
1002 ; CHECK-NEXT: sqsub z0.h, z0.h, #2048 // =0x800
1004 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1005 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1006 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1007 <vscale x 8 x i16> %splat)
1008 ret <vscale x 8 x i16> %out
1011 define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
1012 ; CHECK-LABEL: sqsub_s_lowimm:
1014 ; CHECK-NEXT: sqsub z0.s, z0.s, #1 // =0x1
1016 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1017 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1018 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1019 <vscale x 4 x i32> %splat)
1020 ret <vscale x 4 x i32> %out
1023 define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
1024 ; CHECK-LABEL: sqsub_s_highimm:
1026 ; CHECK-NEXT: sqsub z0.s, z0.s, #8192 // =0x2000
1028 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1029 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1030 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1031 <vscale x 4 x i32> %splat)
1032 ret <vscale x 4 x i32> %out
1035 define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
1036 ; CHECK-LABEL: sqsub_d_lowimm:
1038 ; CHECK-NEXT: sqsub z0.d, z0.d, #255 // =0xff
1040 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1041 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1042 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1043 <vscale x 2 x i64> %splat)
1044 ret <vscale x 2 x i64> %out
1047 define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
1048 ; CHECK-LABEL: sqsub_d_highimm:
1050 ; CHECK-NEXT: sqsub z0.d, z0.d, #65280 // =0xff00
1052 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1053 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1054 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1055 <vscale x 2 x i64> %splat)
1056 ret <vscale x 2 x i64> %out
1061 define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {
1062 ; CHECK-LABEL: uqadd_b_lowimm:
1064 ; CHECK-NEXT: uqadd z0.b, z0.b, #27 // =0x1b
1066 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1067 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1068 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8> %a,
1069 <vscale x 16 x i8> %splat)
1070 ret <vscale x 16 x i8> %out
1073 define <vscale x 8 x i16> @uqadd_h_lowimm(<vscale x 8 x i16> %a) {
1074 ; CHECK-LABEL: uqadd_h_lowimm:
1076 ; CHECK-NEXT: uqadd z0.h, z0.h, #43 // =0x2b
1078 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1079 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1080 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1081 <vscale x 8 x i16> %splat)
1082 ret <vscale x 8 x i16> %out
1085 define <vscale x 8 x i16> @uqadd_h_highimm(<vscale x 8 x i16> %a) {
1086 ; CHECK-LABEL: uqadd_h_highimm:
1088 ; CHECK-NEXT: uqadd z0.h, z0.h, #2048 // =0x800
1090 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1091 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1092 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1093 <vscale x 8 x i16> %splat)
1094 ret <vscale x 8 x i16> %out
1097 define <vscale x 4 x i32> @uqadd_s_lowimm(<vscale x 4 x i32> %a) {
1098 ; CHECK-LABEL: uqadd_s_lowimm:
1100 ; CHECK-NEXT: uqadd z0.s, z0.s, #1 // =0x1
1102 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1103 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1104 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1105 <vscale x 4 x i32> %splat)
1106 ret <vscale x 4 x i32> %out
1109 define <vscale x 4 x i32> @uqadd_s_highimm(<vscale x 4 x i32> %a) {
1110 ; CHECK-LABEL: uqadd_s_highimm:
1112 ; CHECK-NEXT: uqadd z0.s, z0.s, #8192 // =0x2000
1114 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1115 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1116 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1117 <vscale x 4 x i32> %splat)
1118 ret <vscale x 4 x i32> %out
1121 define <vscale x 2 x i64> @uqadd_d_lowimm(<vscale x 2 x i64> %a) {
1122 ; CHECK-LABEL: uqadd_d_lowimm:
1124 ; CHECK-NEXT: uqadd z0.d, z0.d, #255 // =0xff
1126 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1127 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1128 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1129 <vscale x 2 x i64> %splat)
1130 ret <vscale x 2 x i64> %out
1133 define <vscale x 2 x i64> @uqadd_d_highimm(<vscale x 2 x i64> %a) {
1134 ; CHECK-LABEL: uqadd_d_highimm:
1136 ; CHECK-NEXT: uqadd z0.d, z0.d, #65280 // =0xff00
1138 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1139 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1140 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1141 <vscale x 2 x i64> %splat)
1142 ret <vscale x 2 x i64> %out
1147 define <vscale x 16 x i8> @uqsub_b_lowimm(<vscale x 16 x i8> %a) {
1148 ; CHECK-LABEL: uqsub_b_lowimm:
1150 ; CHECK-NEXT: uqsub z0.b, z0.b, #27 // =0x1b
1152 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1153 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1154 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1155 <vscale x 16 x i8> %splat)
1156 ret <vscale x 16 x i8> %out
1159 define <vscale x 8 x i16> @uqsub_h_lowimm(<vscale x 8 x i16> %a) {
1160 ; CHECK-LABEL: uqsub_h_lowimm:
1162 ; CHECK-NEXT: uqsub z0.h, z0.h, #43 // =0x2b
1164 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1165 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1166 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1167 <vscale x 8 x i16> %splat)
1168 ret <vscale x 8 x i16> %out
1171 define <vscale x 8 x i16> @uqsub_h_highimm(<vscale x 8 x i16> %a) {
1172 ; CHECK-LABEL: uqsub_h_highimm:
1174 ; CHECK-NEXT: uqsub z0.h, z0.h, #2048 // =0x800
1176 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1177 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1178 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1179 <vscale x 8 x i16> %splat)
1180 ret <vscale x 8 x i16> %out
1183 define <vscale x 4 x i32> @uqsub_s_lowimm(<vscale x 4 x i32> %a) {
1184 ; CHECK-LABEL: uqsub_s_lowimm:
1186 ; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
1188 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1189 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1190 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1191 <vscale x 4 x i32> %splat)
1192 ret <vscale x 4 x i32> %out
1195 define <vscale x 4 x i32> @uqsub_s_highimm(<vscale x 4 x i32> %a) {
1196 ; CHECK-LABEL: uqsub_s_highimm:
1198 ; CHECK-NEXT: uqsub z0.s, z0.s, #8192 // =0x2000
1200 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1201 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1202 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1203 <vscale x 4 x i32> %splat)
1204 ret <vscale x 4 x i32> %out
1207 define <vscale x 2 x i64> @uqsub_d_lowimm(<vscale x 2 x i64> %a) {
1208 ; CHECK-LABEL: uqsub_d_lowimm:
1210 ; CHECK-NEXT: uqsub z0.d, z0.d, #255 // =0xff
1212 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1213 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1214 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1215 <vscale x 2 x i64> %splat)
1216 ret <vscale x 2 x i64> %out
1219 define <vscale x 2 x i64> @uqsub_d_highimm(<vscale x 2 x i64> %a) {
1220 ; CHECK-LABEL: uqsub_d_highimm:
1222 ; CHECK-NEXT: uqsub z0.d, z0.d, #65280 // =0xff00
1224 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1225 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1226 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1227 <vscale x 2 x i64> %splat)
1228 ret <vscale x 2 x i64> %out
1233 define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1234 ; CHECK-LABEL: asr_i8:
1236 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8
1238 %elt = insertelement <vscale x 16 x i8> undef, i8 9, i32 0
1239 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1240 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1241 <vscale x 16 x i8> %a,
1242 <vscale x 16 x i8> %splat)
1243 ret <vscale x 16 x i8> %out
1246 define <vscale x 16 x i8> @asr_i8_all_active(<vscale x 16 x i8> %a) {
1247 ; CHECK-LABEL: asr_i8_all_active:
1249 ; CHECK-NEXT: asr z0.b, z0.b, #8
1251 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1252 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1253 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1254 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1255 <vscale x 16 x i8> %a,
1256 <vscale x 16 x i8> %splat)
1257 ret <vscale x 16 x i8> %out
1260 ; Ensure we don't match a right shift by zero to the immediate form.
1261 define <vscale x 16 x i8> @asr_i8_too_small(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1262 ; CHECK-LABEL: asr_i8_too_small:
1264 ; CHECK-NEXT: mov z1.b, #0 // =0x0
1265 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
1267 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1268 <vscale x 16 x i8> %a,
1269 <vscale x 16 x i8> zeroinitializer)
1270 ret <vscale x 16 x i8> %out
1273 define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1274 ; CHECK-LABEL: asr_i16:
1276 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16
1278 %elt = insertelement <vscale x 8 x i16> undef, i16 17, i32 0
1279 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1280 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1281 <vscale x 8 x i16> %a,
1282 <vscale x 8 x i16> %splat)
1283 ret <vscale x 8 x i16> %out
1286 define <vscale x 8 x i16> @asr_i16_all_active(<vscale x 8 x i16> %a) {
1287 ; CHECK-LABEL: asr_i16_all_active:
1289 ; CHECK-NEXT: asr z0.h, z0.h, #16
1291 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1292 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1293 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1294 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1295 <vscale x 8 x i16> %a,
1296 <vscale x 8 x i16> %splat)
1297 ret <vscale x 8 x i16> %out
1300 ; Ensure we don't match a right shift by zero to the immediate form.
1301 define <vscale x 8 x i16> @asr_i16_too_small(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1302 ; CHECK-LABEL: asr_i16_too_small:
1304 ; CHECK-NEXT: mov z1.h, #0 // =0x0
1305 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
1307 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1308 <vscale x 8 x i16> %a,
1309 <vscale x 8 x i16> zeroinitializer)
1310 ret <vscale x 8 x i16> %out
1313 define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1314 ; CHECK-LABEL: asr_i32:
1316 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32
1318 %elt = insertelement <vscale x 4 x i32> undef, i32 33, i32 0
1319 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1320 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1321 <vscale x 4 x i32> %a,
1322 <vscale x 4 x i32> %splat)
1323 ret <vscale x 4 x i32> %out
1326 define <vscale x 4 x i32> @asr_i32_all_active(<vscale x 4 x i32> %a) {
1327 ; CHECK-LABEL: asr_i32_all_active:
1329 ; CHECK-NEXT: asr z0.s, z0.s, #32
1331 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1332 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1333 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1334 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1335 <vscale x 4 x i32> %a,
1336 <vscale x 4 x i32> %splat)
1337 ret <vscale x 4 x i32> %out
1340 ; Ensure we don't match a right shift by zero to the immediate form.
1341 define <vscale x 4 x i32> @asr_i32_too_small(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1342 ; CHECK-LABEL: asr_i32_too_small:
1344 ; CHECK-NEXT: mov z1.s, #0 // =0x0
1345 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
1347 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1348 <vscale x 4 x i32> %a,
1349 <vscale x 4 x i32> zeroinitializer)
1350 ret <vscale x 4 x i32> %out
1353 define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1354 ; CHECK-LABEL: asr_i64:
1356 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64
1358 %elt = insertelement <vscale x 2 x i64> undef, i64 65, i64 0
1359 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1360 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1361 <vscale x 2 x i64> %a,
1362 <vscale x 2 x i64> %splat)
1363 ret <vscale x 2 x i64> %out
1366 define <vscale x 2 x i64> @asr_i64_all_active(<vscale x 2 x i64> %a) {
1367 ; CHECK-LABEL: asr_i64_all_active:
1369 ; CHECK-NEXT: asr z0.d, z0.d, #64
1371 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1372 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
1373 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1374 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1375 <vscale x 2 x i64> %a,
1376 <vscale x 2 x i64> %splat)
1377 ret <vscale x 2 x i64> %out
1380 ; Ensure we don't match a right shift by zero to the immediate form.
1381 define <vscale x 2 x i64> @asr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1382 ; CHECK-LABEL: asr_i64_too_small:
1384 ; CHECK-NEXT: mov z1.d, #0 // =0x0
1385 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
1387 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1388 <vscale x 2 x i64> %a,
1389 <vscale x 2 x i64> zeroinitializer)
1390 ret <vscale x 2 x i64> %out
1395 define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1396 ; CHECK-LABEL: lsl_i8:
1398 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
1400 %elt = insertelement <vscale x 16 x i8> undef, i8 7, i32 0
1401 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1402 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1403 <vscale x 16 x i8> %a,
1404 <vscale x 16 x i8> %splat)
1405 ret <vscale x 16 x i8> %out
1408 define <vscale x 16 x i8> @lsl_i8_all_active(<vscale x 16 x i8> %a) {
1409 ; CHECK-LABEL: lsl_i8_all_active:
1411 ; CHECK-NEXT: lsl z0.b, z0.b, #7
1413 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1414 %elt = insertelement <vscale x 16 x i8> undef, i8 7, i32 0
1415 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1416 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1417 <vscale x 16 x i8> %a,
1418 <vscale x 16 x i8> %splat)
1419 ret <vscale x 16 x i8> %out
1422 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1423 define <vscale x 16 x i8> @lsl_i8_too_big(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1424 ; CHECK-LABEL: lsl_i8_too_big:
1426 ; CHECK-NEXT: mov z1.b, #8 // =0x8
1427 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
1429 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1430 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1431 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1432 <vscale x 16 x i8> %a,
1433 <vscale x 16 x i8> %splat)
1434 ret <vscale x 16 x i8> %out
1437 define <vscale x 16 x i8> @lsl_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1438 ; CHECK-LABEL: lsl_i8_zero:
1440 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #0
1442 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1443 <vscale x 16 x i8> %a,
1444 <vscale x 16 x i8> zeroinitializer)
1445 ret <vscale x 16 x i8> %out
1448 define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1449 ; CHECK-LABEL: lsl_i16:
1451 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
1453 %elt = insertelement <vscale x 8 x i16> undef, i16 15, i32 0
1454 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1455 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1456 <vscale x 8 x i16> %a,
1457 <vscale x 8 x i16> %splat)
1458 ret <vscale x 8 x i16> %out
1461 define <vscale x 8 x i16> @lsl_i16_all_active(<vscale x 8 x i16> %a) {
1462 ; CHECK-LABEL: lsl_i16_all_active:
1464 ; CHECK-NEXT: lsl z0.h, z0.h, #15
1466 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1467 %elt = insertelement <vscale x 8 x i16> undef, i16 15, i32 0
1468 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1469 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1470 <vscale x 8 x i16> %a,
1471 <vscale x 8 x i16> %splat)
1472 ret <vscale x 8 x i16> %out
1475 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1476 define <vscale x 8 x i16> @lsl_i16_too_big(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1477 ; CHECK-LABEL: lsl_i16_too_big:
1479 ; CHECK-NEXT: mov z1.h, #16 // =0x10
1480 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
1482 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1483 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1484 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1485 <vscale x 8 x i16> %a,
1486 <vscale x 8 x i16> %splat)
1487 ret <vscale x 8 x i16> %out
1490 define <vscale x 8 x i16> @lsl_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1491 ; CHECK-LABEL: lsl_i16_zero:
1493 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #0
1495 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1496 <vscale x 8 x i16> %a,
1497 <vscale x 8 x i16> zeroinitializer)
1498 ret <vscale x 8 x i16> %out
1501 define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1502 ; CHECK-LABEL: lsl_i32:
1504 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
1506 %elt = insertelement <vscale x 4 x i32> undef, i32 31, i32 0
1507 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1508 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1509 <vscale x 4 x i32> %a,
1510 <vscale x 4 x i32> %splat)
1511 ret <vscale x 4 x i32> %out
1514 define <vscale x 4 x i32> @lsl_i32_all_active(<vscale x 4 x i32> %a) {
1515 ; CHECK-LABEL: lsl_i32_all_active:
1517 ; CHECK-NEXT: lsl z0.s, z0.s, #31
1519 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1520 %elt = insertelement <vscale x 4 x i32> undef, i32 31, i32 0
1521 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1522 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1523 <vscale x 4 x i32> %a,
1524 <vscale x 4 x i32> %splat)
1525 ret <vscale x 4 x i32> %out
1528 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1529 define <vscale x 4 x i32> @lsl_i32_too_big(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1530 ; CHECK-LABEL: lsl_i32_too_big:
1532 ; CHECK-NEXT: mov z1.s, #32 // =0x20
1533 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
1535 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1536 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1537 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1538 <vscale x 4 x i32> %a,
1539 <vscale x 4 x i32> %splat)
1540 ret <vscale x 4 x i32> %out
1543 define <vscale x 4 x i32> @lsl_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1544 ; CHECK-LABEL: lsl_i32_zero:
1546 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #0
1548 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1549 <vscale x 4 x i32> %a,
1550 <vscale x 4 x i32> zeroinitializer)
1551 ret <vscale x 4 x i32> %out
1554 define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1555 ; CHECK-LABEL: lsl_i64:
1557 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
1559 %elt = insertelement <vscale x 2 x i64> undef, i64 63, i64 0
1560 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1561 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1562 <vscale x 2 x i64> %a,
1563 <vscale x 2 x i64> %splat)
1564 ret <vscale x 2 x i64> %out
1567 define <vscale x 2 x i64> @lsl_i64_all_active(<vscale x 2 x i64> %a) {
1568 ; CHECK-LABEL: lsl_i64_all_active:
1570 ; CHECK-NEXT: lsl z0.d, z0.d, #63
1572 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1573 %elt = insertelement <vscale x 2 x i64> undef, i64 63, i64 0
1574 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1575 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1576 <vscale x 2 x i64> %a,
1577 <vscale x 2 x i64> %splat)
1578 ret <vscale x 2 x i64> %out
1581 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1582 define <vscale x 2 x i64> @lsl_i64_too_big(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1583 ; CHECK-LABEL: lsl_i64_too_big:
1585 ; CHECK-NEXT: mov z1.d, #64 // =0x40
1586 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
1588 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
1589 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1590 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1591 <vscale x 2 x i64> %a,
1592 <vscale x 2 x i64> %splat)
1593 ret <vscale x 2 x i64> %out
1596 define <vscale x 2 x i64> @lsl_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1597 ; CHECK-LABEL: lsl_i64_zero:
1599 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #0
1601 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1602 <vscale x 2 x i64> %a,
1603 <vscale x 2 x i64> zeroinitializer)
1604 ret <vscale x 2 x i64> %out
1609 define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1610 ; CHECK-LABEL: lsr_i8:
1612 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8
1614 %elt = insertelement <vscale x 16 x i8> undef, i8 9, i32 0
1615 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1616 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1617 <vscale x 16 x i8> %a,
1618 <vscale x 16 x i8> %splat)
1619 ret <vscale x 16 x i8> %out
1622 define <vscale x 16 x i8> @lsr_i8_all_active(<vscale x 16 x i8> %a) {
1623 ; CHECK-LABEL: lsr_i8_all_active:
1625 ; CHECK-NEXT: lsr z0.b, z0.b, #8
1627 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1628 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1629 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1630 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1631 <vscale x 16 x i8> %a,
1632 <vscale x 16 x i8> %splat)
1633 ret <vscale x 16 x i8> %out
1636 ; Ensure we don't match a right shift by zero to the immediate form.
1637 define <vscale x 16 x i8> @lsr_i8_too_small(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1638 ; CHECK-LABEL: lsr_i8_too_small:
1640 ; CHECK-NEXT: mov z1.b, #0 // =0x0
1641 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
1643 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1644 <vscale x 16 x i8> %a,
1645 <vscale x 16 x i8> zeroinitializer)
1646 ret <vscale x 16 x i8> %out
1649 define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1650 ; CHECK-LABEL: lsr_i16:
1652 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16
1654 %elt = insertelement <vscale x 8 x i16> undef, i16 17, i32 0
1655 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1656 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1657 <vscale x 8 x i16> %a,
1658 <vscale x 8 x i16> %splat)
1659 ret <vscale x 8 x i16> %out
1662 define <vscale x 8 x i16> @lsr_i16_all_active(<vscale x 8 x i16> %a) {
1663 ; CHECK-LABEL: lsr_i16_all_active:
1665 ; CHECK-NEXT: lsr z0.h, z0.h, #16
1667 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1668 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1669 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1670 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1671 <vscale x 8 x i16> %a,
1672 <vscale x 8 x i16> %splat)
1673 ret <vscale x 8 x i16> %out
1676 ; Ensure we don't match a right shift by zero to the immediate form.
1677 define <vscale x 8 x i16> @lsr_i16_too_small(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1678 ; CHECK-LABEL: lsr_i16_too_small:
1680 ; CHECK-NEXT: mov z1.h, #0 // =0x0
1681 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
1683 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1684 <vscale x 8 x i16> %a,
1685 <vscale x 8 x i16> zeroinitializer)
1686 ret <vscale x 8 x i16> %out
1689 define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1690 ; CHECK-LABEL: lsr_i32:
1692 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32
1694 %elt = insertelement <vscale x 4 x i32> undef, i32 33, i32 0
1695 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1696 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1697 <vscale x 4 x i32> %a,
1698 <vscale x 4 x i32> %splat)
1699 ret <vscale x 4 x i32> %out
1702 define <vscale x 4 x i32> @lsr_i32_all_active(<vscale x 4 x i32> %a) {
1703 ; CHECK-LABEL: lsr_i32_all_active:
1705 ; CHECK-NEXT: lsr z0.s, z0.s, #32
1707 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1708 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1709 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1710 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1711 <vscale x 4 x i32> %a,
1712 <vscale x 4 x i32> %splat)
1713 ret <vscale x 4 x i32> %out
1716 ; Ensure we don't match a right shift by zero to the immediate form.
1717 define <vscale x 4 x i32> @lsr_i32_too_small(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1718 ; CHECK-LABEL: lsr_i32_too_small:
1720 ; CHECK-NEXT: mov z1.s, #0 // =0x0
1721 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
1723 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1724 <vscale x 4 x i32> %a,
1725 <vscale x 4 x i32> zeroinitializer)
1726 ret <vscale x 4 x i32> %out
1729 define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1730 ; CHECK-LABEL: lsr_i64:
1732 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64
1734 %elt = insertelement <vscale x 2 x i64> undef, i64 65, i64 0
1735 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1736 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
1737 <vscale x 2 x i64> %a,
1738 <vscale x 2 x i64> %splat)
1739 ret <vscale x 2 x i64> %out
1742 define <vscale x 2 x i64> @lsr_i64_all_active(<vscale x 2 x i64> %a) {
1743 ; CHECK-LABEL: lsr_i64_all_active:
1745 ; CHECK-NEXT: lsr z0.d, z0.d, #64
1747 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1748 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
1749 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1750 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
1751 <vscale x 2 x i64> %a,
1752 <vscale x 2 x i64> %splat)
1753 ret <vscale x 2 x i64> %out
1756 ; Ensure we don't match a right shift by zero to the immediate form.
1757 define <vscale x 2 x i64> @lsr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1758 ; CHECK-LABEL: lsr_i64_too_small:
1760 ; CHECK-NEXT: mov z1.d, #0 // =0x0
1761 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
1763 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
1764 <vscale x 2 x i64> %a,
1765 <vscale x 2 x i64> zeroinitializer)
1766 ret <vscale x 2 x i64> %out
1769 ; As lsr_i32 but where pg is i8 based and thus compatible for i32.
1770 define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
1771 ; CHECK-LABEL: lsr_i32_ptrue_all_b:
1772 ; CHECK: lsr z0.s, z0.s, #1
1774 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1775 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1776 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1777 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg.s,
1778 <vscale x 4 x i32> %a,
1779 <vscale x 4 x i32> %b)
1780 ret <vscale x 4 x i32> %out
1783 ; As lsr_i32 but where pg is i16 based and thus compatible for i32.
1784 define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
1785 ; CHECK-LABEL: lsr_i32_ptrue_all_h:
1786 ; CHECK: lsr z0.s, z0.s, #1
1788 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1789 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1790 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1791 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1792 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg.s,
1793 <vscale x 4 x i32> %a,
1794 <vscale x 4 x i32> %b)
1795 ret <vscale x 4 x i32> %out
1798 ; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and
1799 ; thus inactive lanes are important and the immediate form cannot be used.
1800 define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
1801 ; CHECK-LABEL: lsr_i32_ptrue_all_d:
1802 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
1803 ; CHECK-DAG: lsr z0.s, [[PG]]/m, z0.s, #1
1805 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1806 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1807 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1808 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1809 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg.s,
1810 <vscale x 4 x i32> %a,
1811 <vscale x 4 x i32> %b)
1812 ret <vscale x 4 x i32> %out
1819 ; As mul_i32 but where pg is i8 based and thus compatible for i32.
1820 define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
1821 ; CHECK-LABEL: mul_i32_ptrue_all_b:
1822 ; CHECK: mul z0.s, z0.s, #1
1824 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1825 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1826 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1827 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
1828 <vscale x 4 x i32> %a,
1829 <vscale x 4 x i32> %b)
1830 ret <vscale x 4 x i32> %out
1833 ; As mul_i32 but where pg is i16 based and thus compatible for i32.
1834 define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
1835 ; CHECK-LABEL: mul_i32_ptrue_all_h:
1836 ; CHECK: mul z0.s, z0.s, #1
1838 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1839 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1840 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1841 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1842 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
1843 <vscale x 4 x i32> %a,
1844 <vscale x 4 x i32> %b)
1845 ret <vscale x 4 x i32> %out
1848 ; As mul_i32 but where pg is i64 based, which is not compatibile for i32 and
1849 ; thus inactive lanes are important and the immediate form cannot be used.
1850 define <vscale x 4 x i32> @mul_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
1851 ; CHECK-LABEL: mul_i32_ptrue_all_d:
1852 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d
1853 ; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1
1854 ; CHECK-DAG: mul z0.s, [[PG]]/m, z0.s, [[DUP]].s
1856 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1857 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1858 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1859 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1860 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
1861 <vscale x 4 x i32> %a,
1862 <vscale x 4 x i32> %b)
1863 ret <vscale x 4 x i32> %out
1866 declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1867 declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1868 declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1869 declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1871 declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1872 declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1873 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1874 declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1876 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
1877 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
1878 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
1879 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
1881 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
1882 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
1883 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
1884 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
1886 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
1887 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
1888 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
1889 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
1891 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
1892 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
1893 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
1894 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
1896 declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1897 declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1898 declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1899 declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1901 declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1902 declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1903 declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1904 declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1906 declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1907 declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1908 declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1909 declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1911 declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1912 declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1913 declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1914 declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1916 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1917 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1918 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1919 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1921 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1922 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1923 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1924 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1926 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1927 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1928 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1929 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1931 declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1932 declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1933 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1934 declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1936 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
1937 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
1938 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
1940 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
1941 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
1942 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
1944 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
1946 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 %pattern)
1947 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 %pattern)
1948 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 %pattern)
1949 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 %pattern)