1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
9 ; CHECK-NEXT: add z0.b, z0.b, #127 // =0x7f
11 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
12 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
13 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> %pg,
15 <vscale x 16 x i8> %a,
16 <vscale x 16 x i8> %splat)
17 ret <vscale x 16 x i8> %out
20 define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a) {
21 ; CHECK-LABEL: add_i16:
23 ; CHECK-NEXT: add z0.h, z0.h, #127 // =0x7f
25 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
26 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
27 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
28 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
29 <vscale x 8 x i16> %a,
30 <vscale x 8 x i16> %splat)
31 ret <vscale x 8 x i16> %out
34 define <vscale x 8 x i16> @add_i16_out_of_range(<vscale x 8 x i16> %a) {
35 ; CHECK-LABEL: add_i16_out_of_range:
37 ; CHECK-NEXT: dupm z1.b, #0x1
38 ; CHECK-NEXT: add z0.h, z0.h, z1.h
40 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
41 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
42 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
43 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
44 <vscale x 8 x i16> %a,
45 <vscale x 8 x i16> %splat)
46 ret <vscale x 8 x i16> %out
49 define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a) {
50 ; CHECK-LABEL: add_i32:
52 ; CHECK-NEXT: add z0.s, z0.s, #127 // =0x7f
54 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
55 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
56 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
57 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
58 <vscale x 4 x i32> %a,
59 <vscale x 4 x i32> %splat)
60 ret <vscale x 4 x i32> %out
63 define <vscale x 4 x i32> @add_i32_out_of_range(<vscale x 4 x i32> %a) {
64 ; CHECK-LABEL: add_i32_out_of_range:
66 ; CHECK-NEXT: mov w8, #257 // =0x101
67 ; CHECK-NEXT: mov z1.s, w8
68 ; CHECK-NEXT: add z0.s, z0.s, z1.s
70 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
71 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
72 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
73 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
74 <vscale x 4 x i32> %a,
75 <vscale x 4 x i32> %splat)
76 ret <vscale x 4 x i32> %out
79 define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a) {
80 ; CHECK-LABEL: add_i64:
82 ; CHECK-NEXT: add z0.d, z0.d, #127 // =0x7f
84 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
85 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
86 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
87 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
88 <vscale x 2 x i64> %a,
89 <vscale x 2 x i64> %splat)
90 ret <vscale x 2 x i64> %out
93 define <vscale x 2 x i64> @add_i64_out_of_range(<vscale x 2 x i64> %a) {
94 ; CHECK-LABEL: add_i64_out_of_range:
96 ; CHECK-NEXT: mov w8, #257 // =0x101
97 ; CHECK-NEXT: mov z1.d, x8
98 ; CHECK-NEXT: add z0.d, z0.d, z1.d
100 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
101 %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
102 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
103 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
104 <vscale x 2 x i64> %a,
105 <vscale x 2 x i64> %splat)
106 ret <vscale x 2 x i64> %out
111 define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
112 ; CHECK-LABEL: sub_i8:
114 ; CHECK-NEXT: sub z0.b, z0.b, #127 // =0x7f
116 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
117 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
118 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
119 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> %pg,
120 <vscale x 16 x i8> %a,
121 <vscale x 16 x i8> %splat)
122 ret <vscale x 16 x i8> %out
125 define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a) {
126 ; CHECK-LABEL: sub_i16:
128 ; CHECK-NEXT: sub z0.h, z0.h, #127 // =0x7f
130 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
131 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
132 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
133 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
134 <vscale x 8 x i16> %a,
135 <vscale x 8 x i16> %splat)
136 ret <vscale x 8 x i16> %out
139 define <vscale x 8 x i16> @sub_i16_out_of_range(<vscale x 8 x i16> %a) {
140 ; CHECK-LABEL: sub_i16_out_of_range:
142 ; CHECK-NEXT: dupm z1.b, #0x1
143 ; CHECK-NEXT: sub z0.h, z0.h, z1.h
145 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
146 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
147 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
148 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
149 <vscale x 8 x i16> %a,
150 <vscale x 8 x i16> %splat)
151 ret <vscale x 8 x i16> %out
154 define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a) {
155 ; CHECK-LABEL: sub_i32:
157 ; CHECK-NEXT: sub z0.s, z0.s, #127 // =0x7f
159 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
160 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
161 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
162 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
163 <vscale x 4 x i32> %a,
164 <vscale x 4 x i32> %splat)
165 ret <vscale x 4 x i32> %out
168 define <vscale x 4 x i32> @sub_i32_out_of_range(<vscale x 4 x i32> %a) {
169 ; CHECK-LABEL: sub_i32_out_of_range:
171 ; CHECK-NEXT: mov w8, #257 // =0x101
172 ; CHECK-NEXT: mov z1.s, w8
173 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
175 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
176 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
177 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
178 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
179 <vscale x 4 x i32> %a,
180 <vscale x 4 x i32> %splat)
181 ret <vscale x 4 x i32> %out
184 define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a) {
185 ; CHECK-LABEL: sub_i64:
187 ; CHECK-NEXT: sub z0.d, z0.d, #127 // =0x7f
189 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
190 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
191 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
192 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
193 <vscale x 2 x i64> %a,
194 <vscale x 2 x i64> %splat)
195 ret <vscale x 2 x i64> %out
198 define <vscale x 2 x i64> @sub_i64_out_of_range(<vscale x 2 x i64> %a) {
199 ; CHECK-LABEL: sub_i64_out_of_range:
201 ; CHECK-NEXT: mov w8, #257 // =0x101
202 ; CHECK-NEXT: mov z1.d, x8
203 ; CHECK-NEXT: sub z0.d, z0.d, z1.d
205 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
206 %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
207 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
208 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
209 <vscale x 2 x i64> %a,
210 <vscale x 2 x i64> %splat)
211 ret <vscale x 2 x i64> %out
214 ; As sub_i32 but where pg is i8 based and thus compatible for i32.
215 define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
216 ; CHECK-LABEL: sub_i32_ptrue_all_b:
218 ; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
220 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
221 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
222 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
223 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
224 <vscale x 4 x i32> %a,
225 <vscale x 4 x i32> %b)
226 ret <vscale x 4 x i32> %out
229 ; As sub_i32 but where pg is i16 based and thus compatible for i32.
230 define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
231 ; CHECK-LABEL: sub_i32_ptrue_all_h:
233 ; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
235 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
236 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
237 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
238 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
239 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
240 <vscale x 4 x i32> %a,
241 <vscale x 4 x i32> %b)
242 ret <vscale x 4 x i32> %out
245 ; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and
246 ; thus inactive lanes are important and the immediate form cannot be used.
247 define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
248 ; CHECK-LABEL: sub_i32_ptrue_all_d:
250 ; CHECK-NEXT: mov z1.s, #1 // =0x1
251 ; CHECK-NEXT: ptrue p0.d
252 ; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
254 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
255 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
256 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
257 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
258 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
259 <vscale x 4 x i32> %a,
260 <vscale x 4 x i32> %b)
261 ret <vscale x 4 x i32> %out
266 define <vscale x 16 x i8> @subr_i8(<vscale x 16 x i8> %a) {
267 ; CHECK-LABEL: subr_i8:
269 ; CHECK-NEXT: subr z0.b, z0.b, #127 // =0x7f
271 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
272 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
273 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
274 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> %pg,
275 <vscale x 16 x i8> %a,
276 <vscale x 16 x i8> %splat)
277 ret <vscale x 16 x i8> %out
280 define <vscale x 8 x i16> @subr_i16(<vscale x 8 x i16> %a) {
281 ; CHECK-LABEL: subr_i16:
283 ; CHECK-NEXT: subr z0.h, z0.h, #127 // =0x7f
285 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
286 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
287 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
288 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> %pg,
289 <vscale x 8 x i16> %a,
290 <vscale x 8 x i16> %splat)
291 ret <vscale x 8 x i16> %out
294 define <vscale x 8 x i16> @subr_i16_out_of_range(<vscale x 8 x i16> %a) {
295 ; CHECK-LABEL: subr_i16_out_of_range:
297 ; CHECK-NEXT: dupm z1.b, #0x1
298 ; CHECK-NEXT: sub z0.h, z1.h, z0.h
300 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
301 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
302 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
303 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> %pg,
304 <vscale x 8 x i16> %a,
305 <vscale x 8 x i16> %splat)
306 ret <vscale x 8 x i16> %out
309 define <vscale x 4 x i32> @subr_i32(<vscale x 4 x i32> %a) {
310 ; CHECK-LABEL: subr_i32:
312 ; CHECK-NEXT: subr z0.s, z0.s, #127 // =0x7f
314 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
315 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
316 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
317 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg,
318 <vscale x 4 x i32> %a,
319 <vscale x 4 x i32> %splat)
320 ret <vscale x 4 x i32> %out
323 define <vscale x 4 x i32> @subr_i32_out_of_range(<vscale x 4 x i32> %a) {
324 ; CHECK-LABEL: subr_i32_out_of_range:
326 ; CHECK-NEXT: mov w8, #257 // =0x101
327 ; CHECK-NEXT: mov z1.s, w8
328 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
330 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
331 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
332 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
333 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg,
334 <vscale x 4 x i32> %a,
335 <vscale x 4 x i32> %splat)
336 ret <vscale x 4 x i32> %out
339 define <vscale x 2 x i64> @subr_i64(<vscale x 2 x i64> %a) {
340 ; CHECK-LABEL: subr_i64:
342 ; CHECK-NEXT: subr z0.d, z0.d, #127 // =0x7f
344 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
345 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
346 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
347 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> %pg,
348 <vscale x 2 x i64> %a,
349 <vscale x 2 x i64> %splat)
350 ret <vscale x 2 x i64> %out
353 define <vscale x 2 x i64> @subr_i64_out_of_range(<vscale x 2 x i64> %a) {
354 ; CHECK-LABEL: subr_i64_out_of_range:
356 ; CHECK-NEXT: mov w8, #257 // =0x101
357 ; CHECK-NEXT: mov z1.d, x8
358 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
360 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
361 %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
362 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
363 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> %pg,
364 <vscale x 2 x i64> %a,
365 <vscale x 2 x i64> %splat)
366 ret <vscale x 2 x i64> %out
369 ; As subr_i32 but where pg is i8 based and thus compatible for i32.
370 define <vscale x 4 x i32> @subr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
371 ; CHECK-LABEL: subr_i32_ptrue_all_b:
373 ; CHECK-NEXT: subr z0.s, z0.s, #1 // =0x1
375 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
376 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
377 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
378 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
379 <vscale x 4 x i32> %a,
380 <vscale x 4 x i32> %b)
381 ret <vscale x 4 x i32> %out
384 ; As subr_i32 but where pg is i16 based and thus compatible for i32.
385 define <vscale x 4 x i32> @subr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
386 ; CHECK-LABEL: subr_i32_ptrue_all_h:
388 ; CHECK-NEXT: subr z0.s, z0.s, #1 // =0x1
390 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
391 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
392 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
393 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
394 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
395 <vscale x 4 x i32> %a,
396 <vscale x 4 x i32> %b)
397 ret <vscale x 4 x i32> %out
400 ; As subr_i32 but where pg is i64 based, which is not compatibile for i32 and
401 ; thus inactive lanes are important and the immediate form cannot be used.
402 define <vscale x 4 x i32> @subr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
403 ; CHECK-LABEL: subr_i32_ptrue_all_d:
405 ; CHECK-NEXT: mov z1.s, #1 // =0x1
406 ; CHECK-NEXT: ptrue p0.d
407 ; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s
409 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
410 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
411 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
412 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
413 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
414 <vscale x 4 x i32> %a,
415 <vscale x 4 x i32> %b)
416 ret <vscale x 4 x i32> %out
421 define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a) {
422 ; CHECK-LABEL: smax_i8:
424 ; CHECK-NEXT: smax z0.b, z0.b, #-128
426 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
427 %elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0
428 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
429 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> %pg,
430 <vscale x 16 x i8> %a,
431 <vscale x 16 x i8> %splat)
432 ret <vscale x 16 x i8> %out
435 define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a) {
436 ; CHECK-LABEL: smax_i16:
438 ; CHECK-NEXT: smax z0.h, z0.h, #127
440 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
441 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
442 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
443 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> %pg,
444 <vscale x 8 x i16> %a,
445 <vscale x 8 x i16> %splat)
446 ret <vscale x 8 x i16> %out
449 define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
450 ; CHECK-LABEL: smax_i16_out_of_range:
452 ; CHECK-NEXT: mov w8, #129 // =0x81
453 ; CHECK-NEXT: ptrue p0.h
454 ; CHECK-NEXT: mov z1.h, w8
455 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
457 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
458 %elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
459 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
460 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> %pg,
461 <vscale x 8 x i16> %a,
462 <vscale x 8 x i16> %splat)
463 ret <vscale x 8 x i16> %out
466 define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
467 ; CHECK-LABEL: smax_i32:
469 ; CHECK-NEXT: smax z0.s, z0.s, #-128
471 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
472 %elt = insertelement <vscale x 4 x i32> undef, i32 -128, i32 0
473 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
474 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg,
475 <vscale x 4 x i32> %a,
476 <vscale x 4 x i32> %splat)
477 ret <vscale x 4 x i32> %out
480 define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
481 ; CHECK-LABEL: smax_i32_out_of_range:
483 ; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f
484 ; CHECK-NEXT: ptrue p0.s
485 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
487 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
488 %elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
489 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
490 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg,
491 <vscale x 4 x i32> %a,
492 <vscale x 4 x i32> %splat)
493 ret <vscale x 4 x i32> %out
496 define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
497 ; CHECK-LABEL: smax_i64:
499 ; CHECK-NEXT: smax z0.d, z0.d, #127
501 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
502 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
503 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
504 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> %pg,
505 <vscale x 2 x i64> %a,
506 <vscale x 2 x i64> %splat)
507 ret <vscale x 2 x i64> %out
510 define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
511 ; CHECK-LABEL: smax_i64_out_of_range:
513 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
514 ; CHECK-NEXT: ptrue p0.d
515 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
517 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
518 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
519 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
520 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> %pg,
521 <vscale x 2 x i64> %a,
522 <vscale x 2 x i64> %splat)
523 ret <vscale x 2 x i64> %out
526 ; As smax_i32 but where pg is i8 based and thus compatible for i32.
527 define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
528 ; CHECK-LABEL: smax_i32_ptrue_all_b:
530 ; CHECK-NEXT: smax z0.s, z0.s, #1
532 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
533 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
534 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
535 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
536 <vscale x 4 x i32> %a,
537 <vscale x 4 x i32> %b)
538 ret <vscale x 4 x i32> %out
541 ; As smax_i32 but where pg is i16 based and thus compatible for i32.
542 define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
543 ; CHECK-LABEL: smax_i32_ptrue_all_h:
545 ; CHECK-NEXT: smax z0.s, z0.s, #1
547 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
548 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
549 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
550 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
551 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
552 <vscale x 4 x i32> %a,
553 <vscale x 4 x i32> %b)
554 ret <vscale x 4 x i32> %out
557 ; As smax_i32 but where pg is i64 based, which is not compatibile for i32 and
558 ; thus inactive lanes are important and the immediate form cannot be used.
559 define <vscale x 4 x i32> @smax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
560 ; CHECK-LABEL: smax_i32_ptrue_all_d:
562 ; CHECK-NEXT: mov z1.s, #1 // =0x1
563 ; CHECK-NEXT: ptrue p0.d
564 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
566 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
567 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
568 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
569 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
570 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg.s,
571 <vscale x 4 x i32> %a,
572 <vscale x 4 x i32> %b)
573 ret <vscale x 4 x i32> %out
578 define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a) {
579 ; CHECK-LABEL: smin_i8:
581 ; CHECK-NEXT: smin z0.b, z0.b, #127
583 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
584 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
585 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
586 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> %pg,
587 <vscale x 16 x i8> %a,
588 <vscale x 16 x i8> %splat)
589 ret <vscale x 16 x i8> %out
592 define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a) {
593 ; CHECK-LABEL: smin_i16:
595 ; CHECK-NEXT: smin z0.h, z0.h, #-128
597 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
598 %elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0
599 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
600 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> %pg,
601 <vscale x 8 x i16> %a,
602 <vscale x 8 x i16> %splat)
603 ret <vscale x 8 x i16> %out
606 define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
607 ; CHECK-LABEL: smin_i16_out_of_range:
609 ; CHECK-NEXT: mov z1.h, #-129 // =0xffffffffffffff7f
610 ; CHECK-NEXT: ptrue p0.h
611 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
613 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
614 %elt = insertelement <vscale x 8 x i16> undef, i16 -129, i32 0
615 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
616 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> %pg,
617 <vscale x 8 x i16> %a,
618 <vscale x 8 x i16> %splat)
619 ret <vscale x 8 x i16> %out
622 define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
623 ; CHECK-LABEL: smin_i32:
625 ; CHECK-NEXT: smin z0.s, z0.s, #127
627 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
628 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
629 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
630 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg,
631 <vscale x 4 x i32> %a,
632 <vscale x 4 x i32> %splat)
633 ret <vscale x 4 x i32> %out
636 define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
637 ; CHECK-LABEL: smin_i32_out_of_range:
639 ; CHECK-NEXT: mov w8, #257 // =0x101
640 ; CHECK-NEXT: ptrue p0.s
641 ; CHECK-NEXT: mov z1.s, w8
642 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
644 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
645 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
646 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
647 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg,
648 <vscale x 4 x i32> %a,
649 <vscale x 4 x i32> %splat)
650 ret <vscale x 4 x i32> %out
654 define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
655 ; CHECK-LABEL: smin_i64:
657 ; CHECK-NEXT: smin z0.d, z0.d, #-128
659 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
660 %elt = insertelement <vscale x 2 x i64> undef, i64 -128, i64 0
661 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
662 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> %pg,
663 <vscale x 2 x i64> %a,
664 <vscale x 2 x i64> %splat)
665 ret <vscale x 2 x i64> %out
668 define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
669 ; CHECK-LABEL: smin_i64_out_of_range:
671 ; CHECK-NEXT: mov z1.d, #-256 // =0xffffffffffffff00
672 ; CHECK-NEXT: ptrue p0.d
673 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
675 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
676 %elt = insertelement <vscale x 2 x i64> undef, i64 -256, i64 0
677 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
678 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> %pg,
679 <vscale x 2 x i64> %a,
680 <vscale x 2 x i64> %splat)
681 ret <vscale x 2 x i64> %out
684 ; As smin_i32 but where pg is i8 based and thus compatible for i32.
685 define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
686 ; CHECK-LABEL: smin_i32_ptrue_all_b:
688 ; CHECK-NEXT: smin z0.s, z0.s, #1
690 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
691 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
692 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
693 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
694 <vscale x 4 x i32> %a,
695 <vscale x 4 x i32> %b)
696 ret <vscale x 4 x i32> %out
699 ; As smin_i32 but where pg is i16 based and thus compatible for i32.
700 define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
701 ; CHECK-LABEL: smin_i32_ptrue_all_h:
703 ; CHECK-NEXT: smin z0.s, z0.s, #1
705 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
706 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
707 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
708 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
709 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
710 <vscale x 4 x i32> %a,
711 <vscale x 4 x i32> %b)
712 ret <vscale x 4 x i32> %out
715 ; As smin_i32 but where pg is i64 based, which is not compatibile for i32 and
716 ; thus inactive lanes are important and the immediate form cannot be used.
717 define <vscale x 4 x i32> @smin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
718 ; CHECK-LABEL: smin_i32_ptrue_all_d:
720 ; CHECK-NEXT: mov z1.s, #1 // =0x1
721 ; CHECK-NEXT: ptrue p0.d
722 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
724 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
725 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
726 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
727 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
728 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg.s,
729 <vscale x 4 x i32> %a,
730 <vscale x 4 x i32> %b)
731 ret <vscale x 4 x i32> %out
736 define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a) {
737 ; CHECK-LABEL: umax_i8:
739 ; CHECK-NEXT: umax z0.b, z0.b, #0
741 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
742 %elt = insertelement <vscale x 16 x i8> undef, i8 0, i32 0
743 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
744 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> %pg,
745 <vscale x 16 x i8> %a,
746 <vscale x 16 x i8> %splat)
747 ret <vscale x 16 x i8> %out
750 define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a) {
751 ; CHECK-LABEL: umax_i16:
753 ; CHECK-NEXT: umax z0.h, z0.h, #255
755 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
756 %elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
757 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
758 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> %pg,
759 <vscale x 8 x i16> %a,
760 <vscale x 8 x i16> %splat)
761 ret <vscale x 8 x i16> %out
764 define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
765 ; CHECK-LABEL: umax_i16_out_of_range:
767 ; CHECK-NEXT: dupm z1.b, #0x1
768 ; CHECK-NEXT: ptrue p0.h
769 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
771 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
772 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
773 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
774 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> %pg,
775 <vscale x 8 x i16> %a,
776 <vscale x 8 x i16> %splat)
777 ret <vscale x 8 x i16> %out
780 define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
781 ; CHECK-LABEL: umax_i32:
783 ; CHECK-NEXT: umax z0.s, z0.s, #0
785 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
786 %elt = insertelement <vscale x 4 x i32> undef, i32 0, i32 0
787 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
788 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg,
789 <vscale x 4 x i32> %a,
790 <vscale x 4 x i32> %splat)
791 ret <vscale x 4 x i32> %out
794 define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
795 ; CHECK-LABEL: umax_i32_out_of_range:
797 ; CHECK-NEXT: mov w8, #257 // =0x101
798 ; CHECK-NEXT: ptrue p0.s
799 ; CHECK-NEXT: mov z1.s, w8
800 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
802 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
803 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
804 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
805 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg,
806 <vscale x 4 x i32> %a,
807 <vscale x 4 x i32> %splat)
808 ret <vscale x 4 x i32> %out
811 define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
812 ; CHECK-LABEL: umax_i64:
814 ; CHECK-NEXT: umax z0.d, z0.d, #255
816 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
817 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i64 0
818 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
819 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> %pg,
820 <vscale x 2 x i64> %a,
821 <vscale x 2 x i64> %splat)
822 ret <vscale x 2 x i64> %out
825 define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
826 ; CHECK-LABEL: umax_i64_out_of_range:
828 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
829 ; CHECK-NEXT: ptrue p0.d
830 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
832 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
833 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
834 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
835 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> %pg,
836 <vscale x 2 x i64> %a,
837 <vscale x 2 x i64> %splat)
838 ret <vscale x 2 x i64> %out
841 ; As umax_i32 but where pg is i8 based and thus compatible for i32.
842 define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
843 ; CHECK-LABEL: umax_i32_ptrue_all_b:
845 ; CHECK-NEXT: umax z0.s, z0.s, #1
847 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
848 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
849 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
850 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
851 <vscale x 4 x i32> %a,
852 <vscale x 4 x i32> %b)
853 ret <vscale x 4 x i32> %out
856 ; As umax_i32 but where pg is i16 based and thus compatible for i32.
857 define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
858 ; CHECK-LABEL: umax_i32_ptrue_all_h:
860 ; CHECK-NEXT: umax z0.s, z0.s, #1
862 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
863 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
864 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
865 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
866 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
867 <vscale x 4 x i32> %a,
868 <vscale x 4 x i32> %b)
869 ret <vscale x 4 x i32> %out
872 ; As umax_i32 but where pg is i64 based, which is not compatibile for i32 and
873 ; thus inactive lanes are important and the immediate form cannot be used.
874 define <vscale x 4 x i32> @umax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
875 ; CHECK-LABEL: umax_i32_ptrue_all_d:
877 ; CHECK-NEXT: mov z1.s, #1 // =0x1
878 ; CHECK-NEXT: ptrue p0.d
879 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
881 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
882 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
883 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
884 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
885 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg.s,
886 <vscale x 4 x i32> %a,
887 <vscale x 4 x i32> %b)
888 ret <vscale x 4 x i32> %out
893 define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a) {
894 ; CHECK-LABEL: umin_i8:
896 ; CHECK-NEXT: umin z0.b, z0.b, #255
898 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
899 %elt = insertelement <vscale x 16 x i8> undef, i8 255, i32 0
900 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
901 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> %pg,
902 <vscale x 16 x i8> %a,
903 <vscale x 16 x i8> %splat)
904 ret <vscale x 16 x i8> %out
907 define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a) {
908 ; CHECK-LABEL: umin_i16:
910 ; CHECK-NEXT: umin z0.h, z0.h, #0
912 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
913 %elt = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
914 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
915 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> %pg,
916 <vscale x 8 x i16> %a,
917 <vscale x 8 x i16> %splat)
918 ret <vscale x 8 x i16> %out
921 define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
922 ; CHECK-LABEL: umin_i16_out_of_range:
924 ; CHECK-NEXT: dupm z1.b, #0x1
925 ; CHECK-NEXT: ptrue p0.h
926 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
928 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
929 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
930 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
931 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> %pg,
932 <vscale x 8 x i16> %a,
933 <vscale x 8 x i16> %splat)
934 ret <vscale x 8 x i16> %out
937 define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
938 ; CHECK-LABEL: umin_i32:
940 ; CHECK-NEXT: umin z0.s, z0.s, #255
942 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
943 %elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
944 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
945 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg,
946 <vscale x 4 x i32> %a,
947 <vscale x 4 x i32> %splat)
948 ret <vscale x 4 x i32> %out
951 define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
952 ; CHECK-LABEL: umin_i32_out_of_range:
954 ; CHECK-NEXT: mov w8, #257 // =0x101
955 ; CHECK-NEXT: ptrue p0.s
956 ; CHECK-NEXT: mov z1.s, w8
957 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
959 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
960 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
961 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
962 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg,
963 <vscale x 4 x i32> %a,
964 <vscale x 4 x i32> %splat)
965 ret <vscale x 4 x i32> %out
968 define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
969 ; CHECK-LABEL: umin_i64:
971 ; CHECK-NEXT: umin z0.d, z0.d, #0
973 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
974 %elt = insertelement <vscale x 2 x i64> undef, i64 0, i64 0
975 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
976 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> %pg,
977 <vscale x 2 x i64> %a,
978 <vscale x 2 x i64> %splat)
979 ret <vscale x 2 x i64> %out
982 define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
983 ; CHECK-LABEL: umin_i64_out_of_range:
985 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
986 ; CHECK-NEXT: ptrue p0.d
987 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
989 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
990 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
991 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
992 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> %pg,
993 <vscale x 2 x i64> %a,
994 <vscale x 2 x i64> %splat)
995 ret <vscale x 2 x i64> %out
998 ; As umin_i32 but where pg is i8 based and thus compatible for i32.
999 define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
1000 ; CHECK-LABEL: umin_i32_ptrue_all_b:
1002 ; CHECK-NEXT: umin z0.s, z0.s, #1
1004 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1005 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1006 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1007 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1008 <vscale x 4 x i32> %a,
1009 <vscale x 4 x i32> %b)
1010 ret <vscale x 4 x i32> %out
1013 ; As umin_i32 but where pg is i16 based and thus compatible for i32.
1014 define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
1015 ; CHECK-LABEL: umin_i32_ptrue_all_h:
1017 ; CHECK-NEXT: umin z0.s, z0.s, #1
1019 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1020 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1021 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1022 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1023 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1024 <vscale x 4 x i32> %a,
1025 <vscale x 4 x i32> %b)
1026 ret <vscale x 4 x i32> %out
1029 ; As umin_i32 but where pg is i64 based, which is not compatibile for i32 and
1030 ; thus inactive lanes are important and the immediate form cannot be used.
1031 define <vscale x 4 x i32> @umin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
1032 ; CHECK-LABEL: umin_i32_ptrue_all_d:
1034 ; CHECK-NEXT: mov z1.s, #1 // =0x1
1035 ; CHECK-NEXT: ptrue p0.d
1036 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
1038 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1039 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1040 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1041 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1042 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg.s,
1043 <vscale x 4 x i32> %a,
1044 <vscale x 4 x i32> %b)
1045 ret <vscale x 4 x i32> %out
1050 define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
1051 ; CHECK-LABEL: sqadd_b_lowimm:
1053 ; CHECK-NEXT: sqadd z0.b, z0.b, #27 // =0x1b
1055 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1056 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1057 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
1058 <vscale x 16 x i8> %splat)
1059 ret <vscale x 16 x i8> %out
1062 ; Immediate instruction form only supports positive values.
1063 define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) {
1064 ; CHECK-LABEL: sqadd_b_negimm:
1066 ; CHECK-NEXT: sqsub z0.b, z0.b, #128 // =0x80
1068 %elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0
1069 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1070 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
1071 <vscale x 16 x i8> %splat)
1072 ret <vscale x 16 x i8> %out
1075 define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
1076 ; CHECK-LABEL: sqadd_h_lowimm:
1078 ; CHECK-NEXT: sqadd z0.h, z0.h, #43 // =0x2b
1080 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1081 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1082 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1083 <vscale x 8 x i16> %splat)
1084 ret <vscale x 8 x i16> %out
1087 define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
1088 ; CHECK-LABEL: sqadd_h_highimm:
1090 ; CHECK-NEXT: sqadd z0.h, z0.h, #2048 // =0x800
1092 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1093 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1094 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1095 <vscale x 8 x i16> %splat)
1096 ret <vscale x 8 x i16> %out
1099 ; Immediate instruction form only supports positive values.
1100 define <vscale x 8 x i16> @sqadd_h_negimm(<vscale x 8 x i16> %a) {
1101 ; CHECK-LABEL: sqadd_h_negimm:
1103 ; CHECK-NEXT: sqsub z0.h, z0.h, #1 // =0x1
1105 %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
1106 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1107 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1108 <vscale x 8 x i16> %splat)
1109 ret <vscale x 8 x i16> %out
1112 define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
1113 ; CHECK-LABEL: sqadd_s_lowimm:
1115 ; CHECK-NEXT: sqadd z0.s, z0.s, #1 // =0x1
1117 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1118 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1119 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1120 <vscale x 4 x i32> %splat)
1121 ret <vscale x 4 x i32> %out
1124 define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
1125 ; CHECK-LABEL: sqadd_s_highimm:
1127 ; CHECK-NEXT: sqadd z0.s, z0.s, #8192 // =0x2000
1129 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1130 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1131 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1132 <vscale x 4 x i32> %splat)
1133 ret <vscale x 4 x i32> %out
1136 ; Immediate instruction form only supports positive values.
1137 define <vscale x 4 x i32> @sqadd_s_negimm(<vscale x 4 x i32> %a) {
1138 ; CHECK-LABEL: sqadd_s_negimm:
1140 ; CHECK-NEXT: sqsub z0.s, z0.s, #65280 // =0xff00
1142 %elt = insertelement <vscale x 4 x i32> undef, i32 -65280, i32 0
1143 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1144 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1145 <vscale x 4 x i32> %splat)
1146 ret <vscale x 4 x i32> %out
1149 define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
1150 ; CHECK-LABEL: sqadd_d_lowimm:
1152 ; CHECK-NEXT: sqadd z0.d, z0.d, #255 // =0xff
1154 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1155 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1156 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1157 <vscale x 2 x i64> %splat)
1158 ret <vscale x 2 x i64> %out
1161 define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
1162 ; CHECK-LABEL: sqadd_d_highimm:
1164 ; CHECK-NEXT: sqadd z0.d, z0.d, #65280 // =0xff00
1166 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1167 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1168 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1169 <vscale x 2 x i64> %splat)
1170 ret <vscale x 2 x i64> %out
1173 ; Immediate instruction form only supports positive values.
1174 define <vscale x 2 x i64> @sqadd_d_negimm(<vscale x 2 x i64> %a) {
1175 ; CHECK-LABEL: sqadd_d_negimm:
1177 ; CHECK-NEXT: sqsub z0.d, z0.d, #3840 // =0xf00
1179 %elt = insertelement <vscale x 2 x i64> undef, i64 -3840, i32 0
1180 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1181 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1182 <vscale x 2 x i64> %splat)
1183 ret <vscale x 2 x i64> %out
1188 define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
1189 ; CHECK-LABEL: sqsub_b_lowimm:
1191 ; CHECK-NEXT: sqsub z0.b, z0.b, #27 // =0x1b
1193 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1194 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1195 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1196 <vscale x 16 x i8> %splat)
1197 ret <vscale x 16 x i8> %out
1200 ; Immediate instruction form only supports positive values.
1201 define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) {
1202 ; CHECK-LABEL: sqsub_b_negimm:
1204 ; CHECK-NEXT: sqadd z0.b, z0.b, #1 // =0x1
1206 %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
1207 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1208 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1209 <vscale x 16 x i8> %splat)
1210 ret <vscale x 16 x i8> %out
1213 define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
1214 ; CHECK-LABEL: sqsub_h_lowimm:
1216 ; CHECK-NEXT: sqsub z0.h, z0.h, #43 // =0x2b
1218 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1219 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1220 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1221 <vscale x 8 x i16> %splat)
1222 ret <vscale x 8 x i16> %out
1225 define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
1226 ; CHECK-LABEL: sqsub_h_highimm:
1228 ; CHECK-NEXT: sqsub z0.h, z0.h, #2048 // =0x800
1230 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1231 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1232 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1233 <vscale x 8 x i16> %splat)
1234 ret <vscale x 8 x i16> %out
1237 ; Immediate instruction form only supports positive values.
1238 define <vscale x 8 x i16> @sqsub_h_negimm(<vscale x 8 x i16> %a) {
1239 ; CHECK-LABEL: sqsub_h_negimm:
1241 ; CHECK-NEXT: sqadd z0.h, z0.h, #128 // =0x80
1243 %elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0
1244 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1245 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1246 <vscale x 8 x i16> %splat)
1247 ret <vscale x 8 x i16> %out
1250 define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
1251 ; CHECK-LABEL: sqsub_s_lowimm:
1253 ; CHECK-NEXT: sqsub z0.s, z0.s, #1 // =0x1
1255 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1256 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1257 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1258 <vscale x 4 x i32> %splat)
1259 ret <vscale x 4 x i32> %out
1262 define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
1263 ; CHECK-LABEL: sqsub_s_highimm:
1265 ; CHECK-NEXT: sqsub z0.s, z0.s, #8192 // =0x2000
1267 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1268 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1269 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1270 <vscale x 4 x i32> %splat)
1271 ret <vscale x 4 x i32> %out
1274 ; Immediate instruction form only supports positive values.
1275 define <vscale x 4 x i32> @sqsub_s_negimm(<vscale x 4 x i32> %a) {
1276 ; CHECK-LABEL: sqsub_s_negimm:
1278 ; CHECK-NEXT: sqadd z0.s, z0.s, #32768 // =0x8000
1280 %elt = insertelement <vscale x 4 x i32> undef, i32 -32768, i32 0
1281 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1282 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1283 <vscale x 4 x i32> %splat)
1284 ret <vscale x 4 x i32> %out
1287 define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
1288 ; CHECK-LABEL: sqsub_d_lowimm:
1290 ; CHECK-NEXT: sqsub z0.d, z0.d, #255 // =0xff
1292 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1293 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1294 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1295 <vscale x 2 x i64> %splat)
1296 ret <vscale x 2 x i64> %out
1299 define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
1300 ; CHECK-LABEL: sqsub_d_highimm:
1302 ; CHECK-NEXT: sqsub z0.d, z0.d, #65280 // =0xff00
1304 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1305 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1306 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1307 <vscale x 2 x i64> %splat)
1308 ret <vscale x 2 x i64> %out
1311 ; Immediate instruction form only supports positive values.
1312 define <vscale x 2 x i64> @sqsub_d_negimm(<vscale x 2 x i64> %a) {
1313 ; CHECK-LABEL: sqsub_d_negimm:
1315 ; CHECK-NEXT: sqadd z0.d, z0.d, #57344 // =0xe000
1317 %elt = insertelement <vscale x 2 x i64> undef, i64 -57344, i32 0
1318 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1319 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1320 <vscale x 2 x i64> %splat)
1321 ret <vscale x 2 x i64> %out
1326 define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {
1327 ; CHECK-LABEL: uqadd_b_lowimm:
1329 ; CHECK-NEXT: uqadd z0.b, z0.b, #27 // =0x1b
1331 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1332 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1333 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8> %a,
1334 <vscale x 16 x i8> %splat)
1335 ret <vscale x 16 x i8> %out
1338 define <vscale x 8 x i16> @uqadd_h_lowimm(<vscale x 8 x i16> %a) {
1339 ; CHECK-LABEL: uqadd_h_lowimm:
1341 ; CHECK-NEXT: uqadd z0.h, z0.h, #43 // =0x2b
1343 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1344 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1345 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1346 <vscale x 8 x i16> %splat)
1347 ret <vscale x 8 x i16> %out
1350 define <vscale x 8 x i16> @uqadd_h_highimm(<vscale x 8 x i16> %a) {
1351 ; CHECK-LABEL: uqadd_h_highimm:
1353 ; CHECK-NEXT: uqadd z0.h, z0.h, #2048 // =0x800
1355 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1356 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1357 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1358 <vscale x 8 x i16> %splat)
1359 ret <vscale x 8 x i16> %out
1362 define <vscale x 4 x i32> @uqadd_s_lowimm(<vscale x 4 x i32> %a) {
1363 ; CHECK-LABEL: uqadd_s_lowimm:
1365 ; CHECK-NEXT: uqadd z0.s, z0.s, #1 // =0x1
1367 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1368 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1369 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1370 <vscale x 4 x i32> %splat)
1371 ret <vscale x 4 x i32> %out
1374 define <vscale x 4 x i32> @uqadd_s_highimm(<vscale x 4 x i32> %a) {
1375 ; CHECK-LABEL: uqadd_s_highimm:
1377 ; CHECK-NEXT: uqadd z0.s, z0.s, #8192 // =0x2000
1379 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1380 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1381 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1382 <vscale x 4 x i32> %splat)
1383 ret <vscale x 4 x i32> %out
1386 define <vscale x 2 x i64> @uqadd_d_lowimm(<vscale x 2 x i64> %a) {
1387 ; CHECK-LABEL: uqadd_d_lowimm:
1389 ; CHECK-NEXT: uqadd z0.d, z0.d, #255 // =0xff
1391 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1392 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1393 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1394 <vscale x 2 x i64> %splat)
1395 ret <vscale x 2 x i64> %out
1398 define <vscale x 2 x i64> @uqadd_d_highimm(<vscale x 2 x i64> %a) {
1399 ; CHECK-LABEL: uqadd_d_highimm:
1401 ; CHECK-NEXT: uqadd z0.d, z0.d, #65280 // =0xff00
1403 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1404 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1405 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1406 <vscale x 2 x i64> %splat)
1407 ret <vscale x 2 x i64> %out
1412 define <vscale x 16 x i8> @uqsub_b_lowimm(<vscale x 16 x i8> %a) {
1413 ; CHECK-LABEL: uqsub_b_lowimm:
1415 ; CHECK-NEXT: uqsub z0.b, z0.b, #27 // =0x1b
1417 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1418 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1419 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1420 <vscale x 16 x i8> %splat)
1421 ret <vscale x 16 x i8> %out
1424 define <vscale x 8 x i16> @uqsub_h_lowimm(<vscale x 8 x i16> %a) {
1425 ; CHECK-LABEL: uqsub_h_lowimm:
1427 ; CHECK-NEXT: uqsub z0.h, z0.h, #43 // =0x2b
1429 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1430 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1431 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1432 <vscale x 8 x i16> %splat)
1433 ret <vscale x 8 x i16> %out
1436 define <vscale x 8 x i16> @uqsub_h_highimm(<vscale x 8 x i16> %a) {
1437 ; CHECK-LABEL: uqsub_h_highimm:
1439 ; CHECK-NEXT: uqsub z0.h, z0.h, #2048 // =0x800
1441 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1442 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1443 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1444 <vscale x 8 x i16> %splat)
1445 ret <vscale x 8 x i16> %out
1448 define <vscale x 4 x i32> @uqsub_s_lowimm(<vscale x 4 x i32> %a) {
1449 ; CHECK-LABEL: uqsub_s_lowimm:
1451 ; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
1453 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1454 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1455 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1456 <vscale x 4 x i32> %splat)
1457 ret <vscale x 4 x i32> %out
1460 define <vscale x 4 x i32> @uqsub_s_highimm(<vscale x 4 x i32> %a) {
1461 ; CHECK-LABEL: uqsub_s_highimm:
1463 ; CHECK-NEXT: uqsub z0.s, z0.s, #8192 // =0x2000
1465 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1466 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1467 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1468 <vscale x 4 x i32> %splat)
1469 ret <vscale x 4 x i32> %out
1472 define <vscale x 2 x i64> @uqsub_d_lowimm(<vscale x 2 x i64> %a) {
1473 ; CHECK-LABEL: uqsub_d_lowimm:
1475 ; CHECK-NEXT: uqsub z0.d, z0.d, #255 // =0xff
1477 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1478 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1479 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1480 <vscale x 2 x i64> %splat)
1481 ret <vscale x 2 x i64> %out
1484 define <vscale x 2 x i64> @uqsub_d_highimm(<vscale x 2 x i64> %a) {
1485 ; CHECK-LABEL: uqsub_d_highimm:
1487 ; CHECK-NEXT: uqsub z0.d, z0.d, #65280 // =0xff00
1489 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1490 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1491 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1492 <vscale x 2 x i64> %splat)
1493 ret <vscale x 2 x i64> %out
1498 define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1499 ; CHECK-LABEL: asr_i8:
1501 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8
1503 %elt = insertelement <vscale x 16 x i8> undef, i8 9, i32 0
1504 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1505 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1506 <vscale x 16 x i8> %a,
1507 <vscale x 16 x i8> %splat)
1508 ret <vscale x 16 x i8> %out
1511 define <vscale x 16 x i8> @asr_i8_all_active(<vscale x 16 x i8> %a) {
1512 ; CHECK-LABEL: asr_i8_all_active:
1514 ; CHECK-NEXT: asr z0.b, z0.b, #8
1516 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1517 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1518 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1519 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> %pg,
1520 <vscale x 16 x i8> %a,
1521 <vscale x 16 x i8> %splat)
1522 ret <vscale x 16 x i8> %out
1525 ; Ensure we don't match a right shift by zero to the immediate form.
1526 define <vscale x 16 x i8> @asr_i8_too_small(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1527 ; CHECK-LABEL: asr_i8_too_small:
1529 ; CHECK-NEXT: mov z1.b, #0 // =0x0
1530 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
1532 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1533 <vscale x 16 x i8> %a,
1534 <vscale x 16 x i8> zeroinitializer)
1535 ret <vscale x 16 x i8> %out
1538 define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1539 ; CHECK-LABEL: asr_i16:
1541 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16
1543 %elt = insertelement <vscale x 8 x i16> undef, i16 17, i32 0
1544 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1545 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1546 <vscale x 8 x i16> %a,
1547 <vscale x 8 x i16> %splat)
1548 ret <vscale x 8 x i16> %out
1551 define <vscale x 8 x i16> @asr_i16_all_active(<vscale x 8 x i16> %a) {
1552 ; CHECK-LABEL: asr_i16_all_active:
1554 ; CHECK-NEXT: asr z0.h, z0.h, #16
1556 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1557 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1558 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1559 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> %pg,
1560 <vscale x 8 x i16> %a,
1561 <vscale x 8 x i16> %splat)
1562 ret <vscale x 8 x i16> %out
1565 ; Ensure we don't match a right shift by zero to the immediate form.
1566 define <vscale x 8 x i16> @asr_i16_too_small(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1567 ; CHECK-LABEL: asr_i16_too_small:
1569 ; CHECK-NEXT: mov z1.h, #0 // =0x0
1570 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
1572 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1573 <vscale x 8 x i16> %a,
1574 <vscale x 8 x i16> zeroinitializer)
1575 ret <vscale x 8 x i16> %out
1578 define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1579 ; CHECK-LABEL: asr_i32:
1581 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32
1583 %elt = insertelement <vscale x 4 x i32> undef, i32 33, i32 0
1584 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1585 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1586 <vscale x 4 x i32> %a,
1587 <vscale x 4 x i32> %splat)
1588 ret <vscale x 4 x i32> %out
1591 define <vscale x 4 x i32> @asr_i32_all_active(<vscale x 4 x i32> %a) {
1592 ; CHECK-LABEL: asr_i32_all_active:
1594 ; CHECK-NEXT: asr z0.s, z0.s, #32
1596 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1597 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1598 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1599 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> %pg,
1600 <vscale x 4 x i32> %a,
1601 <vscale x 4 x i32> %splat)
1602 ret <vscale x 4 x i32> %out
1605 ; Ensure we don't match a right shift by zero to the immediate form.
1606 define <vscale x 4 x i32> @asr_i32_too_small(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1607 ; CHECK-LABEL: asr_i32_too_small:
1609 ; CHECK-NEXT: mov z1.s, #0 // =0x0
1610 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
1612 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1613 <vscale x 4 x i32> %a,
1614 <vscale x 4 x i32> zeroinitializer)
1615 ret <vscale x 4 x i32> %out
1618 define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1619 ; CHECK-LABEL: asr_i64:
1621 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64
1623 %elt = insertelement <vscale x 2 x i64> undef, i64 65, i64 0
1624 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1625 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1626 <vscale x 2 x i64> %a,
1627 <vscale x 2 x i64> %splat)
1628 ret <vscale x 2 x i64> %out
1631 define <vscale x 2 x i64> @asr_i64_all_active(<vscale x 2 x i64> %a) {
1632 ; CHECK-LABEL: asr_i64_all_active:
1634 ; CHECK-NEXT: asr z0.d, z0.d, #64
1636 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1637 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
1638 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1639 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> %pg,
1640 <vscale x 2 x i64> %a,
1641 <vscale x 2 x i64> %splat)
1642 ret <vscale x 2 x i64> %out
1645 ; Ensure we don't match a right shift by zero to the immediate form.
1646 define <vscale x 2 x i64> @asr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1647 ; CHECK-LABEL: asr_i64_too_small:
1649 ; CHECK-NEXT: mov z1.d, #0 // =0x0
1650 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
1652 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1653 <vscale x 2 x i64> %a,
1654 <vscale x 2 x i64> zeroinitializer)
1655 ret <vscale x 2 x i64> %out
1660 define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1661 ; CHECK-LABEL: lsl_i8:
1663 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
1665 %elt = insertelement <vscale x 16 x i8> undef, i8 7, i32 0
1666 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1667 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1668 <vscale x 16 x i8> %a,
1669 <vscale x 16 x i8> %splat)
1670 ret <vscale x 16 x i8> %out
1673 define <vscale x 16 x i8> @lsl_i8_all_active(<vscale x 16 x i8> %a) {
1674 ; CHECK-LABEL: lsl_i8_all_active:
1676 ; CHECK-NEXT: lsl z0.b, z0.b, #7
1678 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1679 %elt = insertelement <vscale x 16 x i8> undef, i8 7, i32 0
1680 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1681 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> %pg,
1682 <vscale x 16 x i8> %a,
1683 <vscale x 16 x i8> %splat)
1684 ret <vscale x 16 x i8> %out
1687 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1688 define <vscale x 16 x i8> @lsl_i8_too_big(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1689 ; CHECK-LABEL: lsl_i8_too_big:
1691 ; CHECK-NEXT: mov z1.b, #8 // =0x8
1692 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
1694 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1695 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1696 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1697 <vscale x 16 x i8> %a,
1698 <vscale x 16 x i8> %splat)
1699 ret <vscale x 16 x i8> %out
1702 define <vscale x 16 x i8> @lsl_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1703 ; CHECK-LABEL: lsl_i8_zero:
1705 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #0
1707 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1708 <vscale x 16 x i8> %a,
1709 <vscale x 16 x i8> zeroinitializer)
1710 ret <vscale x 16 x i8> %out
1713 define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1714 ; CHECK-LABEL: lsl_i16:
1716 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
1718 %elt = insertelement <vscale x 8 x i16> undef, i16 15, i32 0
1719 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1720 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1721 <vscale x 8 x i16> %a,
1722 <vscale x 8 x i16> %splat)
1723 ret <vscale x 8 x i16> %out
1726 define <vscale x 8 x i16> @lsl_i16_all_active(<vscale x 8 x i16> %a) {
1727 ; CHECK-LABEL: lsl_i16_all_active:
1729 ; CHECK-NEXT: lsl z0.h, z0.h, #15
1731 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1732 %elt = insertelement <vscale x 8 x i16> undef, i16 15, i32 0
1733 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1734 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> %pg,
1735 <vscale x 8 x i16> %a,
1736 <vscale x 8 x i16> %splat)
1737 ret <vscale x 8 x i16> %out
1740 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1741 define <vscale x 8 x i16> @lsl_i16_too_big(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1742 ; CHECK-LABEL: lsl_i16_too_big:
1744 ; CHECK-NEXT: mov z1.h, #16 // =0x10
1745 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
1747 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1748 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1749 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1750 <vscale x 8 x i16> %a,
1751 <vscale x 8 x i16> %splat)
1752 ret <vscale x 8 x i16> %out
1755 define <vscale x 8 x i16> @lsl_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1756 ; CHECK-LABEL: lsl_i16_zero:
1758 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #0
1760 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1761 <vscale x 8 x i16> %a,
1762 <vscale x 8 x i16> zeroinitializer)
1763 ret <vscale x 8 x i16> %out
1766 define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1767 ; CHECK-LABEL: lsl_i32:
1769 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
1771 %elt = insertelement <vscale x 4 x i32> undef, i32 31, i32 0
1772 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1773 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1774 <vscale x 4 x i32> %a,
1775 <vscale x 4 x i32> %splat)
1776 ret <vscale x 4 x i32> %out
1779 define <vscale x 4 x i32> @lsl_i32_all_active(<vscale x 4 x i32> %a) {
1780 ; CHECK-LABEL: lsl_i32_all_active:
1782 ; CHECK-NEXT: lsl z0.s, z0.s, #31
1784 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1785 %elt = insertelement <vscale x 4 x i32> undef, i32 31, i32 0
1786 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1787 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> %pg,
1788 <vscale x 4 x i32> %a,
1789 <vscale x 4 x i32> %splat)
1790 ret <vscale x 4 x i32> %out
1793 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1794 define <vscale x 4 x i32> @lsl_i32_too_big(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1795 ; CHECK-LABEL: lsl_i32_too_big:
1797 ; CHECK-NEXT: mov z1.s, #32 // =0x20
1798 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
1800 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1801 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1802 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1803 <vscale x 4 x i32> %a,
1804 <vscale x 4 x i32> %splat)
1805 ret <vscale x 4 x i32> %out
1808 define <vscale x 4 x i32> @lsl_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1809 ; CHECK-LABEL: lsl_i32_zero:
1811 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #0
1813 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1814 <vscale x 4 x i32> %a,
1815 <vscale x 4 x i32> zeroinitializer)
1816 ret <vscale x 4 x i32> %out
1819 define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1820 ; CHECK-LABEL: lsl_i64:
1822 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
1824 %elt = insertelement <vscale x 2 x i64> undef, i64 63, i64 0
1825 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1826 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1827 <vscale x 2 x i64> %a,
1828 <vscale x 2 x i64> %splat)
1829 ret <vscale x 2 x i64> %out
1832 define <vscale x 2 x i64> @lsl_i64_all_active(<vscale x 2 x i64> %a) {
1833 ; CHECK-LABEL: lsl_i64_all_active:
1835 ; CHECK-NEXT: lsl z0.d, z0.d, #63
1837 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1838 %elt = insertelement <vscale x 2 x i64> undef, i64 63, i64 0
1839 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1840 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> %pg,
1841 <vscale x 2 x i64> %a,
1842 <vscale x 2 x i64> %splat)
1843 ret <vscale x 2 x i64> %out
1846 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1847 define <vscale x 2 x i64> @lsl_i64_too_big(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1848 ; CHECK-LABEL: lsl_i64_too_big:
1850 ; CHECK-NEXT: mov z1.d, #64 // =0x40
1851 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
1853 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
1854 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1855 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1856 <vscale x 2 x i64> %a,
1857 <vscale x 2 x i64> %splat)
1858 ret <vscale x 2 x i64> %out
1861 define <vscale x 2 x i64> @lsl_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1862 ; CHECK-LABEL: lsl_i64_zero:
1864 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #0
1866 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1867 <vscale x 2 x i64> %a,
1868 <vscale x 2 x i64> zeroinitializer)
1869 ret <vscale x 2 x i64> %out
1874 define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1875 ; CHECK-LABEL: lsr_i8:
1877 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8
1879 %elt = insertelement <vscale x 16 x i8> undef, i8 9, i32 0
1880 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1881 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1882 <vscale x 16 x i8> %a,
1883 <vscale x 16 x i8> %splat)
1884 ret <vscale x 16 x i8> %out
1887 define <vscale x 16 x i8> @lsr_i8_all_active(<vscale x 16 x i8> %a) {
1888 ; CHECK-LABEL: lsr_i8_all_active:
1890 ; CHECK-NEXT: lsr z0.b, z0.b, #8
1892 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1893 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1894 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1895 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> %pg,
1896 <vscale x 16 x i8> %a,
1897 <vscale x 16 x i8> %splat)
1898 ret <vscale x 16 x i8> %out
1901 ; Ensure we don't match a right shift by zero to the immediate form.
1902 define <vscale x 16 x i8> @lsr_i8_too_small(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1903 ; CHECK-LABEL: lsr_i8_too_small:
1905 ; CHECK-NEXT: mov z1.b, #0 // =0x0
1906 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
1908 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1909 <vscale x 16 x i8> %a,
1910 <vscale x 16 x i8> zeroinitializer)
1911 ret <vscale x 16 x i8> %out
1914 define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1915 ; CHECK-LABEL: lsr_i16:
1917 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16
1919 %elt = insertelement <vscale x 8 x i16> undef, i16 17, i32 0
1920 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1921 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1922 <vscale x 8 x i16> %a,
1923 <vscale x 8 x i16> %splat)
1924 ret <vscale x 8 x i16> %out
1927 define <vscale x 8 x i16> @lsr_i16_all_active(<vscale x 8 x i16> %a) {
1928 ; CHECK-LABEL: lsr_i16_all_active:
1930 ; CHECK-NEXT: lsr z0.h, z0.h, #16
1932 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1933 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1934 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1935 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> %pg,
1936 <vscale x 8 x i16> %a,
1937 <vscale x 8 x i16> %splat)
1938 ret <vscale x 8 x i16> %out
1941 ; Ensure we don't match a right shift by zero to the immediate form.
1942 define <vscale x 8 x i16> @lsr_i16_too_small(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1943 ; CHECK-LABEL: lsr_i16_too_small:
1945 ; CHECK-NEXT: mov z1.h, #0 // =0x0
1946 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
1948 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1949 <vscale x 8 x i16> %a,
1950 <vscale x 8 x i16> zeroinitializer)
1951 ret <vscale x 8 x i16> %out
1954 define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1955 ; CHECK-LABEL: lsr_i32:
1957 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32
1959 %elt = insertelement <vscale x 4 x i32> undef, i32 33, i32 0
1960 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1961 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1962 <vscale x 4 x i32> %a,
1963 <vscale x 4 x i32> %splat)
1964 ret <vscale x 4 x i32> %out
1967 define <vscale x 4 x i32> @lsr_i32_all_active(<vscale x 4 x i32> %a) {
1968 ; CHECK-LABEL: lsr_i32_all_active:
1970 ; CHECK-NEXT: lsr z0.s, z0.s, #32
1972 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1973 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1974 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1975 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg,
1976 <vscale x 4 x i32> %a,
1977 <vscale x 4 x i32> %splat)
1978 ret <vscale x 4 x i32> %out
1981 ; Ensure we don't match a right shift by zero to the immediate form.
1982 define <vscale x 4 x i32> @lsr_i32_too_small(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1983 ; CHECK-LABEL: lsr_i32_too_small:
1985 ; CHECK-NEXT: mov z1.s, #0 // =0x0
1986 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
1988 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1989 <vscale x 4 x i32> %a,
1990 <vscale x 4 x i32> zeroinitializer)
1991 ret <vscale x 4 x i32> %out
1994 define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1995 ; CHECK-LABEL: lsr_i64:
1997 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64
1999 %elt = insertelement <vscale x 2 x i64> undef, i64 65, i64 0
2000 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
2001 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
2002 <vscale x 2 x i64> %a,
2003 <vscale x 2 x i64> %splat)
2004 ret <vscale x 2 x i64> %out
2007 define <vscale x 2 x i64> @lsr_i64_all_active(<vscale x 2 x i64> %a) {
2008 ; CHECK-LABEL: lsr_i64_all_active:
2010 ; CHECK-NEXT: lsr z0.d, z0.d, #64
2012 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
2013 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
2014 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
2015 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> %pg,
2016 <vscale x 2 x i64> %a,
2017 <vscale x 2 x i64> %splat)
2018 ret <vscale x 2 x i64> %out
2021 ; Ensure we don't match a right shift by zero to the immediate form.
2022 define <vscale x 2 x i64> @lsr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
2023 ; CHECK-LABEL: lsr_i64_too_small:
2025 ; CHECK-NEXT: mov z1.d, #0 // =0x0
2026 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
2028 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
2029 <vscale x 2 x i64> %a,
2030 <vscale x 2 x i64> zeroinitializer)
2031 ret <vscale x 2 x i64> %out
2034 ; As lsr_i32 but where pg is i8 based and thus compatible for i32.
2035 define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
2036 ; CHECK-LABEL: lsr_i32_ptrue_all_b:
2038 ; CHECK-NEXT: lsr z0.s, z0.s, #1
2040 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
2041 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
2042 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2043 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
2044 <vscale x 4 x i32> %a,
2045 <vscale x 4 x i32> %b)
2046 ret <vscale x 4 x i32> %out
2049 ; As lsr_i32 but where pg is i16 based and thus compatible for i32.
2050 define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
2051 ; CHECK-LABEL: lsr_i32_ptrue_all_h:
2053 ; CHECK-NEXT: lsr z0.s, z0.s, #1
2055 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
2056 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
2057 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
2058 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2059 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
2060 <vscale x 4 x i32> %a,
2061 <vscale x 4 x i32> %b)
2062 ret <vscale x 4 x i32> %out
2065 ; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and
2066 ; thus inactive lanes are important and the immediate form cannot be used.
2067 define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
2068 ; CHECK-LABEL: lsr_i32_ptrue_all_d:
2070 ; CHECK-NEXT: ptrue p0.d
2071 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #1
2073 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
2074 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
2075 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
2076 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2077 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg.s,
2078 <vscale x 4 x i32> %a,
2079 <vscale x 4 x i32> %b)
2080 ret <vscale x 4 x i32> %out
2087 ; As mul_i32 but where pg is i8 based and thus compatible for i32.
2088 define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
2089 ; CHECK-LABEL: mul_i32_ptrue_all_b:
2091 ; CHECK-NEXT: mul z0.s, z0.s, #1
2093 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
2094 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
2095 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2096 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg.s,
2097 <vscale x 4 x i32> %a,
2098 <vscale x 4 x i32> %b)
2099 ret <vscale x 4 x i32> %out
2102 ; As mul_i32 but where pg is i16 based and thus compatible for i32.
2103 define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
2104 ; CHECK-LABEL: mul_i32_ptrue_all_h:
2106 ; CHECK-NEXT: mul z0.s, z0.s, #1
2108 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
2109 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
2110 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
2111 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2112 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg.s,
2113 <vscale x 4 x i32> %a,
2114 <vscale x 4 x i32> %b)
2115 ret <vscale x 4 x i32> %out
2118 ; As mul_i32 but where pg is i64 based, which is not compatibile for i32 and
2119 ; thus inactive lanes are important and the immediate form cannot be used.
2120 define <vscale x 4 x i32> @mul_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
2121 ; CHECK-LABEL: mul_i32_ptrue_all_d:
2123 ; CHECK-NEXT: mov z1.s, #1 // =0x1
2124 ; CHECK-NEXT: ptrue p0.d
2125 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
2127 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
2128 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
2129 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
2130 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2131 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
2132 <vscale x 4 x i32> %a,
2133 <vscale x 4 x i32> %b)
2134 ret <vscale x 4 x i32> %out
2137 declare <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2138 declare <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2139 declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2140 declare <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2142 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2144 declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2145 declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2146 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2147 declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2149 declare <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2150 declare <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2151 declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2152 declare <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2154 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2155 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2156 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2157 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2159 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2160 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2161 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2162 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2164 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2165 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2166 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2167 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2169 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2170 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2171 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2172 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2174 declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2176 declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2177 declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2178 declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2179 declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2181 declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2183 declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2184 declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2185 declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2186 declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2188 declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2190 declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2191 declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2192 declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2193 declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2195 declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2197 declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2198 declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2199 declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2200 declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2202 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2203 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2204 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2205 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2207 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2208 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2209 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2210 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2212 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2213 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2214 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2215 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2217 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2218 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2219 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2220 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2222 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2223 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2224 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2225 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2227 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2228 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2229 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2230 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2232 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2234 declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2235 declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2236 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2237 declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2239 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
2240 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
2241 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
2243 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
2244 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
2245 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
2247 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
2249 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 %pattern)
2250 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 %pattern)
2251 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 %pattern)
2252 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 %pattern)