1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a) {
9 ; CHECK-NEXT: add z0.b, z0.b, #127 // =0x7f
11 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
12 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
13 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> %pg,
15 <vscale x 16 x i8> %a,
16 <vscale x 16 x i8> %splat)
17 ret <vscale x 16 x i8> %out
20 define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a) {
21 ; CHECK-LABEL: add_i16:
23 ; CHECK-NEXT: add z0.h, z0.h, #127 // =0x7f
25 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
26 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
27 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
28 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
29 <vscale x 8 x i16> %a,
30 <vscale x 8 x i16> %splat)
31 ret <vscale x 8 x i16> %out
34 define <vscale x 8 x i16> @add_i16_out_of_range(<vscale x 8 x i16> %a) {
35 ; CHECK-LABEL: add_i16_out_of_range:
37 ; CHECK-NEXT: dupm z1.b, #0x1
38 ; CHECK-NEXT: add z0.h, z0.h, z1.h
40 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
41 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
42 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
43 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
44 <vscale x 8 x i16> %a,
45 <vscale x 8 x i16> %splat)
46 ret <vscale x 8 x i16> %out
49 define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a) {
50 ; CHECK-LABEL: add_i32:
52 ; CHECK-NEXT: add z0.s, z0.s, #127 // =0x7f
54 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
55 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
56 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
57 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
58 <vscale x 4 x i32> %a,
59 <vscale x 4 x i32> %splat)
60 ret <vscale x 4 x i32> %out
63 define <vscale x 4 x i32> @add_i32_out_of_range(<vscale x 4 x i32> %a) {
64 ; CHECK-LABEL: add_i32_out_of_range:
66 ; CHECK-NEXT: mov w8, #257 // =0x101
67 ; CHECK-NEXT: mov z1.s, w8
68 ; CHECK-NEXT: add z0.s, z0.s, z1.s
70 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
71 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
72 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
73 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
74 <vscale x 4 x i32> %a,
75 <vscale x 4 x i32> %splat)
76 ret <vscale x 4 x i32> %out
79 define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a) {
80 ; CHECK-LABEL: add_i64:
82 ; CHECK-NEXT: add z0.d, z0.d, #127 // =0x7f
84 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
85 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
86 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
87 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
88 <vscale x 2 x i64> %a,
89 <vscale x 2 x i64> %splat)
90 ret <vscale x 2 x i64> %out
93 define <vscale x 2 x i64> @add_i64_out_of_range(<vscale x 2 x i64> %a) {
94 ; CHECK-LABEL: add_i64_out_of_range:
96 ; CHECK-NEXT: mov w8, #257 // =0x101
97 ; CHECK-NEXT: mov z1.d, x8
98 ; CHECK-NEXT: add z0.d, z0.d, z1.d
100 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
101 %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
102 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
103 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
104 <vscale x 2 x i64> %a,
105 <vscale x 2 x i64> %splat)
106 ret <vscale x 2 x i64> %out
111 define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a) {
112 ; CHECK-LABEL: sub_i8:
114 ; CHECK-NEXT: sub z0.b, z0.b, #127 // =0x7f
116 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
117 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
118 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
119 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> %pg,
120 <vscale x 16 x i8> %a,
121 <vscale x 16 x i8> %splat)
122 ret <vscale x 16 x i8> %out
125 define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a) {
126 ; CHECK-LABEL: sub_i16:
128 ; CHECK-NEXT: sub z0.h, z0.h, #127 // =0x7f
130 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
131 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
132 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
133 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
134 <vscale x 8 x i16> %a,
135 <vscale x 8 x i16> %splat)
136 ret <vscale x 8 x i16> %out
139 define <vscale x 8 x i16> @sub_i16_out_of_range(<vscale x 8 x i16> %a) {
140 ; CHECK-LABEL: sub_i16_out_of_range:
142 ; CHECK-NEXT: dupm z1.b, #0x1
143 ; CHECK-NEXT: sub z0.h, z0.h, z1.h
145 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
146 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
147 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
148 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
149 <vscale x 8 x i16> %a,
150 <vscale x 8 x i16> %splat)
151 ret <vscale x 8 x i16> %out
154 define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a) {
155 ; CHECK-LABEL: sub_i32:
157 ; CHECK-NEXT: sub z0.s, z0.s, #127 // =0x7f
159 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
160 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
161 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
162 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
163 <vscale x 4 x i32> %a,
164 <vscale x 4 x i32> %splat)
165 ret <vscale x 4 x i32> %out
168 define <vscale x 4 x i32> @sub_i32_out_of_range(<vscale x 4 x i32> %a) {
169 ; CHECK-LABEL: sub_i32_out_of_range:
171 ; CHECK-NEXT: mov w8, #257 // =0x101
172 ; CHECK-NEXT: mov z1.s, w8
173 ; CHECK-NEXT: sub z0.s, z0.s, z1.s
175 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
176 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
177 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
178 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
179 <vscale x 4 x i32> %a,
180 <vscale x 4 x i32> %splat)
181 ret <vscale x 4 x i32> %out
184 define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a) {
185 ; CHECK-LABEL: sub_i64:
187 ; CHECK-NEXT: sub z0.d, z0.d, #127 // =0x7f
189 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
190 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
191 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
192 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
193 <vscale x 2 x i64> %a,
194 <vscale x 2 x i64> %splat)
195 ret <vscale x 2 x i64> %out
198 define <vscale x 2 x i64> @sub_i64_out_of_range(<vscale x 2 x i64> %a) {
199 ; CHECK-LABEL: sub_i64_out_of_range:
201 ; CHECK-NEXT: mov w8, #257 // =0x101
202 ; CHECK-NEXT: mov z1.d, x8
203 ; CHECK-NEXT: sub z0.d, z0.d, z1.d
205 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
206 %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
207 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
208 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
209 <vscale x 2 x i64> %a,
210 <vscale x 2 x i64> %splat)
211 ret <vscale x 2 x i64> %out
214 ; As sub_i32 but where pg is i8 based and thus compatible for i32.
215 define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
216 ; CHECK-LABEL: sub_i32_ptrue_all_b:
218 ; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
220 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
221 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
222 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
223 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
224 <vscale x 4 x i32> %a,
225 <vscale x 4 x i32> %b)
226 ret <vscale x 4 x i32> %out
229 ; As sub_i32 but where pg is i16 based and thus compatible for i32.
230 define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
231 ; CHECK-LABEL: sub_i32_ptrue_all_h:
233 ; CHECK-NEXT: sub z0.s, z0.s, #1 // =0x1
235 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
236 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
237 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
238 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
239 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
240 <vscale x 4 x i32> %a,
241 <vscale x 4 x i32> %b)
242 ret <vscale x 4 x i32> %out
245 ; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and
246 ; thus inactive lanes are important and the immediate form cannot be used.
247 define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
248 ; CHECK-LABEL: sub_i32_ptrue_all_d:
250 ; CHECK-NEXT: ptrue p0.d
251 ; CHECK-NEXT: mov z1.s, #1 // =0x1
252 ; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
254 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
255 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
256 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
257 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
258 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
259 <vscale x 4 x i32> %a,
260 <vscale x 4 x i32> %b)
261 ret <vscale x 4 x i32> %out
266 define <vscale x 16 x i8> @subr_i8(<vscale x 16 x i8> %a) {
267 ; CHECK-LABEL: subr_i8:
269 ; CHECK-NEXT: subr z0.b, z0.b, #127 // =0x7f
271 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
272 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
273 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
274 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> %pg,
275 <vscale x 16 x i8> %a,
276 <vscale x 16 x i8> %splat)
277 ret <vscale x 16 x i8> %out
280 define <vscale x 8 x i16> @subr_i16(<vscale x 8 x i16> %a) {
281 ; CHECK-LABEL: subr_i16:
283 ; CHECK-NEXT: subr z0.h, z0.h, #127 // =0x7f
285 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
286 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
287 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
288 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> %pg,
289 <vscale x 8 x i16> %a,
290 <vscale x 8 x i16> %splat)
291 ret <vscale x 8 x i16> %out
294 define <vscale x 8 x i16> @subr_i16_out_of_range(<vscale x 8 x i16> %a) {
295 ; CHECK-LABEL: subr_i16_out_of_range:
297 ; CHECK-NEXT: dupm z1.b, #0x1
298 ; CHECK-NEXT: sub z0.h, z1.h, z0.h
300 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
301 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
302 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
303 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> %pg,
304 <vscale x 8 x i16> %a,
305 <vscale x 8 x i16> %splat)
306 ret <vscale x 8 x i16> %out
309 define <vscale x 4 x i32> @subr_i32(<vscale x 4 x i32> %a) {
310 ; CHECK-LABEL: subr_i32:
312 ; CHECK-NEXT: subr z0.s, z0.s, #127 // =0x7f
314 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
315 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
316 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
317 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg,
318 <vscale x 4 x i32> %a,
319 <vscale x 4 x i32> %splat)
320 ret <vscale x 4 x i32> %out
323 define <vscale x 4 x i32> @subr_i32_out_of_range(<vscale x 4 x i32> %a) {
324 ; CHECK-LABEL: subr_i32_out_of_range:
326 ; CHECK-NEXT: mov w8, #257 // =0x101
327 ; CHECK-NEXT: mov z1.s, w8
328 ; CHECK-NEXT: sub z0.s, z1.s, z0.s
330 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
331 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
332 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
333 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg,
334 <vscale x 4 x i32> %a,
335 <vscale x 4 x i32> %splat)
336 ret <vscale x 4 x i32> %out
339 define <vscale x 2 x i64> @subr_i64(<vscale x 2 x i64> %a) {
340 ; CHECK-LABEL: subr_i64:
342 ; CHECK-NEXT: subr z0.d, z0.d, #127 // =0x7f
344 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
345 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
346 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
347 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> %pg,
348 <vscale x 2 x i64> %a,
349 <vscale x 2 x i64> %splat)
350 ret <vscale x 2 x i64> %out
353 define <vscale x 2 x i64> @subr_i64_out_of_range(<vscale x 2 x i64> %a) {
354 ; CHECK-LABEL: subr_i64_out_of_range:
356 ; CHECK-NEXT: mov w8, #257 // =0x101
357 ; CHECK-NEXT: mov z1.d, x8
358 ; CHECK-NEXT: sub z0.d, z1.d, z0.d
360 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
361 %elt = insertelement <vscale x 2 x i64> undef, i64 257, i64 0
362 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
363 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> %pg,
364 <vscale x 2 x i64> %a,
365 <vscale x 2 x i64> %splat)
366 ret <vscale x 2 x i64> %out
369 ; As subr_i32 but where pg is i8 based and thus compatible for i32.
370 define <vscale x 4 x i32> @subr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
371 ; CHECK-LABEL: subr_i32_ptrue_all_b:
373 ; CHECK-NEXT: subr z0.s, z0.s, #1 // =0x1
375 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
376 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
377 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
378 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
379 <vscale x 4 x i32> %a,
380 <vscale x 4 x i32> %b)
381 ret <vscale x 4 x i32> %out
384 ; As subr_i32 but where pg is i16 based and thus compatible for i32.
385 define <vscale x 4 x i32> @subr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
386 ; CHECK-LABEL: subr_i32_ptrue_all_h:
388 ; CHECK-NEXT: subr z0.s, z0.s, #1 // =0x1
390 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
391 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
392 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
393 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
394 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
395 <vscale x 4 x i32> %a,
396 <vscale x 4 x i32> %b)
397 ret <vscale x 4 x i32> %out
400 ; As subr_i32 but where pg is i64 based, which is not compatibile for i32 and
401 ; thus inactive lanes are important and the immediate form cannot be used.
402 define <vscale x 4 x i32> @subr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
403 ; CHECK-LABEL: subr_i32_ptrue_all_d:
405 ; CHECK-NEXT: ptrue p0.d
406 ; CHECK-NEXT: mov z1.s, #1 // =0x1
407 ; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s
409 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
410 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
411 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
412 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
413 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> %pg.s,
414 <vscale x 4 x i32> %a,
415 <vscale x 4 x i32> %b)
416 ret <vscale x 4 x i32> %out
421 define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a) {
422 ; CHECK-LABEL: smax_i8:
424 ; CHECK-NEXT: smax z0.b, z0.b, #-128
426 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
427 %elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0
428 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
429 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> %pg,
430 <vscale x 16 x i8> %a,
431 <vscale x 16 x i8> %splat)
432 ret <vscale x 16 x i8> %out
435 define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a) {
436 ; CHECK-LABEL: smax_i16:
438 ; CHECK-NEXT: smax z0.h, z0.h, #127
440 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
441 %elt = insertelement <vscale x 8 x i16> undef, i16 127, i32 0
442 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
443 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> %pg,
444 <vscale x 8 x i16> %a,
445 <vscale x 8 x i16> %splat)
446 ret <vscale x 8 x i16> %out
449 define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
450 ; CHECK-LABEL: smax_i16_out_of_range:
452 ; CHECK-NEXT: ptrue p0.h
453 ; CHECK-NEXT: mov w8, #129 // =0x81
454 ; CHECK-NEXT: mov z1.h, w8
455 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
457 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
458 %elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
459 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
460 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> %pg,
461 <vscale x 8 x i16> %a,
462 <vscale x 8 x i16> %splat)
463 ret <vscale x 8 x i16> %out
466 define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
467 ; CHECK-LABEL: smax_i32:
469 ; CHECK-NEXT: smax z0.s, z0.s, #-128
471 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
472 %elt = insertelement <vscale x 4 x i32> undef, i32 -128, i32 0
473 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
474 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg,
475 <vscale x 4 x i32> %a,
476 <vscale x 4 x i32> %splat)
477 ret <vscale x 4 x i32> %out
480 define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
481 ; CHECK-LABEL: smax_i32_out_of_range:
483 ; CHECK-NEXT: ptrue p0.s
484 ; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f
485 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
487 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
488 %elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
489 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
490 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg,
491 <vscale x 4 x i32> %a,
492 <vscale x 4 x i32> %splat)
493 ret <vscale x 4 x i32> %out
496 define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
497 ; CHECK-LABEL: smax_i64:
499 ; CHECK-NEXT: smax z0.d, z0.d, #127
501 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
502 %elt = insertelement <vscale x 2 x i64> undef, i64 127, i64 0
503 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
504 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> %pg,
505 <vscale x 2 x i64> %a,
506 <vscale x 2 x i64> %splat)
507 ret <vscale x 2 x i64> %out
510 define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
511 ; CHECK-LABEL: smax_i64_out_of_range:
513 ; CHECK-NEXT: ptrue p0.d
514 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
515 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
517 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
518 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
519 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
520 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> %pg,
521 <vscale x 2 x i64> %a,
522 <vscale x 2 x i64> %splat)
523 ret <vscale x 2 x i64> %out
526 ; As smax_i32 but where pg is i8 based and thus compatible for i32.
527 define <vscale x 4 x i32> @smax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
528 ; CHECK-LABEL: smax_i32_ptrue_all_b:
530 ; CHECK-NEXT: smax z0.s, z0.s, #1
532 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
533 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
534 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
535 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
536 <vscale x 4 x i32> %a,
537 <vscale x 4 x i32> %b)
538 ret <vscale x 4 x i32> %out
541 ; As smax_i32 but where pg is i16 based and thus compatible for i32.
542 define <vscale x 4 x i32> @smax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
543 ; CHECK-LABEL: smax_i32_ptrue_all_h:
545 ; CHECK-NEXT: smax z0.s, z0.s, #1
547 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
548 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
549 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
550 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
551 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
552 <vscale x 4 x i32> %a,
553 <vscale x 4 x i32> %b)
554 ret <vscale x 4 x i32> %out
557 ; As smax_i32 but where pg is i64 based, which is not compatibile for i32 and
558 ; thus inactive lanes are important and the immediate form cannot be used.
559 define <vscale x 4 x i32> @smax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
560 ; CHECK-LABEL: smax_i32_ptrue_all_d:
562 ; CHECK-NEXT: ptrue p0.d
563 ; CHECK-NEXT: mov z1.s, #1 // =0x1
564 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
566 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
567 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
568 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
569 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
570 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg.s,
571 <vscale x 4 x i32> %a,
572 <vscale x 4 x i32> %b)
573 ret <vscale x 4 x i32> %out
578 define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a) {
579 ; CHECK-LABEL: smin_i8:
581 ; CHECK-NEXT: smin z0.b, z0.b, #127
583 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
584 %elt = insertelement <vscale x 16 x i8> undef, i8 127, i32 0
585 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
586 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> %pg,
587 <vscale x 16 x i8> %a,
588 <vscale x 16 x i8> %splat)
589 ret <vscale x 16 x i8> %out
592 define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a) {
593 ; CHECK-LABEL: smin_i16:
595 ; CHECK-NEXT: smin z0.h, z0.h, #-128
597 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
598 %elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0
599 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
600 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> %pg,
601 <vscale x 8 x i16> %a,
602 <vscale x 8 x i16> %splat)
603 ret <vscale x 8 x i16> %out
606 define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
607 ; CHECK-LABEL: smin_i16_out_of_range:
609 ; CHECK-NEXT: ptrue p0.h
610 ; CHECK-NEXT: mov z1.h, #-129 // =0xffffffffffffff7f
611 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
613 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
614 %elt = insertelement <vscale x 8 x i16> undef, i16 -129, i32 0
615 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
616 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> %pg,
617 <vscale x 8 x i16> %a,
618 <vscale x 8 x i16> %splat)
619 ret <vscale x 8 x i16> %out
622 define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
623 ; CHECK-LABEL: smin_i32:
625 ; CHECK-NEXT: smin z0.s, z0.s, #127
627 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
628 %elt = insertelement <vscale x 4 x i32> undef, i32 127, i32 0
629 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
630 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg,
631 <vscale x 4 x i32> %a,
632 <vscale x 4 x i32> %splat)
633 ret <vscale x 4 x i32> %out
636 define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
637 ; CHECK-LABEL: smin_i32_out_of_range:
639 ; CHECK-NEXT: ptrue p0.s
640 ; CHECK-NEXT: mov w8, #257 // =0x101
641 ; CHECK-NEXT: mov z1.s, w8
642 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
644 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
645 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
646 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
647 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg,
648 <vscale x 4 x i32> %a,
649 <vscale x 4 x i32> %splat)
650 ret <vscale x 4 x i32> %out
654 define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
655 ; CHECK-LABEL: smin_i64:
657 ; CHECK-NEXT: smin z0.d, z0.d, #-128
659 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
660 %elt = insertelement <vscale x 2 x i64> undef, i64 -128, i64 0
661 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
662 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> %pg,
663 <vscale x 2 x i64> %a,
664 <vscale x 2 x i64> %splat)
665 ret <vscale x 2 x i64> %out
668 define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
669 ; CHECK-LABEL: smin_i64_out_of_range:
671 ; CHECK-NEXT: ptrue p0.d
672 ; CHECK-NEXT: mov z1.d, #-256 // =0xffffffffffffff00
673 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
675 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
676 %elt = insertelement <vscale x 2 x i64> undef, i64 -256, i64 0
677 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
678 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> %pg,
679 <vscale x 2 x i64> %a,
680 <vscale x 2 x i64> %splat)
681 ret <vscale x 2 x i64> %out
684 ; As smin_i32 but where pg is i8 based and thus compatible for i32.
685 define <vscale x 4 x i32> @smin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
686 ; CHECK-LABEL: smin_i32_ptrue_all_b:
688 ; CHECK-NEXT: smin z0.s, z0.s, #1
690 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
691 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
692 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
693 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
694 <vscale x 4 x i32> %a,
695 <vscale x 4 x i32> %b)
696 ret <vscale x 4 x i32> %out
699 ; As smin_i32 but where pg is i16 based and thus compatible for i32.
700 define <vscale x 4 x i32> @smin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
701 ; CHECK-LABEL: smin_i32_ptrue_all_h:
703 ; CHECK-NEXT: smin z0.s, z0.s, #1
705 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
706 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
707 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
708 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
709 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
710 <vscale x 4 x i32> %a,
711 <vscale x 4 x i32> %b)
712 ret <vscale x 4 x i32> %out
715 ; As smin_i32 but where pg is i64 based, which is not compatibile for i32 and
716 ; thus inactive lanes are important and the immediate form cannot be used.
717 define <vscale x 4 x i32> @smin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
718 ; CHECK-LABEL: smin_i32_ptrue_all_d:
720 ; CHECK-NEXT: ptrue p0.d
721 ; CHECK-NEXT: mov z1.s, #1 // =0x1
722 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
724 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
725 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
726 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
727 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
728 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg.s,
729 <vscale x 4 x i32> %a,
730 <vscale x 4 x i32> %b)
731 ret <vscale x 4 x i32> %out
736 define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a) {
737 ; CHECK-LABEL: umax_i8:
739 ; CHECK-NEXT: umax z0.b, z0.b, #0
741 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
742 %elt = insertelement <vscale x 16 x i8> undef, i8 0, i32 0
743 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
744 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> %pg,
745 <vscale x 16 x i8> %a,
746 <vscale x 16 x i8> %splat)
747 ret <vscale x 16 x i8> %out
750 define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a) {
751 ; CHECK-LABEL: umax_i16:
753 ; CHECK-NEXT: umax z0.h, z0.h, #255
755 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
756 %elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
757 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
758 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> %pg,
759 <vscale x 8 x i16> %a,
760 <vscale x 8 x i16> %splat)
761 ret <vscale x 8 x i16> %out
764 define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
765 ; CHECK-LABEL: umax_i16_out_of_range:
767 ; CHECK-NEXT: ptrue p0.h
768 ; CHECK-NEXT: dupm z1.b, #0x1
769 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
771 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
772 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
773 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
774 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> %pg,
775 <vscale x 8 x i16> %a,
776 <vscale x 8 x i16> %splat)
777 ret <vscale x 8 x i16> %out
780 define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
781 ; CHECK-LABEL: umax_i32:
783 ; CHECK-NEXT: umax z0.s, z0.s, #0
785 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
786 %elt = insertelement <vscale x 4 x i32> undef, i32 0, i32 0
787 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
788 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg,
789 <vscale x 4 x i32> %a,
790 <vscale x 4 x i32> %splat)
791 ret <vscale x 4 x i32> %out
794 define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
795 ; CHECK-LABEL: umax_i32_out_of_range:
797 ; CHECK-NEXT: ptrue p0.s
798 ; CHECK-NEXT: mov w8, #257 // =0x101
799 ; CHECK-NEXT: mov z1.s, w8
800 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
802 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
803 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
804 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
805 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg,
806 <vscale x 4 x i32> %a,
807 <vscale x 4 x i32> %splat)
808 ret <vscale x 4 x i32> %out
811 define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
812 ; CHECK-LABEL: umax_i64:
814 ; CHECK-NEXT: umax z0.d, z0.d, #255
816 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
817 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i64 0
818 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
819 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> %pg,
820 <vscale x 2 x i64> %a,
821 <vscale x 2 x i64> %splat)
822 ret <vscale x 2 x i64> %out
825 define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
826 ; CHECK-LABEL: umax_i64_out_of_range:
828 ; CHECK-NEXT: ptrue p0.d
829 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
830 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
832 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
833 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
834 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
835 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> %pg,
836 <vscale x 2 x i64> %a,
837 <vscale x 2 x i64> %splat)
838 ret <vscale x 2 x i64> %out
841 ; As umax_i32 but where pg is i8 based and thus compatible for i32.
842 define <vscale x 4 x i32> @umax_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
843 ; CHECK-LABEL: umax_i32_ptrue_all_b:
845 ; CHECK-NEXT: umax z0.s, z0.s, #1
847 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
848 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
849 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
850 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
851 <vscale x 4 x i32> %a,
852 <vscale x 4 x i32> %b)
853 ret <vscale x 4 x i32> %out
856 ; As umax_i32 but where pg is i16 based and thus compatible for i32.
857 define <vscale x 4 x i32> @umax_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
858 ; CHECK-LABEL: umax_i32_ptrue_all_h:
860 ; CHECK-NEXT: umax z0.s, z0.s, #1
862 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
863 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
864 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
865 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
866 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> %pg.s,
867 <vscale x 4 x i32> %a,
868 <vscale x 4 x i32> %b)
869 ret <vscale x 4 x i32> %out
872 ; As umax_i32 but where pg is i64 based, which is not compatibile for i32 and
873 ; thus inactive lanes are important and the immediate form cannot be used.
874 define <vscale x 4 x i32> @umax_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
875 ; CHECK-LABEL: umax_i32_ptrue_all_d:
877 ; CHECK-NEXT: ptrue p0.d
878 ; CHECK-NEXT: mov z1.s, #1 // =0x1
879 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
881 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
882 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
883 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
884 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
885 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg.s,
886 <vscale x 4 x i32> %a,
887 <vscale x 4 x i32> %b)
888 ret <vscale x 4 x i32> %out
893 define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a) {
894 ; CHECK-LABEL: umin_i8:
896 ; CHECK-NEXT: umin z0.b, z0.b, #255
898 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
899 %elt = insertelement <vscale x 16 x i8> undef, i8 255, i32 0
900 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
901 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> %pg,
902 <vscale x 16 x i8> %a,
903 <vscale x 16 x i8> %splat)
904 ret <vscale x 16 x i8> %out
907 define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a) {
908 ; CHECK-LABEL: umin_i16:
910 ; CHECK-NEXT: umin z0.h, z0.h, #0
912 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
913 %elt = insertelement <vscale x 8 x i16> undef, i16 0, i32 0
914 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
915 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> %pg,
916 <vscale x 8 x i16> %a,
917 <vscale x 8 x i16> %splat)
918 ret <vscale x 8 x i16> %out
921 define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
922 ; CHECK-LABEL: umin_i16_out_of_range:
924 ; CHECK-NEXT: ptrue p0.h
925 ; CHECK-NEXT: dupm z1.b, #0x1
926 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
928 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
929 %elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
930 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
931 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> %pg,
932 <vscale x 8 x i16> %a,
933 <vscale x 8 x i16> %splat)
934 ret <vscale x 8 x i16> %out
937 define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
938 ; CHECK-LABEL: umin_i32:
940 ; CHECK-NEXT: umin z0.s, z0.s, #255
942 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
943 %elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
944 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
945 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg,
946 <vscale x 4 x i32> %a,
947 <vscale x 4 x i32> %splat)
948 ret <vscale x 4 x i32> %out
951 define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
952 ; CHECK-LABEL: umin_i32_out_of_range:
954 ; CHECK-NEXT: ptrue p0.s
955 ; CHECK-NEXT: mov w8, #257 // =0x101
956 ; CHECK-NEXT: mov z1.s, w8
957 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
959 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
960 %elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
961 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
962 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg,
963 <vscale x 4 x i32> %a,
964 <vscale x 4 x i32> %splat)
965 ret <vscale x 4 x i32> %out
968 define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
969 ; CHECK-LABEL: umin_i64:
971 ; CHECK-NEXT: umin z0.d, z0.d, #0
973 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
974 %elt = insertelement <vscale x 2 x i64> undef, i64 0, i64 0
975 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
976 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> %pg,
977 <vscale x 2 x i64> %a,
978 <vscale x 2 x i64> %splat)
979 ret <vscale x 2 x i64> %out
982 define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
983 ; CHECK-LABEL: umin_i64_out_of_range:
985 ; CHECK-NEXT: ptrue p0.d
986 ; CHECK-NEXT: mov z1.d, #65535 // =0xffff
987 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
989 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
990 %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
991 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
992 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> %pg,
993 <vscale x 2 x i64> %a,
994 <vscale x 2 x i64> %splat)
995 ret <vscale x 2 x i64> %out
998 ; As umin_i32 but where pg is i8 based and thus compatible for i32.
999 define <vscale x 4 x i32> @umin_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
1000 ; CHECK-LABEL: umin_i32_ptrue_all_b:
1002 ; CHECK-NEXT: umin z0.s, z0.s, #1
1004 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1005 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1006 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1007 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1008 <vscale x 4 x i32> %a,
1009 <vscale x 4 x i32> %b)
1010 ret <vscale x 4 x i32> %out
1013 ; As umin_i32 but where pg is i16 based and thus compatible for i32.
1014 define <vscale x 4 x i32> @umin_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
1015 ; CHECK-LABEL: umin_i32_ptrue_all_h:
1017 ; CHECK-NEXT: umin z0.s, z0.s, #1
1019 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1020 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1021 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1022 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1023 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1024 <vscale x 4 x i32> %a,
1025 <vscale x 4 x i32> %b)
1026 ret <vscale x 4 x i32> %out
1029 ; As umin_i32 but where pg is i64 based, which is not compatibile for i32 and
1030 ; thus inactive lanes are important and the immediate form cannot be used.
1031 define <vscale x 4 x i32> @umin_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
1032 ; CHECK-LABEL: umin_i32_ptrue_all_d:
1034 ; CHECK-NEXT: ptrue p0.d
1035 ; CHECK-NEXT: mov z1.s, #1 // =0x1
1036 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
1038 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1039 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1040 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1041 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1042 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg.s,
1043 <vscale x 4 x i32> %a,
1044 <vscale x 4 x i32> %b)
1045 ret <vscale x 4 x i32> %out
1050 define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
1051 ; CHECK-LABEL: sqadd_b_lowimm:
1053 ; CHECK-NEXT: sqadd z0.b, z0.b, #27 // =0x1b
1055 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1056 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1057 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
1058 <vscale x 16 x i8> %splat)
1059 ret <vscale x 16 x i8> %out
1062 define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
1063 ; CHECK-LABEL: sqadd_h_lowimm:
1065 ; CHECK-NEXT: sqadd z0.h, z0.h, #43 // =0x2b
1067 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1068 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1069 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1070 <vscale x 8 x i16> %splat)
1071 ret <vscale x 8 x i16> %out
1074 define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
1075 ; CHECK-LABEL: sqadd_h_highimm:
1077 ; CHECK-NEXT: sqadd z0.h, z0.h, #2048 // =0x800
1079 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1080 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1081 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1082 <vscale x 8 x i16> %splat)
1083 ret <vscale x 8 x i16> %out
1086 define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
1087 ; CHECK-LABEL: sqadd_s_lowimm:
1089 ; CHECK-NEXT: sqadd z0.s, z0.s, #1 // =0x1
1091 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1092 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1093 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1094 <vscale x 4 x i32> %splat)
1095 ret <vscale x 4 x i32> %out
1098 define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
1099 ; CHECK-LABEL: sqadd_s_highimm:
1101 ; CHECK-NEXT: sqadd z0.s, z0.s, #8192 // =0x2000
1103 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1104 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1105 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1106 <vscale x 4 x i32> %splat)
1107 ret <vscale x 4 x i32> %out
1110 define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
1111 ; CHECK-LABEL: sqadd_d_lowimm:
1113 ; CHECK-NEXT: sqadd z0.d, z0.d, #255 // =0xff
1115 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1116 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1117 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1118 <vscale x 2 x i64> %splat)
1119 ret <vscale x 2 x i64> %out
1122 define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
1123 ; CHECK-LABEL: sqadd_d_highimm:
1125 ; CHECK-NEXT: sqadd z0.d, z0.d, #65280 // =0xff00
1127 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1128 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1129 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1130 <vscale x 2 x i64> %splat)
1131 ret <vscale x 2 x i64> %out
1136 define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
1137 ; CHECK-LABEL: sqsub_b_lowimm:
1139 ; CHECK-NEXT: sqsub z0.b, z0.b, #27 // =0x1b
1141 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1142 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1143 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1144 <vscale x 16 x i8> %splat)
1145 ret <vscale x 16 x i8> %out
1148 define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
1149 ; CHECK-LABEL: sqsub_h_lowimm:
1151 ; CHECK-NEXT: sqsub z0.h, z0.h, #43 // =0x2b
1153 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1154 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1155 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1156 <vscale x 8 x i16> %splat)
1157 ret <vscale x 8 x i16> %out
1160 define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
1161 ; CHECK-LABEL: sqsub_h_highimm:
1163 ; CHECK-NEXT: sqsub z0.h, z0.h, #2048 // =0x800
1165 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1166 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1167 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1168 <vscale x 8 x i16> %splat)
1169 ret <vscale x 8 x i16> %out
1172 define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
1173 ; CHECK-LABEL: sqsub_s_lowimm:
1175 ; CHECK-NEXT: sqsub z0.s, z0.s, #1 // =0x1
1177 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1178 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1179 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1180 <vscale x 4 x i32> %splat)
1181 ret <vscale x 4 x i32> %out
1184 define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
1185 ; CHECK-LABEL: sqsub_s_highimm:
1187 ; CHECK-NEXT: sqsub z0.s, z0.s, #8192 // =0x2000
1189 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1190 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1191 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1192 <vscale x 4 x i32> %splat)
1193 ret <vscale x 4 x i32> %out
1196 define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
1197 ; CHECK-LABEL: sqsub_d_lowimm:
1199 ; CHECK-NEXT: sqsub z0.d, z0.d, #255 // =0xff
1201 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1202 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1203 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1204 <vscale x 2 x i64> %splat)
1205 ret <vscale x 2 x i64> %out
1208 define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
1209 ; CHECK-LABEL: sqsub_d_highimm:
1211 ; CHECK-NEXT: sqsub z0.d, z0.d, #65280 // =0xff00
1213 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1214 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1215 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1216 <vscale x 2 x i64> %splat)
1217 ret <vscale x 2 x i64> %out
1222 define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {
1223 ; CHECK-LABEL: uqadd_b_lowimm:
1225 ; CHECK-NEXT: uqadd z0.b, z0.b, #27 // =0x1b
1227 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1228 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1229 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8> %a,
1230 <vscale x 16 x i8> %splat)
1231 ret <vscale x 16 x i8> %out
1234 define <vscale x 8 x i16> @uqadd_h_lowimm(<vscale x 8 x i16> %a) {
1235 ; CHECK-LABEL: uqadd_h_lowimm:
1237 ; CHECK-NEXT: uqadd z0.h, z0.h, #43 // =0x2b
1239 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1240 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1241 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1242 <vscale x 8 x i16> %splat)
1243 ret <vscale x 8 x i16> %out
1246 define <vscale x 8 x i16> @uqadd_h_highimm(<vscale x 8 x i16> %a) {
1247 ; CHECK-LABEL: uqadd_h_highimm:
1249 ; CHECK-NEXT: uqadd z0.h, z0.h, #2048 // =0x800
1251 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1252 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1253 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> %a,
1254 <vscale x 8 x i16> %splat)
1255 ret <vscale x 8 x i16> %out
1258 define <vscale x 4 x i32> @uqadd_s_lowimm(<vscale x 4 x i32> %a) {
1259 ; CHECK-LABEL: uqadd_s_lowimm:
1261 ; CHECK-NEXT: uqadd z0.s, z0.s, #1 // =0x1
1263 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1264 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1265 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1266 <vscale x 4 x i32> %splat)
1267 ret <vscale x 4 x i32> %out
1270 define <vscale x 4 x i32> @uqadd_s_highimm(<vscale x 4 x i32> %a) {
1271 ; CHECK-LABEL: uqadd_s_highimm:
1273 ; CHECK-NEXT: uqadd z0.s, z0.s, #8192 // =0x2000
1275 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1276 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1277 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> %a,
1278 <vscale x 4 x i32> %splat)
1279 ret <vscale x 4 x i32> %out
1282 define <vscale x 2 x i64> @uqadd_d_lowimm(<vscale x 2 x i64> %a) {
1283 ; CHECK-LABEL: uqadd_d_lowimm:
1285 ; CHECK-NEXT: uqadd z0.d, z0.d, #255 // =0xff
1287 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1288 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1289 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1290 <vscale x 2 x i64> %splat)
1291 ret <vscale x 2 x i64> %out
1294 define <vscale x 2 x i64> @uqadd_d_highimm(<vscale x 2 x i64> %a) {
1295 ; CHECK-LABEL: uqadd_d_highimm:
1297 ; CHECK-NEXT: uqadd z0.d, z0.d, #65280 // =0xff00
1299 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1300 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1301 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> %a,
1302 <vscale x 2 x i64> %splat)
1303 ret <vscale x 2 x i64> %out
1308 define <vscale x 16 x i8> @uqsub_b_lowimm(<vscale x 16 x i8> %a) {
1309 ; CHECK-LABEL: uqsub_b_lowimm:
1311 ; CHECK-NEXT: uqsub z0.b, z0.b, #27 // =0x1b
1313 %elt = insertelement <vscale x 16 x i8> undef, i8 27, i32 0
1314 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1315 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8> %a,
1316 <vscale x 16 x i8> %splat)
1317 ret <vscale x 16 x i8> %out
1320 define <vscale x 8 x i16> @uqsub_h_lowimm(<vscale x 8 x i16> %a) {
1321 ; CHECK-LABEL: uqsub_h_lowimm:
1323 ; CHECK-NEXT: uqsub z0.h, z0.h, #43 // =0x2b
1325 %elt = insertelement <vscale x 8 x i16> undef, i16 43, i32 0
1326 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1327 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1328 <vscale x 8 x i16> %splat)
1329 ret <vscale x 8 x i16> %out
1332 define <vscale x 8 x i16> @uqsub_h_highimm(<vscale x 8 x i16> %a) {
1333 ; CHECK-LABEL: uqsub_h_highimm:
1335 ; CHECK-NEXT: uqsub z0.h, z0.h, #2048 // =0x800
1337 %elt = insertelement <vscale x 8 x i16> undef, i16 2048, i32 0
1338 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1339 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> %a,
1340 <vscale x 8 x i16> %splat)
1341 ret <vscale x 8 x i16> %out
1344 define <vscale x 4 x i32> @uqsub_s_lowimm(<vscale x 4 x i32> %a) {
1345 ; CHECK-LABEL: uqsub_s_lowimm:
1347 ; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1
1349 %elt = insertelement <vscale x 4 x i32> undef, i32 1, i32 0
1350 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1351 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1352 <vscale x 4 x i32> %splat)
1353 ret <vscale x 4 x i32> %out
1356 define <vscale x 4 x i32> @uqsub_s_highimm(<vscale x 4 x i32> %a) {
1357 ; CHECK-LABEL: uqsub_s_highimm:
1359 ; CHECK-NEXT: uqsub z0.s, z0.s, #8192 // =0x2000
1361 %elt = insertelement <vscale x 4 x i32> undef, i32 8192, i32 0
1362 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1363 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> %a,
1364 <vscale x 4 x i32> %splat)
1365 ret <vscale x 4 x i32> %out
1368 define <vscale x 2 x i64> @uqsub_d_lowimm(<vscale x 2 x i64> %a) {
1369 ; CHECK-LABEL: uqsub_d_lowimm:
1371 ; CHECK-NEXT: uqsub z0.d, z0.d, #255 // =0xff
1373 %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
1374 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1375 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1376 <vscale x 2 x i64> %splat)
1377 ret <vscale x 2 x i64> %out
1380 define <vscale x 2 x i64> @uqsub_d_highimm(<vscale x 2 x i64> %a) {
1381 ; CHECK-LABEL: uqsub_d_highimm:
1383 ; CHECK-NEXT: uqsub z0.d, z0.d, #65280 // =0xff00
1385 %elt = insertelement <vscale x 2 x i64> undef, i64 65280, i32 0
1386 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1387 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> %a,
1388 <vscale x 2 x i64> %splat)
1389 ret <vscale x 2 x i64> %out
1394 define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1395 ; CHECK-LABEL: asr_i8:
1397 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8
1399 %elt = insertelement <vscale x 16 x i8> undef, i8 9, i32 0
1400 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1401 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1402 <vscale x 16 x i8> %a,
1403 <vscale x 16 x i8> %splat)
1404 ret <vscale x 16 x i8> %out
1407 define <vscale x 16 x i8> @asr_i8_all_active(<vscale x 16 x i8> %a) {
1408 ; CHECK-LABEL: asr_i8_all_active:
1410 ; CHECK-NEXT: asr z0.b, z0.b, #8
1412 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1413 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1414 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1415 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> %pg,
1416 <vscale x 16 x i8> %a,
1417 <vscale x 16 x i8> %splat)
1418 ret <vscale x 16 x i8> %out
1421 ; Ensure we don't match a right shift by zero to the immediate form.
1422 define <vscale x 16 x i8> @asr_i8_too_small(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1423 ; CHECK-LABEL: asr_i8_too_small:
1425 ; CHECK-NEXT: mov z1.b, #0 // =0x0
1426 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
1428 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
1429 <vscale x 16 x i8> %a,
1430 <vscale x 16 x i8> zeroinitializer)
1431 ret <vscale x 16 x i8> %out
1434 define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1435 ; CHECK-LABEL: asr_i16:
1437 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16
1439 %elt = insertelement <vscale x 8 x i16> undef, i16 17, i32 0
1440 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1441 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1442 <vscale x 8 x i16> %a,
1443 <vscale x 8 x i16> %splat)
1444 ret <vscale x 8 x i16> %out
1447 define <vscale x 8 x i16> @asr_i16_all_active(<vscale x 8 x i16> %a) {
1448 ; CHECK-LABEL: asr_i16_all_active:
1450 ; CHECK-NEXT: asr z0.h, z0.h, #16
1452 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1453 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1454 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1455 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> %pg,
1456 <vscale x 8 x i16> %a,
1457 <vscale x 8 x i16> %splat)
1458 ret <vscale x 8 x i16> %out
1461 ; Ensure we don't match a right shift by zero to the immediate form.
1462 define <vscale x 8 x i16> @asr_i16_too_small(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1463 ; CHECK-LABEL: asr_i16_too_small:
1465 ; CHECK-NEXT: mov z1.h, #0 // =0x0
1466 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
1468 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
1469 <vscale x 8 x i16> %a,
1470 <vscale x 8 x i16> zeroinitializer)
1471 ret <vscale x 8 x i16> %out
1474 define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1475 ; CHECK-LABEL: asr_i32:
1477 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32
1479 %elt = insertelement <vscale x 4 x i32> undef, i32 33, i32 0
1480 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1481 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1482 <vscale x 4 x i32> %a,
1483 <vscale x 4 x i32> %splat)
1484 ret <vscale x 4 x i32> %out
1487 define <vscale x 4 x i32> @asr_i32_all_active(<vscale x 4 x i32> %a) {
1488 ; CHECK-LABEL: asr_i32_all_active:
1490 ; CHECK-NEXT: asr z0.s, z0.s, #32
1492 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1493 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1494 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1495 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> %pg,
1496 <vscale x 4 x i32> %a,
1497 <vscale x 4 x i32> %splat)
1498 ret <vscale x 4 x i32> %out
1501 ; Ensure we don't match a right shift by zero to the immediate form.
1502 define <vscale x 4 x i32> @asr_i32_too_small(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1503 ; CHECK-LABEL: asr_i32_too_small:
1505 ; CHECK-NEXT: mov z1.s, #0 // =0x0
1506 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
1508 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
1509 <vscale x 4 x i32> %a,
1510 <vscale x 4 x i32> zeroinitializer)
1511 ret <vscale x 4 x i32> %out
1514 define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1515 ; CHECK-LABEL: asr_i64:
1517 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64
1519 %elt = insertelement <vscale x 2 x i64> undef, i64 65, i64 0
1520 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1521 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1522 <vscale x 2 x i64> %a,
1523 <vscale x 2 x i64> %splat)
1524 ret <vscale x 2 x i64> %out
1527 define <vscale x 2 x i64> @asr_i64_all_active(<vscale x 2 x i64> %a) {
1528 ; CHECK-LABEL: asr_i64_all_active:
1530 ; CHECK-NEXT: asr z0.d, z0.d, #64
1532 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1533 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
1534 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1535 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> %pg,
1536 <vscale x 2 x i64> %a,
1537 <vscale x 2 x i64> %splat)
1538 ret <vscale x 2 x i64> %out
1541 ; Ensure we don't match a right shift by zero to the immediate form.
1542 define <vscale x 2 x i64> @asr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1543 ; CHECK-LABEL: asr_i64_too_small:
1545 ; CHECK-NEXT: mov z1.d, #0 // =0x0
1546 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
1548 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
1549 <vscale x 2 x i64> %a,
1550 <vscale x 2 x i64> zeroinitializer)
1551 ret <vscale x 2 x i64> %out
1556 define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1557 ; CHECK-LABEL: lsl_i8:
1559 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
1561 %elt = insertelement <vscale x 16 x i8> undef, i8 7, i32 0
1562 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1563 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1564 <vscale x 16 x i8> %a,
1565 <vscale x 16 x i8> %splat)
1566 ret <vscale x 16 x i8> %out
1569 define <vscale x 16 x i8> @lsl_i8_all_active(<vscale x 16 x i8> %a) {
1570 ; CHECK-LABEL: lsl_i8_all_active:
1572 ; CHECK-NEXT: lsl z0.b, z0.b, #7
1574 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1575 %elt = insertelement <vscale x 16 x i8> undef, i8 7, i32 0
1576 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1577 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> %pg,
1578 <vscale x 16 x i8> %a,
1579 <vscale x 16 x i8> %splat)
1580 ret <vscale x 16 x i8> %out
1583 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1584 define <vscale x 16 x i8> @lsl_i8_too_big(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1585 ; CHECK-LABEL: lsl_i8_too_big:
1587 ; CHECK-NEXT: mov z1.b, #8 // =0x8
1588 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
1590 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1591 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1592 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1593 <vscale x 16 x i8> %a,
1594 <vscale x 16 x i8> %splat)
1595 ret <vscale x 16 x i8> %out
1598 define <vscale x 16 x i8> @lsl_i8_zero(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1599 ; CHECK-LABEL: lsl_i8_zero:
1601 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #0
1603 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
1604 <vscale x 16 x i8> %a,
1605 <vscale x 16 x i8> zeroinitializer)
1606 ret <vscale x 16 x i8> %out
1609 define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1610 ; CHECK-LABEL: lsl_i16:
1612 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
1614 %elt = insertelement <vscale x 8 x i16> undef, i16 15, i32 0
1615 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1616 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1617 <vscale x 8 x i16> %a,
1618 <vscale x 8 x i16> %splat)
1619 ret <vscale x 8 x i16> %out
1622 define <vscale x 8 x i16> @lsl_i16_all_active(<vscale x 8 x i16> %a) {
1623 ; CHECK-LABEL: lsl_i16_all_active:
1625 ; CHECK-NEXT: lsl z0.h, z0.h, #15
1627 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1628 %elt = insertelement <vscale x 8 x i16> undef, i16 15, i32 0
1629 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1630 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> %pg,
1631 <vscale x 8 x i16> %a,
1632 <vscale x 8 x i16> %splat)
1633 ret <vscale x 8 x i16> %out
1636 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1637 define <vscale x 8 x i16> @lsl_i16_too_big(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1638 ; CHECK-LABEL: lsl_i16_too_big:
1640 ; CHECK-NEXT: mov z1.h, #16 // =0x10
1641 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
1643 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1644 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1645 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1646 <vscale x 8 x i16> %a,
1647 <vscale x 8 x i16> %splat)
1648 ret <vscale x 8 x i16> %out
1651 define <vscale x 8 x i16> @lsl_i16_zero(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1652 ; CHECK-LABEL: lsl_i16_zero:
1654 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #0
1656 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
1657 <vscale x 8 x i16> %a,
1658 <vscale x 8 x i16> zeroinitializer)
1659 ret <vscale x 8 x i16> %out
1662 define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1663 ; CHECK-LABEL: lsl_i32:
1665 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
1667 %elt = insertelement <vscale x 4 x i32> undef, i32 31, i32 0
1668 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1669 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1670 <vscale x 4 x i32> %a,
1671 <vscale x 4 x i32> %splat)
1672 ret <vscale x 4 x i32> %out
1675 define <vscale x 4 x i32> @lsl_i32_all_active(<vscale x 4 x i32> %a) {
1676 ; CHECK-LABEL: lsl_i32_all_active:
1678 ; CHECK-NEXT: lsl z0.s, z0.s, #31
1680 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1681 %elt = insertelement <vscale x 4 x i32> undef, i32 31, i32 0
1682 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1683 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> %pg,
1684 <vscale x 4 x i32> %a,
1685 <vscale x 4 x i32> %splat)
1686 ret <vscale x 4 x i32> %out
1689 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1690 define <vscale x 4 x i32> @lsl_i32_too_big(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1691 ; CHECK-LABEL: lsl_i32_too_big:
1693 ; CHECK-NEXT: mov z1.s, #32 // =0x20
1694 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
1696 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1697 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1698 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1699 <vscale x 4 x i32> %a,
1700 <vscale x 4 x i32> %splat)
1701 ret <vscale x 4 x i32> %out
1704 define <vscale x 4 x i32> @lsl_i32_zero(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1705 ; CHECK-LABEL: lsl_i32_zero:
1707 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #0
1709 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
1710 <vscale x 4 x i32> %a,
1711 <vscale x 4 x i32> zeroinitializer)
1712 ret <vscale x 4 x i32> %out
1715 define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1716 ; CHECK-LABEL: lsl_i64:
1718 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
1720 %elt = insertelement <vscale x 2 x i64> undef, i64 63, i64 0
1721 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1722 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1723 <vscale x 2 x i64> %a,
1724 <vscale x 2 x i64> %splat)
1725 ret <vscale x 2 x i64> %out
1728 define <vscale x 2 x i64> @lsl_i64_all_active(<vscale x 2 x i64> %a) {
1729 ; CHECK-LABEL: lsl_i64_all_active:
1731 ; CHECK-NEXT: lsl z0.d, z0.d, #63
1733 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1734 %elt = insertelement <vscale x 2 x i64> undef, i64 63, i64 0
1735 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1736 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> %pg,
1737 <vscale x 2 x i64> %a,
1738 <vscale x 2 x i64> %splat)
1739 ret <vscale x 2 x i64> %out
1742 ; Ensure we don't match a left shift bigger than its bitwidth to the immediate form.
1743 define <vscale x 2 x i64> @lsl_i64_too_big(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1744 ; CHECK-LABEL: lsl_i64_too_big:
1746 ; CHECK-NEXT: mov z1.d, #64 // =0x40
1747 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
1749 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
1750 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1751 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1752 <vscale x 2 x i64> %a,
1753 <vscale x 2 x i64> %splat)
1754 ret <vscale x 2 x i64> %out
1757 define <vscale x 2 x i64> @lsl_i64_zero(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1758 ; CHECK-LABEL: lsl_i64_zero:
1760 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #0
1762 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
1763 <vscale x 2 x i64> %a,
1764 <vscale x 2 x i64> zeroinitializer)
1765 ret <vscale x 2 x i64> %out
1770 define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1771 ; CHECK-LABEL: lsr_i8:
1773 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8
1775 %elt = insertelement <vscale x 16 x i8> undef, i8 9, i32 0
1776 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1777 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1778 <vscale x 16 x i8> %a,
1779 <vscale x 16 x i8> %splat)
1780 ret <vscale x 16 x i8> %out
1783 define <vscale x 16 x i8> @lsr_i8_all_active(<vscale x 16 x i8> %a) {
1784 ; CHECK-LABEL: lsr_i8_all_active:
1786 ; CHECK-NEXT: lsr z0.b, z0.b, #8
1788 %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1789 %elt = insertelement <vscale x 16 x i8> undef, i8 8, i32 0
1790 %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
1791 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> %pg,
1792 <vscale x 16 x i8> %a,
1793 <vscale x 16 x i8> %splat)
1794 ret <vscale x 16 x i8> %out
1797 ; Ensure we don't match a right shift by zero to the immediate form.
1798 define <vscale x 16 x i8> @lsr_i8_too_small(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
1799 ; CHECK-LABEL: lsr_i8_too_small:
1801 ; CHECK-NEXT: mov z1.b, #0 // =0x0
1802 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
1804 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
1805 <vscale x 16 x i8> %a,
1806 <vscale x 16 x i8> zeroinitializer)
1807 ret <vscale x 16 x i8> %out
1810 define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1811 ; CHECK-LABEL: lsr_i16:
1813 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16
1815 %elt = insertelement <vscale x 8 x i16> undef, i16 17, i32 0
1816 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1817 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1818 <vscale x 8 x i16> %a,
1819 <vscale x 8 x i16> %splat)
1820 ret <vscale x 8 x i16> %out
1823 define <vscale x 8 x i16> @lsr_i16_all_active(<vscale x 8 x i16> %a) {
1824 ; CHECK-LABEL: lsr_i16_all_active:
1826 ; CHECK-NEXT: lsr z0.h, z0.h, #16
1828 %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1829 %elt = insertelement <vscale x 8 x i16> undef, i16 16, i32 0
1830 %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
1831 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> %pg,
1832 <vscale x 8 x i16> %a,
1833 <vscale x 8 x i16> %splat)
1834 ret <vscale x 8 x i16> %out
1837 ; Ensure we don't match a right shift by zero to the immediate form.
1838 define <vscale x 8 x i16> @lsr_i16_too_small(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
1839 ; CHECK-LABEL: lsr_i16_too_small:
1841 ; CHECK-NEXT: mov z1.h, #0 // =0x0
1842 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
1844 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
1845 <vscale x 8 x i16> %a,
1846 <vscale x 8 x i16> zeroinitializer)
1847 ret <vscale x 8 x i16> %out
1850 define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1851 ; CHECK-LABEL: lsr_i32:
1853 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32
1855 %elt = insertelement <vscale x 4 x i32> undef, i32 33, i32 0
1856 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1857 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1858 <vscale x 4 x i32> %a,
1859 <vscale x 4 x i32> %splat)
1860 ret <vscale x 4 x i32> %out
1863 define <vscale x 4 x i32> @lsr_i32_all_active(<vscale x 4 x i32> %a) {
1864 ; CHECK-LABEL: lsr_i32_all_active:
1866 ; CHECK-NEXT: lsr z0.s, z0.s, #32
1868 %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1869 %elt = insertelement <vscale x 4 x i32> undef, i32 32, i32 0
1870 %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
1871 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg,
1872 <vscale x 4 x i32> %a,
1873 <vscale x 4 x i32> %splat)
1874 ret <vscale x 4 x i32> %out
1877 ; Ensure we don't match a right shift by zero to the immediate form.
1878 define <vscale x 4 x i32> @lsr_i32_too_small(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
1879 ; CHECK-LABEL: lsr_i32_too_small:
1881 ; CHECK-NEXT: mov z1.s, #0 // =0x0
1882 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
1884 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
1885 <vscale x 4 x i32> %a,
1886 <vscale x 4 x i32> zeroinitializer)
1887 ret <vscale x 4 x i32> %out
1890 define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1891 ; CHECK-LABEL: lsr_i64:
1893 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64
1895 %elt = insertelement <vscale x 2 x i64> undef, i64 65, i64 0
1896 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1897 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
1898 <vscale x 2 x i64> %a,
1899 <vscale x 2 x i64> %splat)
1900 ret <vscale x 2 x i64> %out
1903 define <vscale x 2 x i64> @lsr_i64_all_active(<vscale x 2 x i64> %a) {
1904 ; CHECK-LABEL: lsr_i64_all_active:
1906 ; CHECK-NEXT: lsr z0.d, z0.d, #64
1908 %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1909 %elt = insertelement <vscale x 2 x i64> undef, i64 64, i64 0
1910 %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
1911 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> %pg,
1912 <vscale x 2 x i64> %a,
1913 <vscale x 2 x i64> %splat)
1914 ret <vscale x 2 x i64> %out
1917 ; Ensure we don't match a right shift by zero to the immediate form.
1918 define <vscale x 2 x i64> @lsr_i64_too_small(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
1919 ; CHECK-LABEL: lsr_i64_too_small:
1921 ; CHECK-NEXT: mov z1.d, #0 // =0x0
1922 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
1924 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
1925 <vscale x 2 x i64> %a,
1926 <vscale x 2 x i64> zeroinitializer)
1927 ret <vscale x 2 x i64> %out
1930 ; As lsr_i32 but where pg is i8 based and thus compatible for i32.
1931 define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
1932 ; CHECK-LABEL: lsr_i32_ptrue_all_b:
1934 ; CHECK-NEXT: lsr z0.s, z0.s, #1
1936 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1937 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1938 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1939 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1940 <vscale x 4 x i32> %a,
1941 <vscale x 4 x i32> %b)
1942 ret <vscale x 4 x i32> %out
1945 ; As lsr_i32 but where pg is i16 based and thus compatible for i32.
1946 define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
1947 ; CHECK-LABEL: lsr_i32_ptrue_all_h:
1949 ; CHECK-NEXT: lsr z0.s, z0.s, #1
1951 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1952 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1953 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1954 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1955 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1956 <vscale x 4 x i32> %a,
1957 <vscale x 4 x i32> %b)
1958 ret <vscale x 4 x i32> %out
1961 ; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and
1962 ; thus inactive lanes are important and the immediate form cannot be used.
1963 define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
1964 ; CHECK-LABEL: lsr_i32_ptrue_all_d:
1966 ; CHECK-NEXT: ptrue p0.d
1967 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #1
1969 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1970 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1971 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1972 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1973 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg.s,
1974 <vscale x 4 x i32> %a,
1975 <vscale x 4 x i32> %b)
1976 ret <vscale x 4 x i32> %out
1983 ; As mul_i32 but where pg is i8 based and thus compatible for i32.
1984 define <vscale x 4 x i32> @mul_i32_ptrue_all_b(<vscale x 4 x i32> %a) #0 {
1985 ; CHECK-LABEL: mul_i32_ptrue_all_b:
1987 ; CHECK-NEXT: mul z0.s, z0.s, #1
1989 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
1990 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1991 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
1992 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg.s,
1993 <vscale x 4 x i32> %a,
1994 <vscale x 4 x i32> %b)
1995 ret <vscale x 4 x i32> %out
1998 ; As mul_i32 but where pg is i16 based and thus compatible for i32.
1999 define <vscale x 4 x i32> @mul_i32_ptrue_all_h(<vscale x 4 x i32> %a) #0 {
2000 ; CHECK-LABEL: mul_i32_ptrue_all_h:
2002 ; CHECK-NEXT: mul z0.s, z0.s, #1
2004 %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
2005 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
2006 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
2007 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2008 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg.s,
2009 <vscale x 4 x i32> %a,
2010 <vscale x 4 x i32> %b)
2011 ret <vscale x 4 x i32> %out
2014 ; As mul_i32 but where pg is i64 based, which is not compatibile for i32 and
2015 ; thus inactive lanes are important and the immediate form cannot be used.
2016 define <vscale x 4 x i32> @mul_i32_ptrue_all_d(<vscale x 4 x i32> %a) #0 {
2017 ; CHECK-LABEL: mul_i32_ptrue_all_d:
2019 ; CHECK-NEXT: ptrue p0.d
2020 ; CHECK-NEXT: mov z1.s, #1 // =0x1
2021 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
2023 %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
2024 %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
2025 %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
2026 %b = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 1)
2027 %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %pg.s,
2028 <vscale x 4 x i32> %a,
2029 <vscale x 4 x i32> %b)
2030 ret <vscale x 4 x i32> %out
2033 declare <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2034 declare <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2035 declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2036 declare <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2038 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2040 declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2041 declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2042 declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2043 declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2045 declare <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2046 declare <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2047 declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2048 declare <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2050 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2051 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2052 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2053 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2055 declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2056 declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2057 declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2058 declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2060 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2061 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2062 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2063 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2065 declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2066 declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2067 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2068 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2070 declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2072 declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2073 declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2074 declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2075 declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2077 declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2079 declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2080 declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2081 declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2082 declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2084 declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2086 declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2087 declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2088 declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2089 declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2091 declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2093 declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2094 declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2095 declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2096 declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2098 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2099 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2100 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2101 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2103 declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2104 declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2105 declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2106 declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2108 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2109 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2110 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2111 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2113 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2114 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2115 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2116 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2118 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2119 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2120 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2121 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2123 declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2124 declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2125 declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2126 declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2128 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2130 declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2131 declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2132 declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2133 declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2135 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
2136 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
2137 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
2139 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
2140 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
2141 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
2143 declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
2145 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 %pattern)
2146 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 %pattern)
2147 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 %pattern)
2148 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 %pattern)