1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s
5 target triple = "aarch64-unknown-linux-gnu"
11 define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
12 ; CHECK-LABEL: smax_v8i8:
14 ; CHECK-NEXT: ptrue p0.b, vl8
15 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
17 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
18 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
20 %res = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %op1, <8 x i8> %op2)
24 define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
25 ; CHECK-LABEL: smax_v16i8:
27 ; CHECK-NEXT: ptrue p0.b, vl16
28 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
29 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
30 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
31 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
33 %res = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %op1, <16 x i8> %op2)
37 define void @smax_v32i8(ptr %a, ptr %b) {
38 ; CHECK-LABEL: smax_v32i8:
40 ; CHECK-NEXT: ptrue p0.b, vl16
41 ; CHECK-NEXT: ldp q0, q3, [x1]
42 ; CHECK-NEXT: ldp q1, q2, [x0]
43 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
44 ; CHECK-NEXT: movprfx z1, z2
45 ; CHECK-NEXT: smax z1.b, p0/m, z1.b, z3.b
46 ; CHECK-NEXT: stp q0, q1, [x0]
48 %op1 = load <32 x i8>, ptr %a
49 %op2 = load <32 x i8>, ptr %b
50 %res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
51 store <32 x i8> %res, ptr %a
55 define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
56 ; CHECK-LABEL: smax_v4i16:
58 ; CHECK-NEXT: ptrue p0.h, vl4
59 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
60 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
61 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
62 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
64 %res = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %op1, <4 x i16> %op2)
68 define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
69 ; CHECK-LABEL: smax_v8i16:
71 ; CHECK-NEXT: ptrue p0.h, vl8
72 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
73 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
74 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
75 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
77 %res = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %op1, <8 x i16> %op2)
81 define void @smax_v16i16(ptr %a, ptr %b) {
82 ; CHECK-LABEL: smax_v16i16:
84 ; CHECK-NEXT: ptrue p0.h, vl8
85 ; CHECK-NEXT: ldp q0, q3, [x1]
86 ; CHECK-NEXT: ldp q1, q2, [x0]
87 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
88 ; CHECK-NEXT: movprfx z1, z2
89 ; CHECK-NEXT: smax z1.h, p0/m, z1.h, z3.h
90 ; CHECK-NEXT: stp q0, q1, [x0]
92 %op1 = load <16 x i16>, ptr %a
93 %op2 = load <16 x i16>, ptr %b
94 %res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
95 store <16 x i16> %res, ptr %a
99 define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
100 ; CHECK-LABEL: smax_v2i32:
102 ; CHECK-NEXT: ptrue p0.s, vl2
103 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
104 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
105 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
106 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
108 %res = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %op1, <2 x i32> %op2)
112 define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
113 ; CHECK-LABEL: smax_v4i32:
115 ; CHECK-NEXT: ptrue p0.s, vl4
116 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
117 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
118 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
119 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
121 %res = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %op1, <4 x i32> %op2)
125 define void @smax_v8i32(ptr %a, ptr %b) {
126 ; CHECK-LABEL: smax_v8i32:
128 ; CHECK-NEXT: ptrue p0.s, vl4
129 ; CHECK-NEXT: ldp q0, q3, [x1]
130 ; CHECK-NEXT: ldp q1, q2, [x0]
131 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
132 ; CHECK-NEXT: movprfx z1, z2
133 ; CHECK-NEXT: smax z1.s, p0/m, z1.s, z3.s
134 ; CHECK-NEXT: stp q0, q1, [x0]
136 %op1 = load <8 x i32>, ptr %a
137 %op2 = load <8 x i32>, ptr %b
138 %res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
139 store <8 x i32> %res, ptr %a
143 ; Vector i64 max are not legal for NEON so use SVE when available.
144 define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
145 ; CHECK-LABEL: smax_v1i64:
147 ; CHECK-NEXT: ptrue p0.d, vl1
148 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
149 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
150 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
151 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
153 %res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
157 ; Vector i64 max are not legal for NEON so use SVE when available.
158 define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
159 ; CHECK-LABEL: smax_v2i64:
161 ; CHECK-NEXT: ptrue p0.d, vl2
162 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
163 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
164 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
165 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
167 %res = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
171 define void @smax_v4i64(ptr %a, ptr %b) {
172 ; CHECK-LABEL: smax_v4i64:
174 ; CHECK-NEXT: ptrue p0.d, vl2
175 ; CHECK-NEXT: ldp q0, q3, [x1]
176 ; CHECK-NEXT: ldp q1, q2, [x0]
177 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
178 ; CHECK-NEXT: movprfx z1, z2
179 ; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
180 ; CHECK-NEXT: stp q0, q1, [x0]
182 %op1 = load <4 x i64>, ptr %a
183 %op2 = load <4 x i64>, ptr %b
184 %res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
185 store <4 x i64> %res, ptr %a
193 define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
194 ; CHECK-LABEL: smin_v8i8:
196 ; CHECK-NEXT: ptrue p0.b, vl8
197 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
198 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
199 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
200 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
202 %res = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %op1, <8 x i8> %op2)
206 define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
207 ; CHECK-LABEL: smin_v16i8:
209 ; CHECK-NEXT: ptrue p0.b, vl16
210 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
211 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
212 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
213 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
215 %res = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %op1, <16 x i8> %op2)
219 define void @smin_v32i8(ptr %a, ptr %b) {
220 ; CHECK-LABEL: smin_v32i8:
222 ; CHECK-NEXT: ptrue p0.b, vl16
223 ; CHECK-NEXT: ldp q0, q3, [x1]
224 ; CHECK-NEXT: ldp q1, q2, [x0]
225 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
226 ; CHECK-NEXT: movprfx z1, z2
227 ; CHECK-NEXT: smin z1.b, p0/m, z1.b, z3.b
228 ; CHECK-NEXT: stp q0, q1, [x0]
230 %op1 = load <32 x i8>, ptr %a
231 %op2 = load <32 x i8>, ptr %b
232 %res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
233 store <32 x i8> %res, ptr %a
237 define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
238 ; CHECK-LABEL: smin_v4i16:
240 ; CHECK-NEXT: ptrue p0.h, vl4
241 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
242 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
243 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
244 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
246 %res = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %op1, <4 x i16> %op2)
250 define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
251 ; CHECK-LABEL: smin_v8i16:
253 ; CHECK-NEXT: ptrue p0.h, vl8
254 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
255 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
256 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
257 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
259 %res = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %op1, <8 x i16> %op2)
263 define void @smin_v16i16(ptr %a, ptr %b) {
264 ; CHECK-LABEL: smin_v16i16:
266 ; CHECK-NEXT: ptrue p0.h, vl8
267 ; CHECK-NEXT: ldp q0, q3, [x1]
268 ; CHECK-NEXT: ldp q1, q2, [x0]
269 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
270 ; CHECK-NEXT: movprfx z1, z2
271 ; CHECK-NEXT: smin z1.h, p0/m, z1.h, z3.h
272 ; CHECK-NEXT: stp q0, q1, [x0]
274 %op1 = load <16 x i16>, ptr %a
275 %op2 = load <16 x i16>, ptr %b
276 %res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
277 store <16 x i16> %res, ptr %a
281 define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
282 ; CHECK-LABEL: smin_v2i32:
284 ; CHECK-NEXT: ptrue p0.s, vl2
285 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
286 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
287 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
288 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
290 %res = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %op1, <2 x i32> %op2)
294 define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
295 ; CHECK-LABEL: smin_v4i32:
297 ; CHECK-NEXT: ptrue p0.s, vl4
298 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
299 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
300 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
301 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
303 %res = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %op1, <4 x i32> %op2)
307 define void @smin_v8i32(ptr %a, ptr %b) {
308 ; CHECK-LABEL: smin_v8i32:
310 ; CHECK-NEXT: ptrue p0.s, vl4
311 ; CHECK-NEXT: ldp q0, q3, [x1]
312 ; CHECK-NEXT: ldp q1, q2, [x0]
313 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
314 ; CHECK-NEXT: movprfx z1, z2
315 ; CHECK-NEXT: smin z1.s, p0/m, z1.s, z3.s
316 ; CHECK-NEXT: stp q0, q1, [x0]
318 %op1 = load <8 x i32>, ptr %a
319 %op2 = load <8 x i32>, ptr %b
320 %res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
321 store <8 x i32> %res, ptr %a
325 ; Vector i64 min are not legal for NEON so use SVE when available.
326 define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
327 ; CHECK-LABEL: smin_v1i64:
329 ; CHECK-NEXT: ptrue p0.d, vl1
330 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
331 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
332 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
333 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
335 %res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
339 ; Vector i64 min are not legal for NEON so use SVE when available.
340 define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
341 ; CHECK-LABEL: smin_v2i64:
343 ; CHECK-NEXT: ptrue p0.d, vl2
344 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
345 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
346 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
347 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
349 %res = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
353 define void @smin_v4i64(ptr %a, ptr %b) {
354 ; CHECK-LABEL: smin_v4i64:
356 ; CHECK-NEXT: ptrue p0.d, vl2
357 ; CHECK-NEXT: ldp q0, q3, [x1]
358 ; CHECK-NEXT: ldp q1, q2, [x0]
359 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
360 ; CHECK-NEXT: movprfx z1, z2
361 ; CHECK-NEXT: smin z1.d, p0/m, z1.d, z3.d
362 ; CHECK-NEXT: stp q0, q1, [x0]
364 %op1 = load <4 x i64>, ptr %a
365 %op2 = load <4 x i64>, ptr %b
366 %res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
367 store <4 x i64> %res, ptr %a
375 define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
376 ; CHECK-LABEL: umax_v8i8:
378 ; CHECK-NEXT: ptrue p0.b, vl8
379 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
380 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
381 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
382 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
384 %res = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %op1, <8 x i8> %op2)
388 define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
389 ; CHECK-LABEL: umax_v16i8:
391 ; CHECK-NEXT: ptrue p0.b, vl16
392 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
393 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
394 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
395 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
397 %res = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %op1, <16 x i8> %op2)
401 define void @umax_v32i8(ptr %a, ptr %b) {
402 ; CHECK-LABEL: umax_v32i8:
404 ; CHECK-NEXT: ptrue p0.b, vl16
405 ; CHECK-NEXT: ldp q0, q3, [x1]
406 ; CHECK-NEXT: ldp q1, q2, [x0]
407 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
408 ; CHECK-NEXT: movprfx z1, z2
409 ; CHECK-NEXT: umax z1.b, p0/m, z1.b, z3.b
410 ; CHECK-NEXT: stp q0, q1, [x0]
412 %op1 = load <32 x i8>, ptr %a
413 %op2 = load <32 x i8>, ptr %b
414 %res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
415 store <32 x i8> %res, ptr %a
419 define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
420 ; CHECK-LABEL: umax_v4i16:
422 ; CHECK-NEXT: ptrue p0.h, vl4
423 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
424 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
425 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
426 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
428 %res = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %op1, <4 x i16> %op2)
432 define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
433 ; CHECK-LABEL: umax_v8i16:
435 ; CHECK-NEXT: ptrue p0.h, vl8
436 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
437 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
438 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
439 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
441 %res = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %op1, <8 x i16> %op2)
445 define void @umax_v16i16(ptr %a, ptr %b) {
446 ; CHECK-LABEL: umax_v16i16:
448 ; CHECK-NEXT: ptrue p0.h, vl8
449 ; CHECK-NEXT: ldp q0, q3, [x1]
450 ; CHECK-NEXT: ldp q1, q2, [x0]
451 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
452 ; CHECK-NEXT: movprfx z1, z2
453 ; CHECK-NEXT: umax z1.h, p0/m, z1.h, z3.h
454 ; CHECK-NEXT: stp q0, q1, [x0]
456 %op1 = load <16 x i16>, ptr %a
457 %op2 = load <16 x i16>, ptr %b
458 %res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
459 store <16 x i16> %res, ptr %a
463 define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
464 ; CHECK-LABEL: umax_v2i32:
466 ; CHECK-NEXT: ptrue p0.s, vl2
467 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
468 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
469 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
470 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
472 %res = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %op1, <2 x i32> %op2)
476 define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
477 ; CHECK-LABEL: umax_v4i32:
479 ; CHECK-NEXT: ptrue p0.s, vl4
480 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
481 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
482 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
483 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
485 %res = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %op1, <4 x i32> %op2)
489 define void @umax_v8i32(ptr %a, ptr %b) {
490 ; CHECK-LABEL: umax_v8i32:
492 ; CHECK-NEXT: ptrue p0.s, vl4
493 ; CHECK-NEXT: ldp q0, q3, [x1]
494 ; CHECK-NEXT: ldp q1, q2, [x0]
495 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
496 ; CHECK-NEXT: movprfx z1, z2
497 ; CHECK-NEXT: umax z1.s, p0/m, z1.s, z3.s
498 ; CHECK-NEXT: stp q0, q1, [x0]
500 %op1 = load <8 x i32>, ptr %a
501 %op2 = load <8 x i32>, ptr %b
502 %res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
503 store <8 x i32> %res, ptr %a
507 ; Vector i64 max are not legal for NEON so use SVE when available.
508 define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
509 ; CHECK-LABEL: umax_v1i64:
511 ; CHECK-NEXT: ptrue p0.d, vl1
512 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
513 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
514 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
515 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
517 %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
521 ; Vector i64 max are not legal for NEON so use SVE when available.
522 define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
523 ; CHECK-LABEL: umax_v2i64:
525 ; CHECK-NEXT: ptrue p0.d, vl2
526 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
527 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
528 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
529 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
531 %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
535 define void @umax_v4i64(ptr %a, ptr %b) {
536 ; CHECK-LABEL: umax_v4i64:
538 ; CHECK-NEXT: ptrue p0.d, vl2
539 ; CHECK-NEXT: ldp q0, q3, [x1]
540 ; CHECK-NEXT: ldp q1, q2, [x0]
541 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
542 ; CHECK-NEXT: movprfx z1, z2
543 ; CHECK-NEXT: umax z1.d, p0/m, z1.d, z3.d
544 ; CHECK-NEXT: stp q0, q1, [x0]
546 %op1 = load <4 x i64>, ptr %a
547 %op2 = load <4 x i64>, ptr %b
548 %res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
549 store <4 x i64> %res, ptr %a
557 define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
558 ; CHECK-LABEL: umin_v8i8:
560 ; CHECK-NEXT: ptrue p0.b, vl8
561 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
562 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
563 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
564 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
566 %res = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %op1, <8 x i8> %op2)
570 define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
571 ; CHECK-LABEL: umin_v16i8:
573 ; CHECK-NEXT: ptrue p0.b, vl16
574 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
575 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
576 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
577 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
579 %res = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %op1, <16 x i8> %op2)
583 define void @umin_v32i8(ptr %a, ptr %b) {
584 ; CHECK-LABEL: umin_v32i8:
586 ; CHECK-NEXT: ptrue p0.b, vl16
587 ; CHECK-NEXT: ldp q0, q3, [x1]
588 ; CHECK-NEXT: ldp q1, q2, [x0]
589 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
590 ; CHECK-NEXT: movprfx z1, z2
591 ; CHECK-NEXT: umin z1.b, p0/m, z1.b, z3.b
592 ; CHECK-NEXT: stp q0, q1, [x0]
594 %op1 = load <32 x i8>, ptr %a
595 %op2 = load <32 x i8>, ptr %b
596 %res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
597 store <32 x i8> %res, ptr %a
601 define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
602 ; CHECK-LABEL: umin_v4i16:
604 ; CHECK-NEXT: ptrue p0.h, vl4
605 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
606 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
607 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
608 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
610 %res = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %op1, <4 x i16> %op2)
614 define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
615 ; CHECK-LABEL: umin_v8i16:
617 ; CHECK-NEXT: ptrue p0.h, vl8
618 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
619 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
620 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
621 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
623 %res = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %op1, <8 x i16> %op2)
627 define void @umin_v16i16(ptr %a, ptr %b) {
628 ; CHECK-LABEL: umin_v16i16:
630 ; CHECK-NEXT: ptrue p0.h, vl8
631 ; CHECK-NEXT: ldp q0, q3, [x1]
632 ; CHECK-NEXT: ldp q1, q2, [x0]
633 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
634 ; CHECK-NEXT: movprfx z1, z2
635 ; CHECK-NEXT: umin z1.h, p0/m, z1.h, z3.h
636 ; CHECK-NEXT: stp q0, q1, [x0]
638 %op1 = load <16 x i16>, ptr %a
639 %op2 = load <16 x i16>, ptr %b
640 %res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
641 store <16 x i16> %res, ptr %a
645 define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
646 ; CHECK-LABEL: umin_v2i32:
648 ; CHECK-NEXT: ptrue p0.s, vl2
649 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
650 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
651 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
652 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
654 %res = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %op1, <2 x i32> %op2)
658 define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
659 ; CHECK-LABEL: umin_v4i32:
661 ; CHECK-NEXT: ptrue p0.s, vl4
662 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
663 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
664 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
665 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
667 %res = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %op1, <4 x i32> %op2)
671 define void @umin_v8i32(ptr %a, ptr %b) {
672 ; CHECK-LABEL: umin_v8i32:
674 ; CHECK-NEXT: ptrue p0.s, vl4
675 ; CHECK-NEXT: ldp q0, q3, [x1]
676 ; CHECK-NEXT: ldp q1, q2, [x0]
677 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
678 ; CHECK-NEXT: movprfx z1, z2
679 ; CHECK-NEXT: umin z1.s, p0/m, z1.s, z3.s
680 ; CHECK-NEXT: stp q0, q1, [x0]
682 %op1 = load <8 x i32>, ptr %a
683 %op2 = load <8 x i32>, ptr %b
684 %res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
685 store <8 x i32> %res, ptr %a
689 ; Vector i64 min are not legal for NEON so use SVE when available.
690 define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
691 ; CHECK-LABEL: umin_v1i64:
693 ; CHECK-NEXT: ptrue p0.d, vl1
694 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
695 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
696 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
697 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
699 %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
703 ; Vector i64 min are not legal for NEON so use SVE when available.
704 define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
705 ; CHECK-LABEL: umin_v2i64:
707 ; CHECK-NEXT: ptrue p0.d, vl2
708 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
709 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
710 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
711 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
713 %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
717 define void @umin_v4i64(ptr %a, ptr %b) {
718 ; CHECK-LABEL: umin_v4i64:
720 ; CHECK-NEXT: ptrue p0.d, vl2
721 ; CHECK-NEXT: ldp q0, q3, [x1]
722 ; CHECK-NEXT: ldp q1, q2, [x0]
723 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
724 ; CHECK-NEXT: movprfx z1, z2
725 ; CHECK-NEXT: umin z1.d, p0/m, z1.d, z3.d
726 ; CHECK-NEXT: stp q0, q1, [x0]
728 %op1 = load <4 x i64>, ptr %a
729 %op2 = load <4 x i64>, ptr %b
730 %res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
731 store <4 x i64> %res, ptr %a
735 declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>)
736 declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
737 declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
738 declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
739 declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
740 declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
741 declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
742 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
743 declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
744 declare <1 x i64> @llvm.smin.v1i64(<1 x i64>, <1 x i64>)
745 declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
746 declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
748 declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>)
749 declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
750 declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
751 declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
752 declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
753 declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
754 declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
755 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
756 declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
757 declare <1 x i64> @llvm.smax.v1i64(<1 x i64>, <1 x i64>)
758 declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
759 declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
761 declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>)
762 declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
763 declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
764 declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
765 declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
766 declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
767 declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
768 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
769 declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
770 declare <1 x i64> @llvm.umin.v1i64(<1 x i64>, <1 x i64>)
771 declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
772 declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
774 declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>)
775 declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
776 declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
777 declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
778 declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
779 declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
780 declare <2 x i32> @llvm.umax.v2i32(<2 x i32>, <2 x i32>)
781 declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
782 declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
783 declare <1 x i64> @llvm.umax.v1i64(<1 x i64>, <1 x i64>)
784 declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
785 declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)