1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
10 define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
11 ; CHECK-LABEL: smax_v8i8:
13 ; CHECK-NEXT: ptrue p0.b, vl8
14 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
15 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
16 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
17 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
19 %res = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %op1, <8 x i8> %op2)
23 define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
24 ; CHECK-LABEL: smax_v16i8:
26 ; CHECK-NEXT: ptrue p0.b, vl16
27 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
28 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
29 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
30 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
32 %res = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %op1, <16 x i8> %op2)
36 define void @smax_v32i8(ptr %a, ptr %b) {
37 ; CHECK-LABEL: smax_v32i8:
39 ; CHECK-NEXT: ptrue p0.b, vl16
40 ; CHECK-NEXT: ldp q0, q3, [x1]
41 ; CHECK-NEXT: ldp q1, q2, [x0]
42 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
43 ; CHECK-NEXT: movprfx z1, z2
44 ; CHECK-NEXT: smax z1.b, p0/m, z1.b, z3.b
45 ; CHECK-NEXT: stp q0, q1, [x0]
47 %op1 = load <32 x i8>, ptr %a
48 %op2 = load <32 x i8>, ptr %b
49 %res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
50 store <32 x i8> %res, ptr %a
54 define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
55 ; CHECK-LABEL: smax_v4i16:
57 ; CHECK-NEXT: ptrue p0.h, vl4
58 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
59 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
60 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
61 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
63 %res = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %op1, <4 x i16> %op2)
67 define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
68 ; CHECK-LABEL: smax_v8i16:
70 ; CHECK-NEXT: ptrue p0.h, vl8
71 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
72 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
73 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
74 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
76 %res = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %op1, <8 x i16> %op2)
80 define void @smax_v16i16(ptr %a, ptr %b) {
81 ; CHECK-LABEL: smax_v16i16:
83 ; CHECK-NEXT: ptrue p0.h, vl8
84 ; CHECK-NEXT: ldp q0, q3, [x1]
85 ; CHECK-NEXT: ldp q1, q2, [x0]
86 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
87 ; CHECK-NEXT: movprfx z1, z2
88 ; CHECK-NEXT: smax z1.h, p0/m, z1.h, z3.h
89 ; CHECK-NEXT: stp q0, q1, [x0]
91 %op1 = load <16 x i16>, ptr %a
92 %op2 = load <16 x i16>, ptr %b
93 %res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
94 store <16 x i16> %res, ptr %a
98 define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
99 ; CHECK-LABEL: smax_v2i32:
101 ; CHECK-NEXT: ptrue p0.s, vl2
102 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
103 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
104 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
105 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
107 %res = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %op1, <2 x i32> %op2)
111 define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
112 ; CHECK-LABEL: smax_v4i32:
114 ; CHECK-NEXT: ptrue p0.s, vl4
115 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
116 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
117 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
118 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
120 %res = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %op1, <4 x i32> %op2)
124 define void @smax_v8i32(ptr %a, ptr %b) {
125 ; CHECK-LABEL: smax_v8i32:
127 ; CHECK-NEXT: ptrue p0.s, vl4
128 ; CHECK-NEXT: ldp q0, q3, [x1]
129 ; CHECK-NEXT: ldp q1, q2, [x0]
130 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
131 ; CHECK-NEXT: movprfx z1, z2
132 ; CHECK-NEXT: smax z1.s, p0/m, z1.s, z3.s
133 ; CHECK-NEXT: stp q0, q1, [x0]
135 %op1 = load <8 x i32>, ptr %a
136 %op2 = load <8 x i32>, ptr %b
137 %res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
138 store <8 x i32> %res, ptr %a
142 ; Vector i64 max are not legal for NEON so use SVE when available.
143 define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
144 ; CHECK-LABEL: smax_v1i64:
146 ; CHECK-NEXT: ptrue p0.d, vl1
147 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
148 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
149 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
150 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
152 %res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
156 ; Vector i64 max are not legal for NEON so use SVE when available.
157 define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
158 ; CHECK-LABEL: smax_v2i64:
160 ; CHECK-NEXT: ptrue p0.d, vl2
161 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
162 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
163 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
164 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
166 %res = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
170 define void @smax_v4i64(ptr %a, ptr %b) {
171 ; CHECK-LABEL: smax_v4i64:
173 ; CHECK-NEXT: ptrue p0.d, vl2
174 ; CHECK-NEXT: ldp q0, q3, [x1]
175 ; CHECK-NEXT: ldp q1, q2, [x0]
176 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
177 ; CHECK-NEXT: movprfx z1, z2
178 ; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
179 ; CHECK-NEXT: stp q0, q1, [x0]
181 %op1 = load <4 x i64>, ptr %a
182 %op2 = load <4 x i64>, ptr %b
183 %res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
184 store <4 x i64> %res, ptr %a
192 define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
193 ; CHECK-LABEL: smin_v8i8:
195 ; CHECK-NEXT: ptrue p0.b, vl8
196 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
197 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
198 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
199 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
201 %res = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %op1, <8 x i8> %op2)
205 define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
206 ; CHECK-LABEL: smin_v16i8:
208 ; CHECK-NEXT: ptrue p0.b, vl16
209 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
210 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
211 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
212 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
214 %res = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %op1, <16 x i8> %op2)
218 define void @smin_v32i8(ptr %a, ptr %b) {
219 ; CHECK-LABEL: smin_v32i8:
221 ; CHECK-NEXT: ptrue p0.b, vl16
222 ; CHECK-NEXT: ldp q0, q3, [x1]
223 ; CHECK-NEXT: ldp q1, q2, [x0]
224 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
225 ; CHECK-NEXT: movprfx z1, z2
226 ; CHECK-NEXT: smin z1.b, p0/m, z1.b, z3.b
227 ; CHECK-NEXT: stp q0, q1, [x0]
229 %op1 = load <32 x i8>, ptr %a
230 %op2 = load <32 x i8>, ptr %b
231 %res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
232 store <32 x i8> %res, ptr %a
236 define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
237 ; CHECK-LABEL: smin_v4i16:
239 ; CHECK-NEXT: ptrue p0.h, vl4
240 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
241 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
242 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
243 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
245 %res = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %op1, <4 x i16> %op2)
249 define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
250 ; CHECK-LABEL: smin_v8i16:
252 ; CHECK-NEXT: ptrue p0.h, vl8
253 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
254 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
255 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
256 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
258 %res = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %op1, <8 x i16> %op2)
262 define void @smin_v16i16(ptr %a, ptr %b) {
263 ; CHECK-LABEL: smin_v16i16:
265 ; CHECK-NEXT: ptrue p0.h, vl8
266 ; CHECK-NEXT: ldp q0, q3, [x1]
267 ; CHECK-NEXT: ldp q1, q2, [x0]
268 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
269 ; CHECK-NEXT: movprfx z1, z2
270 ; CHECK-NEXT: smin z1.h, p0/m, z1.h, z3.h
271 ; CHECK-NEXT: stp q0, q1, [x0]
273 %op1 = load <16 x i16>, ptr %a
274 %op2 = load <16 x i16>, ptr %b
275 %res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
276 store <16 x i16> %res, ptr %a
280 define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
281 ; CHECK-LABEL: smin_v2i32:
283 ; CHECK-NEXT: ptrue p0.s, vl2
284 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
285 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
286 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
287 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
289 %res = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %op1, <2 x i32> %op2)
293 define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
294 ; CHECK-LABEL: smin_v4i32:
296 ; CHECK-NEXT: ptrue p0.s, vl4
297 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
298 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
299 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
300 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
302 %res = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %op1, <4 x i32> %op2)
306 define void @smin_v8i32(ptr %a, ptr %b) {
307 ; CHECK-LABEL: smin_v8i32:
309 ; CHECK-NEXT: ptrue p0.s, vl4
310 ; CHECK-NEXT: ldp q0, q3, [x1]
311 ; CHECK-NEXT: ldp q1, q2, [x0]
312 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
313 ; CHECK-NEXT: movprfx z1, z2
314 ; CHECK-NEXT: smin z1.s, p0/m, z1.s, z3.s
315 ; CHECK-NEXT: stp q0, q1, [x0]
317 %op1 = load <8 x i32>, ptr %a
318 %op2 = load <8 x i32>, ptr %b
319 %res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
320 store <8 x i32> %res, ptr %a
324 ; Vector i64 min are not legal for NEON so use SVE when available.
325 define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
326 ; CHECK-LABEL: smin_v1i64:
328 ; CHECK-NEXT: ptrue p0.d, vl1
329 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
330 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
331 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
332 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
334 %res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
338 ; Vector i64 min are not legal for NEON so use SVE when available.
339 define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
340 ; CHECK-LABEL: smin_v2i64:
342 ; CHECK-NEXT: ptrue p0.d, vl2
343 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
344 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
345 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
346 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
348 %res = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
352 define void @smin_v4i64(ptr %a, ptr %b) {
353 ; CHECK-LABEL: smin_v4i64:
355 ; CHECK-NEXT: ptrue p0.d, vl2
356 ; CHECK-NEXT: ldp q0, q3, [x1]
357 ; CHECK-NEXT: ldp q1, q2, [x0]
358 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
359 ; CHECK-NEXT: movprfx z1, z2
360 ; CHECK-NEXT: smin z1.d, p0/m, z1.d, z3.d
361 ; CHECK-NEXT: stp q0, q1, [x0]
363 %op1 = load <4 x i64>, ptr %a
364 %op2 = load <4 x i64>, ptr %b
365 %res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
366 store <4 x i64> %res, ptr %a
374 define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
375 ; CHECK-LABEL: umax_v8i8:
377 ; CHECK-NEXT: ptrue p0.b, vl8
378 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
379 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
380 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
381 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
383 %res = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %op1, <8 x i8> %op2)
387 define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
388 ; CHECK-LABEL: umax_v16i8:
390 ; CHECK-NEXT: ptrue p0.b, vl16
391 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
392 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
393 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
394 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
396 %res = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %op1, <16 x i8> %op2)
400 define void @umax_v32i8(ptr %a, ptr %b) {
401 ; CHECK-LABEL: umax_v32i8:
403 ; CHECK-NEXT: ptrue p0.b, vl16
404 ; CHECK-NEXT: ldp q0, q3, [x1]
405 ; CHECK-NEXT: ldp q1, q2, [x0]
406 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
407 ; CHECK-NEXT: movprfx z1, z2
408 ; CHECK-NEXT: umax z1.b, p0/m, z1.b, z3.b
409 ; CHECK-NEXT: stp q0, q1, [x0]
411 %op1 = load <32 x i8>, ptr %a
412 %op2 = load <32 x i8>, ptr %b
413 %res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
414 store <32 x i8> %res, ptr %a
418 define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
419 ; CHECK-LABEL: umax_v4i16:
421 ; CHECK-NEXT: ptrue p0.h, vl4
422 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
423 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
424 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
425 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
427 %res = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %op1, <4 x i16> %op2)
431 define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
432 ; CHECK-LABEL: umax_v8i16:
434 ; CHECK-NEXT: ptrue p0.h, vl8
435 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
436 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
437 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
438 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
440 %res = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %op1, <8 x i16> %op2)
444 define void @umax_v16i16(ptr %a, ptr %b) {
445 ; CHECK-LABEL: umax_v16i16:
447 ; CHECK-NEXT: ptrue p0.h, vl8
448 ; CHECK-NEXT: ldp q0, q3, [x1]
449 ; CHECK-NEXT: ldp q1, q2, [x0]
450 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
451 ; CHECK-NEXT: movprfx z1, z2
452 ; CHECK-NEXT: umax z1.h, p0/m, z1.h, z3.h
453 ; CHECK-NEXT: stp q0, q1, [x0]
455 %op1 = load <16 x i16>, ptr %a
456 %op2 = load <16 x i16>, ptr %b
457 %res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
458 store <16 x i16> %res, ptr %a
462 define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
463 ; CHECK-LABEL: umax_v2i32:
465 ; CHECK-NEXT: ptrue p0.s, vl2
466 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
467 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
468 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
469 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
471 %res = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %op1, <2 x i32> %op2)
475 define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
476 ; CHECK-LABEL: umax_v4i32:
478 ; CHECK-NEXT: ptrue p0.s, vl4
479 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
480 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
481 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
482 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
484 %res = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %op1, <4 x i32> %op2)
488 define void @umax_v8i32(ptr %a, ptr %b) {
489 ; CHECK-LABEL: umax_v8i32:
491 ; CHECK-NEXT: ptrue p0.s, vl4
492 ; CHECK-NEXT: ldp q0, q3, [x1]
493 ; CHECK-NEXT: ldp q1, q2, [x0]
494 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
495 ; CHECK-NEXT: movprfx z1, z2
496 ; CHECK-NEXT: umax z1.s, p0/m, z1.s, z3.s
497 ; CHECK-NEXT: stp q0, q1, [x0]
499 %op1 = load <8 x i32>, ptr %a
500 %op2 = load <8 x i32>, ptr %b
501 %res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
502 store <8 x i32> %res, ptr %a
506 ; Vector i64 max are not legal for NEON so use SVE when available.
507 define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
508 ; CHECK-LABEL: umax_v1i64:
510 ; CHECK-NEXT: ptrue p0.d, vl1
511 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
512 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
513 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
514 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
516 %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
520 ; Vector i64 max are not legal for NEON so use SVE when available.
521 define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
522 ; CHECK-LABEL: umax_v2i64:
524 ; CHECK-NEXT: ptrue p0.d, vl2
525 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
526 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
527 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
528 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
530 %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
534 define void @umax_v4i64(ptr %a, ptr %b) {
535 ; CHECK-LABEL: umax_v4i64:
537 ; CHECK-NEXT: ptrue p0.d, vl2
538 ; CHECK-NEXT: ldp q0, q3, [x1]
539 ; CHECK-NEXT: ldp q1, q2, [x0]
540 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
541 ; CHECK-NEXT: movprfx z1, z2
542 ; CHECK-NEXT: umax z1.d, p0/m, z1.d, z3.d
543 ; CHECK-NEXT: stp q0, q1, [x0]
545 %op1 = load <4 x i64>, ptr %a
546 %op2 = load <4 x i64>, ptr %b
547 %res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
548 store <4 x i64> %res, ptr %a
556 define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
557 ; CHECK-LABEL: umin_v8i8:
559 ; CHECK-NEXT: ptrue p0.b, vl8
560 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
561 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
562 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
563 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
565 %res = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %op1, <8 x i8> %op2)
569 define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
570 ; CHECK-LABEL: umin_v16i8:
572 ; CHECK-NEXT: ptrue p0.b, vl16
573 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
574 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
575 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
576 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
578 %res = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %op1, <16 x i8> %op2)
582 define void @umin_v32i8(ptr %a, ptr %b) {
583 ; CHECK-LABEL: umin_v32i8:
585 ; CHECK-NEXT: ptrue p0.b, vl16
586 ; CHECK-NEXT: ldp q0, q3, [x1]
587 ; CHECK-NEXT: ldp q1, q2, [x0]
588 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
589 ; CHECK-NEXT: movprfx z1, z2
590 ; CHECK-NEXT: umin z1.b, p0/m, z1.b, z3.b
591 ; CHECK-NEXT: stp q0, q1, [x0]
593 %op1 = load <32 x i8>, ptr %a
594 %op2 = load <32 x i8>, ptr %b
595 %res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
596 store <32 x i8> %res, ptr %a
600 define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
601 ; CHECK-LABEL: umin_v4i16:
603 ; CHECK-NEXT: ptrue p0.h, vl4
604 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
605 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
606 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
607 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
609 %res = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %op1, <4 x i16> %op2)
613 define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
614 ; CHECK-LABEL: umin_v8i16:
616 ; CHECK-NEXT: ptrue p0.h, vl8
617 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
618 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
619 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
620 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
622 %res = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %op1, <8 x i16> %op2)
626 define void @umin_v16i16(ptr %a, ptr %b) {
627 ; CHECK-LABEL: umin_v16i16:
629 ; CHECK-NEXT: ptrue p0.h, vl8
630 ; CHECK-NEXT: ldp q0, q3, [x1]
631 ; CHECK-NEXT: ldp q1, q2, [x0]
632 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
633 ; CHECK-NEXT: movprfx z1, z2
634 ; CHECK-NEXT: umin z1.h, p0/m, z1.h, z3.h
635 ; CHECK-NEXT: stp q0, q1, [x0]
637 %op1 = load <16 x i16>, ptr %a
638 %op2 = load <16 x i16>, ptr %b
639 %res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
640 store <16 x i16> %res, ptr %a
644 define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
645 ; CHECK-LABEL: umin_v2i32:
647 ; CHECK-NEXT: ptrue p0.s, vl2
648 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
649 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
650 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
651 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
653 %res = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %op1, <2 x i32> %op2)
657 define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
658 ; CHECK-LABEL: umin_v4i32:
660 ; CHECK-NEXT: ptrue p0.s, vl4
661 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
662 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
663 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
664 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
666 %res = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %op1, <4 x i32> %op2)
670 define void @umin_v8i32(ptr %a, ptr %b) {
671 ; CHECK-LABEL: umin_v8i32:
673 ; CHECK-NEXT: ptrue p0.s, vl4
674 ; CHECK-NEXT: ldp q0, q3, [x1]
675 ; CHECK-NEXT: ldp q1, q2, [x0]
676 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
677 ; CHECK-NEXT: movprfx z1, z2
678 ; CHECK-NEXT: umin z1.s, p0/m, z1.s, z3.s
679 ; CHECK-NEXT: stp q0, q1, [x0]
681 %op1 = load <8 x i32>, ptr %a
682 %op2 = load <8 x i32>, ptr %b
683 %res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
684 store <8 x i32> %res, ptr %a
688 ; Vector i64 min are not legal for NEON so use SVE when available.
689 define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
690 ; CHECK-LABEL: umin_v1i64:
692 ; CHECK-NEXT: ptrue p0.d, vl1
693 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
694 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
695 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
696 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
698 %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
702 ; Vector i64 min are not legal for NEON so use SVE when available.
703 define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
704 ; CHECK-LABEL: umin_v2i64:
706 ; CHECK-NEXT: ptrue p0.d, vl2
707 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
708 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
709 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
710 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
712 %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
716 define void @umin_v4i64(ptr %a, ptr %b) {
717 ; CHECK-LABEL: umin_v4i64:
719 ; CHECK-NEXT: ptrue p0.d, vl2
720 ; CHECK-NEXT: ldp q0, q3, [x1]
721 ; CHECK-NEXT: ldp q1, q2, [x0]
722 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
723 ; CHECK-NEXT: movprfx z1, z2
724 ; CHECK-NEXT: umin z1.d, p0/m, z1.d, z3.d
725 ; CHECK-NEXT: stp q0, q1, [x0]
727 %op1 = load <4 x i64>, ptr %a
728 %op2 = load <4 x i64>, ptr %b
729 %res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
730 store <4 x i64> %res, ptr %a
734 declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>)
735 declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)
736 declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>)
737 declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
738 declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
739 declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>)
740 declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
741 declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
742 declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
743 declare <1 x i64> @llvm.smin.v1i64(<1 x i64>, <1 x i64>)
744 declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
745 declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
747 declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>)
748 declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)
749 declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>)
750 declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
751 declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
752 declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>)
753 declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
754 declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
755 declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
756 declare <1 x i64> @llvm.smax.v1i64(<1 x i64>, <1 x i64>)
757 declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
758 declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
760 declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>)
761 declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)
762 declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>)
763 declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
764 declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
765 declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>)
766 declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
767 declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
768 declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
769 declare <1 x i64> @llvm.umin.v1i64(<1 x i64>, <1 x i64>)
770 declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
771 declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
773 declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>)
774 declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)
775 declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>)
776 declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
777 declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
778 declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>)
779 declare <2 x i32> @llvm.umax.v2i32(<2 x i32>, <2 x i32>)
780 declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
781 declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>)
782 declare <1 x i64> @llvm.umax.v1i64(<1 x i64>, <1 x i64>)
783 declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>)
784 declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>)