1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
3 ; RUN: llc -mattr=+sme -force-streaming-compatible-sve < %s | FileCheck %s
5 target triple = "aarch64-unknown-linux-gnu"
11 define i8 @uaddv_v8i8(<8 x i8> %a) {
12 ; CHECK-LABEL: uaddv_v8i8:
14 ; CHECK-NEXT: ptrue p0.b, vl8
15 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
16 ; CHECK-NEXT: uaddv d0, p0, z0.b
17 ; CHECK-NEXT: fmov x0, d0
18 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
20 %res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
24 define i8 @uaddv_v16i8(<16 x i8> %a) {
25 ; CHECK-LABEL: uaddv_v16i8:
27 ; CHECK-NEXT: ptrue p0.b, vl16
28 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
29 ; CHECK-NEXT: uaddv d0, p0, z0.b
30 ; CHECK-NEXT: fmov x0, d0
31 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
33 %res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
37 define i8 @uaddv_v32i8(ptr %a) {
38 ; CHECK-LABEL: uaddv_v32i8:
40 ; CHECK-NEXT: ptrue p0.b, vl16
41 ; CHECK-NEXT: ldp q1, q0, [x0]
42 ; CHECK-NEXT: add z0.b, z1.b, z0.b
43 ; CHECK-NEXT: uaddv d0, p0, z0.b
44 ; CHECK-NEXT: fmov x0, d0
45 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
47 %op = load <32 x i8>, ptr %a
48 %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op)
52 define i16 @uaddv_v4i16(<4 x i16> %a) {
53 ; CHECK-LABEL: uaddv_v4i16:
55 ; CHECK-NEXT: ptrue p0.h, vl4
56 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
57 ; CHECK-NEXT: uaddv d0, p0, z0.h
58 ; CHECK-NEXT: fmov x0, d0
59 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
61 %res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
65 define i16 @uaddv_v8i16(<8 x i16> %a) {
66 ; CHECK-LABEL: uaddv_v8i16:
68 ; CHECK-NEXT: ptrue p0.h, vl8
69 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
70 ; CHECK-NEXT: uaddv d0, p0, z0.h
71 ; CHECK-NEXT: fmov x0, d0
72 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
74 %res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
78 define i16 @uaddv_v16i16(ptr %a) {
79 ; CHECK-LABEL: uaddv_v16i16:
81 ; CHECK-NEXT: ptrue p0.h, vl8
82 ; CHECK-NEXT: ldp q1, q0, [x0]
83 ; CHECK-NEXT: add z0.h, z1.h, z0.h
84 ; CHECK-NEXT: uaddv d0, p0, z0.h
85 ; CHECK-NEXT: fmov x0, d0
86 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
88 %op = load <16 x i16>, ptr %a
89 %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op)
93 define i32 @uaddv_v2i32(<2 x i32> %a) {
94 ; CHECK-LABEL: uaddv_v2i32:
96 ; CHECK-NEXT: ptrue p0.s, vl2
97 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
98 ; CHECK-NEXT: uaddv d0, p0, z0.s
99 ; CHECK-NEXT: fmov x0, d0
100 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
102 %res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
106 define i32 @uaddv_v4i32(<4 x i32> %a) {
107 ; CHECK-LABEL: uaddv_v4i32:
109 ; CHECK-NEXT: ptrue p0.s, vl4
110 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
111 ; CHECK-NEXT: uaddv d0, p0, z0.s
112 ; CHECK-NEXT: fmov x0, d0
113 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
115 %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
119 define i32 @uaddv_v8i32(ptr %a) {
120 ; CHECK-LABEL: uaddv_v8i32:
122 ; CHECK-NEXT: ptrue p0.s, vl4
123 ; CHECK-NEXT: ldp q1, q0, [x0]
124 ; CHECK-NEXT: add z0.s, z1.s, z0.s
125 ; CHECK-NEXT: uaddv d0, p0, z0.s
126 ; CHECK-NEXT: fmov x0, d0
127 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
129 %op = load <8 x i32>, ptr %a
130 %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op)
134 define i64 @uaddv_v2i64(<2 x i64> %a) {
135 ; CHECK-LABEL: uaddv_v2i64:
137 ; CHECK-NEXT: ptrue p0.d, vl2
138 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
139 ; CHECK-NEXT: uaddv d0, p0, z0.d
140 ; CHECK-NEXT: fmov x0, d0
142 %res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
146 define i64 @uaddv_v4i64(ptr %a) {
147 ; CHECK-LABEL: uaddv_v4i64:
149 ; CHECK-NEXT: ptrue p0.d, vl2
150 ; CHECK-NEXT: ldp q1, q0, [x0]
151 ; CHECK-NEXT: add z0.d, z1.d, z0.d
152 ; CHECK-NEXT: uaddv d0, p0, z0.d
153 ; CHECK-NEXT: fmov x0, d0
155 %op = load <4 x i64>, ptr %a
156 %res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op)
164 define i8 @smaxv_v8i8(<8 x i8> %a) {
165 ; CHECK-LABEL: smaxv_v8i8:
167 ; CHECK-NEXT: ptrue p0.b, vl8
168 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
169 ; CHECK-NEXT: smaxv b0, p0, z0.b
170 ; CHECK-NEXT: fmov w0, s0
172 %res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a)
176 define i8 @smaxv_v16i8(<16 x i8> %a) {
177 ; CHECK-LABEL: smaxv_v16i8:
179 ; CHECK-NEXT: ptrue p0.b, vl16
180 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
181 ; CHECK-NEXT: smaxv b0, p0, z0.b
182 ; CHECK-NEXT: fmov w0, s0
184 %res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a)
188 define i8 @smaxv_v32i8(ptr %a) {
189 ; CHECK-LABEL: smaxv_v32i8:
191 ; CHECK-NEXT: ptrue p0.b, vl16
192 ; CHECK-NEXT: ldp q1, q0, [x0]
193 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
194 ; CHECK-NEXT: smaxv b0, p0, z0.b
195 ; CHECK-NEXT: fmov w0, s0
197 %op = load <32 x i8>, ptr %a
198 %res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op)
202 define i16 @smaxv_v4i16(<4 x i16> %a) {
203 ; CHECK-LABEL: smaxv_v4i16:
205 ; CHECK-NEXT: ptrue p0.h, vl4
206 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
207 ; CHECK-NEXT: smaxv h0, p0, z0.h
208 ; CHECK-NEXT: fmov w0, s0
210 %res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a)
214 define i16 @smaxv_v8i16(<8 x i16> %a) {
215 ; CHECK-LABEL: smaxv_v8i16:
217 ; CHECK-NEXT: ptrue p0.h, vl8
218 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
219 ; CHECK-NEXT: smaxv h0, p0, z0.h
220 ; CHECK-NEXT: fmov w0, s0
222 %res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a)
226 define i16 @smaxv_v16i16(ptr %a) {
227 ; CHECK-LABEL: smaxv_v16i16:
229 ; CHECK-NEXT: ptrue p0.h, vl8
230 ; CHECK-NEXT: ldp q1, q0, [x0]
231 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
232 ; CHECK-NEXT: smaxv h0, p0, z0.h
233 ; CHECK-NEXT: fmov w0, s0
235 %op = load <16 x i16>, ptr %a
236 %res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op)
240 define i32 @smaxv_v2i32(<2 x i32> %a) {
241 ; CHECK-LABEL: smaxv_v2i32:
243 ; CHECK-NEXT: ptrue p0.s, vl2
244 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
245 ; CHECK-NEXT: smaxv s0, p0, z0.s
246 ; CHECK-NEXT: fmov w0, s0
248 %res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a)
252 define i32 @smaxv_v4i32(<4 x i32> %a) {
253 ; CHECK-LABEL: smaxv_v4i32:
255 ; CHECK-NEXT: ptrue p0.s, vl4
256 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
257 ; CHECK-NEXT: smaxv s0, p0, z0.s
258 ; CHECK-NEXT: fmov w0, s0
260 %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
264 define i32 @smaxv_v8i32(ptr %a) {
265 ; CHECK-LABEL: smaxv_v8i32:
267 ; CHECK-NEXT: ptrue p0.s, vl4
268 ; CHECK-NEXT: ldp q1, q0, [x0]
269 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
270 ; CHECK-NEXT: smaxv s0, p0, z0.s
271 ; CHECK-NEXT: fmov w0, s0
273 %op = load <8 x i32>, ptr %a
274 %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op)
278 ; No NEON 64-bit vector SMAXV support. Use SVE.
279 define i64 @smaxv_v2i64(<2 x i64> %a) {
280 ; CHECK-LABEL: smaxv_v2i64:
282 ; CHECK-NEXT: ptrue p0.d, vl2
283 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
284 ; CHECK-NEXT: smaxv d0, p0, z0.d
285 ; CHECK-NEXT: fmov x0, d0
287 %res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a)
291 define i64 @smaxv_v4i64(ptr %a) {
292 ; CHECK-LABEL: smaxv_v4i64:
294 ; CHECK-NEXT: ptrue p0.d, vl2
295 ; CHECK-NEXT: ldp q1, q0, [x0]
296 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
297 ; CHECK-NEXT: smaxv d0, p0, z0.d
298 ; CHECK-NEXT: fmov x0, d0
300 %op = load <4 x i64>, ptr %a
301 %res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op)
309 define i8 @sminv_v8i8(<8 x i8> %a) {
310 ; CHECK-LABEL: sminv_v8i8:
312 ; CHECK-NEXT: ptrue p0.b, vl8
313 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
314 ; CHECK-NEXT: sminv b0, p0, z0.b
315 ; CHECK-NEXT: fmov w0, s0
317 %res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a)
321 define i8 @sminv_v16i8(<16 x i8> %a) {
322 ; CHECK-LABEL: sminv_v16i8:
324 ; CHECK-NEXT: ptrue p0.b, vl16
325 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
326 ; CHECK-NEXT: sminv b0, p0, z0.b
327 ; CHECK-NEXT: fmov w0, s0
329 %res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a)
333 define i8 @sminv_v32i8(ptr %a) {
334 ; CHECK-LABEL: sminv_v32i8:
336 ; CHECK-NEXT: ptrue p0.b, vl16
337 ; CHECK-NEXT: ldp q1, q0, [x0]
338 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
339 ; CHECK-NEXT: sminv b0, p0, z0.b
340 ; CHECK-NEXT: fmov w0, s0
342 %op = load <32 x i8>, ptr %a
343 %res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op)
347 define i16 @sminv_v4i16(<4 x i16> %a) {
348 ; CHECK-LABEL: sminv_v4i16:
350 ; CHECK-NEXT: ptrue p0.h, vl4
351 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
352 ; CHECK-NEXT: sminv h0, p0, z0.h
353 ; CHECK-NEXT: fmov w0, s0
355 %res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a)
359 define i16 @sminv_v8i16(<8 x i16> %a) {
360 ; CHECK-LABEL: sminv_v8i16:
362 ; CHECK-NEXT: ptrue p0.h, vl8
363 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
364 ; CHECK-NEXT: sminv h0, p0, z0.h
365 ; CHECK-NEXT: fmov w0, s0
367 %res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a)
371 define i16 @sminv_v16i16(ptr %a) {
372 ; CHECK-LABEL: sminv_v16i16:
374 ; CHECK-NEXT: ptrue p0.h, vl8
375 ; CHECK-NEXT: ldp q1, q0, [x0]
376 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
377 ; CHECK-NEXT: sminv h0, p0, z0.h
378 ; CHECK-NEXT: fmov w0, s0
380 %op = load <16 x i16>, ptr %a
381 %res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op)
385 define i32 @sminv_v2i32(<2 x i32> %a) {
386 ; CHECK-LABEL: sminv_v2i32:
388 ; CHECK-NEXT: ptrue p0.s, vl2
389 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
390 ; CHECK-NEXT: sminv s0, p0, z0.s
391 ; CHECK-NEXT: fmov w0, s0
393 %res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a)
397 define i32 @sminv_v4i32(<4 x i32> %a) {
398 ; CHECK-LABEL: sminv_v4i32:
400 ; CHECK-NEXT: ptrue p0.s, vl4
401 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
402 ; CHECK-NEXT: sminv s0, p0, z0.s
403 ; CHECK-NEXT: fmov w0, s0
405 %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
409 define i32 @sminv_v8i32(ptr %a) {
410 ; CHECK-LABEL: sminv_v8i32:
412 ; CHECK-NEXT: ptrue p0.s, vl4
413 ; CHECK-NEXT: ldp q1, q0, [x0]
414 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
415 ; CHECK-NEXT: sminv s0, p0, z0.s
416 ; CHECK-NEXT: fmov w0, s0
418 %op = load <8 x i32>, ptr %a
419 %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op)
423 ; No NEON 64-bit vector SMINV support. Use SVE.
424 define i64 @sminv_v2i64(<2 x i64> %a) {
425 ; CHECK-LABEL: sminv_v2i64:
427 ; CHECK-NEXT: ptrue p0.d, vl2
428 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
429 ; CHECK-NEXT: sminv d0, p0, z0.d
430 ; CHECK-NEXT: fmov x0, d0
432 %res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a)
436 define i64 @sminv_v4i64(ptr %a) {
437 ; CHECK-LABEL: sminv_v4i64:
439 ; CHECK-NEXT: ptrue p0.d, vl2
440 ; CHECK-NEXT: ldp q1, q0, [x0]
441 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
442 ; CHECK-NEXT: sminv d0, p0, z0.d
443 ; CHECK-NEXT: fmov x0, d0
445 %op = load <4 x i64>, ptr %a
446 %res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op)
454 define i8 @umaxv_v8i8(<8 x i8> %a) {
455 ; CHECK-LABEL: umaxv_v8i8:
457 ; CHECK-NEXT: ptrue p0.b, vl8
458 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
459 ; CHECK-NEXT: umaxv b0, p0, z0.b
460 ; CHECK-NEXT: fmov w0, s0
462 %res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a)
466 define i8 @umaxv_v16i8(<16 x i8> %a) {
467 ; CHECK-LABEL: umaxv_v16i8:
469 ; CHECK-NEXT: ptrue p0.b, vl16
470 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
471 ; CHECK-NEXT: umaxv b0, p0, z0.b
472 ; CHECK-NEXT: fmov w0, s0
474 %res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a)
478 define i8 @umaxv_v32i8(ptr %a) {
479 ; CHECK-LABEL: umaxv_v32i8:
481 ; CHECK-NEXT: ptrue p0.b, vl16
482 ; CHECK-NEXT: ldp q1, q0, [x0]
483 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
484 ; CHECK-NEXT: umaxv b0, p0, z0.b
485 ; CHECK-NEXT: fmov w0, s0
487 %op = load <32 x i8>, ptr %a
488 %res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op)
492 define i16 @umaxv_v4i16(<4 x i16> %a) {
493 ; CHECK-LABEL: umaxv_v4i16:
495 ; CHECK-NEXT: ptrue p0.h, vl4
496 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
497 ; CHECK-NEXT: umaxv h0, p0, z0.h
498 ; CHECK-NEXT: fmov w0, s0
500 %res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a)
504 define i16 @umaxv_v8i16(<8 x i16> %a) {
505 ; CHECK-LABEL: umaxv_v8i16:
507 ; CHECK-NEXT: ptrue p0.h, vl8
508 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
509 ; CHECK-NEXT: umaxv h0, p0, z0.h
510 ; CHECK-NEXT: fmov w0, s0
512 %res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a)
516 define i16 @umaxv_v16i16(ptr %a) {
517 ; CHECK-LABEL: umaxv_v16i16:
519 ; CHECK-NEXT: ptrue p0.h, vl8
520 ; CHECK-NEXT: ldp q1, q0, [x0]
521 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
522 ; CHECK-NEXT: umaxv h0, p0, z0.h
523 ; CHECK-NEXT: fmov w0, s0
525 %op = load <16 x i16>, ptr %a
526 %res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op)
530 define i32 @umaxv_v2i32(<2 x i32> %a) {
531 ; CHECK-LABEL: umaxv_v2i32:
533 ; CHECK-NEXT: ptrue p0.s, vl2
534 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
535 ; CHECK-NEXT: umaxv s0, p0, z0.s
536 ; CHECK-NEXT: fmov w0, s0
538 %res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a)
542 define i32 @umaxv_v4i32(<4 x i32> %a) {
543 ; CHECK-LABEL: umaxv_v4i32:
545 ; CHECK-NEXT: ptrue p0.s, vl4
546 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
547 ; CHECK-NEXT: umaxv s0, p0, z0.s
548 ; CHECK-NEXT: fmov w0, s0
550 %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
554 define i32 @umaxv_v8i32(ptr %a) {
555 ; CHECK-LABEL: umaxv_v8i32:
557 ; CHECK-NEXT: ptrue p0.s, vl4
558 ; CHECK-NEXT: ldp q1, q0, [x0]
559 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
560 ; CHECK-NEXT: umaxv s0, p0, z0.s
561 ; CHECK-NEXT: fmov w0, s0
563 %op = load <8 x i32>, ptr %a
564 %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op)
568 ; No NEON 64-bit vector UMAXV support. Use SVE.
569 define i64 @umaxv_v2i64(<2 x i64> %a) {
570 ; CHECK-LABEL: umaxv_v2i64:
572 ; CHECK-NEXT: ptrue p0.d, vl2
573 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
574 ; CHECK-NEXT: umaxv d0, p0, z0.d
575 ; CHECK-NEXT: fmov x0, d0
577 %res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
581 define i64 @umaxv_v4i64(ptr %a) {
582 ; CHECK-LABEL: umaxv_v4i64:
584 ; CHECK-NEXT: ptrue p0.d, vl2
585 ; CHECK-NEXT: ldp q1, q0, [x0]
586 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
587 ; CHECK-NEXT: umaxv d0, p0, z0.d
588 ; CHECK-NEXT: fmov x0, d0
590 %op = load <4 x i64>, ptr %a
591 %res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op)
599 define i8 @uminv_v8i8(<8 x i8> %a) {
600 ; CHECK-LABEL: uminv_v8i8:
602 ; CHECK-NEXT: ptrue p0.b, vl8
603 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
604 ; CHECK-NEXT: uminv b0, p0, z0.b
605 ; CHECK-NEXT: fmov w0, s0
607 %res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a)
611 define i8 @uminv_v16i8(<16 x i8> %a) {
612 ; CHECK-LABEL: uminv_v16i8:
614 ; CHECK-NEXT: ptrue p0.b, vl16
615 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
616 ; CHECK-NEXT: uminv b0, p0, z0.b
617 ; CHECK-NEXT: fmov w0, s0
619 %res = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a)
623 define i8 @uminv_v32i8(ptr %a) {
624 ; CHECK-LABEL: uminv_v32i8:
626 ; CHECK-NEXT: ptrue p0.b, vl16
627 ; CHECK-NEXT: ldp q1, q0, [x0]
628 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
629 ; CHECK-NEXT: uminv b0, p0, z0.b
630 ; CHECK-NEXT: fmov w0, s0
632 %op = load <32 x i8>, ptr %a
633 %res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op)
637 define i16 @uminv_v4i16(<4 x i16> %a) {
638 ; CHECK-LABEL: uminv_v4i16:
640 ; CHECK-NEXT: ptrue p0.h, vl4
641 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
642 ; CHECK-NEXT: uminv h0, p0, z0.h
643 ; CHECK-NEXT: fmov w0, s0
645 %res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a)
649 define i16 @uminv_v8i16(<8 x i16> %a) {
650 ; CHECK-LABEL: uminv_v8i16:
652 ; CHECK-NEXT: ptrue p0.h, vl8
653 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
654 ; CHECK-NEXT: uminv h0, p0, z0.h
655 ; CHECK-NEXT: fmov w0, s0
657 %res = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a)
661 define i16 @uminv_v16i16(ptr %a) {
662 ; CHECK-LABEL: uminv_v16i16:
664 ; CHECK-NEXT: ptrue p0.h, vl8
665 ; CHECK-NEXT: ldp q1, q0, [x0]
666 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
667 ; CHECK-NEXT: uminv h0, p0, z0.h
668 ; CHECK-NEXT: fmov w0, s0
670 %op = load <16 x i16>, ptr %a
671 %res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op)
675 define i32 @uminv_v2i32(<2 x i32> %a) {
676 ; CHECK-LABEL: uminv_v2i32:
678 ; CHECK-NEXT: ptrue p0.s, vl2
679 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
680 ; CHECK-NEXT: uminv s0, p0, z0.s
681 ; CHECK-NEXT: fmov w0, s0
683 %res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a)
687 define i32 @uminv_v4i32(<4 x i32> %a) {
688 ; CHECK-LABEL: uminv_v4i32:
690 ; CHECK-NEXT: ptrue p0.s, vl4
691 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
692 ; CHECK-NEXT: uminv s0, p0, z0.s
693 ; CHECK-NEXT: fmov w0, s0
695 %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
699 define i32 @uminv_v8i32(ptr %a) {
700 ; CHECK-LABEL: uminv_v8i32:
702 ; CHECK-NEXT: ptrue p0.s, vl4
703 ; CHECK-NEXT: ldp q1, q0, [x0]
704 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
705 ; CHECK-NEXT: uminv s0, p0, z0.s
706 ; CHECK-NEXT: fmov w0, s0
708 %op = load <8 x i32>, ptr %a
709 %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op)
713 ; No NEON 64-bit vector UMINV support. Use SVE.
714 define i64 @uminv_v2i64(<2 x i64> %a) {
715 ; CHECK-LABEL: uminv_v2i64:
717 ; CHECK-NEXT: ptrue p0.d, vl2
718 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
719 ; CHECK-NEXT: uminv d0, p0, z0.d
720 ; CHECK-NEXT: fmov x0, d0
722 %res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a)
726 define i64 @uminv_v4i64(ptr %a) {
727 ; CHECK-LABEL: uminv_v4i64:
729 ; CHECK-NEXT: ptrue p0.d, vl2
730 ; CHECK-NEXT: ldp q1, q0, [x0]
731 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
732 ; CHECK-NEXT: uminv d0, p0, z0.d
733 ; CHECK-NEXT: fmov x0, d0
735 %op = load <4 x i64>, ptr %a
736 %res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op)
740 declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
741 declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
742 declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
744 declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
745 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
746 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
748 declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
749 declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
750 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
752 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
753 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
755 declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>)
756 declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>)
757 declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>)
759 declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>)
760 declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>)
761 declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>)
763 declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>)
764 declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>)
765 declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>)
767 declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
768 declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
770 declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>)
771 declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>)
772 declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>)
774 declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>)
775 declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>)
776 declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>)
778 declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>)
779 declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>)
780 declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>)
782 declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
783 declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
785 declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>)
786 declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>)
787 declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>)
789 declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>)
790 declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>)
791 declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>)
793 declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>)
794 declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>)
795 declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>)
797 declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
798 declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
800 declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>)
801 declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>)
802 declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>)
804 declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>)
805 declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>)
806 declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>)
808 declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>)
809 declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>)
810 declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>)
812 declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)
813 declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)