1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define i8 @andv_nxv16i8(<vscale x 16 x i8> %a) {
7 ; CHECK-LABEL: andv_nxv16i8:
9 ; CHECK-NEXT: ptrue p0.b
10 ; CHECK-NEXT: andv b0, p0, z0.b
11 ; CHECK-NEXT: fmov w0, s0
13 %res = call i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8> %a)
17 define i16 @andv_nxv8i16(<vscale x 8 x i16> %a) {
18 ; CHECK-LABEL: andv_nxv8i16:
20 ; CHECK-NEXT: ptrue p0.h
21 ; CHECK-NEXT: andv h0, p0, z0.h
22 ; CHECK-NEXT: fmov w0, s0
24 %res = call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> %a)
28 define i32 @andv_nxv4i32(<vscale x 4 x i32> %a) {
29 ; CHECK-LABEL: andv_nxv4i32:
31 ; CHECK-NEXT: ptrue p0.s
32 ; CHECK-NEXT: andv s0, p0, z0.s
33 ; CHECK-NEXT: fmov w0, s0
35 %res = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %a)
39 define i64 @andv_nxv2i64(<vscale x 2 x i64> %a) {
40 ; CHECK-LABEL: andv_nxv2i64:
42 ; CHECK-NEXT: ptrue p0.d
43 ; CHECK-NEXT: andv d0, p0, z0.d
44 ; CHECK-NEXT: fmov x0, d0
46 %res = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %a)
52 define i8 @orv_nxv16i8(<vscale x 16 x i8> %a) {
53 ; CHECK-LABEL: orv_nxv16i8:
55 ; CHECK-NEXT: ptrue p0.b
56 ; CHECK-NEXT: orv b0, p0, z0.b
57 ; CHECK-NEXT: fmov w0, s0
59 %res = call i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8> %a)
63 define i16 @orv_nxv8i16(<vscale x 8 x i16> %a) {
64 ; CHECK-LABEL: orv_nxv8i16:
66 ; CHECK-NEXT: ptrue p0.h
67 ; CHECK-NEXT: orv h0, p0, z0.h
68 ; CHECK-NEXT: fmov w0, s0
70 %res = call i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16> %a)
74 define i32 @orv_nxv4i32(<vscale x 4 x i32> %a) {
75 ; CHECK-LABEL: orv_nxv4i32:
77 ; CHECK-NEXT: ptrue p0.s
78 ; CHECK-NEXT: orv s0, p0, z0.s
79 ; CHECK-NEXT: fmov w0, s0
81 %res = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %a)
85 define i64 @orv_nxv2i64(<vscale x 2 x i64> %a) {
86 ; CHECK-LABEL: orv_nxv2i64:
88 ; CHECK-NEXT: ptrue p0.d
89 ; CHECK-NEXT: orv d0, p0, z0.d
90 ; CHECK-NEXT: fmov x0, d0
92 %res = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %a)
98 define i8 @xorv_nxv16i8(<vscale x 16 x i8> %a) {
99 ; CHECK-LABEL: xorv_nxv16i8:
101 ; CHECK-NEXT: ptrue p0.b
102 ; CHECK-NEXT: eorv b0, p0, z0.b
103 ; CHECK-NEXT: fmov w0, s0
105 %res = call i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8> %a)
109 define i16 @xorv_nxv8i16(<vscale x 8 x i16> %a) {
110 ; CHECK-LABEL: xorv_nxv8i16:
112 ; CHECK-NEXT: ptrue p0.h
113 ; CHECK-NEXT: eorv h0, p0, z0.h
114 ; CHECK-NEXT: fmov w0, s0
116 %res = call i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16> %a)
120 define i32 @xorv_nxv4i32(<vscale x 4 x i32> %a) {
121 ; CHECK-LABEL: xorv_nxv4i32:
123 ; CHECK-NEXT: ptrue p0.s
124 ; CHECK-NEXT: eorv s0, p0, z0.s
125 ; CHECK-NEXT: fmov w0, s0
127 %res = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %a)
131 define i64 @xorv_nxv2i64(<vscale x 2 x i64> %a) {
132 ; CHECK-LABEL: xorv_nxv2i64:
134 ; CHECK-NEXT: ptrue p0.d
135 ; CHECK-NEXT: eorv d0, p0, z0.d
136 ; CHECK-NEXT: fmov x0, d0
138 %res = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %a)
144 define i8 @uaddv_nxv16i8(<vscale x 16 x i8> %a) {
145 ; CHECK-LABEL: uaddv_nxv16i8:
147 ; CHECK-NEXT: ptrue p0.b
148 ; CHECK-NEXT: uaddv d0, p0, z0.b
149 ; CHECK-NEXT: fmov x0, d0
150 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
152 %res = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %a)
156 define i16 @uaddv_nxv8i16(<vscale x 8 x i16> %a) {
157 ; CHECK-LABEL: uaddv_nxv8i16:
159 ; CHECK-NEXT: ptrue p0.h
160 ; CHECK-NEXT: uaddv d0, p0, z0.h
161 ; CHECK-NEXT: fmov x0, d0
162 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
164 %res = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %a)
168 define i32 @uaddv_nxv4i32(<vscale x 4 x i32> %a) {
169 ; CHECK-LABEL: uaddv_nxv4i32:
171 ; CHECK-NEXT: ptrue p0.s
172 ; CHECK-NEXT: uaddv d0, p0, z0.s
173 ; CHECK-NEXT: fmov x0, d0
174 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
176 %res = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %a)
180 define i64 @uaddv_nxv2i64(<vscale x 2 x i64> %a) {
181 ; CHECK-LABEL: uaddv_nxv2i64:
183 ; CHECK-NEXT: ptrue p0.d
184 ; CHECK-NEXT: uaddv d0, p0, z0.d
185 ; CHECK-NEXT: fmov x0, d0
187 %res = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %a)
193 define i8 @umin_nxv16i8(<vscale x 16 x i8> %a) {
194 ; CHECK-LABEL: umin_nxv16i8:
196 ; CHECK-NEXT: ptrue p0.b
197 ; CHECK-NEXT: uminv b0, p0, z0.b
198 ; CHECK-NEXT: fmov w0, s0
200 %res = call i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8> %a)
204 define i16 @umin_nxv8i16(<vscale x 8 x i16> %a) {
205 ; CHECK-LABEL: umin_nxv8i16:
207 ; CHECK-NEXT: ptrue p0.h
208 ; CHECK-NEXT: uminv h0, p0, z0.h
209 ; CHECK-NEXT: fmov w0, s0
211 %res = call i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16> %a)
215 define i32 @umin_nxv4i32(<vscale x 4 x i32> %a) {
216 ; CHECK-LABEL: umin_nxv4i32:
218 ; CHECK-NEXT: ptrue p0.s
219 ; CHECK-NEXT: uminv s0, p0, z0.s
220 ; CHECK-NEXT: fmov w0, s0
222 %res = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %a)
226 define i64 @umin_nxv2i64(<vscale x 2 x i64> %a) {
227 ; CHECK-LABEL: umin_nxv2i64:
229 ; CHECK-NEXT: ptrue p0.d
230 ; CHECK-NEXT: uminv d0, p0, z0.d
231 ; CHECK-NEXT: fmov x0, d0
233 %res = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %a)
239 define i8 @smin_nxv16i8(<vscale x 16 x i8> %a) {
240 ; CHECK-LABEL: smin_nxv16i8:
242 ; CHECK-NEXT: ptrue p0.b
243 ; CHECK-NEXT: sminv b0, p0, z0.b
244 ; CHECK-NEXT: fmov w0, s0
246 %res = call i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8> %a)
250 define i16 @smin_nxv8i16(<vscale x 8 x i16> %a) {
251 ; CHECK-LABEL: smin_nxv8i16:
253 ; CHECK-NEXT: ptrue p0.h
254 ; CHECK-NEXT: sminv h0, p0, z0.h
255 ; CHECK-NEXT: fmov w0, s0
257 %res = call i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16> %a)
261 define i32 @smin_nxv4i32(<vscale x 4 x i32> %a) {
262 ; CHECK-LABEL: smin_nxv4i32:
264 ; CHECK-NEXT: ptrue p0.s
265 ; CHECK-NEXT: sminv s0, p0, z0.s
266 ; CHECK-NEXT: fmov w0, s0
268 %res = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %a)
272 define i64 @smin_nxv2i64(<vscale x 2 x i64> %a) {
273 ; CHECK-LABEL: smin_nxv2i64:
275 ; CHECK-NEXT: ptrue p0.d
276 ; CHECK-NEXT: sminv d0, p0, z0.d
277 ; CHECK-NEXT: fmov x0, d0
279 %res = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %a)
285 define i8 @umax_nxv16i8(<vscale x 16 x i8> %a) {
286 ; CHECK-LABEL: umax_nxv16i8:
288 ; CHECK-NEXT: ptrue p0.b
289 ; CHECK-NEXT: umaxv b0, p0, z0.b
290 ; CHECK-NEXT: fmov w0, s0
292 %res = call i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8> %a)
296 define i16 @umax_nxv8i16(<vscale x 8 x i16> %a) {
297 ; CHECK-LABEL: umax_nxv8i16:
299 ; CHECK-NEXT: ptrue p0.h
300 ; CHECK-NEXT: umaxv h0, p0, z0.h
301 ; CHECK-NEXT: fmov w0, s0
303 %res = call i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16> %a)
307 define i32 @umax_nxv4i32(<vscale x 4 x i32> %a) {
308 ; CHECK-LABEL: umax_nxv4i32:
310 ; CHECK-NEXT: ptrue p0.s
311 ; CHECK-NEXT: umaxv s0, p0, z0.s
312 ; CHECK-NEXT: fmov w0, s0
314 %res = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %a)
318 define i64 @umax_nxv2i64(<vscale x 2 x i64> %a) {
319 ; CHECK-LABEL: umax_nxv2i64:
321 ; CHECK-NEXT: ptrue p0.d
322 ; CHECK-NEXT: umaxv d0, p0, z0.d
323 ; CHECK-NEXT: fmov x0, d0
325 %res = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %a)
331 define i8 @smax_nxv16i8(<vscale x 16 x i8> %a) {
332 ; CHECK-LABEL: smax_nxv16i8:
334 ; CHECK-NEXT: ptrue p0.b
335 ; CHECK-NEXT: smaxv b0, p0, z0.b
336 ; CHECK-NEXT: fmov w0, s0
338 %res = call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> %a)
342 define i16 @smax_nxv8i16(<vscale x 8 x i16> %a) {
343 ; CHECK-LABEL: smax_nxv8i16:
345 ; CHECK-NEXT: ptrue p0.h
346 ; CHECK-NEXT: smaxv h0, p0, z0.h
347 ; CHECK-NEXT: fmov w0, s0
349 %res = call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> %a)
353 define i32 @smax_nxv4i32(<vscale x 4 x i32> %a) {
354 ; CHECK-LABEL: smax_nxv4i32:
356 ; CHECK-NEXT: ptrue p0.s
357 ; CHECK-NEXT: smaxv s0, p0, z0.s
358 ; CHECK-NEXT: fmov w0, s0
360 %res = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %a)
364 define i64 @smax_nxv2i64(<vscale x 2 x i64> %a) {
365 ; CHECK-LABEL: smax_nxv2i64:
367 ; CHECK-NEXT: ptrue p0.d
368 ; CHECK-NEXT: smaxv d0, p0, z0.d
369 ; CHECK-NEXT: fmov x0, d0
371 %res = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %a)
375 ; Test widen vector reduce type
376 declare i8 @llvm.vector.reduce.smin.nxv10i8(<vscale x 10 x i8>)
378 define i8 @smin_nxv10i8(<vscale x 10 x i8> %a) {
379 ; CHECK-LABEL: smin_nxv10i8:
381 ; CHECK-NEXT: uunpkhi z2.h, z0.b
382 ; CHECK-NEXT: mov z1.d, #127 // =0x7f
383 ; CHECK-NEXT: uunpklo z0.h, z0.b
384 ; CHECK-NEXT: ptrue p0.b
385 ; CHECK-NEXT: uunpklo z3.s, z2.h
386 ; CHECK-NEXT: uunpkhi z2.s, z2.h
387 ; CHECK-NEXT: uunpklo z3.d, z3.s
388 ; CHECK-NEXT: uzp1 z3.s, z3.s, z1.s
389 ; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h
390 ; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b
391 ; CHECK-NEXT: uunpkhi z2.h, z2.b
392 ; CHECK-NEXT: uunpkhi z3.s, z2.h
393 ; CHECK-NEXT: uunpklo z2.s, z2.h
394 ; CHECK-NEXT: uunpkhi z3.d, z3.s
395 ; CHECK-NEXT: uzp1 z3.s, z1.s, z3.s
396 ; CHECK-NEXT: uzp1 z2.h, z2.h, z3.h
397 ; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b
398 ; CHECK-NEXT: uunpkhi z2.h, z2.b
399 ; CHECK-NEXT: uunpkhi z3.s, z2.h
400 ; CHECK-NEXT: uunpklo z2.s, z2.h
401 ; CHECK-NEXT: uunpklo z3.d, z3.s
402 ; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s
403 ; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
404 ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
405 ; CHECK-NEXT: sminv b0, p0, z0.b
406 ; CHECK-NEXT: fmov w0, s0
408 %res = call i8 @llvm.vector.reduce.smin.nxv10i8(<vscale x 10 x i8> %a)
412 declare i8 @llvm.vector.reduce.add.nxv12i8(<vscale x 12 x i8>)
414 define i8 @uaddv_nxv12i8(<vscale x 12 x i8> %a) {
415 ; CHECK-LABEL: uaddv_nxv12i8:
417 ; CHECK-NEXT: uunpkhi z2.h, z0.b
418 ; CHECK-NEXT: mov z1.s, #0 // =0x0
419 ; CHECK-NEXT: uunpklo z0.h, z0.b
420 ; CHECK-NEXT: ptrue p0.b
421 ; CHECK-NEXT: uunpklo z2.s, z2.h
422 ; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
423 ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
424 ; CHECK-NEXT: uaddv d0, p0, z0.b
425 ; CHECK-NEXT: fmov x0, d0
426 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
428 %res = call i8 @llvm.vector.reduce.add.nxv12i8(<vscale x 12 x i8> %a)
432 declare i8 @llvm.vector.reduce.umax.nxv14i8(<vscale x 14 x i8>)
434 define i8 @umax_nxv14i8(<vscale x 14 x i8> %a) {
435 ; CHECK-LABEL: umax_nxv14i8:
437 ; CHECK-NEXT: uunpkhi z2.h, z0.b
438 ; CHECK-NEXT: mov z1.d, #0 // =0x0
439 ; CHECK-NEXT: uunpklo z0.h, z0.b
440 ; CHECK-NEXT: ptrue p0.b
441 ; CHECK-NEXT: uunpkhi z3.s, z2.h
442 ; CHECK-NEXT: uunpklo z2.s, z2.h
443 ; CHECK-NEXT: uunpklo z3.d, z3.s
444 ; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s
445 ; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
446 ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
447 ; CHECK-NEXT: umaxv b0, p0, z0.b
448 ; CHECK-NEXT: fmov w0, s0
450 %res = call i8 @llvm.vector.reduce.umax.nxv14i8(<vscale x 14 x i8> %a)
454 declare i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8>)
455 declare i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16>)
456 declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
457 declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
459 declare i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8>)
460 declare i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16>)
461 declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
462 declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
464 declare i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8>)
465 declare i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16>)
466 declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
467 declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
469 declare i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8>)
470 declare i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16>)
471 declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
472 declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
474 declare i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8>)
475 declare i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16>)
476 declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
477 declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
479 declare i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8>)
480 declare i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16>)
481 declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
482 declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
484 declare i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8>)
485 declare i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16>)
486 declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
487 declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
489 declare i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8>)
490 declare i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16>)
491 declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
492 declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)