1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
6 define i8 @andv_nxv16i8(<vscale x 16 x i8> %a) {
7 ; CHECK-LABEL: andv_nxv16i8:
9 ; CHECK-NEXT: ptrue p0.b
10 ; CHECK-NEXT: andv b0, p0, z0.b
11 ; CHECK-NEXT: fmov w0, s0
13 %res = call i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8> %a)
17 define i16 @andv_nxv8i16(<vscale x 8 x i16> %a) {
18 ; CHECK-LABEL: andv_nxv8i16:
20 ; CHECK-NEXT: ptrue p0.h
21 ; CHECK-NEXT: andv h0, p0, z0.h
22 ; CHECK-NEXT: fmov w0, s0
24 %res = call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> %a)
28 define i32 @andv_nxv4i32(<vscale x 4 x i32> %a) {
29 ; CHECK-LABEL: andv_nxv4i32:
31 ; CHECK-NEXT: ptrue p0.s
32 ; CHECK-NEXT: andv s0, p0, z0.s
33 ; CHECK-NEXT: fmov w0, s0
35 %res = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %a)
39 define i64 @andv_nxv2i64(<vscale x 2 x i64> %a) {
40 ; CHECK-LABEL: andv_nxv2i64:
42 ; CHECK-NEXT: ptrue p0.d
43 ; CHECK-NEXT: andv d0, p0, z0.d
44 ; CHECK-NEXT: fmov x0, d0
46 %res = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %a)
52 define i8 @orv_nxv16i8(<vscale x 16 x i8> %a) {
53 ; CHECK-LABEL: orv_nxv16i8:
55 ; CHECK-NEXT: ptrue p0.b
56 ; CHECK-NEXT: orv b0, p0, z0.b
57 ; CHECK-NEXT: fmov w0, s0
59 %res = call i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8> %a)
63 define i16 @orv_nxv8i16(<vscale x 8 x i16> %a) {
64 ; CHECK-LABEL: orv_nxv8i16:
66 ; CHECK-NEXT: ptrue p0.h
67 ; CHECK-NEXT: orv h0, p0, z0.h
68 ; CHECK-NEXT: fmov w0, s0
70 %res = call i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16> %a)
74 define i32 @orv_nxv4i32(<vscale x 4 x i32> %a) {
75 ; CHECK-LABEL: orv_nxv4i32:
77 ; CHECK-NEXT: ptrue p0.s
78 ; CHECK-NEXT: orv s0, p0, z0.s
79 ; CHECK-NEXT: fmov w0, s0
81 %res = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %a)
85 define i64 @orv_nxv2i64(<vscale x 2 x i64> %a) {
86 ; CHECK-LABEL: orv_nxv2i64:
88 ; CHECK-NEXT: ptrue p0.d
89 ; CHECK-NEXT: orv d0, p0, z0.d
90 ; CHECK-NEXT: fmov x0, d0
92 %res = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %a)
98 define i8 @xorv_nxv16i8(<vscale x 16 x i8> %a) {
99 ; CHECK-LABEL: xorv_nxv16i8:
101 ; CHECK-NEXT: ptrue p0.b
102 ; CHECK-NEXT: eorv b0, p0, z0.b
103 ; CHECK-NEXT: fmov w0, s0
105 %res = call i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8> %a)
109 define i16 @xorv_nxv8i16(<vscale x 8 x i16> %a) {
110 ; CHECK-LABEL: xorv_nxv8i16:
112 ; CHECK-NEXT: ptrue p0.h
113 ; CHECK-NEXT: eorv h0, p0, z0.h
114 ; CHECK-NEXT: fmov w0, s0
116 %res = call i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16> %a)
120 define i32 @xorv_nxv4i32(<vscale x 4 x i32> %a) {
121 ; CHECK-LABEL: xorv_nxv4i32:
123 ; CHECK-NEXT: ptrue p0.s
124 ; CHECK-NEXT: eorv s0, p0, z0.s
125 ; CHECK-NEXT: fmov w0, s0
127 %res = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %a)
131 define i64 @xorv_nxv2i64(<vscale x 2 x i64> %a) {
132 ; CHECK-LABEL: xorv_nxv2i64:
134 ; CHECK-NEXT: ptrue p0.d
135 ; CHECK-NEXT: eorv d0, p0, z0.d
136 ; CHECK-NEXT: fmov x0, d0
138 %res = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %a)
144 define i8 @uaddv_nxv16i8(<vscale x 16 x i8> %a) {
145 ; CHECK-LABEL: uaddv_nxv16i8:
147 ; CHECK-NEXT: ptrue p0.b
148 ; CHECK-NEXT: uaddv d0, p0, z0.b
149 ; CHECK-NEXT: fmov x0, d0
150 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
152 %res = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %a)
156 define i16 @uaddv_nxv8i16(<vscale x 8 x i16> %a) {
157 ; CHECK-LABEL: uaddv_nxv8i16:
159 ; CHECK-NEXT: ptrue p0.h
160 ; CHECK-NEXT: uaddv d0, p0, z0.h
161 ; CHECK-NEXT: fmov x0, d0
162 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
164 %res = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %a)
168 define i32 @uaddv_nxv4i32(<vscale x 4 x i32> %a) {
169 ; CHECK-LABEL: uaddv_nxv4i32:
171 ; CHECK-NEXT: ptrue p0.s
172 ; CHECK-NEXT: uaddv d0, p0, z0.s
173 ; CHECK-NEXT: fmov x0, d0
174 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
176 %res = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %a)
180 define i64 @uaddv_nxv2i64(<vscale x 2 x i64> %a) {
181 ; CHECK-LABEL: uaddv_nxv2i64:
183 ; CHECK-NEXT: ptrue p0.d
184 ; CHECK-NEXT: uaddv d0, p0, z0.d
185 ; CHECK-NEXT: fmov x0, d0
187 %res = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %a)
193 define i8 @umin_nxv16i8(<vscale x 16 x i8> %a) {
194 ; CHECK-LABEL: umin_nxv16i8:
196 ; CHECK-NEXT: ptrue p0.b
197 ; CHECK-NEXT: uminv b0, p0, z0.b
198 ; CHECK-NEXT: fmov w0, s0
200 %res = call i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8> %a)
204 define i16 @umin_nxv8i16(<vscale x 8 x i16> %a) {
205 ; CHECK-LABEL: umin_nxv8i16:
207 ; CHECK-NEXT: ptrue p0.h
208 ; CHECK-NEXT: uminv h0, p0, z0.h
209 ; CHECK-NEXT: fmov w0, s0
211 %res = call i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16> %a)
215 define i32 @umin_nxv4i32(<vscale x 4 x i32> %a) {
216 ; CHECK-LABEL: umin_nxv4i32:
218 ; CHECK-NEXT: ptrue p0.s
219 ; CHECK-NEXT: uminv s0, p0, z0.s
220 ; CHECK-NEXT: fmov w0, s0
222 %res = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %a)
226 define i64 @umin_nxv2i64(<vscale x 2 x i64> %a) {
227 ; CHECK-LABEL: umin_nxv2i64:
229 ; CHECK-NEXT: ptrue p0.d
230 ; CHECK-NEXT: uminv d0, p0, z0.d
231 ; CHECK-NEXT: fmov x0, d0
233 %res = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %a)
239 define i8 @smin_nxv16i8(<vscale x 16 x i8> %a) {
240 ; CHECK-LABEL: smin_nxv16i8:
242 ; CHECK-NEXT: ptrue p0.b
243 ; CHECK-NEXT: sminv b0, p0, z0.b
244 ; CHECK-NEXT: fmov w0, s0
246 %res = call i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8> %a)
250 define i16 @smin_nxv8i16(<vscale x 8 x i16> %a) {
251 ; CHECK-LABEL: smin_nxv8i16:
253 ; CHECK-NEXT: ptrue p0.h
254 ; CHECK-NEXT: sminv h0, p0, z0.h
255 ; CHECK-NEXT: fmov w0, s0
257 %res = call i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16> %a)
261 define i32 @smin_nxv4i32(<vscale x 4 x i32> %a) {
262 ; CHECK-LABEL: smin_nxv4i32:
264 ; CHECK-NEXT: ptrue p0.s
265 ; CHECK-NEXT: sminv s0, p0, z0.s
266 ; CHECK-NEXT: fmov w0, s0
268 %res = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %a)
272 define i64 @smin_nxv2i64(<vscale x 2 x i64> %a) {
273 ; CHECK-LABEL: smin_nxv2i64:
275 ; CHECK-NEXT: ptrue p0.d
276 ; CHECK-NEXT: sminv d0, p0, z0.d
277 ; CHECK-NEXT: fmov x0, d0
279 %res = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %a)
285 define i8 @umax_nxv16i8(<vscale x 16 x i8> %a) {
286 ; CHECK-LABEL: umax_nxv16i8:
288 ; CHECK-NEXT: ptrue p0.b
289 ; CHECK-NEXT: umaxv b0, p0, z0.b
290 ; CHECK-NEXT: fmov w0, s0
292 %res = call i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8> %a)
296 define i16 @umax_nxv8i16(<vscale x 8 x i16> %a) {
297 ; CHECK-LABEL: umax_nxv8i16:
299 ; CHECK-NEXT: ptrue p0.h
300 ; CHECK-NEXT: umaxv h0, p0, z0.h
301 ; CHECK-NEXT: fmov w0, s0
303 %res = call i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16> %a)
307 define i32 @umax_nxv4i32(<vscale x 4 x i32> %a) {
308 ; CHECK-LABEL: umax_nxv4i32:
310 ; CHECK-NEXT: ptrue p0.s
311 ; CHECK-NEXT: umaxv s0, p0, z0.s
312 ; CHECK-NEXT: fmov w0, s0
314 %res = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %a)
318 define i64 @umax_nxv2i64(<vscale x 2 x i64> %a) {
319 ; CHECK-LABEL: umax_nxv2i64:
321 ; CHECK-NEXT: ptrue p0.d
322 ; CHECK-NEXT: umaxv d0, p0, z0.d
323 ; CHECK-NEXT: fmov x0, d0
325 %res = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %a)
331 define i8 @smax_nxv16i8(<vscale x 16 x i8> %a) {
332 ; CHECK-LABEL: smax_nxv16i8:
334 ; CHECK-NEXT: ptrue p0.b
335 ; CHECK-NEXT: smaxv b0, p0, z0.b
336 ; CHECK-NEXT: fmov w0, s0
338 %res = call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> %a)
342 define i16 @smax_nxv8i16(<vscale x 8 x i16> %a) {
343 ; CHECK-LABEL: smax_nxv8i16:
345 ; CHECK-NEXT: ptrue p0.h
346 ; CHECK-NEXT: smaxv h0, p0, z0.h
347 ; CHECK-NEXT: fmov w0, s0
349 %res = call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> %a)
353 define i32 @smax_nxv4i32(<vscale x 4 x i32> %a) {
354 ; CHECK-LABEL: smax_nxv4i32:
356 ; CHECK-NEXT: ptrue p0.s
357 ; CHECK-NEXT: smaxv s0, p0, z0.s
358 ; CHECK-NEXT: fmov w0, s0
360 %res = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %a)
364 define i64 @smax_nxv2i64(<vscale x 2 x i64> %a) {
365 ; CHECK-LABEL: smax_nxv2i64:
367 ; CHECK-NEXT: ptrue p0.d
368 ; CHECK-NEXT: smaxv d0, p0, z0.d
369 ; CHECK-NEXT: fmov x0, d0
371 %res = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %a)
375 declare i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8>)
376 declare i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16>)
377 declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
378 declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
380 declare i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8>)
381 declare i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16>)
382 declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
383 declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
385 declare i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8>)
386 declare i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16>)
387 declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
388 declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
390 declare i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8>)
391 declare i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16>)
392 declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
393 declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
395 declare i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8>)
396 declare i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16>)
397 declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
398 declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
400 declare i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8>)
401 declare i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16>)
402 declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
403 declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
405 declare i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8>)
406 declare i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16>)
407 declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
408 declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
410 declare i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8>)
411 declare i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16>)
412 declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
413 declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)