1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
3 ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
5 ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
7 declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
9 define signext i8 @vreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
10 ; CHECK-LABEL: vreduce_add_nxv1i8:
12 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
13 ; CHECK-NEXT: vmv.s.x v9, zero
14 ; CHECK-NEXT: vredsum.vs v8, v8, v9
15 ; CHECK-NEXT: vmv.x.s a0, v8
17 %red = call i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8> %v)
21 declare i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8>)
23 define signext i8 @vreduce_umax_nxv1i8(<vscale x 1 x i8> %v) {
24 ; CHECK-LABEL: vreduce_umax_nxv1i8:
26 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
27 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
28 ; CHECK-NEXT: vmv.x.s a0, v8
30 %red = call i8 @llvm.vector.reduce.umax.nxv1i8(<vscale x 1 x i8> %v)
34 declare i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8>)
36 define signext i8 @vreduce_smax_nxv1i8(<vscale x 1 x i8> %v) {
37 ; CHECK-LABEL: vreduce_smax_nxv1i8:
39 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
40 ; CHECK-NEXT: vredmax.vs v8, v8, v8
41 ; CHECK-NEXT: vmv.x.s a0, v8
43 %red = call i8 @llvm.vector.reduce.smax.nxv1i8(<vscale x 1 x i8> %v)
47 declare i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8>)
49 define signext i8 @vreduce_umin_nxv1i8(<vscale x 1 x i8> %v) {
50 ; CHECK-LABEL: vreduce_umin_nxv1i8:
52 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
53 ; CHECK-NEXT: vredminu.vs v8, v8, v8
54 ; CHECK-NEXT: vmv.x.s a0, v8
56 %red = call i8 @llvm.vector.reduce.umin.nxv1i8(<vscale x 1 x i8> %v)
60 declare i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8>)
62 define signext i8 @vreduce_smin_nxv1i8(<vscale x 1 x i8> %v) {
63 ; CHECK-LABEL: vreduce_smin_nxv1i8:
65 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
66 ; CHECK-NEXT: vredmin.vs v8, v8, v8
67 ; CHECK-NEXT: vmv.x.s a0, v8
69 %red = call i8 @llvm.vector.reduce.smin.nxv1i8(<vscale x 1 x i8> %v)
73 declare i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8>)
75 define signext i8 @vreduce_and_nxv1i8(<vscale x 1 x i8> %v) {
76 ; CHECK-LABEL: vreduce_and_nxv1i8:
78 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
79 ; CHECK-NEXT: vredand.vs v8, v8, v8
80 ; CHECK-NEXT: vmv.x.s a0, v8
82 %red = call i8 @llvm.vector.reduce.and.nxv1i8(<vscale x 1 x i8> %v)
86 declare i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8>)
88 define signext i8 @vreduce_or_nxv1i8(<vscale x 1 x i8> %v) {
89 ; CHECK-LABEL: vreduce_or_nxv1i8:
91 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
92 ; CHECK-NEXT: vredor.vs v8, v8, v8
93 ; CHECK-NEXT: vmv.x.s a0, v8
95 %red = call i8 @llvm.vector.reduce.or.nxv1i8(<vscale x 1 x i8> %v)
99 declare i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8>)
101 define signext i8 @vreduce_xor_nxv1i8(<vscale x 1 x i8> %v) {
102 ; CHECK-LABEL: vreduce_xor_nxv1i8:
104 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
105 ; CHECK-NEXT: vmv.s.x v9, zero
106 ; CHECK-NEXT: vredxor.vs v8, v8, v9
107 ; CHECK-NEXT: vmv.x.s a0, v8
109 %red = call i8 @llvm.vector.reduce.xor.nxv1i8(<vscale x 1 x i8> %v)
113 declare i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8>)
115 define signext i8 @vreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
116 ; CHECK-LABEL: vreduce_add_nxv2i8:
118 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
119 ; CHECK-NEXT: vmv.s.x v9, zero
120 ; CHECK-NEXT: vredsum.vs v8, v8, v9
121 ; CHECK-NEXT: vmv.x.s a0, v8
123 %red = call i8 @llvm.vector.reduce.add.nxv2i8(<vscale x 2 x i8> %v)
127 declare i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8>)
129 define signext i8 @vreduce_umax_nxv2i8(<vscale x 2 x i8> %v) {
130 ; CHECK-LABEL: vreduce_umax_nxv2i8:
132 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
133 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
134 ; CHECK-NEXT: vmv.x.s a0, v8
136 %red = call i8 @llvm.vector.reduce.umax.nxv2i8(<vscale x 2 x i8> %v)
140 declare i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8>)
142 define signext i8 @vreduce_smax_nxv2i8(<vscale x 2 x i8> %v) {
143 ; CHECK-LABEL: vreduce_smax_nxv2i8:
145 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
146 ; CHECK-NEXT: vredmax.vs v8, v8, v8
147 ; CHECK-NEXT: vmv.x.s a0, v8
149 %red = call i8 @llvm.vector.reduce.smax.nxv2i8(<vscale x 2 x i8> %v)
153 declare i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8>)
155 define signext i8 @vreduce_umin_nxv2i8(<vscale x 2 x i8> %v) {
156 ; CHECK-LABEL: vreduce_umin_nxv2i8:
158 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
159 ; CHECK-NEXT: vredminu.vs v8, v8, v8
160 ; CHECK-NEXT: vmv.x.s a0, v8
162 %red = call i8 @llvm.vector.reduce.umin.nxv2i8(<vscale x 2 x i8> %v)
166 declare i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8>)
168 define signext i8 @vreduce_smin_nxv2i8(<vscale x 2 x i8> %v) {
169 ; CHECK-LABEL: vreduce_smin_nxv2i8:
171 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
172 ; CHECK-NEXT: vredmin.vs v8, v8, v8
173 ; CHECK-NEXT: vmv.x.s a0, v8
175 %red = call i8 @llvm.vector.reduce.smin.nxv2i8(<vscale x 2 x i8> %v)
179 declare i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8>)
181 define signext i8 @vreduce_and_nxv2i8(<vscale x 2 x i8> %v) {
182 ; CHECK-LABEL: vreduce_and_nxv2i8:
184 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
185 ; CHECK-NEXT: vredand.vs v8, v8, v8
186 ; CHECK-NEXT: vmv.x.s a0, v8
188 %red = call i8 @llvm.vector.reduce.and.nxv2i8(<vscale x 2 x i8> %v)
192 declare i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8>)
194 define signext i8 @vreduce_or_nxv2i8(<vscale x 2 x i8> %v) {
195 ; CHECK-LABEL: vreduce_or_nxv2i8:
197 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
198 ; CHECK-NEXT: vredor.vs v8, v8, v8
199 ; CHECK-NEXT: vmv.x.s a0, v8
201 %red = call i8 @llvm.vector.reduce.or.nxv2i8(<vscale x 2 x i8> %v)
205 declare i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8>)
207 define signext i8 @vreduce_xor_nxv2i8(<vscale x 2 x i8> %v) {
208 ; CHECK-LABEL: vreduce_xor_nxv2i8:
210 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
211 ; CHECK-NEXT: vmv.s.x v9, zero
212 ; CHECK-NEXT: vredxor.vs v8, v8, v9
213 ; CHECK-NEXT: vmv.x.s a0, v8
215 %red = call i8 @llvm.vector.reduce.xor.nxv2i8(<vscale x 2 x i8> %v)
219 declare i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8>)
221 define signext i8 @vreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
222 ; CHECK-LABEL: vreduce_add_nxv4i8:
224 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
225 ; CHECK-NEXT: vmv.s.x v9, zero
226 ; CHECK-NEXT: vredsum.vs v8, v8, v9
227 ; CHECK-NEXT: vmv.x.s a0, v8
229 %red = call i8 @llvm.vector.reduce.add.nxv4i8(<vscale x 4 x i8> %v)
233 declare i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8>)
235 define signext i8 @vreduce_umax_nxv4i8(<vscale x 4 x i8> %v) {
236 ; CHECK-LABEL: vreduce_umax_nxv4i8:
238 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
239 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
240 ; CHECK-NEXT: vmv.x.s a0, v8
242 %red = call i8 @llvm.vector.reduce.umax.nxv4i8(<vscale x 4 x i8> %v)
246 declare i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8>)
248 define signext i8 @vreduce_smax_nxv4i8(<vscale x 4 x i8> %v) {
249 ; CHECK-LABEL: vreduce_smax_nxv4i8:
251 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
252 ; CHECK-NEXT: vredmax.vs v8, v8, v8
253 ; CHECK-NEXT: vmv.x.s a0, v8
255 %red = call i8 @llvm.vector.reduce.smax.nxv4i8(<vscale x 4 x i8> %v)
259 declare i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8>)
261 define signext i8 @vreduce_umin_nxv4i8(<vscale x 4 x i8> %v) {
262 ; CHECK-LABEL: vreduce_umin_nxv4i8:
264 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
265 ; CHECK-NEXT: vredminu.vs v8, v8, v8
266 ; CHECK-NEXT: vmv.x.s a0, v8
268 %red = call i8 @llvm.vector.reduce.umin.nxv4i8(<vscale x 4 x i8> %v)
272 declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
274 define signext i8 @vreduce_smin_nxv4i8(<vscale x 4 x i8> %v) {
275 ; CHECK-LABEL: vreduce_smin_nxv4i8:
277 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
278 ; CHECK-NEXT: vredmin.vs v8, v8, v8
279 ; CHECK-NEXT: vmv.x.s a0, v8
281 %red = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %v)
285 declare i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8>)
287 define signext i8 @vreduce_and_nxv4i8(<vscale x 4 x i8> %v) {
288 ; CHECK-LABEL: vreduce_and_nxv4i8:
290 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
291 ; CHECK-NEXT: vredand.vs v8, v8, v8
292 ; CHECK-NEXT: vmv.x.s a0, v8
294 %red = call i8 @llvm.vector.reduce.and.nxv4i8(<vscale x 4 x i8> %v)
298 declare i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8>)
300 define signext i8 @vreduce_or_nxv4i8(<vscale x 4 x i8> %v) {
301 ; CHECK-LABEL: vreduce_or_nxv4i8:
303 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
304 ; CHECK-NEXT: vredor.vs v8, v8, v8
305 ; CHECK-NEXT: vmv.x.s a0, v8
307 %red = call i8 @llvm.vector.reduce.or.nxv4i8(<vscale x 4 x i8> %v)
311 declare i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8>)
313 define signext i8 @vreduce_xor_nxv4i8(<vscale x 4 x i8> %v) {
314 ; CHECK-LABEL: vreduce_xor_nxv4i8:
316 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
317 ; CHECK-NEXT: vmv.s.x v9, zero
318 ; CHECK-NEXT: vredxor.vs v8, v8, v9
319 ; CHECK-NEXT: vmv.x.s a0, v8
321 %red = call i8 @llvm.vector.reduce.xor.nxv4i8(<vscale x 4 x i8> %v)
325 declare i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16>)
327 define signext i16 @vreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
328 ; CHECK-LABEL: vreduce_add_nxv1i16:
330 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
331 ; CHECK-NEXT: vmv.s.x v9, zero
332 ; CHECK-NEXT: vredsum.vs v8, v8, v9
333 ; CHECK-NEXT: vmv.x.s a0, v8
335 %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %v)
339 define signext i16 @vwreduce_add_nxv1i8(<vscale x 1 x i8> %v) {
340 ; CHECK-LABEL: vwreduce_add_nxv1i8:
342 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
343 ; CHECK-NEXT: vmv.s.x v9, zero
344 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
345 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
346 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
347 ; CHECK-NEXT: vmv.x.s a0, v8
349 %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
350 %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
354 define signext i16 @vwreduce_uadd_nxv1i8(<vscale x 1 x i8> %v) {
355 ; CHECK-LABEL: vwreduce_uadd_nxv1i8:
357 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
358 ; CHECK-NEXT: vmv.s.x v9, zero
359 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
360 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
361 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
362 ; CHECK-NEXT: vmv.x.s a0, v8
364 %e = sext <vscale x 1 x i8> %v to <vscale x 1 x i16>
365 %red = call i16 @llvm.vector.reduce.add.nxv1i16(<vscale x 1 x i16> %e)
369 declare i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16>)
371 define signext i16 @vreduce_umax_nxv1i16(<vscale x 1 x i16> %v) {
372 ; CHECK-LABEL: vreduce_umax_nxv1i16:
374 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
375 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
376 ; CHECK-NEXT: vmv.x.s a0, v8
378 %red = call i16 @llvm.vector.reduce.umax.nxv1i16(<vscale x 1 x i16> %v)
382 declare i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16>)
384 define signext i16 @vreduce_smax_nxv1i16(<vscale x 1 x i16> %v) {
385 ; CHECK-LABEL: vreduce_smax_nxv1i16:
387 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
388 ; CHECK-NEXT: vredmax.vs v8, v8, v8
389 ; CHECK-NEXT: vmv.x.s a0, v8
391 %red = call i16 @llvm.vector.reduce.smax.nxv1i16(<vscale x 1 x i16> %v)
395 declare i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16>)
397 define signext i16 @vreduce_umin_nxv1i16(<vscale x 1 x i16> %v) {
398 ; CHECK-LABEL: vreduce_umin_nxv1i16:
400 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
401 ; CHECK-NEXT: vredminu.vs v8, v8, v8
402 ; CHECK-NEXT: vmv.x.s a0, v8
404 %red = call i16 @llvm.vector.reduce.umin.nxv1i16(<vscale x 1 x i16> %v)
408 declare i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16>)
410 define signext i16 @vreduce_smin_nxv1i16(<vscale x 1 x i16> %v) {
411 ; CHECK-LABEL: vreduce_smin_nxv1i16:
413 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
414 ; CHECK-NEXT: vredmin.vs v8, v8, v8
415 ; CHECK-NEXT: vmv.x.s a0, v8
417 %red = call i16 @llvm.vector.reduce.smin.nxv1i16(<vscale x 1 x i16> %v)
421 declare i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16>)
423 define signext i16 @vreduce_and_nxv1i16(<vscale x 1 x i16> %v) {
424 ; CHECK-LABEL: vreduce_and_nxv1i16:
426 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
427 ; CHECK-NEXT: vredand.vs v8, v8, v8
428 ; CHECK-NEXT: vmv.x.s a0, v8
430 %red = call i16 @llvm.vector.reduce.and.nxv1i16(<vscale x 1 x i16> %v)
434 declare i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16>)
436 define signext i16 @vreduce_or_nxv1i16(<vscale x 1 x i16> %v) {
437 ; CHECK-LABEL: vreduce_or_nxv1i16:
439 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
440 ; CHECK-NEXT: vredor.vs v8, v8, v8
441 ; CHECK-NEXT: vmv.x.s a0, v8
443 %red = call i16 @llvm.vector.reduce.or.nxv1i16(<vscale x 1 x i16> %v)
447 declare i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16>)
449 define signext i16 @vreduce_xor_nxv1i16(<vscale x 1 x i16> %v) {
450 ; CHECK-LABEL: vreduce_xor_nxv1i16:
452 ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
453 ; CHECK-NEXT: vmv.s.x v9, zero
454 ; CHECK-NEXT: vredxor.vs v8, v8, v9
455 ; CHECK-NEXT: vmv.x.s a0, v8
457 %red = call i16 @llvm.vector.reduce.xor.nxv1i16(<vscale x 1 x i16> %v)
461 declare i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16>)
463 define signext i16 @vreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
464 ; CHECK-LABEL: vreduce_add_nxv2i16:
466 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
467 ; CHECK-NEXT: vmv.s.x v9, zero
468 ; CHECK-NEXT: vredsum.vs v8, v8, v9
469 ; CHECK-NEXT: vmv.x.s a0, v8
471 %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %v)
475 define signext i16 @vwreduce_add_nxv2i8(<vscale x 2 x i8> %v) {
476 ; CHECK-LABEL: vwreduce_add_nxv2i8:
478 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
479 ; CHECK-NEXT: vmv.s.x v9, zero
480 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
481 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
482 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
483 ; CHECK-NEXT: vmv.x.s a0, v8
485 %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
486 %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
490 define signext i16 @vwreduce_uadd_nxv2i8(<vscale x 2 x i8> %v) {
491 ; CHECK-LABEL: vwreduce_uadd_nxv2i8:
493 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
494 ; CHECK-NEXT: vmv.s.x v9, zero
495 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
496 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
497 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
498 ; CHECK-NEXT: vmv.x.s a0, v8
500 %e = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
501 %red = call i16 @llvm.vector.reduce.add.nxv2i16(<vscale x 2 x i16> %e)
505 declare i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16>)
507 define signext i16 @vreduce_umax_nxv2i16(<vscale x 2 x i16> %v) {
508 ; CHECK-LABEL: vreduce_umax_nxv2i16:
510 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
511 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
512 ; CHECK-NEXT: vmv.x.s a0, v8
514 %red = call i16 @llvm.vector.reduce.umax.nxv2i16(<vscale x 2 x i16> %v)
518 declare i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16>)
520 define signext i16 @vreduce_smax_nxv2i16(<vscale x 2 x i16> %v) {
521 ; CHECK-LABEL: vreduce_smax_nxv2i16:
523 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
524 ; CHECK-NEXT: vredmax.vs v8, v8, v8
525 ; CHECK-NEXT: vmv.x.s a0, v8
527 %red = call i16 @llvm.vector.reduce.smax.nxv2i16(<vscale x 2 x i16> %v)
531 declare i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16>)
533 define signext i16 @vreduce_umin_nxv2i16(<vscale x 2 x i16> %v) {
534 ; CHECK-LABEL: vreduce_umin_nxv2i16:
536 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
537 ; CHECK-NEXT: vredminu.vs v8, v8, v8
538 ; CHECK-NEXT: vmv.x.s a0, v8
540 %red = call i16 @llvm.vector.reduce.umin.nxv2i16(<vscale x 2 x i16> %v)
544 declare i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16>)
546 define signext i16 @vreduce_smin_nxv2i16(<vscale x 2 x i16> %v) {
547 ; CHECK-LABEL: vreduce_smin_nxv2i16:
549 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
550 ; CHECK-NEXT: vredmin.vs v8, v8, v8
551 ; CHECK-NEXT: vmv.x.s a0, v8
553 %red = call i16 @llvm.vector.reduce.smin.nxv2i16(<vscale x 2 x i16> %v)
557 declare i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16>)
559 define signext i16 @vreduce_and_nxv2i16(<vscale x 2 x i16> %v) {
560 ; CHECK-LABEL: vreduce_and_nxv2i16:
562 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
563 ; CHECK-NEXT: vredand.vs v8, v8, v8
564 ; CHECK-NEXT: vmv.x.s a0, v8
566 %red = call i16 @llvm.vector.reduce.and.nxv2i16(<vscale x 2 x i16> %v)
570 declare i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16>)
572 define signext i16 @vreduce_or_nxv2i16(<vscale x 2 x i16> %v) {
573 ; CHECK-LABEL: vreduce_or_nxv2i16:
575 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
576 ; CHECK-NEXT: vredor.vs v8, v8, v8
577 ; CHECK-NEXT: vmv.x.s a0, v8
579 %red = call i16 @llvm.vector.reduce.or.nxv2i16(<vscale x 2 x i16> %v)
583 declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
585 define signext i16 @vreduce_xor_nxv2i16(<vscale x 2 x i16> %v) {
586 ; CHECK-LABEL: vreduce_xor_nxv2i16:
588 ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
589 ; CHECK-NEXT: vmv.s.x v9, zero
590 ; CHECK-NEXT: vredxor.vs v8, v8, v9
591 ; CHECK-NEXT: vmv.x.s a0, v8
593 %red = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %v)
597 declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
599 define signext i16 @vreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
600 ; CHECK-LABEL: vreduce_add_nxv4i16:
602 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
603 ; CHECK-NEXT: vmv.s.x v9, zero
604 ; CHECK-NEXT: vredsum.vs v8, v8, v9
605 ; CHECK-NEXT: vmv.x.s a0, v8
607 %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %v)
611 define signext i16 @vwreduce_add_nxv4i8(<vscale x 4 x i8> %v) {
612 ; CHECK-LABEL: vwreduce_add_nxv4i8:
614 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
615 ; CHECK-NEXT: vmv.s.x v9, zero
616 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
617 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
618 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
619 ; CHECK-NEXT: vmv.x.s a0, v8
621 %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
622 %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
626 define signext i16 @vwreduce_uadd_nxv4i8(<vscale x 4 x i8> %v) {
627 ; CHECK-LABEL: vwreduce_uadd_nxv4i8:
629 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
630 ; CHECK-NEXT: vmv.s.x v9, zero
631 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
632 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
633 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
634 ; CHECK-NEXT: vmv.x.s a0, v8
636 %e = sext <vscale x 4 x i8> %v to <vscale x 4 x i16>
637 %red = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %e)
641 declare i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16>)
643 define signext i16 @vreduce_umax_nxv4i16(<vscale x 4 x i16> %v) {
644 ; CHECK-LABEL: vreduce_umax_nxv4i16:
646 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
647 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
648 ; CHECK-NEXT: vmv.x.s a0, v8
650 %red = call i16 @llvm.vector.reduce.umax.nxv4i16(<vscale x 4 x i16> %v)
654 declare i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16>)
656 define signext i16 @vreduce_smax_nxv4i16(<vscale x 4 x i16> %v) {
657 ; CHECK-LABEL: vreduce_smax_nxv4i16:
659 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
660 ; CHECK-NEXT: vredmax.vs v8, v8, v8
661 ; CHECK-NEXT: vmv.x.s a0, v8
663 %red = call i16 @llvm.vector.reduce.smax.nxv4i16(<vscale x 4 x i16> %v)
667 declare i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16>)
669 define signext i16 @vreduce_umin_nxv4i16(<vscale x 4 x i16> %v) {
670 ; CHECK-LABEL: vreduce_umin_nxv4i16:
672 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
673 ; CHECK-NEXT: vredminu.vs v8, v8, v8
674 ; CHECK-NEXT: vmv.x.s a0, v8
676 %red = call i16 @llvm.vector.reduce.umin.nxv4i16(<vscale x 4 x i16> %v)
680 declare i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16>)
682 define signext i16 @vreduce_smin_nxv4i16(<vscale x 4 x i16> %v) {
683 ; CHECK-LABEL: vreduce_smin_nxv4i16:
685 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
686 ; CHECK-NEXT: vredmin.vs v8, v8, v8
687 ; CHECK-NEXT: vmv.x.s a0, v8
689 %red = call i16 @llvm.vector.reduce.smin.nxv4i16(<vscale x 4 x i16> %v)
693 declare i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16>)
695 define signext i16 @vreduce_and_nxv4i16(<vscale x 4 x i16> %v) {
696 ; CHECK-LABEL: vreduce_and_nxv4i16:
698 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
699 ; CHECK-NEXT: vredand.vs v8, v8, v8
700 ; CHECK-NEXT: vmv.x.s a0, v8
702 %red = call i16 @llvm.vector.reduce.and.nxv4i16(<vscale x 4 x i16> %v)
706 declare i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16>)
708 define signext i16 @vreduce_or_nxv4i16(<vscale x 4 x i16> %v) {
709 ; CHECK-LABEL: vreduce_or_nxv4i16:
711 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
712 ; CHECK-NEXT: vredor.vs v8, v8, v8
713 ; CHECK-NEXT: vmv.x.s a0, v8
715 %red = call i16 @llvm.vector.reduce.or.nxv4i16(<vscale x 4 x i16> %v)
719 declare i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16>)
721 define signext i16 @vreduce_xor_nxv4i16(<vscale x 4 x i16> %v) {
722 ; CHECK-LABEL: vreduce_xor_nxv4i16:
724 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
725 ; CHECK-NEXT: vmv.s.x v9, zero
726 ; CHECK-NEXT: vredxor.vs v8, v8, v9
727 ; CHECK-NEXT: vmv.x.s a0, v8
729 %red = call i16 @llvm.vector.reduce.xor.nxv4i16(<vscale x 4 x i16> %v)
733 declare i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32>)
735 define signext i32 @vreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
736 ; CHECK-LABEL: vreduce_add_nxv1i32:
738 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
739 ; CHECK-NEXT: vmv.s.x v9, zero
740 ; CHECK-NEXT: vredsum.vs v8, v8, v9
741 ; CHECK-NEXT: vmv.x.s a0, v8
743 %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %v)
747 define signext i32 @vwreduce_add_nxv1i16(<vscale x 1 x i16> %v) {
748 ; CHECK-LABEL: vwreduce_add_nxv1i16:
750 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
751 ; CHECK-NEXT: vmv.s.x v9, zero
752 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
753 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
754 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
755 ; CHECK-NEXT: vmv.x.s a0, v8
757 %e = sext <vscale x 1 x i16> %v to <vscale x 1 x i32>
758 %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
762 define signext i32 @vwreduce_uadd_nxv1i16(<vscale x 1 x i16> %v) {
763 ; CHECK-LABEL: vwreduce_uadd_nxv1i16:
765 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
766 ; CHECK-NEXT: vmv.s.x v9, zero
767 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
768 ; CHECK-NEXT: vwredsumu.vs v8, v8, v9
769 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
770 ; CHECK-NEXT: vmv.x.s a0, v8
772 %e = zext <vscale x 1 x i16> %v to <vscale x 1 x i32>
773 %red = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %e)
777 declare i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32>)
779 define signext i32 @vreduce_umax_nxv1i32(<vscale x 1 x i32> %v) {
780 ; CHECK-LABEL: vreduce_umax_nxv1i32:
782 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
783 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
784 ; CHECK-NEXT: vmv.x.s a0, v8
786 %red = call i32 @llvm.vector.reduce.umax.nxv1i32(<vscale x 1 x i32> %v)
790 declare i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32>)
792 define signext i32 @vreduce_smax_nxv1i32(<vscale x 1 x i32> %v) {
793 ; CHECK-LABEL: vreduce_smax_nxv1i32:
795 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
796 ; CHECK-NEXT: vredmax.vs v8, v8, v8
797 ; CHECK-NEXT: vmv.x.s a0, v8
799 %red = call i32 @llvm.vector.reduce.smax.nxv1i32(<vscale x 1 x i32> %v)
803 declare i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32>)
805 define signext i32 @vreduce_umin_nxv1i32(<vscale x 1 x i32> %v) {
806 ; CHECK-LABEL: vreduce_umin_nxv1i32:
808 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
809 ; CHECK-NEXT: vredminu.vs v8, v8, v8
810 ; CHECK-NEXT: vmv.x.s a0, v8
812 %red = call i32 @llvm.vector.reduce.umin.nxv1i32(<vscale x 1 x i32> %v)
816 declare i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32>)
818 define signext i32 @vreduce_smin_nxv1i32(<vscale x 1 x i32> %v) {
819 ; CHECK-LABEL: vreduce_smin_nxv1i32:
821 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
822 ; CHECK-NEXT: vredmin.vs v8, v8, v8
823 ; CHECK-NEXT: vmv.x.s a0, v8
825 %red = call i32 @llvm.vector.reduce.smin.nxv1i32(<vscale x 1 x i32> %v)
829 declare i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32>)
831 define signext i32 @vreduce_and_nxv1i32(<vscale x 1 x i32> %v) {
832 ; CHECK-LABEL: vreduce_and_nxv1i32:
834 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
835 ; CHECK-NEXT: vredand.vs v8, v8, v8
836 ; CHECK-NEXT: vmv.x.s a0, v8
838 %red = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> %v)
842 declare i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32>)
844 define signext i32 @vreduce_or_nxv1i32(<vscale x 1 x i32> %v) {
845 ; CHECK-LABEL: vreduce_or_nxv1i32:
847 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
848 ; CHECK-NEXT: vredor.vs v8, v8, v8
849 ; CHECK-NEXT: vmv.x.s a0, v8
851 %red = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> %v)
855 declare i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32>)
857 define signext i32 @vreduce_xor_nxv1i32(<vscale x 1 x i32> %v) {
858 ; CHECK-LABEL: vreduce_xor_nxv1i32:
860 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
861 ; CHECK-NEXT: vmv.s.x v9, zero
862 ; CHECK-NEXT: vredxor.vs v8, v8, v9
863 ; CHECK-NEXT: vmv.x.s a0, v8
865 %red = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> %v)
869 declare i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32>)
871 define signext i32 @vreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
872 ; CHECK-LABEL: vreduce_add_nxv2i32:
874 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
875 ; CHECK-NEXT: vmv.s.x v9, zero
876 ; CHECK-NEXT: vredsum.vs v8, v8, v9
877 ; CHECK-NEXT: vmv.x.s a0, v8
879 %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %v)
883 define signext i32 @vwreduce_add_nxv2i16(<vscale x 2 x i16> %v) {
884 ; CHECK-LABEL: vwreduce_add_nxv2i16:
886 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
887 ; CHECK-NEXT: vmv.s.x v9, zero
888 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
889 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
890 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
891 ; CHECK-NEXT: vmv.x.s a0, v8
893 %e = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
894 %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
898 define signext i32 @vwreduce_uadd_nxv2i16(<vscale x 2 x i16> %v) {
899 ; CHECK-LABEL: vwreduce_uadd_nxv2i16:
901 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
902 ; CHECK-NEXT: vmv.s.x v9, zero
903 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
904 ; CHECK-NEXT: vwredsumu.vs v8, v8, v9
905 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
906 ; CHECK-NEXT: vmv.x.s a0, v8
908 %e = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
909 %red = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> %e)
913 declare i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32>)
915 define signext i32 @vreduce_umax_nxv2i32(<vscale x 2 x i32> %v) {
916 ; CHECK-LABEL: vreduce_umax_nxv2i32:
918 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
919 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
920 ; CHECK-NEXT: vmv.x.s a0, v8
922 %red = call i32 @llvm.vector.reduce.umax.nxv2i32(<vscale x 2 x i32> %v)
926 declare i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32>)
928 define signext i32 @vreduce_smax_nxv2i32(<vscale x 2 x i32> %v) {
929 ; CHECK-LABEL: vreduce_smax_nxv2i32:
931 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
932 ; CHECK-NEXT: vredmax.vs v8, v8, v8
933 ; CHECK-NEXT: vmv.x.s a0, v8
935 %red = call i32 @llvm.vector.reduce.smax.nxv2i32(<vscale x 2 x i32> %v)
939 declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
941 define signext i32 @vreduce_umin_nxv2i32(<vscale x 2 x i32> %v) {
942 ; CHECK-LABEL: vreduce_umin_nxv2i32:
944 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
945 ; CHECK-NEXT: vredminu.vs v8, v8, v8
946 ; CHECK-NEXT: vmv.x.s a0, v8
948 %red = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %v)
952 declare i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32>)
954 define signext i32 @vreduce_smin_nxv2i32(<vscale x 2 x i32> %v) {
955 ; CHECK-LABEL: vreduce_smin_nxv2i32:
957 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
958 ; CHECK-NEXT: vredmin.vs v8, v8, v8
959 ; CHECK-NEXT: vmv.x.s a0, v8
961 %red = call i32 @llvm.vector.reduce.smin.nxv2i32(<vscale x 2 x i32> %v)
965 declare i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32>)
967 define signext i32 @vreduce_and_nxv2i32(<vscale x 2 x i32> %v) {
968 ; CHECK-LABEL: vreduce_and_nxv2i32:
970 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
971 ; CHECK-NEXT: vredand.vs v8, v8, v8
972 ; CHECK-NEXT: vmv.x.s a0, v8
974 %red = call i32 @llvm.vector.reduce.and.nxv2i32(<vscale x 2 x i32> %v)
978 declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
980 define signext i32 @vreduce_or_nxv2i32(<vscale x 2 x i32> %v) {
981 ; CHECK-LABEL: vreduce_or_nxv2i32:
983 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
984 ; CHECK-NEXT: vredor.vs v8, v8, v8
985 ; CHECK-NEXT: vmv.x.s a0, v8
987 %red = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %v)
991 declare i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32>)
993 define signext i32 @vreduce_xor_nxv2i32(<vscale x 2 x i32> %v) {
994 ; CHECK-LABEL: vreduce_xor_nxv2i32:
996 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
997 ; CHECK-NEXT: vmv.s.x v9, zero
998 ; CHECK-NEXT: vredxor.vs v8, v8, v9
999 ; CHECK-NEXT: vmv.x.s a0, v8
1001 %red = call i32 @llvm.vector.reduce.xor.nxv2i32(<vscale x 2 x i32> %v)
1005 declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
1007 define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
1008 ; CHECK-LABEL: vreduce_add_nxv4i32:
1010 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1011 ; CHECK-NEXT: vmv.s.x v10, zero
1012 ; CHECK-NEXT: vredsum.vs v8, v8, v10
1013 ; CHECK-NEXT: vmv.x.s a0, v8
1015 %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v)
1019 define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
1020 ; CHECK-LABEL: vwreduce_add_nxv4i16:
1022 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1023 ; CHECK-NEXT: vmv.s.x v9, zero
1024 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1025 ; CHECK-NEXT: vwredsum.vs v8, v8, v9
1026 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1027 ; CHECK-NEXT: vmv.x.s a0, v8
1029 %e = sext <vscale x 4 x i16> %v to <vscale x 4 x i32>
1030 %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
1034 define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
1035 ; CHECK-LABEL: vwreduce_uadd_nxv4i16:
1037 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1038 ; CHECK-NEXT: vmv.s.x v9, zero
1039 ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
1040 ; CHECK-NEXT: vwredsumu.vs v8, v8, v9
1041 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1042 ; CHECK-NEXT: vmv.x.s a0, v8
1044 %e = zext <vscale x 4 x i16> %v to <vscale x 4 x i32>
1045 %red = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %e)
1049 declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
1051 define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
1052 ; CHECK-LABEL: vreduce_umax_nxv4i32:
1054 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1055 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
1056 ; CHECK-NEXT: vmv.x.s a0, v8
1058 %red = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v)
1062 declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
1064 define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
1065 ; CHECK-LABEL: vreduce_smax_nxv4i32:
1067 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1068 ; CHECK-NEXT: vredmax.vs v8, v8, v8
1069 ; CHECK-NEXT: vmv.x.s a0, v8
1071 %red = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v)
1075 declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
1077 define signext i32 @vreduce_umin_nxv4i32(<vscale x 4 x i32> %v) {
1078 ; CHECK-LABEL: vreduce_umin_nxv4i32:
1080 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1081 ; CHECK-NEXT: vredminu.vs v8, v8, v8
1082 ; CHECK-NEXT: vmv.x.s a0, v8
1084 %red = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v)
1088 declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
1090 define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
1091 ; CHECK-LABEL: vreduce_smin_nxv4i32:
1093 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1094 ; CHECK-NEXT: vredmin.vs v8, v8, v8
1095 ; CHECK-NEXT: vmv.x.s a0, v8
1097 %red = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v)
1101 declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
1103 define signext i32 @vreduce_and_nxv4i32(<vscale x 4 x i32> %v) {
1104 ; CHECK-LABEL: vreduce_and_nxv4i32:
1106 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1107 ; CHECK-NEXT: vredand.vs v8, v8, v8
1108 ; CHECK-NEXT: vmv.x.s a0, v8
1110 %red = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v)
1114 declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
1116 define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
1117 ; CHECK-LABEL: vreduce_or_nxv4i32:
1119 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1120 ; CHECK-NEXT: vredor.vs v8, v8, v8
1121 ; CHECK-NEXT: vmv.x.s a0, v8
1123 %red = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v)
1127 declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
1129 define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
1130 ; CHECK-LABEL: vreduce_xor_nxv4i32:
1132 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1133 ; CHECK-NEXT: vmv.s.x v10, zero
1134 ; CHECK-NEXT: vredxor.vs v8, v8, v10
1135 ; CHECK-NEXT: vmv.x.s a0, v8
1137 %red = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v)
1141 declare i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64>)
1143 define i64 @vreduce_add_nxv1i64(<vscale x 1 x i64> %v) {
1144 ; RV32-LABEL: vreduce_add_nxv1i64:
1146 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1147 ; RV32-NEXT: vmv.s.x v9, zero
1148 ; RV32-NEXT: vredsum.vs v8, v8, v9
1149 ; RV32-NEXT: vmv.x.s a0, v8
1150 ; RV32-NEXT: li a1, 32
1151 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1152 ; RV32-NEXT: vsrl.vx v8, v8, a1
1153 ; RV32-NEXT: vmv.x.s a1, v8
1156 ; RV64-LABEL: vreduce_add_nxv1i64:
1158 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1159 ; RV64-NEXT: vmv.s.x v9, zero
1160 ; RV64-NEXT: vredsum.vs v8, v8, v9
1161 ; RV64-NEXT: vmv.x.s a0, v8
1163 %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %v)
1167 define i64 @vwreduce_add_nxv1i32(<vscale x 1 x i32> %v) {
1168 ; RV32-LABEL: vwreduce_add_nxv1i32:
1170 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1171 ; RV32-NEXT: vmv.s.x v9, zero
1172 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
1173 ; RV32-NEXT: vwredsum.vs v8, v8, v9
1174 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1175 ; RV32-NEXT: vmv.x.s a0, v8
1176 ; RV32-NEXT: li a1, 32
1177 ; RV32-NEXT: vsrl.vx v8, v8, a1
1178 ; RV32-NEXT: vmv.x.s a1, v8
1181 ; RV64-LABEL: vwreduce_add_nxv1i32:
1183 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1184 ; RV64-NEXT: vmv.s.x v9, zero
1185 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1186 ; RV64-NEXT: vwredsum.vs v8, v8, v9
1187 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1188 ; RV64-NEXT: vmv.x.s a0, v8
1190 %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
1191 %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
1195 define i64 @vwreduce_uadd_nxv1i32(<vscale x 1 x i32> %v) {
1196 ; RV32-LABEL: vwreduce_uadd_nxv1i32:
1198 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1199 ; RV32-NEXT: vmv.s.x v9, zero
1200 ; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
1201 ; RV32-NEXT: vwredsumu.vs v8, v8, v9
1202 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1203 ; RV32-NEXT: vmv.x.s a0, v8
1204 ; RV32-NEXT: li a1, 32
1205 ; RV32-NEXT: vsrl.vx v8, v8, a1
1206 ; RV32-NEXT: vmv.x.s a1, v8
1209 ; RV64-LABEL: vwreduce_uadd_nxv1i32:
1211 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1212 ; RV64-NEXT: vmv.s.x v9, zero
1213 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1214 ; RV64-NEXT: vwredsumu.vs v8, v8, v9
1215 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1216 ; RV64-NEXT: vmv.x.s a0, v8
1218 %e = zext <vscale x 1 x i32> %v to <vscale x 1 x i64>
1219 %red = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> %e)
1223 declare i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64>)
1225 define i64 @vreduce_umax_nxv1i64(<vscale x 1 x i64> %v) {
1226 ; RV32-LABEL: vreduce_umax_nxv1i64:
1228 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1229 ; RV32-NEXT: vredmaxu.vs v8, v8, v8
1230 ; RV32-NEXT: li a0, 32
1231 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1232 ; RV32-NEXT: vsrl.vx v9, v8, a0
1233 ; RV32-NEXT: vmv.x.s a1, v9
1234 ; RV32-NEXT: vmv.x.s a0, v8
1237 ; RV64-LABEL: vreduce_umax_nxv1i64:
1239 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1240 ; RV64-NEXT: vredmaxu.vs v8, v8, v8
1241 ; RV64-NEXT: vmv.x.s a0, v8
1243 %red = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> %v)
1247 declare i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64>)
1249 define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
1250 ; RV32-LABEL: vreduce_smax_nxv1i64:
1252 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1253 ; RV32-NEXT: vredmax.vs v8, v8, v8
1254 ; RV32-NEXT: li a0, 32
1255 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1256 ; RV32-NEXT: vsrl.vx v9, v8, a0
1257 ; RV32-NEXT: vmv.x.s a1, v9
1258 ; RV32-NEXT: vmv.x.s a0, v8
1261 ; RV64-LABEL: vreduce_smax_nxv1i64:
1263 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1264 ; RV64-NEXT: vredmax.vs v8, v8, v8
1265 ; RV64-NEXT: vmv.x.s a0, v8
1267 %red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
1271 declare i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64>)
1273 define i64 @vreduce_umin_nxv1i64(<vscale x 1 x i64> %v) {
1274 ; RV32-LABEL: vreduce_umin_nxv1i64:
1276 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1277 ; RV32-NEXT: vredminu.vs v8, v8, v8
1278 ; RV32-NEXT: li a0, 32
1279 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1280 ; RV32-NEXT: vsrl.vx v9, v8, a0
1281 ; RV32-NEXT: vmv.x.s a1, v9
1282 ; RV32-NEXT: vmv.x.s a0, v8
1285 ; RV64-LABEL: vreduce_umin_nxv1i64:
1287 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1288 ; RV64-NEXT: vredminu.vs v8, v8, v8
1289 ; RV64-NEXT: vmv.x.s a0, v8
1291 %red = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> %v)
1295 declare i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64>)
1297 define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
1298 ; RV32-LABEL: vreduce_smin_nxv1i64:
1300 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1301 ; RV32-NEXT: vredmin.vs v8, v8, v8
1302 ; RV32-NEXT: li a0, 32
1303 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1304 ; RV32-NEXT: vsrl.vx v9, v8, a0
1305 ; RV32-NEXT: vmv.x.s a1, v9
1306 ; RV32-NEXT: vmv.x.s a0, v8
1309 ; RV64-LABEL: vreduce_smin_nxv1i64:
1311 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1312 ; RV64-NEXT: vredmin.vs v8, v8, v8
1313 ; RV64-NEXT: vmv.x.s a0, v8
1315 %red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
1319 declare i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64>)
1321 define i64 @vreduce_and_nxv1i64(<vscale x 1 x i64> %v) {
1322 ; RV32-LABEL: vreduce_and_nxv1i64:
1324 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1325 ; RV32-NEXT: vredand.vs v8, v8, v8
1326 ; RV32-NEXT: li a0, 32
1327 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1328 ; RV32-NEXT: vsrl.vx v9, v8, a0
1329 ; RV32-NEXT: vmv.x.s a1, v9
1330 ; RV32-NEXT: vmv.x.s a0, v8
1333 ; RV64-LABEL: vreduce_and_nxv1i64:
1335 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1336 ; RV64-NEXT: vredand.vs v8, v8, v8
1337 ; RV64-NEXT: vmv.x.s a0, v8
1339 %red = call i64 @llvm.vector.reduce.and.nxv1i64(<vscale x 1 x i64> %v)
1343 declare i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64>)
1345 define i64 @vreduce_or_nxv1i64(<vscale x 1 x i64> %v) {
1346 ; RV32-LABEL: vreduce_or_nxv1i64:
1348 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1349 ; RV32-NEXT: vredor.vs v8, v8, v8
1350 ; RV32-NEXT: li a0, 32
1351 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1352 ; RV32-NEXT: vsrl.vx v9, v8, a0
1353 ; RV32-NEXT: vmv.x.s a1, v9
1354 ; RV32-NEXT: vmv.x.s a0, v8
1357 ; RV64-LABEL: vreduce_or_nxv1i64:
1359 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1360 ; RV64-NEXT: vredor.vs v8, v8, v8
1361 ; RV64-NEXT: vmv.x.s a0, v8
1363 %red = call i64 @llvm.vector.reduce.or.nxv1i64(<vscale x 1 x i64> %v)
1367 declare i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64>)
1369 define i64 @vreduce_xor_nxv1i64(<vscale x 1 x i64> %v) {
1370 ; RV32-LABEL: vreduce_xor_nxv1i64:
1372 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1373 ; RV32-NEXT: vmv.s.x v9, zero
1374 ; RV32-NEXT: vredxor.vs v8, v8, v9
1375 ; RV32-NEXT: vmv.x.s a0, v8
1376 ; RV32-NEXT: li a1, 32
1377 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1378 ; RV32-NEXT: vsrl.vx v8, v8, a1
1379 ; RV32-NEXT: vmv.x.s a1, v8
1382 ; RV64-LABEL: vreduce_xor_nxv1i64:
1384 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1385 ; RV64-NEXT: vmv.s.x v9, zero
1386 ; RV64-NEXT: vredxor.vs v8, v8, v9
1387 ; RV64-NEXT: vmv.x.s a0, v8
1389 %red = call i64 @llvm.vector.reduce.xor.nxv1i64(<vscale x 1 x i64> %v)
1393 declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
1395 define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
1396 ; RV32-LABEL: vreduce_add_nxv2i64:
1398 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1399 ; RV32-NEXT: vmv.s.x v10, zero
1400 ; RV32-NEXT: vredsum.vs v8, v8, v10
1401 ; RV32-NEXT: vmv.x.s a0, v8
1402 ; RV32-NEXT: li a1, 32
1403 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1404 ; RV32-NEXT: vsrl.vx v8, v8, a1
1405 ; RV32-NEXT: vmv.x.s a1, v8
1408 ; RV64-LABEL: vreduce_add_nxv2i64:
1410 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1411 ; RV64-NEXT: vmv.s.x v10, zero
1412 ; RV64-NEXT: vredsum.vs v8, v8, v10
1413 ; RV64-NEXT: vmv.x.s a0, v8
1415 %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %v)
1419 define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
1420 ; RV32-LABEL: vwreduce_add_nxv2i32:
1422 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1423 ; RV32-NEXT: vmv.s.x v9, zero
1424 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1425 ; RV32-NEXT: vwredsum.vs v8, v8, v9
1426 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1427 ; RV32-NEXT: vmv.x.s a0, v8
1428 ; RV32-NEXT: li a1, 32
1429 ; RV32-NEXT: vsrl.vx v8, v8, a1
1430 ; RV32-NEXT: vmv.x.s a1, v8
1433 ; RV64-LABEL: vwreduce_add_nxv2i32:
1435 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1436 ; RV64-NEXT: vmv.s.x v9, zero
1437 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1438 ; RV64-NEXT: vwredsum.vs v8, v8, v9
1439 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1440 ; RV64-NEXT: vmv.x.s a0, v8
1442 %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
1443 %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
1447 define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
1448 ; RV32-LABEL: vwreduce_uadd_nxv2i32:
1450 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1451 ; RV32-NEXT: vmv.s.x v9, zero
1452 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1453 ; RV32-NEXT: vwredsumu.vs v8, v8, v9
1454 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1455 ; RV32-NEXT: vmv.x.s a0, v8
1456 ; RV32-NEXT: li a1, 32
1457 ; RV32-NEXT: vsrl.vx v8, v8, a1
1458 ; RV32-NEXT: vmv.x.s a1, v8
1461 ; RV64-LABEL: vwreduce_uadd_nxv2i32:
1463 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1464 ; RV64-NEXT: vmv.s.x v9, zero
1465 ; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
1466 ; RV64-NEXT: vwredsumu.vs v8, v8, v9
1467 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1468 ; RV64-NEXT: vmv.x.s a0, v8
1470 %e = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
1471 %red = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %e)
1475 declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
1477 define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
1478 ; RV32-LABEL: vreduce_umax_nxv2i64:
1480 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1481 ; RV32-NEXT: vredmaxu.vs v8, v8, v8
1482 ; RV32-NEXT: vmv.x.s a0, v8
1483 ; RV32-NEXT: li a1, 32
1484 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1485 ; RV32-NEXT: vsrl.vx v8, v8, a1
1486 ; RV32-NEXT: vmv.x.s a1, v8
1489 ; RV64-LABEL: vreduce_umax_nxv2i64:
1491 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1492 ; RV64-NEXT: vredmaxu.vs v8, v8, v8
1493 ; RV64-NEXT: vmv.x.s a0, v8
1495 %red = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %v)
1499 declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
1501 define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
1502 ; RV32-LABEL: vreduce_smax_nxv2i64:
1504 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1505 ; RV32-NEXT: vredmax.vs v8, v8, v8
1506 ; RV32-NEXT: vmv.x.s a0, v8
1507 ; RV32-NEXT: li a1, 32
1508 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1509 ; RV32-NEXT: vsrl.vx v8, v8, a1
1510 ; RV32-NEXT: vmv.x.s a1, v8
1513 ; RV64-LABEL: vreduce_smax_nxv2i64:
1515 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1516 ; RV64-NEXT: vredmax.vs v8, v8, v8
1517 ; RV64-NEXT: vmv.x.s a0, v8
1519 %red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
1523 declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
1525 define i64 @vreduce_umin_nxv2i64(<vscale x 2 x i64> %v) {
1526 ; RV32-LABEL: vreduce_umin_nxv2i64:
1528 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1529 ; RV32-NEXT: vredminu.vs v8, v8, v8
1530 ; RV32-NEXT: vmv.x.s a0, v8
1531 ; RV32-NEXT: li a1, 32
1532 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1533 ; RV32-NEXT: vsrl.vx v8, v8, a1
1534 ; RV32-NEXT: vmv.x.s a1, v8
1537 ; RV64-LABEL: vreduce_umin_nxv2i64:
1539 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1540 ; RV64-NEXT: vredminu.vs v8, v8, v8
1541 ; RV64-NEXT: vmv.x.s a0, v8
1543 %red = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %v)
1547 declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
1549 define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
1550 ; RV32-LABEL: vreduce_smin_nxv2i64:
1552 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1553 ; RV32-NEXT: vredmin.vs v8, v8, v8
1554 ; RV32-NEXT: vmv.x.s a0, v8
1555 ; RV32-NEXT: li a1, 32
1556 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1557 ; RV32-NEXT: vsrl.vx v8, v8, a1
1558 ; RV32-NEXT: vmv.x.s a1, v8
1561 ; RV64-LABEL: vreduce_smin_nxv2i64:
1563 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1564 ; RV64-NEXT: vredmin.vs v8, v8, v8
1565 ; RV64-NEXT: vmv.x.s a0, v8
1567 %red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
1571 declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
1573 define i64 @vreduce_and_nxv2i64(<vscale x 2 x i64> %v) {
1574 ; RV32-LABEL: vreduce_and_nxv2i64:
1576 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1577 ; RV32-NEXT: vredand.vs v8, v8, v8
1578 ; RV32-NEXT: vmv.x.s a0, v8
1579 ; RV32-NEXT: li a1, 32
1580 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1581 ; RV32-NEXT: vsrl.vx v8, v8, a1
1582 ; RV32-NEXT: vmv.x.s a1, v8
1585 ; RV64-LABEL: vreduce_and_nxv2i64:
1587 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1588 ; RV64-NEXT: vredand.vs v8, v8, v8
1589 ; RV64-NEXT: vmv.x.s a0, v8
1591 %red = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %v)
1595 declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
1597 define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
1598 ; RV32-LABEL: vreduce_or_nxv2i64:
1600 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1601 ; RV32-NEXT: vredor.vs v8, v8, v8
1602 ; RV32-NEXT: vmv.x.s a0, v8
1603 ; RV32-NEXT: li a1, 32
1604 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1605 ; RV32-NEXT: vsrl.vx v8, v8, a1
1606 ; RV32-NEXT: vmv.x.s a1, v8
1609 ; RV64-LABEL: vreduce_or_nxv2i64:
1611 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1612 ; RV64-NEXT: vredor.vs v8, v8, v8
1613 ; RV64-NEXT: vmv.x.s a0, v8
1615 %red = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %v)
1619 declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
1621 define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
1622 ; RV32-LABEL: vreduce_xor_nxv2i64:
1624 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1625 ; RV32-NEXT: vmv.s.x v10, zero
1626 ; RV32-NEXT: vredxor.vs v8, v8, v10
1627 ; RV32-NEXT: vmv.x.s a0, v8
1628 ; RV32-NEXT: li a1, 32
1629 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1630 ; RV32-NEXT: vsrl.vx v8, v8, a1
1631 ; RV32-NEXT: vmv.x.s a1, v8
1634 ; RV64-LABEL: vreduce_xor_nxv2i64:
1636 ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
1637 ; RV64-NEXT: vmv.s.x v10, zero
1638 ; RV64-NEXT: vredxor.vs v8, v8, v10
1639 ; RV64-NEXT: vmv.x.s a0, v8
1641 %red = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %v)
1645 declare i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64>)
1647 define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
1648 ; RV32-LABEL: vreduce_add_nxv4i64:
1650 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1651 ; RV32-NEXT: vmv.s.x v12, zero
1652 ; RV32-NEXT: vredsum.vs v8, v8, v12
1653 ; RV32-NEXT: vmv.x.s a0, v8
1654 ; RV32-NEXT: li a1, 32
1655 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1656 ; RV32-NEXT: vsrl.vx v8, v8, a1
1657 ; RV32-NEXT: vmv.x.s a1, v8
1660 ; RV64-LABEL: vreduce_add_nxv4i64:
1662 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1663 ; RV64-NEXT: vmv.s.x v12, zero
1664 ; RV64-NEXT: vredsum.vs v8, v8, v12
1665 ; RV64-NEXT: vmv.x.s a0, v8
1667 %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v)
1671 define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
1672 ; RV32-LABEL: vwreduce_add_nxv4i32:
1674 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1675 ; RV32-NEXT: vmv.s.x v10, zero
1676 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1677 ; RV32-NEXT: vwredsum.vs v8, v8, v10
1678 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1679 ; RV32-NEXT: vmv.x.s a0, v8
1680 ; RV32-NEXT: li a1, 32
1681 ; RV32-NEXT: vsrl.vx v8, v8, a1
1682 ; RV32-NEXT: vmv.x.s a1, v8
1685 ; RV64-LABEL: vwreduce_add_nxv4i32:
1687 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1688 ; RV64-NEXT: vmv.s.x v10, zero
1689 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1690 ; RV64-NEXT: vwredsum.vs v8, v8, v10
1691 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1692 ; RV64-NEXT: vmv.x.s a0, v8
1694 %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
1695 %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
1699 define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
1700 ; RV32-LABEL: vwreduce_uadd_nxv4i32:
1702 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1703 ; RV32-NEXT: vmv.s.x v10, zero
1704 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1705 ; RV32-NEXT: vwredsumu.vs v8, v8, v10
1706 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1707 ; RV32-NEXT: vmv.x.s a0, v8
1708 ; RV32-NEXT: li a1, 32
1709 ; RV32-NEXT: vsrl.vx v8, v8, a1
1710 ; RV32-NEXT: vmv.x.s a1, v8
1713 ; RV64-LABEL: vwreduce_uadd_nxv4i32:
1715 ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
1716 ; RV64-NEXT: vmv.s.x v10, zero
1717 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
1718 ; RV64-NEXT: vwredsumu.vs v8, v8, v10
1719 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1720 ; RV64-NEXT: vmv.x.s a0, v8
1722 %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
1723 %red = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %e)
1727 declare i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64>)
1729 define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
1730 ; RV32-LABEL: vreduce_umax_nxv4i64:
1732 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1733 ; RV32-NEXT: vredmaxu.vs v8, v8, v8
1734 ; RV32-NEXT: vmv.x.s a0, v8
1735 ; RV32-NEXT: li a1, 32
1736 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1737 ; RV32-NEXT: vsrl.vx v8, v8, a1
1738 ; RV32-NEXT: vmv.x.s a1, v8
1741 ; RV64-LABEL: vreduce_umax_nxv4i64:
1743 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1744 ; RV64-NEXT: vredmaxu.vs v8, v8, v8
1745 ; RV64-NEXT: vmv.x.s a0, v8
1747 %red = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v)
1751 declare i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64>)
1753 define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
1754 ; RV32-LABEL: vreduce_smax_nxv4i64:
1756 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1757 ; RV32-NEXT: vredmax.vs v8, v8, v8
1758 ; RV32-NEXT: vmv.x.s a0, v8
1759 ; RV32-NEXT: li a1, 32
1760 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1761 ; RV32-NEXT: vsrl.vx v8, v8, a1
1762 ; RV32-NEXT: vmv.x.s a1, v8
1765 ; RV64-LABEL: vreduce_smax_nxv4i64:
1767 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1768 ; RV64-NEXT: vredmax.vs v8, v8, v8
1769 ; RV64-NEXT: vmv.x.s a0, v8
1771 %red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
1775 declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
1777 define i64 @vreduce_umin_nxv4i64(<vscale x 4 x i64> %v) {
1778 ; RV32-LABEL: vreduce_umin_nxv4i64:
1780 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1781 ; RV32-NEXT: vredminu.vs v8, v8, v8
1782 ; RV32-NEXT: vmv.x.s a0, v8
1783 ; RV32-NEXT: li a1, 32
1784 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1785 ; RV32-NEXT: vsrl.vx v8, v8, a1
1786 ; RV32-NEXT: vmv.x.s a1, v8
1789 ; RV64-LABEL: vreduce_umin_nxv4i64:
1791 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1792 ; RV64-NEXT: vredminu.vs v8, v8, v8
1793 ; RV64-NEXT: vmv.x.s a0, v8
1795 %red = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v)
1799 declare i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64>)
1801 define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
1802 ; RV32-LABEL: vreduce_smin_nxv4i64:
1804 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1805 ; RV32-NEXT: vredmin.vs v8, v8, v8
1806 ; RV32-NEXT: vmv.x.s a0, v8
1807 ; RV32-NEXT: li a1, 32
1808 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1809 ; RV32-NEXT: vsrl.vx v8, v8, a1
1810 ; RV32-NEXT: vmv.x.s a1, v8
1813 ; RV64-LABEL: vreduce_smin_nxv4i64:
1815 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1816 ; RV64-NEXT: vredmin.vs v8, v8, v8
1817 ; RV64-NEXT: vmv.x.s a0, v8
1819 %red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
1823 declare i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64>)
1825 define i64 @vreduce_and_nxv4i64(<vscale x 4 x i64> %v) {
1826 ; RV32-LABEL: vreduce_and_nxv4i64:
1828 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1829 ; RV32-NEXT: vredand.vs v8, v8, v8
1830 ; RV32-NEXT: vmv.x.s a0, v8
1831 ; RV32-NEXT: li a1, 32
1832 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1833 ; RV32-NEXT: vsrl.vx v8, v8, a1
1834 ; RV32-NEXT: vmv.x.s a1, v8
1837 ; RV64-LABEL: vreduce_and_nxv4i64:
1839 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1840 ; RV64-NEXT: vredand.vs v8, v8, v8
1841 ; RV64-NEXT: vmv.x.s a0, v8
1843 %red = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v)
1847 declare i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64>)
1849 define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
1850 ; RV32-LABEL: vreduce_or_nxv4i64:
1852 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1853 ; RV32-NEXT: vredor.vs v8, v8, v8
1854 ; RV32-NEXT: vmv.x.s a0, v8
1855 ; RV32-NEXT: li a1, 32
1856 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1857 ; RV32-NEXT: vsrl.vx v8, v8, a1
1858 ; RV32-NEXT: vmv.x.s a1, v8
1861 ; RV64-LABEL: vreduce_or_nxv4i64:
1863 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1864 ; RV64-NEXT: vredor.vs v8, v8, v8
1865 ; RV64-NEXT: vmv.x.s a0, v8
1867 %red = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v)
1871 declare i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64>)
1873 define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
1874 ; RV32-LABEL: vreduce_xor_nxv4i64:
1876 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1877 ; RV32-NEXT: vmv.s.x v12, zero
1878 ; RV32-NEXT: vredxor.vs v8, v8, v12
1879 ; RV32-NEXT: vmv.x.s a0, v8
1880 ; RV32-NEXT: li a1, 32
1881 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1882 ; RV32-NEXT: vsrl.vx v8, v8, a1
1883 ; RV32-NEXT: vmv.x.s a1, v8
1886 ; RV64-LABEL: vreduce_xor_nxv4i64:
1888 ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
1889 ; RV64-NEXT: vmv.s.x v12, zero
1890 ; RV64-NEXT: vredxor.vs v8, v8, v12
1891 ; RV64-NEXT: vmv.x.s a0, v8
1893 %red = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v)