1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
7 declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32)
9 define signext i8 @vpreduce_add_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vpreduce_add_v2i8:
12 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
13 ; CHECK-NEXT: vmv.s.x v9, a0
14 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
15 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
16 ; CHECK-NEXT: vmv.x.s a0, v9
18 %r = call i8 @llvm.vp.reduce.add.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
22 declare i8 @llvm.vp.reduce.umax.v2i8(i8, <2 x i8>, <2 x i1>, i32)
24 define signext i8 @vpreduce_umax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
25 ; CHECK-LABEL: vpreduce_umax_v2i8:
27 ; CHECK-NEXT: andi a0, a0, 255
28 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
29 ; CHECK-NEXT: vmv.s.x v9, a0
30 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
31 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
32 ; CHECK-NEXT: vmv.x.s a0, v9
34 %r = call i8 @llvm.vp.reduce.umax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
38 declare i8 @llvm.vp.reduce.smax.v2i8(i8, <2 x i8>, <2 x i1>, i32)
40 define signext i8 @vpreduce_smax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
41 ; CHECK-LABEL: vpreduce_smax_v2i8:
43 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
44 ; CHECK-NEXT: vmv.s.x v9, a0
45 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
46 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
47 ; CHECK-NEXT: vmv.x.s a0, v9
49 %r = call i8 @llvm.vp.reduce.smax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
53 declare i8 @llvm.vp.reduce.umin.v2i8(i8, <2 x i8>, <2 x i1>, i32)
55 define signext i8 @vpreduce_umin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
56 ; CHECK-LABEL: vpreduce_umin_v2i8:
58 ; CHECK-NEXT: andi a0, a0, 255
59 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
60 ; CHECK-NEXT: vmv.s.x v9, a0
61 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
62 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
63 ; CHECK-NEXT: vmv.x.s a0, v9
65 %r = call i8 @llvm.vp.reduce.umin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
69 declare i8 @llvm.vp.reduce.smin.v2i8(i8, <2 x i8>, <2 x i1>, i32)
71 define signext i8 @vpreduce_smin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
72 ; CHECK-LABEL: vpreduce_smin_v2i8:
74 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
75 ; CHECK-NEXT: vmv.s.x v9, a0
76 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
77 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
78 ; CHECK-NEXT: vmv.x.s a0, v9
80 %r = call i8 @llvm.vp.reduce.smin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
84 declare i8 @llvm.vp.reduce.and.v2i8(i8, <2 x i8>, <2 x i1>, i32)
86 define signext i8 @vpreduce_and_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
87 ; CHECK-LABEL: vpreduce_and_v2i8:
89 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
90 ; CHECK-NEXT: vmv.s.x v9, a0
91 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
92 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
93 ; CHECK-NEXT: vmv.x.s a0, v9
95 %r = call i8 @llvm.vp.reduce.and.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
99 declare i8 @llvm.vp.reduce.or.v2i8(i8, <2 x i8>, <2 x i1>, i32)
101 define signext i8 @vpreduce_or_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
102 ; CHECK-LABEL: vpreduce_or_v2i8:
104 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
105 ; CHECK-NEXT: vmv.s.x v9, a0
106 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
107 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
108 ; CHECK-NEXT: vmv.x.s a0, v9
110 %r = call i8 @llvm.vp.reduce.or.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
114 declare i8 @llvm.vp.reduce.xor.v2i8(i8, <2 x i8>, <2 x i1>, i32)
116 define signext i8 @vpreduce_xor_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
117 ; CHECK-LABEL: vpreduce_xor_v2i8:
119 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
120 ; CHECK-NEXT: vmv.s.x v9, a0
121 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
122 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
123 ; CHECK-NEXT: vmv.x.s a0, v9
125 %r = call i8 @llvm.vp.reduce.xor.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
129 declare i8 @llvm.vp.reduce.umin.v3i8(i8, <3 x i8>, <3 x i1>, i32)
131 define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) {
132 ; CHECK-LABEL: vpreduce_umin_v3i8:
134 ; CHECK-NEXT: andi a0, a0, 255
135 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
136 ; CHECK-NEXT: vmv.s.x v9, a0
137 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
138 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
139 ; CHECK-NEXT: vmv.x.s a0, v9
141 %r = call i8 @llvm.vp.reduce.umin.v3i8(i8 %s, <3 x i8> %v, <3 x i1> %m, i32 %evl)
145 declare i8 @llvm.vp.reduce.add.v4i8(i8, <4 x i8>, <4 x i1>, i32)
147 define signext i8 @vpreduce_add_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
148 ; CHECK-LABEL: vpreduce_add_v4i8:
150 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
151 ; CHECK-NEXT: vmv.s.x v9, a0
152 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
153 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
154 ; CHECK-NEXT: vmv.x.s a0, v9
156 %r = call i8 @llvm.vp.reduce.add.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
160 declare i8 @llvm.vp.reduce.umax.v4i8(i8, <4 x i8>, <4 x i1>, i32)
162 define signext i8 @vpreduce_umax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
163 ; CHECK-LABEL: vpreduce_umax_v4i8:
165 ; CHECK-NEXT: andi a0, a0, 255
166 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
167 ; CHECK-NEXT: vmv.s.x v9, a0
168 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
169 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
170 ; CHECK-NEXT: vmv.x.s a0, v9
172 %r = call i8 @llvm.vp.reduce.umax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
176 declare i8 @llvm.vp.reduce.smax.v4i8(i8, <4 x i8>, <4 x i1>, i32)
178 define signext i8 @vpreduce_smax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
179 ; CHECK-LABEL: vpreduce_smax_v4i8:
181 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
182 ; CHECK-NEXT: vmv.s.x v9, a0
183 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
184 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
185 ; CHECK-NEXT: vmv.x.s a0, v9
187 %r = call i8 @llvm.vp.reduce.smax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
191 declare i8 @llvm.vp.reduce.umin.v4i8(i8, <4 x i8>, <4 x i1>, i32)
193 define signext i8 @vpreduce_umin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
194 ; CHECK-LABEL: vpreduce_umin_v4i8:
196 ; CHECK-NEXT: andi a0, a0, 255
197 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
198 ; CHECK-NEXT: vmv.s.x v9, a0
199 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
200 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
201 ; CHECK-NEXT: vmv.x.s a0, v9
203 %r = call i8 @llvm.vp.reduce.umin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
207 declare i8 @llvm.vp.reduce.smin.v4i8(i8, <4 x i8>, <4 x i1>, i32)
209 define signext i8 @vpreduce_smin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
210 ; CHECK-LABEL: vpreduce_smin_v4i8:
212 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
213 ; CHECK-NEXT: vmv.s.x v9, a0
214 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
215 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
216 ; CHECK-NEXT: vmv.x.s a0, v9
218 %r = call i8 @llvm.vp.reduce.smin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
222 declare i8 @llvm.vp.reduce.and.v4i8(i8, <4 x i8>, <4 x i1>, i32)
224 define signext i8 @vpreduce_and_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
225 ; CHECK-LABEL: vpreduce_and_v4i8:
227 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
228 ; CHECK-NEXT: vmv.s.x v9, a0
229 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
230 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
231 ; CHECK-NEXT: vmv.x.s a0, v9
233 %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
237 declare i8 @llvm.vp.reduce.or.v4i8(i8, <4 x i8>, <4 x i1>, i32)
239 define signext i8 @vpreduce_or_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
240 ; CHECK-LABEL: vpreduce_or_v4i8:
242 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
243 ; CHECK-NEXT: vmv.s.x v9, a0
244 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
245 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
246 ; CHECK-NEXT: vmv.x.s a0, v9
248 %r = call i8 @llvm.vp.reduce.or.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
252 declare i8 @llvm.vp.reduce.xor.v4i8(i8, <4 x i8>, <4 x i1>, i32)
254 define signext i8 @vpreduce_xor_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
255 ; CHECK-LABEL: vpreduce_xor_v4i8:
257 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
258 ; CHECK-NEXT: vmv.s.x v9, a0
259 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
260 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
261 ; CHECK-NEXT: vmv.x.s a0, v9
263 %r = call i8 @llvm.vp.reduce.xor.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
267 declare i16 @llvm.vp.reduce.add.v2i16(i16, <2 x i16>, <2 x i1>, i32)
269 define signext i16 @vpreduce_add_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
270 ; CHECK-LABEL: vpreduce_add_v2i16:
272 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
273 ; CHECK-NEXT: vmv.s.x v9, a0
274 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
275 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
276 ; CHECK-NEXT: vmv.x.s a0, v9
278 %r = call i16 @llvm.vp.reduce.add.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
282 declare i16 @llvm.vp.reduce.umax.v2i16(i16, <2 x i16>, <2 x i1>, i32)
284 define signext i16 @vpreduce_umax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
285 ; RV32-LABEL: vpreduce_umax_v2i16:
287 ; RV32-NEXT: slli a0, a0, 16
288 ; RV32-NEXT: srli a0, a0, 16
289 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
290 ; RV32-NEXT: vmv.s.x v9, a0
291 ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
292 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
293 ; RV32-NEXT: vmv.x.s a0, v9
296 ; RV64-LABEL: vpreduce_umax_v2i16:
298 ; RV64-NEXT: slli a0, a0, 48
299 ; RV64-NEXT: srli a0, a0, 48
300 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
301 ; RV64-NEXT: vmv.s.x v9, a0
302 ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
303 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
304 ; RV64-NEXT: vmv.x.s a0, v9
306 %r = call i16 @llvm.vp.reduce.umax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
310 declare i16 @llvm.vp.reduce.smax.v2i16(i16, <2 x i16>, <2 x i1>, i32)
312 define signext i16 @vpreduce_smax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
313 ; CHECK-LABEL: vpreduce_smax_v2i16:
315 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
316 ; CHECK-NEXT: vmv.s.x v9, a0
317 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
318 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
319 ; CHECK-NEXT: vmv.x.s a0, v9
321 %r = call i16 @llvm.vp.reduce.smax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
325 declare i16 @llvm.vp.reduce.umin.v2i16(i16, <2 x i16>, <2 x i1>, i32)
327 define signext i16 @vpreduce_umin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
328 ; RV32-LABEL: vpreduce_umin_v2i16:
330 ; RV32-NEXT: slli a0, a0, 16
331 ; RV32-NEXT: srli a0, a0, 16
332 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
333 ; RV32-NEXT: vmv.s.x v9, a0
334 ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
335 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
336 ; RV32-NEXT: vmv.x.s a0, v9
339 ; RV64-LABEL: vpreduce_umin_v2i16:
341 ; RV64-NEXT: slli a0, a0, 48
342 ; RV64-NEXT: srli a0, a0, 48
343 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
344 ; RV64-NEXT: vmv.s.x v9, a0
345 ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
346 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
347 ; RV64-NEXT: vmv.x.s a0, v9
349 %r = call i16 @llvm.vp.reduce.umin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
353 declare i16 @llvm.vp.reduce.smin.v2i16(i16, <2 x i16>, <2 x i1>, i32)
355 define signext i16 @vpreduce_smin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
356 ; CHECK-LABEL: vpreduce_smin_v2i16:
358 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
359 ; CHECK-NEXT: vmv.s.x v9, a0
360 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
361 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
362 ; CHECK-NEXT: vmv.x.s a0, v9
364 %r = call i16 @llvm.vp.reduce.smin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
368 declare i16 @llvm.vp.reduce.and.v2i16(i16, <2 x i16>, <2 x i1>, i32)
370 define signext i16 @vpreduce_and_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
371 ; CHECK-LABEL: vpreduce_and_v2i16:
373 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
374 ; CHECK-NEXT: vmv.s.x v9, a0
375 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
376 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
377 ; CHECK-NEXT: vmv.x.s a0, v9
379 %r = call i16 @llvm.vp.reduce.and.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
383 declare i16 @llvm.vp.reduce.or.v2i16(i16, <2 x i16>, <2 x i1>, i32)
385 define signext i16 @vpreduce_or_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
386 ; CHECK-LABEL: vpreduce_or_v2i16:
388 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
389 ; CHECK-NEXT: vmv.s.x v9, a0
390 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
391 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
392 ; CHECK-NEXT: vmv.x.s a0, v9
394 %r = call i16 @llvm.vp.reduce.or.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
398 declare i16 @llvm.vp.reduce.xor.v2i16(i16, <2 x i16>, <2 x i1>, i32)
400 define signext i16 @vpreduce_xor_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
401 ; CHECK-LABEL: vpreduce_xor_v2i16:
403 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
404 ; CHECK-NEXT: vmv.s.x v9, a0
405 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
406 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
407 ; CHECK-NEXT: vmv.x.s a0, v9
409 %r = call i16 @llvm.vp.reduce.xor.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
413 declare i16 @llvm.vp.reduce.add.v4i16(i16, <4 x i16>, <4 x i1>, i32)
415 define signext i16 @vpreduce_add_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
416 ; CHECK-LABEL: vpreduce_add_v4i16:
418 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
419 ; CHECK-NEXT: vmv.s.x v9, a0
420 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
421 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
422 ; CHECK-NEXT: vmv.x.s a0, v9
424 %r = call i16 @llvm.vp.reduce.add.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
428 declare i16 @llvm.vp.reduce.umax.v4i16(i16, <4 x i16>, <4 x i1>, i32)
430 define signext i16 @vpreduce_umax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
431 ; RV32-LABEL: vpreduce_umax_v4i16:
433 ; RV32-NEXT: slli a0, a0, 16
434 ; RV32-NEXT: srli a0, a0, 16
435 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
436 ; RV32-NEXT: vmv.s.x v9, a0
437 ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
438 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
439 ; RV32-NEXT: vmv.x.s a0, v9
442 ; RV64-LABEL: vpreduce_umax_v4i16:
444 ; RV64-NEXT: slli a0, a0, 48
445 ; RV64-NEXT: srli a0, a0, 48
446 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
447 ; RV64-NEXT: vmv.s.x v9, a0
448 ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
449 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
450 ; RV64-NEXT: vmv.x.s a0, v9
452 %r = call i16 @llvm.vp.reduce.umax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
456 declare i16 @llvm.vp.reduce.smax.v4i16(i16, <4 x i16>, <4 x i1>, i32)
458 define signext i16 @vpreduce_smax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
459 ; CHECK-LABEL: vpreduce_smax_v4i16:
461 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
462 ; CHECK-NEXT: vmv.s.x v9, a0
463 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
464 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
465 ; CHECK-NEXT: vmv.x.s a0, v9
467 %r = call i16 @llvm.vp.reduce.smax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
471 declare i16 @llvm.vp.reduce.umin.v4i16(i16, <4 x i16>, <4 x i1>, i32)
473 define signext i16 @vpreduce_umin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
474 ; RV32-LABEL: vpreduce_umin_v4i16:
476 ; RV32-NEXT: slli a0, a0, 16
477 ; RV32-NEXT: srli a0, a0, 16
478 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
479 ; RV32-NEXT: vmv.s.x v9, a0
480 ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
481 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
482 ; RV32-NEXT: vmv.x.s a0, v9
485 ; RV64-LABEL: vpreduce_umin_v4i16:
487 ; RV64-NEXT: slli a0, a0, 48
488 ; RV64-NEXT: srli a0, a0, 48
489 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
490 ; RV64-NEXT: vmv.s.x v9, a0
491 ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
492 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
493 ; RV64-NEXT: vmv.x.s a0, v9
495 %r = call i16 @llvm.vp.reduce.umin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
499 declare i16 @llvm.vp.reduce.smin.v4i16(i16, <4 x i16>, <4 x i1>, i32)
501 define signext i16 @vpreduce_smin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
502 ; CHECK-LABEL: vpreduce_smin_v4i16:
504 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
505 ; CHECK-NEXT: vmv.s.x v9, a0
506 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
507 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
508 ; CHECK-NEXT: vmv.x.s a0, v9
510 %r = call i16 @llvm.vp.reduce.smin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
514 declare i16 @llvm.vp.reduce.and.v4i16(i16, <4 x i16>, <4 x i1>, i32)
516 define signext i16 @vpreduce_and_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
517 ; CHECK-LABEL: vpreduce_and_v4i16:
519 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
520 ; CHECK-NEXT: vmv.s.x v9, a0
521 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
522 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
523 ; CHECK-NEXT: vmv.x.s a0, v9
525 %r = call i16 @llvm.vp.reduce.and.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
529 declare i16 @llvm.vp.reduce.or.v4i16(i16, <4 x i16>, <4 x i1>, i32)
531 define signext i16 @vpreduce_or_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
532 ; CHECK-LABEL: vpreduce_or_v4i16:
534 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
535 ; CHECK-NEXT: vmv.s.x v9, a0
536 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
537 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
538 ; CHECK-NEXT: vmv.x.s a0, v9
540 %r = call i16 @llvm.vp.reduce.or.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
544 declare i16 @llvm.vp.reduce.xor.v4i16(i16, <4 x i16>, <4 x i1>, i32)
546 define signext i16 @vpreduce_xor_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
547 ; CHECK-LABEL: vpreduce_xor_v4i16:
549 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
550 ; CHECK-NEXT: vmv.s.x v9, a0
551 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
552 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
553 ; CHECK-NEXT: vmv.x.s a0, v9
555 %r = call i16 @llvm.vp.reduce.xor.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
559 declare i32 @llvm.vp.reduce.add.v2i32(i32, <2 x i32>, <2 x i1>, i32)
561 define signext i32 @vpreduce_add_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
562 ; CHECK-LABEL: vpreduce_add_v2i32:
564 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
565 ; CHECK-NEXT: vmv.s.x v9, a0
566 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
567 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
568 ; CHECK-NEXT: vmv.x.s a0, v9
570 %r = call i32 @llvm.vp.reduce.add.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
574 declare i32 @llvm.vp.reduce.umax.v2i32(i32, <2 x i32>, <2 x i1>, i32)
576 define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
577 ; CHECK-LABEL: vpreduce_umax_v2i32:
579 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
580 ; CHECK-NEXT: vmv.s.x v9, a0
581 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
582 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
583 ; CHECK-NEXT: vmv.x.s a0, v9
585 %r = call i32 @llvm.vp.reduce.umax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
589 declare i32 @llvm.vp.reduce.smax.v2i32(i32, <2 x i32>, <2 x i1>, i32)
591 define signext i32 @vpreduce_smax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
592 ; CHECK-LABEL: vpreduce_smax_v2i32:
594 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
595 ; CHECK-NEXT: vmv.s.x v9, a0
596 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
597 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
598 ; CHECK-NEXT: vmv.x.s a0, v9
600 %r = call i32 @llvm.vp.reduce.smax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
604 declare i32 @llvm.vp.reduce.umin.v2i32(i32, <2 x i32>, <2 x i1>, i32)
606 define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
607 ; CHECK-LABEL: vpreduce_umin_v2i32:
609 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
610 ; CHECK-NEXT: vmv.s.x v9, a0
611 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
612 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
613 ; CHECK-NEXT: vmv.x.s a0, v9
615 %r = call i32 @llvm.vp.reduce.umin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
619 declare i32 @llvm.vp.reduce.smin.v2i32(i32, <2 x i32>, <2 x i1>, i32)
621 define signext i32 @vpreduce_smin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
622 ; CHECK-LABEL: vpreduce_smin_v2i32:
624 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
625 ; CHECK-NEXT: vmv.s.x v9, a0
626 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
627 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
628 ; CHECK-NEXT: vmv.x.s a0, v9
630 %r = call i32 @llvm.vp.reduce.smin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
634 declare i32 @llvm.vp.reduce.and.v2i32(i32, <2 x i32>, <2 x i1>, i32)
636 define signext i32 @vpreduce_and_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
637 ; CHECK-LABEL: vpreduce_and_v2i32:
639 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
640 ; CHECK-NEXT: vmv.s.x v9, a0
641 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
642 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
643 ; CHECK-NEXT: vmv.x.s a0, v9
645 %r = call i32 @llvm.vp.reduce.and.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
649 declare i32 @llvm.vp.reduce.or.v2i32(i32, <2 x i32>, <2 x i1>, i32)
651 define signext i32 @vpreduce_or_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
652 ; CHECK-LABEL: vpreduce_or_v2i32:
654 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
655 ; CHECK-NEXT: vmv.s.x v9, a0
656 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
657 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
658 ; CHECK-NEXT: vmv.x.s a0, v9
660 %r = call i32 @llvm.vp.reduce.or.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
664 declare i32 @llvm.vp.reduce.xor.v2i32(i32, <2 x i32>, <2 x i1>, i32)
666 define signext i32 @vpreduce_xor_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
667 ; CHECK-LABEL: vpreduce_xor_v2i32:
669 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
670 ; CHECK-NEXT: vmv.s.x v9, a0
671 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
672 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
673 ; CHECK-NEXT: vmv.x.s a0, v9
675 %r = call i32 @llvm.vp.reduce.xor.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
679 declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32)
681 define signext i32 @vpreduce_add_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
682 ; CHECK-LABEL: vpreduce_add_v4i32:
684 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
685 ; CHECK-NEXT: vmv.s.x v9, a0
686 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
687 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
688 ; CHECK-NEXT: vmv.x.s a0, v9
690 %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
694 declare i32 @llvm.vp.reduce.umax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
696 define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
697 ; CHECK-LABEL: vpreduce_umax_v4i32:
699 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
700 ; CHECK-NEXT: vmv.s.x v9, a0
701 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
702 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
703 ; CHECK-NEXT: vmv.x.s a0, v9
705 %r = call i32 @llvm.vp.reduce.umax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
709 declare i32 @llvm.vp.reduce.smax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
711 define signext i32 @vpreduce_smax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
712 ; CHECK-LABEL: vpreduce_smax_v4i32:
714 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
715 ; CHECK-NEXT: vmv.s.x v9, a0
716 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
717 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
718 ; CHECK-NEXT: vmv.x.s a0, v9
720 %r = call i32 @llvm.vp.reduce.smax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
724 declare i32 @llvm.vp.reduce.umin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
726 define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
727 ; CHECK-LABEL: vpreduce_umin_v4i32:
729 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
730 ; CHECK-NEXT: vmv.s.x v9, a0
731 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
732 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
733 ; CHECK-NEXT: vmv.x.s a0, v9
735 %r = call i32 @llvm.vp.reduce.umin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
739 declare i32 @llvm.vp.reduce.smin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
741 define signext i32 @vpreduce_smin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
742 ; CHECK-LABEL: vpreduce_smin_v4i32:
744 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
745 ; CHECK-NEXT: vmv.s.x v9, a0
746 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
747 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
748 ; CHECK-NEXT: vmv.x.s a0, v9
750 %r = call i32 @llvm.vp.reduce.smin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
754 declare i32 @llvm.vp.reduce.and.v4i32(i32, <4 x i32>, <4 x i1>, i32)
756 define signext i32 @vpreduce_and_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
757 ; CHECK-LABEL: vpreduce_and_v4i32:
759 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
760 ; CHECK-NEXT: vmv.s.x v9, a0
761 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
762 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
763 ; CHECK-NEXT: vmv.x.s a0, v9
765 %r = call i32 @llvm.vp.reduce.and.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
769 declare i32 @llvm.vp.reduce.or.v4i32(i32, <4 x i32>, <4 x i1>, i32)
771 define signext i32 @vpreduce_or_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
772 ; CHECK-LABEL: vpreduce_or_v4i32:
774 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
775 ; CHECK-NEXT: vmv.s.x v9, a0
776 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
777 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
778 ; CHECK-NEXT: vmv.x.s a0, v9
780 %r = call i32 @llvm.vp.reduce.or.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
784 declare i32 @llvm.vp.reduce.xor.v4i32(i32, <4 x i32>, <4 x i1>, i32)
786 define signext i32 @vpreduce_xor_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
787 ; CHECK-LABEL: vpreduce_xor_v4i32:
789 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
790 ; CHECK-NEXT: vmv.s.x v9, a0
791 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
792 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
793 ; CHECK-NEXT: vmv.x.s a0, v9
795 %r = call i32 @llvm.vp.reduce.xor.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
799 declare i32 @llvm.vp.reduce.xor.v64i32(i32, <64 x i32>, <64 x i1>, i32)
801 define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) {
802 ; CHECK-LABEL: vpreduce_xor_v64i32:
804 ; CHECK-NEXT: li a3, 32
805 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
806 ; CHECK-NEXT: vslidedown.vi v24, v0, 4
807 ; CHECK-NEXT: mv a2, a1
808 ; CHECK-NEXT: bltu a1, a3, .LBB49_2
809 ; CHECK-NEXT: # %bb.1:
810 ; CHECK-NEXT: li a2, 32
811 ; CHECK-NEXT: .LBB49_2:
812 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
813 ; CHECK-NEXT: vmv.s.x v25, a0
814 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
815 ; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t
816 ; CHECK-NEXT: addi a0, a1, -32
817 ; CHECK-NEXT: sltu a1, a1, a0
818 ; CHECK-NEXT: addi a1, a1, -1
819 ; CHECK-NEXT: and a0, a1, a0
820 ; CHECK-NEXT: vmv1r.v v0, v24
821 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
822 ; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t
823 ; CHECK-NEXT: vmv.x.s a0, v25
825 %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl)
829 declare i64 @llvm.vp.reduce.add.v2i64(i64, <2 x i64>, <2 x i1>, i32)
831 define signext i64 @vpreduce_add_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
832 ; RV32-LABEL: vpreduce_add_v2i64:
834 ; RV32-NEXT: addi sp, sp, -16
835 ; RV32-NEXT: .cfi_def_cfa_offset 16
836 ; RV32-NEXT: sw a1, 12(sp)
837 ; RV32-NEXT: sw a0, 8(sp)
838 ; RV32-NEXT: addi a0, sp, 8
839 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
840 ; RV32-NEXT: vlse64.v v9, (a0), zero
841 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
842 ; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t
843 ; RV32-NEXT: vmv.x.s a0, v9
844 ; RV32-NEXT: li a1, 32
845 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
846 ; RV32-NEXT: vsrl.vx v8, v9, a1
847 ; RV32-NEXT: vmv.x.s a1, v8
848 ; RV32-NEXT: addi sp, sp, 16
851 ; RV64-LABEL: vpreduce_add_v2i64:
853 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
854 ; RV64-NEXT: vmv.s.x v9, a0
855 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
856 ; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t
857 ; RV64-NEXT: vmv.x.s a0, v9
859 %r = call i64 @llvm.vp.reduce.add.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
863 declare i64 @llvm.vp.reduce.umax.v2i64(i64, <2 x i64>, <2 x i1>, i32)
865 define signext i64 @vpreduce_umax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
866 ; RV32-LABEL: vpreduce_umax_v2i64:
868 ; RV32-NEXT: addi sp, sp, -16
869 ; RV32-NEXT: .cfi_def_cfa_offset 16
870 ; RV32-NEXT: sw a1, 12(sp)
871 ; RV32-NEXT: sw a0, 8(sp)
872 ; RV32-NEXT: addi a0, sp, 8
873 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
874 ; RV32-NEXT: vlse64.v v9, (a0), zero
875 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
876 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
877 ; RV32-NEXT: vmv.x.s a0, v9
878 ; RV32-NEXT: li a1, 32
879 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
880 ; RV32-NEXT: vsrl.vx v8, v9, a1
881 ; RV32-NEXT: vmv.x.s a1, v8
882 ; RV32-NEXT: addi sp, sp, 16
885 ; RV64-LABEL: vpreduce_umax_v2i64:
887 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
888 ; RV64-NEXT: vmv.s.x v9, a0
889 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
890 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
891 ; RV64-NEXT: vmv.x.s a0, v9
893 %r = call i64 @llvm.vp.reduce.umax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
897 declare i64 @llvm.vp.reduce.smax.v2i64(i64, <2 x i64>, <2 x i1>, i32)
899 define signext i64 @vpreduce_smax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
900 ; RV32-LABEL: vpreduce_smax_v2i64:
902 ; RV32-NEXT: addi sp, sp, -16
903 ; RV32-NEXT: .cfi_def_cfa_offset 16
904 ; RV32-NEXT: sw a1, 12(sp)
905 ; RV32-NEXT: sw a0, 8(sp)
906 ; RV32-NEXT: addi a0, sp, 8
907 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
908 ; RV32-NEXT: vlse64.v v9, (a0), zero
909 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
910 ; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t
911 ; RV32-NEXT: vmv.x.s a0, v9
912 ; RV32-NEXT: li a1, 32
913 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
914 ; RV32-NEXT: vsrl.vx v8, v9, a1
915 ; RV32-NEXT: vmv.x.s a1, v8
916 ; RV32-NEXT: addi sp, sp, 16
919 ; RV64-LABEL: vpreduce_smax_v2i64:
921 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
922 ; RV64-NEXT: vmv.s.x v9, a0
923 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
924 ; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t
925 ; RV64-NEXT: vmv.x.s a0, v9
927 %r = call i64 @llvm.vp.reduce.smax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
931 declare i64 @llvm.vp.reduce.umin.v2i64(i64, <2 x i64>, <2 x i1>, i32)
933 define signext i64 @vpreduce_umin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
934 ; RV32-LABEL: vpreduce_umin_v2i64:
936 ; RV32-NEXT: addi sp, sp, -16
937 ; RV32-NEXT: .cfi_def_cfa_offset 16
938 ; RV32-NEXT: sw a1, 12(sp)
939 ; RV32-NEXT: sw a0, 8(sp)
940 ; RV32-NEXT: addi a0, sp, 8
941 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
942 ; RV32-NEXT: vlse64.v v9, (a0), zero
943 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
944 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
945 ; RV32-NEXT: vmv.x.s a0, v9
946 ; RV32-NEXT: li a1, 32
947 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
948 ; RV32-NEXT: vsrl.vx v8, v9, a1
949 ; RV32-NEXT: vmv.x.s a1, v8
950 ; RV32-NEXT: addi sp, sp, 16
953 ; RV64-LABEL: vpreduce_umin_v2i64:
955 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
956 ; RV64-NEXT: vmv.s.x v9, a0
957 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
958 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
959 ; RV64-NEXT: vmv.x.s a0, v9
961 %r = call i64 @llvm.vp.reduce.umin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
965 declare i64 @llvm.vp.reduce.smin.v2i64(i64, <2 x i64>, <2 x i1>, i32)
967 define signext i64 @vpreduce_smin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
968 ; RV32-LABEL: vpreduce_smin_v2i64:
970 ; RV32-NEXT: addi sp, sp, -16
971 ; RV32-NEXT: .cfi_def_cfa_offset 16
972 ; RV32-NEXT: sw a1, 12(sp)
973 ; RV32-NEXT: sw a0, 8(sp)
974 ; RV32-NEXT: addi a0, sp, 8
975 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
976 ; RV32-NEXT: vlse64.v v9, (a0), zero
977 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
978 ; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t
979 ; RV32-NEXT: vmv.x.s a0, v9
980 ; RV32-NEXT: li a1, 32
981 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
982 ; RV32-NEXT: vsrl.vx v8, v9, a1
983 ; RV32-NEXT: vmv.x.s a1, v8
984 ; RV32-NEXT: addi sp, sp, 16
987 ; RV64-LABEL: vpreduce_smin_v2i64:
989 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
990 ; RV64-NEXT: vmv.s.x v9, a0
991 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
992 ; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t
993 ; RV64-NEXT: vmv.x.s a0, v9
995 %r = call i64 @llvm.vp.reduce.smin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
999 declare i64 @llvm.vp.reduce.and.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1001 define signext i64 @vpreduce_and_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1002 ; RV32-LABEL: vpreduce_and_v2i64:
1004 ; RV32-NEXT: addi sp, sp, -16
1005 ; RV32-NEXT: .cfi_def_cfa_offset 16
1006 ; RV32-NEXT: sw a1, 12(sp)
1007 ; RV32-NEXT: sw a0, 8(sp)
1008 ; RV32-NEXT: addi a0, sp, 8
1009 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1010 ; RV32-NEXT: vlse64.v v9, (a0), zero
1011 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1012 ; RV32-NEXT: vredand.vs v9, v8, v9, v0.t
1013 ; RV32-NEXT: vmv.x.s a0, v9
1014 ; RV32-NEXT: li a1, 32
1015 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1016 ; RV32-NEXT: vsrl.vx v8, v9, a1
1017 ; RV32-NEXT: vmv.x.s a1, v8
1018 ; RV32-NEXT: addi sp, sp, 16
1021 ; RV64-LABEL: vpreduce_and_v2i64:
1023 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1024 ; RV64-NEXT: vmv.s.x v9, a0
1025 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1026 ; RV64-NEXT: vredand.vs v9, v8, v9, v0.t
1027 ; RV64-NEXT: vmv.x.s a0, v9
1029 %r = call i64 @llvm.vp.reduce.and.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1033 declare i64 @llvm.vp.reduce.or.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1035 define signext i64 @vpreduce_or_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1036 ; RV32-LABEL: vpreduce_or_v2i64:
1038 ; RV32-NEXT: addi sp, sp, -16
1039 ; RV32-NEXT: .cfi_def_cfa_offset 16
1040 ; RV32-NEXT: sw a1, 12(sp)
1041 ; RV32-NEXT: sw a0, 8(sp)
1042 ; RV32-NEXT: addi a0, sp, 8
1043 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1044 ; RV32-NEXT: vlse64.v v9, (a0), zero
1045 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1046 ; RV32-NEXT: vredor.vs v9, v8, v9, v0.t
1047 ; RV32-NEXT: vmv.x.s a0, v9
1048 ; RV32-NEXT: li a1, 32
1049 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1050 ; RV32-NEXT: vsrl.vx v8, v9, a1
1051 ; RV32-NEXT: vmv.x.s a1, v8
1052 ; RV32-NEXT: addi sp, sp, 16
1055 ; RV64-LABEL: vpreduce_or_v2i64:
1057 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1058 ; RV64-NEXT: vmv.s.x v9, a0
1059 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1060 ; RV64-NEXT: vredor.vs v9, v8, v9, v0.t
1061 ; RV64-NEXT: vmv.x.s a0, v9
1063 %r = call i64 @llvm.vp.reduce.or.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1067 declare i64 @llvm.vp.reduce.xor.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1069 define signext i64 @vpreduce_xor_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1070 ; RV32-LABEL: vpreduce_xor_v2i64:
1072 ; RV32-NEXT: addi sp, sp, -16
1073 ; RV32-NEXT: .cfi_def_cfa_offset 16
1074 ; RV32-NEXT: sw a1, 12(sp)
1075 ; RV32-NEXT: sw a0, 8(sp)
1076 ; RV32-NEXT: addi a0, sp, 8
1077 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1078 ; RV32-NEXT: vlse64.v v9, (a0), zero
1079 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1080 ; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t
1081 ; RV32-NEXT: vmv.x.s a0, v9
1082 ; RV32-NEXT: li a1, 32
1083 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1084 ; RV32-NEXT: vsrl.vx v8, v9, a1
1085 ; RV32-NEXT: vmv.x.s a1, v8
1086 ; RV32-NEXT: addi sp, sp, 16
1089 ; RV64-LABEL: vpreduce_xor_v2i64:
1091 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1092 ; RV64-NEXT: vmv.s.x v9, a0
1093 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1094 ; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t
1095 ; RV64-NEXT: vmv.x.s a0, v9
1097 %r = call i64 @llvm.vp.reduce.xor.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1101 declare i64 @llvm.vp.reduce.add.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1103 define signext i64 @vpreduce_add_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1104 ; RV32-LABEL: vpreduce_add_v4i64:
1106 ; RV32-NEXT: addi sp, sp, -16
1107 ; RV32-NEXT: .cfi_def_cfa_offset 16
1108 ; RV32-NEXT: sw a1, 12(sp)
1109 ; RV32-NEXT: sw a0, 8(sp)
1110 ; RV32-NEXT: addi a0, sp, 8
1111 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1112 ; RV32-NEXT: vlse64.v v10, (a0), zero
1113 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1114 ; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t
1115 ; RV32-NEXT: vmv.x.s a0, v10
1116 ; RV32-NEXT: li a1, 32
1117 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1118 ; RV32-NEXT: vsrl.vx v8, v10, a1
1119 ; RV32-NEXT: vmv.x.s a1, v8
1120 ; RV32-NEXT: addi sp, sp, 16
1123 ; RV64-LABEL: vpreduce_add_v4i64:
1125 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1126 ; RV64-NEXT: vmv.s.x v10, a0
1127 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1128 ; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t
1129 ; RV64-NEXT: vmv.x.s a0, v10
1131 %r = call i64 @llvm.vp.reduce.add.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1135 declare i64 @llvm.vp.reduce.umax.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1137 define signext i64 @vpreduce_umax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1138 ; RV32-LABEL: vpreduce_umax_v4i64:
1140 ; RV32-NEXT: addi sp, sp, -16
1141 ; RV32-NEXT: .cfi_def_cfa_offset 16
1142 ; RV32-NEXT: sw a1, 12(sp)
1143 ; RV32-NEXT: sw a0, 8(sp)
1144 ; RV32-NEXT: addi a0, sp, 8
1145 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1146 ; RV32-NEXT: vlse64.v v10, (a0), zero
1147 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1148 ; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1149 ; RV32-NEXT: vmv.x.s a0, v10
1150 ; RV32-NEXT: li a1, 32
1151 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1152 ; RV32-NEXT: vsrl.vx v8, v10, a1
1153 ; RV32-NEXT: vmv.x.s a1, v8
1154 ; RV32-NEXT: addi sp, sp, 16
1157 ; RV64-LABEL: vpreduce_umax_v4i64:
1159 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1160 ; RV64-NEXT: vmv.s.x v10, a0
1161 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1162 ; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1163 ; RV64-NEXT: vmv.x.s a0, v10
1165 %r = call i64 @llvm.vp.reduce.umax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1169 declare i64 @llvm.vp.reduce.smax.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1171 define signext i64 @vpreduce_smax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1172 ; RV32-LABEL: vpreduce_smax_v4i64:
1174 ; RV32-NEXT: addi sp, sp, -16
1175 ; RV32-NEXT: .cfi_def_cfa_offset 16
1176 ; RV32-NEXT: sw a1, 12(sp)
1177 ; RV32-NEXT: sw a0, 8(sp)
1178 ; RV32-NEXT: addi a0, sp, 8
1179 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1180 ; RV32-NEXT: vlse64.v v10, (a0), zero
1181 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1182 ; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t
1183 ; RV32-NEXT: vmv.x.s a0, v10
1184 ; RV32-NEXT: li a1, 32
1185 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1186 ; RV32-NEXT: vsrl.vx v8, v10, a1
1187 ; RV32-NEXT: vmv.x.s a1, v8
1188 ; RV32-NEXT: addi sp, sp, 16
1191 ; RV64-LABEL: vpreduce_smax_v4i64:
1193 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1194 ; RV64-NEXT: vmv.s.x v10, a0
1195 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1196 ; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t
1197 ; RV64-NEXT: vmv.x.s a0, v10
1199 %r = call i64 @llvm.vp.reduce.smax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1203 declare i64 @llvm.vp.reduce.umin.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1205 define signext i64 @vpreduce_umin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1206 ; RV32-LABEL: vpreduce_umin_v4i64:
1208 ; RV32-NEXT: addi sp, sp, -16
1209 ; RV32-NEXT: .cfi_def_cfa_offset 16
1210 ; RV32-NEXT: sw a1, 12(sp)
1211 ; RV32-NEXT: sw a0, 8(sp)
1212 ; RV32-NEXT: addi a0, sp, 8
1213 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1214 ; RV32-NEXT: vlse64.v v10, (a0), zero
1215 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1216 ; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t
1217 ; RV32-NEXT: vmv.x.s a0, v10
1218 ; RV32-NEXT: li a1, 32
1219 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1220 ; RV32-NEXT: vsrl.vx v8, v10, a1
1221 ; RV32-NEXT: vmv.x.s a1, v8
1222 ; RV32-NEXT: addi sp, sp, 16
1225 ; RV64-LABEL: vpreduce_umin_v4i64:
1227 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1228 ; RV64-NEXT: vmv.s.x v10, a0
1229 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1230 ; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t
1231 ; RV64-NEXT: vmv.x.s a0, v10
1233 %r = call i64 @llvm.vp.reduce.umin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1237 declare i64 @llvm.vp.reduce.smin.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1239 define signext i64 @vpreduce_smin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1240 ; RV32-LABEL: vpreduce_smin_v4i64:
1242 ; RV32-NEXT: addi sp, sp, -16
1243 ; RV32-NEXT: .cfi_def_cfa_offset 16
1244 ; RV32-NEXT: sw a1, 12(sp)
1245 ; RV32-NEXT: sw a0, 8(sp)
1246 ; RV32-NEXT: addi a0, sp, 8
1247 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1248 ; RV32-NEXT: vlse64.v v10, (a0), zero
1249 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1250 ; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t
1251 ; RV32-NEXT: vmv.x.s a0, v10
1252 ; RV32-NEXT: li a1, 32
1253 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1254 ; RV32-NEXT: vsrl.vx v8, v10, a1
1255 ; RV32-NEXT: vmv.x.s a1, v8
1256 ; RV32-NEXT: addi sp, sp, 16
1259 ; RV64-LABEL: vpreduce_smin_v4i64:
1261 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1262 ; RV64-NEXT: vmv.s.x v10, a0
1263 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1264 ; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t
1265 ; RV64-NEXT: vmv.x.s a0, v10
1267 %r = call i64 @llvm.vp.reduce.smin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1271 declare i64 @llvm.vp.reduce.and.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1273 define signext i64 @vpreduce_and_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1274 ; RV32-LABEL: vpreduce_and_v4i64:
1276 ; RV32-NEXT: addi sp, sp, -16
1277 ; RV32-NEXT: .cfi_def_cfa_offset 16
1278 ; RV32-NEXT: sw a1, 12(sp)
1279 ; RV32-NEXT: sw a0, 8(sp)
1280 ; RV32-NEXT: addi a0, sp, 8
1281 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1282 ; RV32-NEXT: vlse64.v v10, (a0), zero
1283 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1284 ; RV32-NEXT: vredand.vs v10, v8, v10, v0.t
1285 ; RV32-NEXT: vmv.x.s a0, v10
1286 ; RV32-NEXT: li a1, 32
1287 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1288 ; RV32-NEXT: vsrl.vx v8, v10, a1
1289 ; RV32-NEXT: vmv.x.s a1, v8
1290 ; RV32-NEXT: addi sp, sp, 16
1293 ; RV64-LABEL: vpreduce_and_v4i64:
1295 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1296 ; RV64-NEXT: vmv.s.x v10, a0
1297 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1298 ; RV64-NEXT: vredand.vs v10, v8, v10, v0.t
1299 ; RV64-NEXT: vmv.x.s a0, v10
1301 %r = call i64 @llvm.vp.reduce.and.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1305 declare i64 @llvm.vp.reduce.or.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1307 define signext i64 @vpreduce_or_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1308 ; RV32-LABEL: vpreduce_or_v4i64:
1310 ; RV32-NEXT: addi sp, sp, -16
1311 ; RV32-NEXT: .cfi_def_cfa_offset 16
1312 ; RV32-NEXT: sw a1, 12(sp)
1313 ; RV32-NEXT: sw a0, 8(sp)
1314 ; RV32-NEXT: addi a0, sp, 8
1315 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1316 ; RV32-NEXT: vlse64.v v10, (a0), zero
1317 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1318 ; RV32-NEXT: vredor.vs v10, v8, v10, v0.t
1319 ; RV32-NEXT: vmv.x.s a0, v10
1320 ; RV32-NEXT: li a1, 32
1321 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1322 ; RV32-NEXT: vsrl.vx v8, v10, a1
1323 ; RV32-NEXT: vmv.x.s a1, v8
1324 ; RV32-NEXT: addi sp, sp, 16
1327 ; RV64-LABEL: vpreduce_or_v4i64:
1329 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1330 ; RV64-NEXT: vmv.s.x v10, a0
1331 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1332 ; RV64-NEXT: vredor.vs v10, v8, v10, v0.t
1333 ; RV64-NEXT: vmv.x.s a0, v10
1335 %r = call i64 @llvm.vp.reduce.or.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1339 declare i64 @llvm.vp.reduce.xor.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1341 define signext i64 @vpreduce_xor_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1342 ; RV32-LABEL: vpreduce_xor_v4i64:
1344 ; RV32-NEXT: addi sp, sp, -16
1345 ; RV32-NEXT: .cfi_def_cfa_offset 16
1346 ; RV32-NEXT: sw a1, 12(sp)
1347 ; RV32-NEXT: sw a0, 8(sp)
1348 ; RV32-NEXT: addi a0, sp, 8
1349 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1350 ; RV32-NEXT: vlse64.v v10, (a0), zero
1351 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1352 ; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t
1353 ; RV32-NEXT: vmv.x.s a0, v10
1354 ; RV32-NEXT: li a1, 32
1355 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1356 ; RV32-NEXT: vsrl.vx v8, v10, a1
1357 ; RV32-NEXT: vmv.x.s a1, v8
1358 ; RV32-NEXT: addi sp, sp, 16
1361 ; RV64-LABEL: vpreduce_xor_v4i64:
1363 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1364 ; RV64-NEXT: vmv.s.x v10, a0
1365 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1366 ; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t
1367 ; RV64-NEXT: vmv.x.s a0, v10
1369 %r = call i64 @llvm.vp.reduce.xor.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1373 declare i8 @llvm.vp.reduce.mul.v1i8(i8, <1 x i8>, <1 x i1>, i32)
1375 define i8 @vpreduce_mul_v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 zeroext %evl) {
1376 ; RV32-LABEL: vpreduce_mul_v1i8:
1378 ; RV32-NEXT: addi sp, sp, -16
1379 ; RV32-NEXT: .cfi_def_cfa_offset 16
1380 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1381 ; RV32-NEXT: .cfi_offset ra, -4
1382 ; RV32-NEXT: mv a2, a0
1383 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1384 ; RV32-NEXT: vmv.s.x v9, a1
1385 ; RV32-NEXT: vmsne.vi v9, v9, 0
1386 ; RV32-NEXT: vmand.mm v0, v9, v0
1387 ; RV32-NEXT: vmv.v.i v9, 1
1388 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1389 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1390 ; RV32-NEXT: vmv.x.s a0, v8
1391 ; RV32-NEXT: mv a1, a2
1392 ; RV32-NEXT: call __mulsi3
1393 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1394 ; RV32-NEXT: addi sp, sp, 16
1397 ; RV64-LABEL: vpreduce_mul_v1i8:
1399 ; RV64-NEXT: addi sp, sp, -16
1400 ; RV64-NEXT: .cfi_def_cfa_offset 16
1401 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1402 ; RV64-NEXT: .cfi_offset ra, -8
1403 ; RV64-NEXT: mv a2, a0
1404 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1405 ; RV64-NEXT: vmv.s.x v9, a1
1406 ; RV64-NEXT: vmsne.vi v9, v9, 0
1407 ; RV64-NEXT: vmand.mm v0, v9, v0
1408 ; RV64-NEXT: vmv.v.i v9, 1
1409 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1410 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1411 ; RV64-NEXT: vmv.x.s a0, v8
1412 ; RV64-NEXT: mv a1, a2
1413 ; RV64-NEXT: call __muldi3
1414 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1415 ; RV64-NEXT: addi sp, sp, 16
1417 %r = call i8 @llvm.vp.reduce.mul.v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 %evl)
1421 declare i8 @llvm.vp.reduce.mul.v2i8(i8, <2 x i8>, <2 x i1>, i32)
1423 define signext i8 @vpreduce_mul_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
1424 ; RV32-LABEL: vpreduce_mul_v2i8:
1426 ; RV32-NEXT: addi sp, sp, -16
1427 ; RV32-NEXT: .cfi_def_cfa_offset 16
1428 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1429 ; RV32-NEXT: .cfi_offset ra, -4
1430 ; RV32-NEXT: mv a2, a0
1431 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1432 ; RV32-NEXT: vid.v v9
1433 ; RV32-NEXT: vmsltu.vx v9, v9, a1
1434 ; RV32-NEXT: vmand.mm v0, v9, v0
1435 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1436 ; RV32-NEXT: vmv.v.i v9, 1
1437 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1438 ; RV32-NEXT: vrgather.vi v9, v8, 1
1439 ; RV32-NEXT: vmul.vv v8, v8, v9
1440 ; RV32-NEXT: vmv.x.s a0, v8
1441 ; RV32-NEXT: mv a1, a2
1442 ; RV32-NEXT: call __mulsi3
1443 ; RV32-NEXT: slli a0, a0, 24
1444 ; RV32-NEXT: srai a0, a0, 24
1445 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1446 ; RV32-NEXT: addi sp, sp, 16
1449 ; RV64-LABEL: vpreduce_mul_v2i8:
1451 ; RV64-NEXT: addi sp, sp, -16
1452 ; RV64-NEXT: .cfi_def_cfa_offset 16
1453 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1454 ; RV64-NEXT: .cfi_offset ra, -8
1455 ; RV64-NEXT: mv a2, a0
1456 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1457 ; RV64-NEXT: vid.v v9
1458 ; RV64-NEXT: vmsltu.vx v9, v9, a1
1459 ; RV64-NEXT: vmand.mm v0, v9, v0
1460 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1461 ; RV64-NEXT: vmv.v.i v9, 1
1462 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1463 ; RV64-NEXT: vrgather.vi v9, v8, 1
1464 ; RV64-NEXT: vmul.vv v8, v8, v9
1465 ; RV64-NEXT: vmv.x.s a0, v8
1466 ; RV64-NEXT: mv a1, a2
1467 ; RV64-NEXT: call __muldi3
1468 ; RV64-NEXT: slli a0, a0, 56
1469 ; RV64-NEXT: srai a0, a0, 56
1470 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1471 ; RV64-NEXT: addi sp, sp, 16
1473 %r = call i8 @llvm.vp.reduce.mul.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
1477 declare i8 @llvm.vp.reduce.mul.v4i8(i8, <4 x i8>, <4 x i1>, i32)
1479 define signext i8 @vpreduce_mul_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
1480 ; RV32-LABEL: vpreduce_mul_v4i8:
1482 ; RV32-NEXT: addi sp, sp, -16
1483 ; RV32-NEXT: .cfi_def_cfa_offset 16
1484 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1485 ; RV32-NEXT: .cfi_offset ra, -4
1486 ; RV32-NEXT: mv a2, a0
1487 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1488 ; RV32-NEXT: vid.v v9
1489 ; RV32-NEXT: vmsltu.vx v9, v9, a1
1490 ; RV32-NEXT: vmand.mm v0, v9, v0
1491 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1492 ; RV32-NEXT: vmv.v.i v9, 1
1493 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1494 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1495 ; RV32-NEXT: vmul.vv v8, v8, v9
1496 ; RV32-NEXT: vrgather.vi v9, v8, 1
1497 ; RV32-NEXT: vmul.vv v8, v8, v9
1498 ; RV32-NEXT: vmv.x.s a0, v8
1499 ; RV32-NEXT: mv a1, a2
1500 ; RV32-NEXT: call __mulsi3
1501 ; RV32-NEXT: slli a0, a0, 24
1502 ; RV32-NEXT: srai a0, a0, 24
1503 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1504 ; RV32-NEXT: addi sp, sp, 16
1507 ; RV64-LABEL: vpreduce_mul_v4i8:
1509 ; RV64-NEXT: addi sp, sp, -16
1510 ; RV64-NEXT: .cfi_def_cfa_offset 16
1511 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1512 ; RV64-NEXT: .cfi_offset ra, -8
1513 ; RV64-NEXT: mv a2, a0
1514 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1515 ; RV64-NEXT: vid.v v9
1516 ; RV64-NEXT: vmsltu.vx v9, v9, a1
1517 ; RV64-NEXT: vmand.mm v0, v9, v0
1518 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1519 ; RV64-NEXT: vmv.v.i v9, 1
1520 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1521 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1522 ; RV64-NEXT: vmul.vv v8, v8, v9
1523 ; RV64-NEXT: vrgather.vi v9, v8, 1
1524 ; RV64-NEXT: vmul.vv v8, v8, v9
1525 ; RV64-NEXT: vmv.x.s a0, v8
1526 ; RV64-NEXT: mv a1, a2
1527 ; RV64-NEXT: call __muldi3
1528 ; RV64-NEXT: slli a0, a0, 56
1529 ; RV64-NEXT: srai a0, a0, 56
1530 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1531 ; RV64-NEXT: addi sp, sp, 16
1533 %r = call i8 @llvm.vp.reduce.mul.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
1537 declare i8 @llvm.vp.reduce.mul.v8i8(i8, <8 x i8>, <8 x i1>, i32)
1539 define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i32 zeroext %evl) {
1540 ; RV32-LABEL: vpreduce_mul_v8i8:
1542 ; RV32-NEXT: addi sp, sp, -16
1543 ; RV32-NEXT: .cfi_def_cfa_offset 16
1544 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1545 ; RV32-NEXT: .cfi_offset ra, -4
1546 ; RV32-NEXT: mv a2, a0
1547 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1548 ; RV32-NEXT: vid.v v10
1549 ; RV32-NEXT: vmsltu.vx v9, v10, a1
1550 ; RV32-NEXT: vmand.mm v0, v9, v0
1551 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
1552 ; RV32-NEXT: vmv.v.i v9, 1
1553 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1554 ; RV32-NEXT: vslidedown.vi v9, v8, 4
1555 ; RV32-NEXT: vmul.vv v8, v8, v9
1556 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1557 ; RV32-NEXT: vmul.vv v8, v8, v9
1558 ; RV32-NEXT: vrgather.vi v9, v8, 1
1559 ; RV32-NEXT: vmul.vv v8, v8, v9
1560 ; RV32-NEXT: vmv.x.s a0, v8
1561 ; RV32-NEXT: mv a1, a2
1562 ; RV32-NEXT: call __mulsi3
1563 ; RV32-NEXT: slli a0, a0, 24
1564 ; RV32-NEXT: srai a0, a0, 24
1565 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1566 ; RV32-NEXT: addi sp, sp, 16
1569 ; RV64-LABEL: vpreduce_mul_v8i8:
1571 ; RV64-NEXT: addi sp, sp, -16
1572 ; RV64-NEXT: .cfi_def_cfa_offset 16
1573 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1574 ; RV64-NEXT: .cfi_offset ra, -8
1575 ; RV64-NEXT: mv a2, a0
1576 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1577 ; RV64-NEXT: vid.v v10
1578 ; RV64-NEXT: vmsltu.vx v9, v10, a1
1579 ; RV64-NEXT: vmand.mm v0, v9, v0
1580 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
1581 ; RV64-NEXT: vmv.v.i v9, 1
1582 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1583 ; RV64-NEXT: vslidedown.vi v9, v8, 4
1584 ; RV64-NEXT: vmul.vv v8, v8, v9
1585 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1586 ; RV64-NEXT: vmul.vv v8, v8, v9
1587 ; RV64-NEXT: vrgather.vi v9, v8, 1
1588 ; RV64-NEXT: vmul.vv v8, v8, v9
1589 ; RV64-NEXT: vmv.x.s a0, v8
1590 ; RV64-NEXT: mv a1, a2
1591 ; RV64-NEXT: call __muldi3
1592 ; RV64-NEXT: slli a0, a0, 56
1593 ; RV64-NEXT: srai a0, a0, 56
1594 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1595 ; RV64-NEXT: addi sp, sp, 16
1597 %r = call i8 @llvm.vp.reduce.mul.v8i8(i8 %s, <8 x i8> %v, <8 x i1> %m, i32 %evl)
1601 declare i8 @llvm.vp.reduce.mul.v16i8(i8, <16 x i8>, <16 x i1>, i32)
1603 define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, i32 zeroext %evl) {
1604 ; RV32-LABEL: vpreduce_mul_v16i8:
1606 ; RV32-NEXT: addi sp, sp, -16
1607 ; RV32-NEXT: .cfi_def_cfa_offset 16
1608 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1609 ; RV32-NEXT: .cfi_offset ra, -4
1610 ; RV32-NEXT: mv a2, a0
1611 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1612 ; RV32-NEXT: vid.v v12
1613 ; RV32-NEXT: vmsltu.vx v9, v12, a1
1614 ; RV32-NEXT: vmand.mm v0, v9, v0
1615 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1616 ; RV32-NEXT: vmv.v.i v9, 1
1617 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1618 ; RV32-NEXT: vslidedown.vi v9, v8, 8
1619 ; RV32-NEXT: vmul.vv v8, v8, v9
1620 ; RV32-NEXT: vslidedown.vi v9, v8, 4
1621 ; RV32-NEXT: vmul.vv v8, v8, v9
1622 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1623 ; RV32-NEXT: vmul.vv v8, v8, v9
1624 ; RV32-NEXT: vrgather.vi v9, v8, 1
1625 ; RV32-NEXT: vmul.vv v8, v8, v9
1626 ; RV32-NEXT: vmv.x.s a0, v8
1627 ; RV32-NEXT: mv a1, a2
1628 ; RV32-NEXT: call __mulsi3
1629 ; RV32-NEXT: slli a0, a0, 24
1630 ; RV32-NEXT: srai a0, a0, 24
1631 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1632 ; RV32-NEXT: addi sp, sp, 16
1635 ; RV64-LABEL: vpreduce_mul_v16i8:
1637 ; RV64-NEXT: addi sp, sp, -16
1638 ; RV64-NEXT: .cfi_def_cfa_offset 16
1639 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1640 ; RV64-NEXT: .cfi_offset ra, -8
1641 ; RV64-NEXT: mv a2, a0
1642 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1643 ; RV64-NEXT: vid.v v12
1644 ; RV64-NEXT: vmsltu.vx v9, v12, a1
1645 ; RV64-NEXT: vmand.mm v0, v9, v0
1646 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1647 ; RV64-NEXT: vmv.v.i v9, 1
1648 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1649 ; RV64-NEXT: vslidedown.vi v9, v8, 8
1650 ; RV64-NEXT: vmul.vv v8, v8, v9
1651 ; RV64-NEXT: vslidedown.vi v9, v8, 4
1652 ; RV64-NEXT: vmul.vv v8, v8, v9
1653 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1654 ; RV64-NEXT: vmul.vv v8, v8, v9
1655 ; RV64-NEXT: vrgather.vi v9, v8, 1
1656 ; RV64-NEXT: vmul.vv v8, v8, v9
1657 ; RV64-NEXT: vmv.x.s a0, v8
1658 ; RV64-NEXT: mv a1, a2
1659 ; RV64-NEXT: call __muldi3
1660 ; RV64-NEXT: slli a0, a0, 56
1661 ; RV64-NEXT: srai a0, a0, 56
1662 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1663 ; RV64-NEXT: addi sp, sp, 16
1665 %r = call i8 @llvm.vp.reduce.mul.v16i8(i8 %s, <16 x i8> %v, <16 x i1> %m, i32 %evl)
1669 declare i8 @llvm.vp.reduce.mul.v32i8(i8, <32 x i8>, <32 x i1>, i32)
1671 define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) {
1672 ; RV32-LABEL: vpreduce_mul_v32i8:
1674 ; RV32-NEXT: addi sp, sp, -16
1675 ; RV32-NEXT: .cfi_def_cfa_offset 16
1676 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1677 ; RV32-NEXT: .cfi_offset ra, -4
1678 ; RV32-NEXT: mv a2, a0
1679 ; RV32-NEXT: li a0, 32
1680 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1681 ; RV32-NEXT: vid.v v16
1682 ; RV32-NEXT: vmsltu.vx v10, v16, a1
1683 ; RV32-NEXT: vmand.mm v0, v10, v0
1684 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1685 ; RV32-NEXT: vmv.v.i v10, 1
1686 ; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
1687 ; RV32-NEXT: vslidedown.vi v10, v8, 16
1688 ; RV32-NEXT: vmul.vv v8, v8, v10
1689 ; RV32-NEXT: vslidedown.vi v10, v8, 8
1690 ; RV32-NEXT: vmul.vv v8, v8, v10
1691 ; RV32-NEXT: vslidedown.vi v10, v8, 4
1692 ; RV32-NEXT: vmul.vv v8, v8, v10
1693 ; RV32-NEXT: vslidedown.vi v10, v8, 2
1694 ; RV32-NEXT: vmul.vv v8, v8, v10
1695 ; RV32-NEXT: vrgather.vi v10, v8, 1
1696 ; RV32-NEXT: vmul.vv v8, v8, v10
1697 ; RV32-NEXT: vmv.x.s a0, v8
1698 ; RV32-NEXT: mv a1, a2
1699 ; RV32-NEXT: call __mulsi3
1700 ; RV32-NEXT: slli a0, a0, 24
1701 ; RV32-NEXT: srai a0, a0, 24
1702 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1703 ; RV32-NEXT: addi sp, sp, 16
1706 ; RV64-LABEL: vpreduce_mul_v32i8:
1708 ; RV64-NEXT: addi sp, sp, -16
1709 ; RV64-NEXT: .cfi_def_cfa_offset 16
1710 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1711 ; RV64-NEXT: .cfi_offset ra, -8
1712 ; RV64-NEXT: mv a2, a0
1713 ; RV64-NEXT: li a0, 32
1714 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1715 ; RV64-NEXT: vid.v v16
1716 ; RV64-NEXT: vmsltu.vx v10, v16, a1
1717 ; RV64-NEXT: vmand.mm v0, v10, v0
1718 ; RV64-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1719 ; RV64-NEXT: vmv.v.i v10, 1
1720 ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
1721 ; RV64-NEXT: vslidedown.vi v10, v8, 16
1722 ; RV64-NEXT: vmul.vv v8, v8, v10
1723 ; RV64-NEXT: vslidedown.vi v10, v8, 8
1724 ; RV64-NEXT: vmul.vv v8, v8, v10
1725 ; RV64-NEXT: vslidedown.vi v10, v8, 4
1726 ; RV64-NEXT: vmul.vv v8, v8, v10
1727 ; RV64-NEXT: vslidedown.vi v10, v8, 2
1728 ; RV64-NEXT: vmul.vv v8, v8, v10
1729 ; RV64-NEXT: vrgather.vi v10, v8, 1
1730 ; RV64-NEXT: vmul.vv v8, v8, v10
1731 ; RV64-NEXT: vmv.x.s a0, v8
1732 ; RV64-NEXT: mv a1, a2
1733 ; RV64-NEXT: call __muldi3
1734 ; RV64-NEXT: slli a0, a0, 56
1735 ; RV64-NEXT: srai a0, a0, 56
1736 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1737 ; RV64-NEXT: addi sp, sp, 16
1739 %r = call i8 @llvm.vp.reduce.mul.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl)
1743 declare i8 @llvm.vp.reduce.mul.v64i8(i8, <64 x i8>, <64 x i1>, i32)
1745 define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, i32 zeroext %evl) {
1746 ; RV32-LABEL: vpreduce_mul_v64i8:
1748 ; RV32-NEXT: addi sp, sp, -16
1749 ; RV32-NEXT: .cfi_def_cfa_offset 16
1750 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1751 ; RV32-NEXT: .cfi_offset ra, -4
1752 ; RV32-NEXT: li a3, 32
1753 ; RV32-NEXT: lui a2, %hi(.LCPI72_0)
1754 ; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0)
1755 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1756 ; RV32-NEXT: vle8.v v12, (a2)
1757 ; RV32-NEXT: mv a2, a0
1758 ; RV32-NEXT: vid.v v16
1759 ; RV32-NEXT: vmsltu.vx v14, v16, a1
1760 ; RV32-NEXT: vsext.vf4 v16, v12
1761 ; RV32-NEXT: vmsltu.vx v12, v16, a1
1762 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1763 ; RV32-NEXT: vslideup.vi v14, v12, 4
1764 ; RV32-NEXT: li a0, 64
1765 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
1766 ; RV32-NEXT: vmand.mm v0, v14, v0
1767 ; RV32-NEXT: vmv.v.i v12, 1
1768 ; RV32-NEXT: vmerge.vvm v8, v12, v8, v0
1769 ; RV32-NEXT: vslidedown.vx v12, v8, a3
1770 ; RV32-NEXT: vmul.vv v8, v8, v12
1771 ; RV32-NEXT: vslidedown.vi v12, v8, 16
1772 ; RV32-NEXT: vmul.vv v8, v8, v12
1773 ; RV32-NEXT: vslidedown.vi v12, v8, 8
1774 ; RV32-NEXT: vmul.vv v8, v8, v12
1775 ; RV32-NEXT: vslidedown.vi v12, v8, 4
1776 ; RV32-NEXT: vmul.vv v8, v8, v12
1777 ; RV32-NEXT: vslidedown.vi v12, v8, 2
1778 ; RV32-NEXT: vmul.vv v8, v8, v12
1779 ; RV32-NEXT: vrgather.vi v12, v8, 1
1780 ; RV32-NEXT: vmul.vv v8, v8, v12
1781 ; RV32-NEXT: vmv.x.s a0, v8
1782 ; RV32-NEXT: mv a1, a2
1783 ; RV32-NEXT: call __mulsi3
1784 ; RV32-NEXT: slli a0, a0, 24
1785 ; RV32-NEXT: srai a0, a0, 24
1786 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1787 ; RV32-NEXT: addi sp, sp, 16
1790 ; RV64-LABEL: vpreduce_mul_v64i8:
1792 ; RV64-NEXT: addi sp, sp, -16
1793 ; RV64-NEXT: .cfi_def_cfa_offset 16
1794 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1795 ; RV64-NEXT: .cfi_offset ra, -8
1796 ; RV64-NEXT: li a3, 32
1797 ; RV64-NEXT: lui a2, %hi(.LCPI72_0)
1798 ; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0)
1799 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1800 ; RV64-NEXT: vle8.v v12, (a2)
1801 ; RV64-NEXT: mv a2, a0
1802 ; RV64-NEXT: vid.v v16
1803 ; RV64-NEXT: vmsltu.vx v14, v16, a1
1804 ; RV64-NEXT: vsext.vf4 v16, v12
1805 ; RV64-NEXT: vmsltu.vx v12, v16, a1
1806 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1807 ; RV64-NEXT: vslideup.vi v14, v12, 4
1808 ; RV64-NEXT: li a0, 64
1809 ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
1810 ; RV64-NEXT: vmand.mm v0, v14, v0
1811 ; RV64-NEXT: vmv.v.i v12, 1
1812 ; RV64-NEXT: vmerge.vvm v8, v12, v8, v0
1813 ; RV64-NEXT: vslidedown.vx v12, v8, a3
1814 ; RV64-NEXT: vmul.vv v8, v8, v12
1815 ; RV64-NEXT: vslidedown.vi v12, v8, 16
1816 ; RV64-NEXT: vmul.vv v8, v8, v12
1817 ; RV64-NEXT: vslidedown.vi v12, v8, 8
1818 ; RV64-NEXT: vmul.vv v8, v8, v12
1819 ; RV64-NEXT: vslidedown.vi v12, v8, 4
1820 ; RV64-NEXT: vmul.vv v8, v8, v12
1821 ; RV64-NEXT: vslidedown.vi v12, v8, 2
1822 ; RV64-NEXT: vmul.vv v8, v8, v12
1823 ; RV64-NEXT: vrgather.vi v12, v8, 1
1824 ; RV64-NEXT: vmul.vv v8, v8, v12
1825 ; RV64-NEXT: vmv.x.s a0, v8
1826 ; RV64-NEXT: mv a1, a2
1827 ; RV64-NEXT: call __muldi3
1828 ; RV64-NEXT: slli a0, a0, 56
1829 ; RV64-NEXT: srai a0, a0, 56
1830 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1831 ; RV64-NEXT: addi sp, sp, 16
1833 %r = call i8 @llvm.vp.reduce.mul.v64i8(i8 %s, <64 x i8> %v, <64 x i1> %m, i32 %evl)
1837 ; Test start value is the first element of a vector.
1838 define zeroext i8 @front_ele_v4i8(<4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
1839 ; CHECK-LABEL: front_ele_v4i8:
1841 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
1842 ; CHECK-NEXT: vredand.vs v8, v8, v8, v0.t
1843 ; CHECK-NEXT: vmv.x.s a0, v8
1844 ; CHECK-NEXT: andi a0, a0, 255
1846 %s = extractelement <4 x i8> %v, i64 0
1847 %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
1851 ; Test start value is the first element of a vector which longer than M1.
1852 declare i8 @llvm.vp.reduce.and.v32i8(i8, <32 x i8>, <32 x i1>, i32)
1853 define zeroext i8 @front_ele_v32i8(<32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) {
1854 ; CHECK-LABEL: front_ele_v32i8:
1856 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
1857 ; CHECK-NEXT: vredand.vs v8, v8, v8, v0.t
1858 ; CHECK-NEXT: vmv.x.s a0, v8
1859 ; CHECK-NEXT: andi a0, a0, 255
1861 %s = extractelement <32 x i8> %v, i64 0
1862 %r = call i8 @llvm.vp.reduce.and.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl)