1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
7 declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32)
9 define signext i8 @vpreduce_add_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vpreduce_add_v2i8:
12 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
13 ; CHECK-NEXT: vmv.s.x v9, a0
14 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
15 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
16 ; CHECK-NEXT: vmv.x.s a0, v9
18 %r = call i8 @llvm.vp.reduce.add.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
22 declare i8 @llvm.vp.reduce.umax.v2i8(i8, <2 x i8>, <2 x i1>, i32)
24 define signext i8 @vpreduce_umax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
25 ; CHECK-LABEL: vpreduce_umax_v2i8:
27 ; CHECK-NEXT: andi a0, a0, 255
28 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
29 ; CHECK-NEXT: vmv.s.x v9, a0
30 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
31 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
32 ; CHECK-NEXT: vmv.x.s a0, v9
34 %r = call i8 @llvm.vp.reduce.umax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
38 declare i8 @llvm.vp.reduce.smax.v2i8(i8, <2 x i8>, <2 x i1>, i32)
40 define signext i8 @vpreduce_smax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
41 ; CHECK-LABEL: vpreduce_smax_v2i8:
43 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
44 ; CHECK-NEXT: vmv.s.x v9, a0
45 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
46 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
47 ; CHECK-NEXT: vmv.x.s a0, v9
49 %r = call i8 @llvm.vp.reduce.smax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
53 declare i8 @llvm.vp.reduce.umin.v2i8(i8, <2 x i8>, <2 x i1>, i32)
55 define signext i8 @vpreduce_umin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
56 ; CHECK-LABEL: vpreduce_umin_v2i8:
58 ; CHECK-NEXT: andi a0, a0, 255
59 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
60 ; CHECK-NEXT: vmv.s.x v9, a0
61 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
62 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
63 ; CHECK-NEXT: vmv.x.s a0, v9
65 %r = call i8 @llvm.vp.reduce.umin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
69 declare i8 @llvm.vp.reduce.smin.v2i8(i8, <2 x i8>, <2 x i1>, i32)
71 define signext i8 @vpreduce_smin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
72 ; CHECK-LABEL: vpreduce_smin_v2i8:
74 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
75 ; CHECK-NEXT: vmv.s.x v9, a0
76 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
77 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
78 ; CHECK-NEXT: vmv.x.s a0, v9
80 %r = call i8 @llvm.vp.reduce.smin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
84 declare i8 @llvm.vp.reduce.and.v2i8(i8, <2 x i8>, <2 x i1>, i32)
86 define signext i8 @vpreduce_and_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
87 ; CHECK-LABEL: vpreduce_and_v2i8:
89 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
90 ; CHECK-NEXT: vmv.s.x v9, a0
91 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
92 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
93 ; CHECK-NEXT: vmv.x.s a0, v9
95 %r = call i8 @llvm.vp.reduce.and.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
99 declare i8 @llvm.vp.reduce.or.v2i8(i8, <2 x i8>, <2 x i1>, i32)
101 define signext i8 @vpreduce_or_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
102 ; CHECK-LABEL: vpreduce_or_v2i8:
104 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
105 ; CHECK-NEXT: vmv.s.x v9, a0
106 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
107 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
108 ; CHECK-NEXT: vmv.x.s a0, v9
110 %r = call i8 @llvm.vp.reduce.or.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
114 declare i8 @llvm.vp.reduce.xor.v2i8(i8, <2 x i8>, <2 x i1>, i32)
116 define signext i8 @vpreduce_xor_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
117 ; CHECK-LABEL: vpreduce_xor_v2i8:
119 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
120 ; CHECK-NEXT: vmv.s.x v9, a0
121 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
122 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
123 ; CHECK-NEXT: vmv.x.s a0, v9
125 %r = call i8 @llvm.vp.reduce.xor.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
129 declare i8 @llvm.vp.reduce.umin.v3i8(i8, <3 x i8>, <3 x i1>, i32)
131 define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) {
132 ; CHECK-LABEL: vpreduce_umin_v3i8:
134 ; CHECK-NEXT: andi a0, a0, 255
135 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
136 ; CHECK-NEXT: vmv.s.x v9, a0
137 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
138 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
139 ; CHECK-NEXT: vmv.x.s a0, v9
141 %r = call i8 @llvm.vp.reduce.umin.v3i8(i8 %s, <3 x i8> %v, <3 x i1> %m, i32 %evl)
145 declare i8 @llvm.vp.reduce.add.v4i8(i8, <4 x i8>, <4 x i1>, i32)
147 define signext i8 @vpreduce_add_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
148 ; CHECK-LABEL: vpreduce_add_v4i8:
150 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
151 ; CHECK-NEXT: vmv.s.x v9, a0
152 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
153 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
154 ; CHECK-NEXT: vmv.x.s a0, v9
156 %r = call i8 @llvm.vp.reduce.add.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
160 declare i8 @llvm.vp.reduce.umax.v4i8(i8, <4 x i8>, <4 x i1>, i32)
162 define signext i8 @vpreduce_umax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
163 ; CHECK-LABEL: vpreduce_umax_v4i8:
165 ; CHECK-NEXT: andi a0, a0, 255
166 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
167 ; CHECK-NEXT: vmv.s.x v9, a0
168 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
169 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
170 ; CHECK-NEXT: vmv.x.s a0, v9
172 %r = call i8 @llvm.vp.reduce.umax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
176 declare i8 @llvm.vp.reduce.smax.v4i8(i8, <4 x i8>, <4 x i1>, i32)
178 define signext i8 @vpreduce_smax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
179 ; CHECK-LABEL: vpreduce_smax_v4i8:
181 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
182 ; CHECK-NEXT: vmv.s.x v9, a0
183 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
184 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
185 ; CHECK-NEXT: vmv.x.s a0, v9
187 %r = call i8 @llvm.vp.reduce.smax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
191 declare i8 @llvm.vp.reduce.umin.v4i8(i8, <4 x i8>, <4 x i1>, i32)
193 define signext i8 @vpreduce_umin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
194 ; CHECK-LABEL: vpreduce_umin_v4i8:
196 ; CHECK-NEXT: andi a0, a0, 255
197 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
198 ; CHECK-NEXT: vmv.s.x v9, a0
199 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
200 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
201 ; CHECK-NEXT: vmv.x.s a0, v9
203 %r = call i8 @llvm.vp.reduce.umin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
207 declare i8 @llvm.vp.reduce.smin.v4i8(i8, <4 x i8>, <4 x i1>, i32)
209 define signext i8 @vpreduce_smin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
210 ; CHECK-LABEL: vpreduce_smin_v4i8:
212 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
213 ; CHECK-NEXT: vmv.s.x v9, a0
214 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
215 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
216 ; CHECK-NEXT: vmv.x.s a0, v9
218 %r = call i8 @llvm.vp.reduce.smin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
222 declare i8 @llvm.vp.reduce.and.v4i8(i8, <4 x i8>, <4 x i1>, i32)
224 define signext i8 @vpreduce_and_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
225 ; CHECK-LABEL: vpreduce_and_v4i8:
227 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
228 ; CHECK-NEXT: vmv.s.x v9, a0
229 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
230 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
231 ; CHECK-NEXT: vmv.x.s a0, v9
233 %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
237 declare i8 @llvm.vp.reduce.or.v4i8(i8, <4 x i8>, <4 x i1>, i32)
239 define signext i8 @vpreduce_or_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
240 ; CHECK-LABEL: vpreduce_or_v4i8:
242 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
243 ; CHECK-NEXT: vmv.s.x v9, a0
244 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
245 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
246 ; CHECK-NEXT: vmv.x.s a0, v9
248 %r = call i8 @llvm.vp.reduce.or.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
252 declare i8 @llvm.vp.reduce.xor.v4i8(i8, <4 x i8>, <4 x i1>, i32)
254 define signext i8 @vpreduce_xor_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
255 ; CHECK-LABEL: vpreduce_xor_v4i8:
257 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
258 ; CHECK-NEXT: vmv.s.x v9, a0
259 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
260 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
261 ; CHECK-NEXT: vmv.x.s a0, v9
263 %r = call i8 @llvm.vp.reduce.xor.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
267 declare i16 @llvm.vp.reduce.add.v2i16(i16, <2 x i16>, <2 x i1>, i32)
269 define signext i16 @vpreduce_add_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
270 ; CHECK-LABEL: vpreduce_add_v2i16:
272 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
273 ; CHECK-NEXT: vmv.s.x v9, a0
274 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
275 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
276 ; CHECK-NEXT: vmv.x.s a0, v9
278 %r = call i16 @llvm.vp.reduce.add.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
282 declare i16 @llvm.vp.reduce.umax.v2i16(i16, <2 x i16>, <2 x i1>, i32)
284 define signext i16 @vpreduce_umax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
285 ; RV32-LABEL: vpreduce_umax_v2i16:
287 ; RV32-NEXT: slli a0, a0, 16
288 ; RV32-NEXT: srli a0, a0, 16
289 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
290 ; RV32-NEXT: vmv.s.x v9, a0
291 ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
292 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
293 ; RV32-NEXT: vmv.x.s a0, v9
296 ; RV64-LABEL: vpreduce_umax_v2i16:
298 ; RV64-NEXT: slli a0, a0, 48
299 ; RV64-NEXT: srli a0, a0, 48
300 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
301 ; RV64-NEXT: vmv.s.x v9, a0
302 ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
303 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
304 ; RV64-NEXT: vmv.x.s a0, v9
306 %r = call i16 @llvm.vp.reduce.umax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
310 declare i16 @llvm.vp.reduce.smax.v2i16(i16, <2 x i16>, <2 x i1>, i32)
312 define signext i16 @vpreduce_smax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
313 ; CHECK-LABEL: vpreduce_smax_v2i16:
315 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
316 ; CHECK-NEXT: vmv.s.x v9, a0
317 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
318 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
319 ; CHECK-NEXT: vmv.x.s a0, v9
321 %r = call i16 @llvm.vp.reduce.smax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
325 declare i16 @llvm.vp.reduce.umin.v2i16(i16, <2 x i16>, <2 x i1>, i32)
327 define signext i16 @vpreduce_umin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
328 ; RV32-LABEL: vpreduce_umin_v2i16:
330 ; RV32-NEXT: slli a0, a0, 16
331 ; RV32-NEXT: srli a0, a0, 16
332 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
333 ; RV32-NEXT: vmv.s.x v9, a0
334 ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
335 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
336 ; RV32-NEXT: vmv.x.s a0, v9
339 ; RV64-LABEL: vpreduce_umin_v2i16:
341 ; RV64-NEXT: slli a0, a0, 48
342 ; RV64-NEXT: srli a0, a0, 48
343 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
344 ; RV64-NEXT: vmv.s.x v9, a0
345 ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
346 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
347 ; RV64-NEXT: vmv.x.s a0, v9
349 %r = call i16 @llvm.vp.reduce.umin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
353 declare i16 @llvm.vp.reduce.smin.v2i16(i16, <2 x i16>, <2 x i1>, i32)
355 define signext i16 @vpreduce_smin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
356 ; CHECK-LABEL: vpreduce_smin_v2i16:
358 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
359 ; CHECK-NEXT: vmv.s.x v9, a0
360 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
361 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
362 ; CHECK-NEXT: vmv.x.s a0, v9
364 %r = call i16 @llvm.vp.reduce.smin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
368 declare i16 @llvm.vp.reduce.and.v2i16(i16, <2 x i16>, <2 x i1>, i32)
370 define signext i16 @vpreduce_and_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
371 ; CHECK-LABEL: vpreduce_and_v2i16:
373 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
374 ; CHECK-NEXT: vmv.s.x v9, a0
375 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
376 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
377 ; CHECK-NEXT: vmv.x.s a0, v9
379 %r = call i16 @llvm.vp.reduce.and.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
383 declare i16 @llvm.vp.reduce.or.v2i16(i16, <2 x i16>, <2 x i1>, i32)
385 define signext i16 @vpreduce_or_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
386 ; CHECK-LABEL: vpreduce_or_v2i16:
388 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
389 ; CHECK-NEXT: vmv.s.x v9, a0
390 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
391 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
392 ; CHECK-NEXT: vmv.x.s a0, v9
394 %r = call i16 @llvm.vp.reduce.or.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
398 declare i16 @llvm.vp.reduce.xor.v2i16(i16, <2 x i16>, <2 x i1>, i32)
400 define signext i16 @vpreduce_xor_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
401 ; CHECK-LABEL: vpreduce_xor_v2i16:
403 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
404 ; CHECK-NEXT: vmv.s.x v9, a0
405 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
406 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
407 ; CHECK-NEXT: vmv.x.s a0, v9
409 %r = call i16 @llvm.vp.reduce.xor.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
413 declare i16 @llvm.vp.reduce.add.v4i16(i16, <4 x i16>, <4 x i1>, i32)
415 define signext i16 @vpreduce_add_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
416 ; CHECK-LABEL: vpreduce_add_v4i16:
418 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
419 ; CHECK-NEXT: vmv.s.x v9, a0
420 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
421 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
422 ; CHECK-NEXT: vmv.x.s a0, v9
424 %r = call i16 @llvm.vp.reduce.add.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
428 declare i16 @llvm.vp.reduce.umax.v4i16(i16, <4 x i16>, <4 x i1>, i32)
430 define signext i16 @vpreduce_umax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
431 ; RV32-LABEL: vpreduce_umax_v4i16:
433 ; RV32-NEXT: slli a0, a0, 16
434 ; RV32-NEXT: srli a0, a0, 16
435 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
436 ; RV32-NEXT: vmv.s.x v9, a0
437 ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
438 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
439 ; RV32-NEXT: vmv.x.s a0, v9
442 ; RV64-LABEL: vpreduce_umax_v4i16:
444 ; RV64-NEXT: slli a0, a0, 48
445 ; RV64-NEXT: srli a0, a0, 48
446 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
447 ; RV64-NEXT: vmv.s.x v9, a0
448 ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
449 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
450 ; RV64-NEXT: vmv.x.s a0, v9
452 %r = call i16 @llvm.vp.reduce.umax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
456 declare i16 @llvm.vp.reduce.smax.v4i16(i16, <4 x i16>, <4 x i1>, i32)
458 define signext i16 @vpreduce_smax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
459 ; CHECK-LABEL: vpreduce_smax_v4i16:
461 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
462 ; CHECK-NEXT: vmv.s.x v9, a0
463 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
464 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
465 ; CHECK-NEXT: vmv.x.s a0, v9
467 %r = call i16 @llvm.vp.reduce.smax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
471 declare i16 @llvm.vp.reduce.umin.v4i16(i16, <4 x i16>, <4 x i1>, i32)
473 define signext i16 @vpreduce_umin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
474 ; RV32-LABEL: vpreduce_umin_v4i16:
476 ; RV32-NEXT: slli a0, a0, 16
477 ; RV32-NEXT: srli a0, a0, 16
478 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
479 ; RV32-NEXT: vmv.s.x v9, a0
480 ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
481 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
482 ; RV32-NEXT: vmv.x.s a0, v9
485 ; RV64-LABEL: vpreduce_umin_v4i16:
487 ; RV64-NEXT: slli a0, a0, 48
488 ; RV64-NEXT: srli a0, a0, 48
489 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
490 ; RV64-NEXT: vmv.s.x v9, a0
491 ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
492 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
493 ; RV64-NEXT: vmv.x.s a0, v9
495 %r = call i16 @llvm.vp.reduce.umin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
499 declare i16 @llvm.vp.reduce.smin.v4i16(i16, <4 x i16>, <4 x i1>, i32)
501 define signext i16 @vpreduce_smin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
502 ; CHECK-LABEL: vpreduce_smin_v4i16:
504 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
505 ; CHECK-NEXT: vmv.s.x v9, a0
506 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
507 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
508 ; CHECK-NEXT: vmv.x.s a0, v9
510 %r = call i16 @llvm.vp.reduce.smin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
514 declare i16 @llvm.vp.reduce.and.v4i16(i16, <4 x i16>, <4 x i1>, i32)
516 define signext i16 @vpreduce_and_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
517 ; CHECK-LABEL: vpreduce_and_v4i16:
519 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
520 ; CHECK-NEXT: vmv.s.x v9, a0
521 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
522 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
523 ; CHECK-NEXT: vmv.x.s a0, v9
525 %r = call i16 @llvm.vp.reduce.and.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
529 declare i16 @llvm.vp.reduce.or.v4i16(i16, <4 x i16>, <4 x i1>, i32)
531 define signext i16 @vpreduce_or_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
532 ; CHECK-LABEL: vpreduce_or_v4i16:
534 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
535 ; CHECK-NEXT: vmv.s.x v9, a0
536 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
537 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
538 ; CHECK-NEXT: vmv.x.s a0, v9
540 %r = call i16 @llvm.vp.reduce.or.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
544 declare i16 @llvm.vp.reduce.xor.v4i16(i16, <4 x i16>, <4 x i1>, i32)
546 define signext i16 @vpreduce_xor_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
547 ; CHECK-LABEL: vpreduce_xor_v4i16:
549 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
550 ; CHECK-NEXT: vmv.s.x v9, a0
551 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
552 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
553 ; CHECK-NEXT: vmv.x.s a0, v9
555 %r = call i16 @llvm.vp.reduce.xor.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
559 declare i32 @llvm.vp.reduce.add.v2i32(i32, <2 x i32>, <2 x i1>, i32)
561 define signext i32 @vpreduce_add_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
562 ; CHECK-LABEL: vpreduce_add_v2i32:
564 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
565 ; CHECK-NEXT: vmv.s.x v9, a0
566 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
567 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
568 ; CHECK-NEXT: vmv.x.s a0, v9
570 %r = call i32 @llvm.vp.reduce.add.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
574 declare i32 @llvm.vp.reduce.umax.v2i32(i32, <2 x i32>, <2 x i1>, i32)
576 define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
577 ; CHECK-LABEL: vpreduce_umax_v2i32:
579 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
580 ; CHECK-NEXT: vmv.s.x v9, a0
581 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
582 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
583 ; CHECK-NEXT: vmv.x.s a0, v9
585 %r = call i32 @llvm.vp.reduce.umax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
589 declare i32 @llvm.vp.reduce.smax.v2i32(i32, <2 x i32>, <2 x i1>, i32)
591 define signext i32 @vpreduce_smax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
592 ; CHECK-LABEL: vpreduce_smax_v2i32:
594 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
595 ; CHECK-NEXT: vmv.s.x v9, a0
596 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
597 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
598 ; CHECK-NEXT: vmv.x.s a0, v9
600 %r = call i32 @llvm.vp.reduce.smax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
604 declare i32 @llvm.vp.reduce.umin.v2i32(i32, <2 x i32>, <2 x i1>, i32)
606 define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
607 ; CHECK-LABEL: vpreduce_umin_v2i32:
609 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
610 ; CHECK-NEXT: vmv.s.x v9, a0
611 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
612 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
613 ; CHECK-NEXT: vmv.x.s a0, v9
615 %r = call i32 @llvm.vp.reduce.umin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
619 declare i32 @llvm.vp.reduce.smin.v2i32(i32, <2 x i32>, <2 x i1>, i32)
621 define signext i32 @vpreduce_smin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
622 ; CHECK-LABEL: vpreduce_smin_v2i32:
624 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
625 ; CHECK-NEXT: vmv.s.x v9, a0
626 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
627 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
628 ; CHECK-NEXT: vmv.x.s a0, v9
630 %r = call i32 @llvm.vp.reduce.smin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
634 declare i32 @llvm.vp.reduce.and.v2i32(i32, <2 x i32>, <2 x i1>, i32)
636 define signext i32 @vpreduce_and_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
637 ; CHECK-LABEL: vpreduce_and_v2i32:
639 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
640 ; CHECK-NEXT: vmv.s.x v9, a0
641 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
642 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
643 ; CHECK-NEXT: vmv.x.s a0, v9
645 %r = call i32 @llvm.vp.reduce.and.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
649 declare i32 @llvm.vp.reduce.or.v2i32(i32, <2 x i32>, <2 x i1>, i32)
651 define signext i32 @vpreduce_or_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
652 ; CHECK-LABEL: vpreduce_or_v2i32:
654 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
655 ; CHECK-NEXT: vmv.s.x v9, a0
656 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
657 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
658 ; CHECK-NEXT: vmv.x.s a0, v9
660 %r = call i32 @llvm.vp.reduce.or.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
664 declare i32 @llvm.vp.reduce.xor.v2i32(i32, <2 x i32>, <2 x i1>, i32)
666 define signext i32 @vpreduce_xor_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
667 ; CHECK-LABEL: vpreduce_xor_v2i32:
669 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
670 ; CHECK-NEXT: vmv.s.x v9, a0
671 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
672 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
673 ; CHECK-NEXT: vmv.x.s a0, v9
675 %r = call i32 @llvm.vp.reduce.xor.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
679 declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32)
681 define signext i32 @vpreduce_add_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
682 ; CHECK-LABEL: vpreduce_add_v4i32:
684 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
685 ; CHECK-NEXT: vmv.s.x v9, a0
686 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
687 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
688 ; CHECK-NEXT: vmv.x.s a0, v9
690 %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
694 declare i32 @llvm.vp.reduce.umax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
696 define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
697 ; CHECK-LABEL: vpreduce_umax_v4i32:
699 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
700 ; CHECK-NEXT: vmv.s.x v9, a0
701 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
702 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
703 ; CHECK-NEXT: vmv.x.s a0, v9
705 %r = call i32 @llvm.vp.reduce.umax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
709 declare i32 @llvm.vp.reduce.smax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
711 define signext i32 @vpreduce_smax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
712 ; CHECK-LABEL: vpreduce_smax_v4i32:
714 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
715 ; CHECK-NEXT: vmv.s.x v9, a0
716 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
717 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
718 ; CHECK-NEXT: vmv.x.s a0, v9
720 %r = call i32 @llvm.vp.reduce.smax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
724 declare i32 @llvm.vp.reduce.umin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
726 define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
727 ; CHECK-LABEL: vpreduce_umin_v4i32:
729 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
730 ; CHECK-NEXT: vmv.s.x v9, a0
731 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
732 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
733 ; CHECK-NEXT: vmv.x.s a0, v9
735 %r = call i32 @llvm.vp.reduce.umin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
739 declare i32 @llvm.vp.reduce.smin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
741 define signext i32 @vpreduce_smin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
742 ; CHECK-LABEL: vpreduce_smin_v4i32:
744 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
745 ; CHECK-NEXT: vmv.s.x v9, a0
746 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
747 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
748 ; CHECK-NEXT: vmv.x.s a0, v9
750 %r = call i32 @llvm.vp.reduce.smin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
754 declare i32 @llvm.vp.reduce.and.v4i32(i32, <4 x i32>, <4 x i1>, i32)
756 define signext i32 @vpreduce_and_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
757 ; CHECK-LABEL: vpreduce_and_v4i32:
759 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
760 ; CHECK-NEXT: vmv.s.x v9, a0
761 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
762 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
763 ; CHECK-NEXT: vmv.x.s a0, v9
765 %r = call i32 @llvm.vp.reduce.and.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
769 declare i32 @llvm.vp.reduce.or.v4i32(i32, <4 x i32>, <4 x i1>, i32)
771 define signext i32 @vpreduce_or_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
772 ; CHECK-LABEL: vpreduce_or_v4i32:
774 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
775 ; CHECK-NEXT: vmv.s.x v9, a0
776 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
777 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
778 ; CHECK-NEXT: vmv.x.s a0, v9
780 %r = call i32 @llvm.vp.reduce.or.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
784 declare i32 @llvm.vp.reduce.xor.v4i32(i32, <4 x i32>, <4 x i1>, i32)
786 define signext i32 @vpreduce_xor_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
787 ; CHECK-LABEL: vpreduce_xor_v4i32:
789 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
790 ; CHECK-NEXT: vmv.s.x v9, a0
791 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
792 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
793 ; CHECK-NEXT: vmv.x.s a0, v9
795 %r = call i32 @llvm.vp.reduce.xor.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
799 declare i32 @llvm.vp.reduce.xor.v64i32(i32, <64 x i32>, <64 x i1>, i32)
801 define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) {
802 ; CHECK-LABEL: vpreduce_xor_v64i32:
804 ; CHECK-NEXT: li a3, 32
805 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
806 ; CHECK-NEXT: vslidedown.vi v24, v0, 4
807 ; CHECK-NEXT: mv a2, a1
808 ; CHECK-NEXT: bltu a1, a3, .LBB49_2
809 ; CHECK-NEXT: # %bb.1:
810 ; CHECK-NEXT: li a2, 32
811 ; CHECK-NEXT: .LBB49_2:
812 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
813 ; CHECK-NEXT: vmv.s.x v25, a0
814 ; CHECK-NEXT: addi a0, a1, -32
815 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
816 ; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t
817 ; CHECK-NEXT: sltu a1, a1, a0
818 ; CHECK-NEXT: addi a1, a1, -1
819 ; CHECK-NEXT: and a0, a1, a0
820 ; CHECK-NEXT: vmv1r.v v0, v24
821 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
822 ; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t
823 ; CHECK-NEXT: vmv.x.s a0, v25
825 %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl)
829 declare i64 @llvm.vp.reduce.add.v2i64(i64, <2 x i64>, <2 x i1>, i32)
831 define signext i64 @vpreduce_add_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
832 ; RV32-LABEL: vpreduce_add_v2i64:
834 ; RV32-NEXT: addi sp, sp, -16
835 ; RV32-NEXT: .cfi_def_cfa_offset 16
836 ; RV32-NEXT: sw a0, 8(sp)
837 ; RV32-NEXT: sw a1, 12(sp)
838 ; RV32-NEXT: addi a0, sp, 8
839 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
840 ; RV32-NEXT: vlse64.v v9, (a0), zero
841 ; RV32-NEXT: li a1, 32
842 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
843 ; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t
844 ; RV32-NEXT: vmv.x.s a0, v9
845 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
846 ; RV32-NEXT: vsrl.vx v8, v9, a1
847 ; RV32-NEXT: vmv.x.s a1, v8
848 ; RV32-NEXT: addi sp, sp, 16
849 ; RV32-NEXT: .cfi_def_cfa_offset 0
852 ; RV64-LABEL: vpreduce_add_v2i64:
854 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
855 ; RV64-NEXT: vmv.s.x v9, a0
856 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
857 ; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t
858 ; RV64-NEXT: vmv.x.s a0, v9
860 %r = call i64 @llvm.vp.reduce.add.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
864 declare i64 @llvm.vp.reduce.umax.v2i64(i64, <2 x i64>, <2 x i1>, i32)
866 define signext i64 @vpreduce_umax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
867 ; RV32-LABEL: vpreduce_umax_v2i64:
869 ; RV32-NEXT: addi sp, sp, -16
870 ; RV32-NEXT: .cfi_def_cfa_offset 16
871 ; RV32-NEXT: sw a0, 8(sp)
872 ; RV32-NEXT: sw a1, 12(sp)
873 ; RV32-NEXT: addi a0, sp, 8
874 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
875 ; RV32-NEXT: vlse64.v v9, (a0), zero
876 ; RV32-NEXT: li a1, 32
877 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
878 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
879 ; RV32-NEXT: vmv.x.s a0, v9
880 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
881 ; RV32-NEXT: vsrl.vx v8, v9, a1
882 ; RV32-NEXT: vmv.x.s a1, v8
883 ; RV32-NEXT: addi sp, sp, 16
884 ; RV32-NEXT: .cfi_def_cfa_offset 0
887 ; RV64-LABEL: vpreduce_umax_v2i64:
889 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
890 ; RV64-NEXT: vmv.s.x v9, a0
891 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
892 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
893 ; RV64-NEXT: vmv.x.s a0, v9
895 %r = call i64 @llvm.vp.reduce.umax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
899 declare i64 @llvm.vp.reduce.smax.v2i64(i64, <2 x i64>, <2 x i1>, i32)
901 define signext i64 @vpreduce_smax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
902 ; RV32-LABEL: vpreduce_smax_v2i64:
904 ; RV32-NEXT: addi sp, sp, -16
905 ; RV32-NEXT: .cfi_def_cfa_offset 16
906 ; RV32-NEXT: sw a0, 8(sp)
907 ; RV32-NEXT: sw a1, 12(sp)
908 ; RV32-NEXT: addi a0, sp, 8
909 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
910 ; RV32-NEXT: vlse64.v v9, (a0), zero
911 ; RV32-NEXT: li a1, 32
912 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
913 ; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t
914 ; RV32-NEXT: vmv.x.s a0, v9
915 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
916 ; RV32-NEXT: vsrl.vx v8, v9, a1
917 ; RV32-NEXT: vmv.x.s a1, v8
918 ; RV32-NEXT: addi sp, sp, 16
919 ; RV32-NEXT: .cfi_def_cfa_offset 0
922 ; RV64-LABEL: vpreduce_smax_v2i64:
924 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
925 ; RV64-NEXT: vmv.s.x v9, a0
926 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
927 ; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t
928 ; RV64-NEXT: vmv.x.s a0, v9
930 %r = call i64 @llvm.vp.reduce.smax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
934 declare i64 @llvm.vp.reduce.umin.v2i64(i64, <2 x i64>, <2 x i1>, i32)
936 define signext i64 @vpreduce_umin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
937 ; RV32-LABEL: vpreduce_umin_v2i64:
939 ; RV32-NEXT: addi sp, sp, -16
940 ; RV32-NEXT: .cfi_def_cfa_offset 16
941 ; RV32-NEXT: sw a0, 8(sp)
942 ; RV32-NEXT: sw a1, 12(sp)
943 ; RV32-NEXT: addi a0, sp, 8
944 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
945 ; RV32-NEXT: vlse64.v v9, (a0), zero
946 ; RV32-NEXT: li a1, 32
947 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
948 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
949 ; RV32-NEXT: vmv.x.s a0, v9
950 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
951 ; RV32-NEXT: vsrl.vx v8, v9, a1
952 ; RV32-NEXT: vmv.x.s a1, v8
953 ; RV32-NEXT: addi sp, sp, 16
954 ; RV32-NEXT: .cfi_def_cfa_offset 0
957 ; RV64-LABEL: vpreduce_umin_v2i64:
959 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
960 ; RV64-NEXT: vmv.s.x v9, a0
961 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
962 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
963 ; RV64-NEXT: vmv.x.s a0, v9
965 %r = call i64 @llvm.vp.reduce.umin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
969 declare i64 @llvm.vp.reduce.smin.v2i64(i64, <2 x i64>, <2 x i1>, i32)
971 define signext i64 @vpreduce_smin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
972 ; RV32-LABEL: vpreduce_smin_v2i64:
974 ; RV32-NEXT: addi sp, sp, -16
975 ; RV32-NEXT: .cfi_def_cfa_offset 16
976 ; RV32-NEXT: sw a0, 8(sp)
977 ; RV32-NEXT: sw a1, 12(sp)
978 ; RV32-NEXT: addi a0, sp, 8
979 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
980 ; RV32-NEXT: vlse64.v v9, (a0), zero
981 ; RV32-NEXT: li a1, 32
982 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
983 ; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t
984 ; RV32-NEXT: vmv.x.s a0, v9
985 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
986 ; RV32-NEXT: vsrl.vx v8, v9, a1
987 ; RV32-NEXT: vmv.x.s a1, v8
988 ; RV32-NEXT: addi sp, sp, 16
989 ; RV32-NEXT: .cfi_def_cfa_offset 0
992 ; RV64-LABEL: vpreduce_smin_v2i64:
994 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
995 ; RV64-NEXT: vmv.s.x v9, a0
996 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
997 ; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t
998 ; RV64-NEXT: vmv.x.s a0, v9
1000 %r = call i64 @llvm.vp.reduce.smin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1004 declare i64 @llvm.vp.reduce.and.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1006 define signext i64 @vpreduce_and_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1007 ; RV32-LABEL: vpreduce_and_v2i64:
1009 ; RV32-NEXT: addi sp, sp, -16
1010 ; RV32-NEXT: .cfi_def_cfa_offset 16
1011 ; RV32-NEXT: sw a0, 8(sp)
1012 ; RV32-NEXT: sw a1, 12(sp)
1013 ; RV32-NEXT: addi a0, sp, 8
1014 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1015 ; RV32-NEXT: vlse64.v v9, (a0), zero
1016 ; RV32-NEXT: li a1, 32
1017 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1018 ; RV32-NEXT: vredand.vs v9, v8, v9, v0.t
1019 ; RV32-NEXT: vmv.x.s a0, v9
1020 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1021 ; RV32-NEXT: vsrl.vx v8, v9, a1
1022 ; RV32-NEXT: vmv.x.s a1, v8
1023 ; RV32-NEXT: addi sp, sp, 16
1024 ; RV32-NEXT: .cfi_def_cfa_offset 0
1027 ; RV64-LABEL: vpreduce_and_v2i64:
1029 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1030 ; RV64-NEXT: vmv.s.x v9, a0
1031 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1032 ; RV64-NEXT: vredand.vs v9, v8, v9, v0.t
1033 ; RV64-NEXT: vmv.x.s a0, v9
1035 %r = call i64 @llvm.vp.reduce.and.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1039 declare i64 @llvm.vp.reduce.or.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1041 define signext i64 @vpreduce_or_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1042 ; RV32-LABEL: vpreduce_or_v2i64:
1044 ; RV32-NEXT: addi sp, sp, -16
1045 ; RV32-NEXT: .cfi_def_cfa_offset 16
1046 ; RV32-NEXT: sw a0, 8(sp)
1047 ; RV32-NEXT: sw a1, 12(sp)
1048 ; RV32-NEXT: addi a0, sp, 8
1049 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1050 ; RV32-NEXT: vlse64.v v9, (a0), zero
1051 ; RV32-NEXT: li a1, 32
1052 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1053 ; RV32-NEXT: vredor.vs v9, v8, v9, v0.t
1054 ; RV32-NEXT: vmv.x.s a0, v9
1055 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1056 ; RV32-NEXT: vsrl.vx v8, v9, a1
1057 ; RV32-NEXT: vmv.x.s a1, v8
1058 ; RV32-NEXT: addi sp, sp, 16
1059 ; RV32-NEXT: .cfi_def_cfa_offset 0
1062 ; RV64-LABEL: vpreduce_or_v2i64:
1064 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1065 ; RV64-NEXT: vmv.s.x v9, a0
1066 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1067 ; RV64-NEXT: vredor.vs v9, v8, v9, v0.t
1068 ; RV64-NEXT: vmv.x.s a0, v9
1070 %r = call i64 @llvm.vp.reduce.or.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1074 declare i64 @llvm.vp.reduce.xor.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1076 define signext i64 @vpreduce_xor_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1077 ; RV32-LABEL: vpreduce_xor_v2i64:
1079 ; RV32-NEXT: addi sp, sp, -16
1080 ; RV32-NEXT: .cfi_def_cfa_offset 16
1081 ; RV32-NEXT: sw a0, 8(sp)
1082 ; RV32-NEXT: sw a1, 12(sp)
1083 ; RV32-NEXT: addi a0, sp, 8
1084 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1085 ; RV32-NEXT: vlse64.v v9, (a0), zero
1086 ; RV32-NEXT: li a1, 32
1087 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1088 ; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t
1089 ; RV32-NEXT: vmv.x.s a0, v9
1090 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1091 ; RV32-NEXT: vsrl.vx v8, v9, a1
1092 ; RV32-NEXT: vmv.x.s a1, v8
1093 ; RV32-NEXT: addi sp, sp, 16
1094 ; RV32-NEXT: .cfi_def_cfa_offset 0
1097 ; RV64-LABEL: vpreduce_xor_v2i64:
1099 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1100 ; RV64-NEXT: vmv.s.x v9, a0
1101 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1102 ; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t
1103 ; RV64-NEXT: vmv.x.s a0, v9
1105 %r = call i64 @llvm.vp.reduce.xor.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1109 declare i64 @llvm.vp.reduce.add.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1111 define signext i64 @vpreduce_add_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1112 ; RV32-LABEL: vpreduce_add_v4i64:
1114 ; RV32-NEXT: addi sp, sp, -16
1115 ; RV32-NEXT: .cfi_def_cfa_offset 16
1116 ; RV32-NEXT: sw a0, 8(sp)
1117 ; RV32-NEXT: sw a1, 12(sp)
1118 ; RV32-NEXT: addi a0, sp, 8
1119 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1120 ; RV32-NEXT: vlse64.v v10, (a0), zero
1121 ; RV32-NEXT: li a1, 32
1122 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1123 ; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t
1124 ; RV32-NEXT: vmv.x.s a0, v10
1125 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1126 ; RV32-NEXT: vsrl.vx v8, v10, a1
1127 ; RV32-NEXT: vmv.x.s a1, v8
1128 ; RV32-NEXT: addi sp, sp, 16
1129 ; RV32-NEXT: .cfi_def_cfa_offset 0
1132 ; RV64-LABEL: vpreduce_add_v4i64:
1134 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1135 ; RV64-NEXT: vmv.s.x v10, a0
1136 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1137 ; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t
1138 ; RV64-NEXT: vmv.x.s a0, v10
1140 %r = call i64 @llvm.vp.reduce.add.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1144 declare i64 @llvm.vp.reduce.umax.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1146 define signext i64 @vpreduce_umax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1147 ; RV32-LABEL: vpreduce_umax_v4i64:
1149 ; RV32-NEXT: addi sp, sp, -16
1150 ; RV32-NEXT: .cfi_def_cfa_offset 16
1151 ; RV32-NEXT: sw a0, 8(sp)
1152 ; RV32-NEXT: sw a1, 12(sp)
1153 ; RV32-NEXT: addi a0, sp, 8
1154 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1155 ; RV32-NEXT: vlse64.v v10, (a0), zero
1156 ; RV32-NEXT: li a1, 32
1157 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1158 ; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1159 ; RV32-NEXT: vmv.x.s a0, v10
1160 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1161 ; RV32-NEXT: vsrl.vx v8, v10, a1
1162 ; RV32-NEXT: vmv.x.s a1, v8
1163 ; RV32-NEXT: addi sp, sp, 16
1164 ; RV32-NEXT: .cfi_def_cfa_offset 0
1167 ; RV64-LABEL: vpreduce_umax_v4i64:
1169 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1170 ; RV64-NEXT: vmv.s.x v10, a0
1171 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1172 ; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1173 ; RV64-NEXT: vmv.x.s a0, v10
1175 %r = call i64 @llvm.vp.reduce.umax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1179 declare i64 @llvm.vp.reduce.smax.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1181 define signext i64 @vpreduce_smax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1182 ; RV32-LABEL: vpreduce_smax_v4i64:
1184 ; RV32-NEXT: addi sp, sp, -16
1185 ; RV32-NEXT: .cfi_def_cfa_offset 16
1186 ; RV32-NEXT: sw a0, 8(sp)
1187 ; RV32-NEXT: sw a1, 12(sp)
1188 ; RV32-NEXT: addi a0, sp, 8
1189 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1190 ; RV32-NEXT: vlse64.v v10, (a0), zero
1191 ; RV32-NEXT: li a1, 32
1192 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1193 ; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t
1194 ; RV32-NEXT: vmv.x.s a0, v10
1195 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1196 ; RV32-NEXT: vsrl.vx v8, v10, a1
1197 ; RV32-NEXT: vmv.x.s a1, v8
1198 ; RV32-NEXT: addi sp, sp, 16
1199 ; RV32-NEXT: .cfi_def_cfa_offset 0
1202 ; RV64-LABEL: vpreduce_smax_v4i64:
1204 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1205 ; RV64-NEXT: vmv.s.x v10, a0
1206 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1207 ; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t
1208 ; RV64-NEXT: vmv.x.s a0, v10
1210 %r = call i64 @llvm.vp.reduce.smax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1214 declare i64 @llvm.vp.reduce.umin.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1216 define signext i64 @vpreduce_umin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1217 ; RV32-LABEL: vpreduce_umin_v4i64:
1219 ; RV32-NEXT: addi sp, sp, -16
1220 ; RV32-NEXT: .cfi_def_cfa_offset 16
1221 ; RV32-NEXT: sw a0, 8(sp)
1222 ; RV32-NEXT: sw a1, 12(sp)
1223 ; RV32-NEXT: addi a0, sp, 8
1224 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1225 ; RV32-NEXT: vlse64.v v10, (a0), zero
1226 ; RV32-NEXT: li a1, 32
1227 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1228 ; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t
1229 ; RV32-NEXT: vmv.x.s a0, v10
1230 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1231 ; RV32-NEXT: vsrl.vx v8, v10, a1
1232 ; RV32-NEXT: vmv.x.s a1, v8
1233 ; RV32-NEXT: addi sp, sp, 16
1234 ; RV32-NEXT: .cfi_def_cfa_offset 0
1237 ; RV64-LABEL: vpreduce_umin_v4i64:
1239 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1240 ; RV64-NEXT: vmv.s.x v10, a0
1241 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1242 ; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t
1243 ; RV64-NEXT: vmv.x.s a0, v10
1245 %r = call i64 @llvm.vp.reduce.umin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1249 declare i64 @llvm.vp.reduce.smin.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1251 define signext i64 @vpreduce_smin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1252 ; RV32-LABEL: vpreduce_smin_v4i64:
1254 ; RV32-NEXT: addi sp, sp, -16
1255 ; RV32-NEXT: .cfi_def_cfa_offset 16
1256 ; RV32-NEXT: sw a0, 8(sp)
1257 ; RV32-NEXT: sw a1, 12(sp)
1258 ; RV32-NEXT: addi a0, sp, 8
1259 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1260 ; RV32-NEXT: vlse64.v v10, (a0), zero
1261 ; RV32-NEXT: li a1, 32
1262 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1263 ; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t
1264 ; RV32-NEXT: vmv.x.s a0, v10
1265 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1266 ; RV32-NEXT: vsrl.vx v8, v10, a1
1267 ; RV32-NEXT: vmv.x.s a1, v8
1268 ; RV32-NEXT: addi sp, sp, 16
1269 ; RV32-NEXT: .cfi_def_cfa_offset 0
1272 ; RV64-LABEL: vpreduce_smin_v4i64:
1274 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1275 ; RV64-NEXT: vmv.s.x v10, a0
1276 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1277 ; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t
1278 ; RV64-NEXT: vmv.x.s a0, v10
1280 %r = call i64 @llvm.vp.reduce.smin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1284 declare i64 @llvm.vp.reduce.and.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1286 define signext i64 @vpreduce_and_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1287 ; RV32-LABEL: vpreduce_and_v4i64:
1289 ; RV32-NEXT: addi sp, sp, -16
1290 ; RV32-NEXT: .cfi_def_cfa_offset 16
1291 ; RV32-NEXT: sw a0, 8(sp)
1292 ; RV32-NEXT: sw a1, 12(sp)
1293 ; RV32-NEXT: addi a0, sp, 8
1294 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1295 ; RV32-NEXT: vlse64.v v10, (a0), zero
1296 ; RV32-NEXT: li a1, 32
1297 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1298 ; RV32-NEXT: vredand.vs v10, v8, v10, v0.t
1299 ; RV32-NEXT: vmv.x.s a0, v10
1300 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1301 ; RV32-NEXT: vsrl.vx v8, v10, a1
1302 ; RV32-NEXT: vmv.x.s a1, v8
1303 ; RV32-NEXT: addi sp, sp, 16
1304 ; RV32-NEXT: .cfi_def_cfa_offset 0
1307 ; RV64-LABEL: vpreduce_and_v4i64:
1309 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1310 ; RV64-NEXT: vmv.s.x v10, a0
1311 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1312 ; RV64-NEXT: vredand.vs v10, v8, v10, v0.t
1313 ; RV64-NEXT: vmv.x.s a0, v10
1315 %r = call i64 @llvm.vp.reduce.and.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1319 declare i64 @llvm.vp.reduce.or.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1321 define signext i64 @vpreduce_or_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1322 ; RV32-LABEL: vpreduce_or_v4i64:
1324 ; RV32-NEXT: addi sp, sp, -16
1325 ; RV32-NEXT: .cfi_def_cfa_offset 16
1326 ; RV32-NEXT: sw a0, 8(sp)
1327 ; RV32-NEXT: sw a1, 12(sp)
1328 ; RV32-NEXT: addi a0, sp, 8
1329 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1330 ; RV32-NEXT: vlse64.v v10, (a0), zero
1331 ; RV32-NEXT: li a1, 32
1332 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1333 ; RV32-NEXT: vredor.vs v10, v8, v10, v0.t
1334 ; RV32-NEXT: vmv.x.s a0, v10
1335 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1336 ; RV32-NEXT: vsrl.vx v8, v10, a1
1337 ; RV32-NEXT: vmv.x.s a1, v8
1338 ; RV32-NEXT: addi sp, sp, 16
1339 ; RV32-NEXT: .cfi_def_cfa_offset 0
1342 ; RV64-LABEL: vpreduce_or_v4i64:
1344 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1345 ; RV64-NEXT: vmv.s.x v10, a0
1346 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1347 ; RV64-NEXT: vredor.vs v10, v8, v10, v0.t
1348 ; RV64-NEXT: vmv.x.s a0, v10
1350 %r = call i64 @llvm.vp.reduce.or.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1354 declare i64 @llvm.vp.reduce.xor.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1356 define signext i64 @vpreduce_xor_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1357 ; RV32-LABEL: vpreduce_xor_v4i64:
1359 ; RV32-NEXT: addi sp, sp, -16
1360 ; RV32-NEXT: .cfi_def_cfa_offset 16
1361 ; RV32-NEXT: sw a0, 8(sp)
1362 ; RV32-NEXT: sw a1, 12(sp)
1363 ; RV32-NEXT: addi a0, sp, 8
1364 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1365 ; RV32-NEXT: vlse64.v v10, (a0), zero
1366 ; RV32-NEXT: li a1, 32
1367 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1368 ; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t
1369 ; RV32-NEXT: vmv.x.s a0, v10
1370 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1371 ; RV32-NEXT: vsrl.vx v8, v10, a1
1372 ; RV32-NEXT: vmv.x.s a1, v8
1373 ; RV32-NEXT: addi sp, sp, 16
1374 ; RV32-NEXT: .cfi_def_cfa_offset 0
1377 ; RV64-LABEL: vpreduce_xor_v4i64:
1379 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1380 ; RV64-NEXT: vmv.s.x v10, a0
1381 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1382 ; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t
1383 ; RV64-NEXT: vmv.x.s a0, v10
1385 %r = call i64 @llvm.vp.reduce.xor.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1389 declare i8 @llvm.vp.reduce.mul.v1i8(i8, <1 x i8>, <1 x i1>, i32)
1391 define i8 @vpreduce_mul_v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 zeroext %evl) {
1392 ; RV32-LABEL: vpreduce_mul_v1i8:
1394 ; RV32-NEXT: addi sp, sp, -16
1395 ; RV32-NEXT: .cfi_def_cfa_offset 16
1396 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1397 ; RV32-NEXT: .cfi_offset ra, -4
1398 ; RV32-NEXT: mv a2, a0
1399 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1400 ; RV32-NEXT: vmv.s.x v9, a1
1401 ; RV32-NEXT: vmsne.vi v9, v9, 0
1402 ; RV32-NEXT: vmand.mm v0, v9, v0
1403 ; RV32-NEXT: vmv.v.i v9, 1
1404 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1405 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1406 ; RV32-NEXT: vmv.x.s a0, v8
1407 ; RV32-NEXT: mv a1, a2
1408 ; RV32-NEXT: call __mulsi3
1409 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1410 ; RV32-NEXT: .cfi_restore ra
1411 ; RV32-NEXT: addi sp, sp, 16
1412 ; RV32-NEXT: .cfi_def_cfa_offset 0
1415 ; RV64-LABEL: vpreduce_mul_v1i8:
1417 ; RV64-NEXT: addi sp, sp, -16
1418 ; RV64-NEXT: .cfi_def_cfa_offset 16
1419 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1420 ; RV64-NEXT: .cfi_offset ra, -8
1421 ; RV64-NEXT: mv a2, a0
1422 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1423 ; RV64-NEXT: vmv.s.x v9, a1
1424 ; RV64-NEXT: vmsne.vi v9, v9, 0
1425 ; RV64-NEXT: vmand.mm v0, v9, v0
1426 ; RV64-NEXT: vmv.v.i v9, 1
1427 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1428 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1429 ; RV64-NEXT: vmv.x.s a0, v8
1430 ; RV64-NEXT: mv a1, a2
1431 ; RV64-NEXT: call __muldi3
1432 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1433 ; RV64-NEXT: .cfi_restore ra
1434 ; RV64-NEXT: addi sp, sp, 16
1435 ; RV64-NEXT: .cfi_def_cfa_offset 0
1437 %r = call i8 @llvm.vp.reduce.mul.v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 %evl)
1441 declare i8 @llvm.vp.reduce.mul.v2i8(i8, <2 x i8>, <2 x i1>, i32)
1443 define signext i8 @vpreduce_mul_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
1444 ; RV32-LABEL: vpreduce_mul_v2i8:
1446 ; RV32-NEXT: addi sp, sp, -16
1447 ; RV32-NEXT: .cfi_def_cfa_offset 16
1448 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1449 ; RV32-NEXT: .cfi_offset ra, -4
1450 ; RV32-NEXT: mv a2, a0
1451 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1452 ; RV32-NEXT: vid.v v9
1453 ; RV32-NEXT: vmsltu.vx v9, v9, a1
1454 ; RV32-NEXT: vmand.mm v0, v9, v0
1455 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1456 ; RV32-NEXT: vmv.v.i v9, 1
1457 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1458 ; RV32-NEXT: vrgather.vi v9, v8, 1
1459 ; RV32-NEXT: vmul.vv v8, v8, v9
1460 ; RV32-NEXT: vmv.x.s a0, v8
1461 ; RV32-NEXT: mv a1, a2
1462 ; RV32-NEXT: call __mulsi3
1463 ; RV32-NEXT: slli a0, a0, 24
1464 ; RV32-NEXT: srai a0, a0, 24
1465 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1466 ; RV32-NEXT: .cfi_restore ra
1467 ; RV32-NEXT: addi sp, sp, 16
1468 ; RV32-NEXT: .cfi_def_cfa_offset 0
1471 ; RV64-LABEL: vpreduce_mul_v2i8:
1473 ; RV64-NEXT: addi sp, sp, -16
1474 ; RV64-NEXT: .cfi_def_cfa_offset 16
1475 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1476 ; RV64-NEXT: .cfi_offset ra, -8
1477 ; RV64-NEXT: mv a2, a0
1478 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1479 ; RV64-NEXT: vid.v v9
1480 ; RV64-NEXT: vmsltu.vx v9, v9, a1
1481 ; RV64-NEXT: vmand.mm v0, v9, v0
1482 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1483 ; RV64-NEXT: vmv.v.i v9, 1
1484 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1485 ; RV64-NEXT: vrgather.vi v9, v8, 1
1486 ; RV64-NEXT: vmul.vv v8, v8, v9
1487 ; RV64-NEXT: vmv.x.s a0, v8
1488 ; RV64-NEXT: mv a1, a2
1489 ; RV64-NEXT: call __muldi3
1490 ; RV64-NEXT: slli a0, a0, 56
1491 ; RV64-NEXT: srai a0, a0, 56
1492 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1493 ; RV64-NEXT: .cfi_restore ra
1494 ; RV64-NEXT: addi sp, sp, 16
1495 ; RV64-NEXT: .cfi_def_cfa_offset 0
1497 %r = call i8 @llvm.vp.reduce.mul.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
1501 declare i8 @llvm.vp.reduce.mul.v4i8(i8, <4 x i8>, <4 x i1>, i32)
1503 define signext i8 @vpreduce_mul_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
1504 ; RV32-LABEL: vpreduce_mul_v4i8:
1506 ; RV32-NEXT: addi sp, sp, -16
1507 ; RV32-NEXT: .cfi_def_cfa_offset 16
1508 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1509 ; RV32-NEXT: .cfi_offset ra, -4
1510 ; RV32-NEXT: mv a2, a0
1511 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1512 ; RV32-NEXT: vid.v v9
1513 ; RV32-NEXT: vmsltu.vx v9, v9, a1
1514 ; RV32-NEXT: vmand.mm v0, v9, v0
1515 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1516 ; RV32-NEXT: vmv.v.i v9, 1
1517 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1518 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1519 ; RV32-NEXT: vmul.vv v8, v8, v9
1520 ; RV32-NEXT: vrgather.vi v9, v8, 1
1521 ; RV32-NEXT: vmul.vv v8, v8, v9
1522 ; RV32-NEXT: vmv.x.s a0, v8
1523 ; RV32-NEXT: mv a1, a2
1524 ; RV32-NEXT: call __mulsi3
1525 ; RV32-NEXT: slli a0, a0, 24
1526 ; RV32-NEXT: srai a0, a0, 24
1527 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1528 ; RV32-NEXT: .cfi_restore ra
1529 ; RV32-NEXT: addi sp, sp, 16
1530 ; RV32-NEXT: .cfi_def_cfa_offset 0
1533 ; RV64-LABEL: vpreduce_mul_v4i8:
1535 ; RV64-NEXT: addi sp, sp, -16
1536 ; RV64-NEXT: .cfi_def_cfa_offset 16
1537 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1538 ; RV64-NEXT: .cfi_offset ra, -8
1539 ; RV64-NEXT: mv a2, a0
1540 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1541 ; RV64-NEXT: vid.v v9
1542 ; RV64-NEXT: vmsltu.vx v9, v9, a1
1543 ; RV64-NEXT: vmand.mm v0, v9, v0
1544 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1545 ; RV64-NEXT: vmv.v.i v9, 1
1546 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1547 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1548 ; RV64-NEXT: vmul.vv v8, v8, v9
1549 ; RV64-NEXT: vrgather.vi v9, v8, 1
1550 ; RV64-NEXT: vmul.vv v8, v8, v9
1551 ; RV64-NEXT: vmv.x.s a0, v8
1552 ; RV64-NEXT: mv a1, a2
1553 ; RV64-NEXT: call __muldi3
1554 ; RV64-NEXT: slli a0, a0, 56
1555 ; RV64-NEXT: srai a0, a0, 56
1556 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1557 ; RV64-NEXT: .cfi_restore ra
1558 ; RV64-NEXT: addi sp, sp, 16
1559 ; RV64-NEXT: .cfi_def_cfa_offset 0
1561 %r = call i8 @llvm.vp.reduce.mul.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
1565 declare i8 @llvm.vp.reduce.mul.v8i8(i8, <8 x i8>, <8 x i1>, i32)
1567 define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i32 zeroext %evl) {
1568 ; RV32-LABEL: vpreduce_mul_v8i8:
1570 ; RV32-NEXT: addi sp, sp, -16
1571 ; RV32-NEXT: .cfi_def_cfa_offset 16
1572 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1573 ; RV32-NEXT: .cfi_offset ra, -4
1574 ; RV32-NEXT: mv a2, a0
1575 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1576 ; RV32-NEXT: vid.v v10
1577 ; RV32-NEXT: vmsltu.vx v9, v10, a1
1578 ; RV32-NEXT: vmand.mm v0, v9, v0
1579 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
1580 ; RV32-NEXT: vmv.v.i v9, 1
1581 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1582 ; RV32-NEXT: vslidedown.vi v9, v8, 4
1583 ; RV32-NEXT: vmul.vv v8, v8, v9
1584 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1585 ; RV32-NEXT: vmul.vv v8, v8, v9
1586 ; RV32-NEXT: vrgather.vi v9, v8, 1
1587 ; RV32-NEXT: vmul.vv v8, v8, v9
1588 ; RV32-NEXT: vmv.x.s a0, v8
1589 ; RV32-NEXT: mv a1, a2
1590 ; RV32-NEXT: call __mulsi3
1591 ; RV32-NEXT: slli a0, a0, 24
1592 ; RV32-NEXT: srai a0, a0, 24
1593 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1594 ; RV32-NEXT: .cfi_restore ra
1595 ; RV32-NEXT: addi sp, sp, 16
1596 ; RV32-NEXT: .cfi_def_cfa_offset 0
1599 ; RV64-LABEL: vpreduce_mul_v8i8:
1601 ; RV64-NEXT: addi sp, sp, -16
1602 ; RV64-NEXT: .cfi_def_cfa_offset 16
1603 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1604 ; RV64-NEXT: .cfi_offset ra, -8
1605 ; RV64-NEXT: mv a2, a0
1606 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1607 ; RV64-NEXT: vid.v v10
1608 ; RV64-NEXT: vmsltu.vx v9, v10, a1
1609 ; RV64-NEXT: vmand.mm v0, v9, v0
1610 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
1611 ; RV64-NEXT: vmv.v.i v9, 1
1612 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1613 ; RV64-NEXT: vslidedown.vi v9, v8, 4
1614 ; RV64-NEXT: vmul.vv v8, v8, v9
1615 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1616 ; RV64-NEXT: vmul.vv v8, v8, v9
1617 ; RV64-NEXT: vrgather.vi v9, v8, 1
1618 ; RV64-NEXT: vmul.vv v8, v8, v9
1619 ; RV64-NEXT: vmv.x.s a0, v8
1620 ; RV64-NEXT: mv a1, a2
1621 ; RV64-NEXT: call __muldi3
1622 ; RV64-NEXT: slli a0, a0, 56
1623 ; RV64-NEXT: srai a0, a0, 56
1624 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1625 ; RV64-NEXT: .cfi_restore ra
1626 ; RV64-NEXT: addi sp, sp, 16
1627 ; RV64-NEXT: .cfi_def_cfa_offset 0
1629 %r = call i8 @llvm.vp.reduce.mul.v8i8(i8 %s, <8 x i8> %v, <8 x i1> %m, i32 %evl)
1633 declare i8 @llvm.vp.reduce.mul.v16i8(i8, <16 x i8>, <16 x i1>, i32)
1635 define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, i32 zeroext %evl) {
1636 ; RV32-LABEL: vpreduce_mul_v16i8:
1638 ; RV32-NEXT: addi sp, sp, -16
1639 ; RV32-NEXT: .cfi_def_cfa_offset 16
1640 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1641 ; RV32-NEXT: .cfi_offset ra, -4
1642 ; RV32-NEXT: mv a2, a0
1643 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1644 ; RV32-NEXT: vid.v v12
1645 ; RV32-NEXT: vmsltu.vx v9, v12, a1
1646 ; RV32-NEXT: vmand.mm v0, v9, v0
1647 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1648 ; RV32-NEXT: vmv.v.i v9, 1
1649 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1650 ; RV32-NEXT: vslidedown.vi v9, v8, 8
1651 ; RV32-NEXT: vmul.vv v8, v8, v9
1652 ; RV32-NEXT: vslidedown.vi v9, v8, 4
1653 ; RV32-NEXT: vmul.vv v8, v8, v9
1654 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1655 ; RV32-NEXT: vmul.vv v8, v8, v9
1656 ; RV32-NEXT: vrgather.vi v9, v8, 1
1657 ; RV32-NEXT: vmul.vv v8, v8, v9
1658 ; RV32-NEXT: vmv.x.s a0, v8
1659 ; RV32-NEXT: mv a1, a2
1660 ; RV32-NEXT: call __mulsi3
1661 ; RV32-NEXT: slli a0, a0, 24
1662 ; RV32-NEXT: srai a0, a0, 24
1663 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1664 ; RV32-NEXT: .cfi_restore ra
1665 ; RV32-NEXT: addi sp, sp, 16
1666 ; RV32-NEXT: .cfi_def_cfa_offset 0
1669 ; RV64-LABEL: vpreduce_mul_v16i8:
1671 ; RV64-NEXT: addi sp, sp, -16
1672 ; RV64-NEXT: .cfi_def_cfa_offset 16
1673 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1674 ; RV64-NEXT: .cfi_offset ra, -8
1675 ; RV64-NEXT: mv a2, a0
1676 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1677 ; RV64-NEXT: vid.v v12
1678 ; RV64-NEXT: vmsltu.vx v9, v12, a1
1679 ; RV64-NEXT: vmand.mm v0, v9, v0
1680 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1681 ; RV64-NEXT: vmv.v.i v9, 1
1682 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1683 ; RV64-NEXT: vslidedown.vi v9, v8, 8
1684 ; RV64-NEXT: vmul.vv v8, v8, v9
1685 ; RV64-NEXT: vslidedown.vi v9, v8, 4
1686 ; RV64-NEXT: vmul.vv v8, v8, v9
1687 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1688 ; RV64-NEXT: vmul.vv v8, v8, v9
1689 ; RV64-NEXT: vrgather.vi v9, v8, 1
1690 ; RV64-NEXT: vmul.vv v8, v8, v9
1691 ; RV64-NEXT: vmv.x.s a0, v8
1692 ; RV64-NEXT: mv a1, a2
1693 ; RV64-NEXT: call __muldi3
1694 ; RV64-NEXT: slli a0, a0, 56
1695 ; RV64-NEXT: srai a0, a0, 56
1696 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1697 ; RV64-NEXT: .cfi_restore ra
1698 ; RV64-NEXT: addi sp, sp, 16
1699 ; RV64-NEXT: .cfi_def_cfa_offset 0
1701 %r = call i8 @llvm.vp.reduce.mul.v16i8(i8 %s, <16 x i8> %v, <16 x i1> %m, i32 %evl)
1705 declare i8 @llvm.vp.reduce.mul.v32i8(i8, <32 x i8>, <32 x i1>, i32)
1707 define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) {
1708 ; RV32-LABEL: vpreduce_mul_v32i8:
1710 ; RV32-NEXT: addi sp, sp, -16
1711 ; RV32-NEXT: .cfi_def_cfa_offset 16
1712 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1713 ; RV32-NEXT: .cfi_offset ra, -4
1714 ; RV32-NEXT: mv a2, a0
1715 ; RV32-NEXT: li a0, 32
1716 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1717 ; RV32-NEXT: vid.v v16
1718 ; RV32-NEXT: vmsltu.vx v10, v16, a1
1719 ; RV32-NEXT: vmand.mm v0, v10, v0
1720 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1721 ; RV32-NEXT: vmv.v.i v10, 1
1722 ; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
1723 ; RV32-NEXT: vslidedown.vi v10, v8, 16
1724 ; RV32-NEXT: vmul.vv v8, v8, v10
1725 ; RV32-NEXT: vslidedown.vi v10, v8, 8
1726 ; RV32-NEXT: vmul.vv v8, v8, v10
1727 ; RV32-NEXT: vslidedown.vi v10, v8, 4
1728 ; RV32-NEXT: vmul.vv v8, v8, v10
1729 ; RV32-NEXT: vslidedown.vi v10, v8, 2
1730 ; RV32-NEXT: vmul.vv v8, v8, v10
1731 ; RV32-NEXT: vrgather.vi v10, v8, 1
1732 ; RV32-NEXT: vmul.vv v8, v8, v10
1733 ; RV32-NEXT: vmv.x.s a0, v8
1734 ; RV32-NEXT: mv a1, a2
1735 ; RV32-NEXT: call __mulsi3
1736 ; RV32-NEXT: slli a0, a0, 24
1737 ; RV32-NEXT: srai a0, a0, 24
1738 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1739 ; RV32-NEXT: .cfi_restore ra
1740 ; RV32-NEXT: addi sp, sp, 16
1741 ; RV32-NEXT: .cfi_def_cfa_offset 0
1744 ; RV64-LABEL: vpreduce_mul_v32i8:
1746 ; RV64-NEXT: addi sp, sp, -16
1747 ; RV64-NEXT: .cfi_def_cfa_offset 16
1748 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1749 ; RV64-NEXT: .cfi_offset ra, -8
1750 ; RV64-NEXT: mv a2, a0
1751 ; RV64-NEXT: li a0, 32
1752 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1753 ; RV64-NEXT: vid.v v16
1754 ; RV64-NEXT: vmsltu.vx v10, v16, a1
1755 ; RV64-NEXT: vmand.mm v0, v10, v0
1756 ; RV64-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1757 ; RV64-NEXT: vmv.v.i v10, 1
1758 ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
1759 ; RV64-NEXT: vslidedown.vi v10, v8, 16
1760 ; RV64-NEXT: vmul.vv v8, v8, v10
1761 ; RV64-NEXT: vslidedown.vi v10, v8, 8
1762 ; RV64-NEXT: vmul.vv v8, v8, v10
1763 ; RV64-NEXT: vslidedown.vi v10, v8, 4
1764 ; RV64-NEXT: vmul.vv v8, v8, v10
1765 ; RV64-NEXT: vslidedown.vi v10, v8, 2
1766 ; RV64-NEXT: vmul.vv v8, v8, v10
1767 ; RV64-NEXT: vrgather.vi v10, v8, 1
1768 ; RV64-NEXT: vmul.vv v8, v8, v10
1769 ; RV64-NEXT: vmv.x.s a0, v8
1770 ; RV64-NEXT: mv a1, a2
1771 ; RV64-NEXT: call __muldi3
1772 ; RV64-NEXT: slli a0, a0, 56
1773 ; RV64-NEXT: srai a0, a0, 56
1774 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1775 ; RV64-NEXT: .cfi_restore ra
1776 ; RV64-NEXT: addi sp, sp, 16
1777 ; RV64-NEXT: .cfi_def_cfa_offset 0
1779 %r = call i8 @llvm.vp.reduce.mul.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl)
1783 declare i8 @llvm.vp.reduce.mul.v64i8(i8, <64 x i8>, <64 x i1>, i32)
1785 define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, i32 zeroext %evl) {
1786 ; RV32-LABEL: vpreduce_mul_v64i8:
1788 ; RV32-NEXT: addi sp, sp, -16
1789 ; RV32-NEXT: .cfi_def_cfa_offset 16
1790 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1791 ; RV32-NEXT: .cfi_offset ra, -4
1792 ; RV32-NEXT: mv a2, a0
1793 ; RV32-NEXT: li a0, 32
1794 ; RV32-NEXT: lui a3, %hi(.LCPI72_0)
1795 ; RV32-NEXT: addi a3, a3, %lo(.LCPI72_0)
1796 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1797 ; RV32-NEXT: vle8.v v12, (a3)
1798 ; RV32-NEXT: vid.v v16
1799 ; RV32-NEXT: vmsltu.vx v14, v16, a1
1800 ; RV32-NEXT: li a3, 64
1801 ; RV32-NEXT: vsext.vf4 v16, v12
1802 ; RV32-NEXT: vmsltu.vx v12, v16, a1
1803 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1804 ; RV32-NEXT: vslideup.vi v14, v12, 4
1805 ; RV32-NEXT: vsetvli zero, a3, e8, m4, ta, ma
1806 ; RV32-NEXT: vmand.mm v0, v14, v0
1807 ; RV32-NEXT: vmv.v.i v12, 1
1808 ; RV32-NEXT: vmerge.vvm v8, v12, v8, v0
1809 ; RV32-NEXT: vslidedown.vx v12, v8, a0
1810 ; RV32-NEXT: vmul.vv v8, v8, v12
1811 ; RV32-NEXT: vslidedown.vi v12, v8, 16
1812 ; RV32-NEXT: vmul.vv v8, v8, v12
1813 ; RV32-NEXT: vslidedown.vi v12, v8, 8
1814 ; RV32-NEXT: vmul.vv v8, v8, v12
1815 ; RV32-NEXT: vslidedown.vi v12, v8, 4
1816 ; RV32-NEXT: vmul.vv v8, v8, v12
1817 ; RV32-NEXT: vslidedown.vi v12, v8, 2
1818 ; RV32-NEXT: vmul.vv v8, v8, v12
1819 ; RV32-NEXT: vrgather.vi v12, v8, 1
1820 ; RV32-NEXT: vmul.vv v8, v8, v12
1821 ; RV32-NEXT: vmv.x.s a0, v8
1822 ; RV32-NEXT: mv a1, a2
1823 ; RV32-NEXT: call __mulsi3
1824 ; RV32-NEXT: slli a0, a0, 24
1825 ; RV32-NEXT: srai a0, a0, 24
1826 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1827 ; RV32-NEXT: .cfi_restore ra
1828 ; RV32-NEXT: addi sp, sp, 16
1829 ; RV32-NEXT: .cfi_def_cfa_offset 0
1832 ; RV64-LABEL: vpreduce_mul_v64i8:
1834 ; RV64-NEXT: addi sp, sp, -16
1835 ; RV64-NEXT: .cfi_def_cfa_offset 16
1836 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1837 ; RV64-NEXT: .cfi_offset ra, -8
1838 ; RV64-NEXT: mv a2, a0
1839 ; RV64-NEXT: li a0, 32
1840 ; RV64-NEXT: lui a3, %hi(.LCPI72_0)
1841 ; RV64-NEXT: addi a3, a3, %lo(.LCPI72_0)
1842 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1843 ; RV64-NEXT: vle8.v v12, (a3)
1844 ; RV64-NEXT: vid.v v16
1845 ; RV64-NEXT: vmsltu.vx v14, v16, a1
1846 ; RV64-NEXT: li a3, 64
1847 ; RV64-NEXT: vsext.vf4 v16, v12
1848 ; RV64-NEXT: vmsltu.vx v12, v16, a1
1849 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1850 ; RV64-NEXT: vslideup.vi v14, v12, 4
1851 ; RV64-NEXT: vsetvli zero, a3, e8, m4, ta, ma
1852 ; RV64-NEXT: vmand.mm v0, v14, v0
1853 ; RV64-NEXT: vmv.v.i v12, 1
1854 ; RV64-NEXT: vmerge.vvm v8, v12, v8, v0
1855 ; RV64-NEXT: vslidedown.vx v12, v8, a0
1856 ; RV64-NEXT: vmul.vv v8, v8, v12
1857 ; RV64-NEXT: vslidedown.vi v12, v8, 16
1858 ; RV64-NEXT: vmul.vv v8, v8, v12
1859 ; RV64-NEXT: vslidedown.vi v12, v8, 8
1860 ; RV64-NEXT: vmul.vv v8, v8, v12
1861 ; RV64-NEXT: vslidedown.vi v12, v8, 4
1862 ; RV64-NEXT: vmul.vv v8, v8, v12
1863 ; RV64-NEXT: vslidedown.vi v12, v8, 2
1864 ; RV64-NEXT: vmul.vv v8, v8, v12
1865 ; RV64-NEXT: vrgather.vi v12, v8, 1
1866 ; RV64-NEXT: vmul.vv v8, v8, v12
1867 ; RV64-NEXT: vmv.x.s a0, v8
1868 ; RV64-NEXT: mv a1, a2
1869 ; RV64-NEXT: call __muldi3
1870 ; RV64-NEXT: slli a0, a0, 56
1871 ; RV64-NEXT: srai a0, a0, 56
1872 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1873 ; RV64-NEXT: .cfi_restore ra
1874 ; RV64-NEXT: addi sp, sp, 16
1875 ; RV64-NEXT: .cfi_def_cfa_offset 0
1877 %r = call i8 @llvm.vp.reduce.mul.v64i8(i8 %s, <64 x i8> %v, <64 x i1> %m, i32 %evl)
1881 ; Test start value is the first element of a vector.
1882 define zeroext i8 @front_ele_v4i8(<4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
1883 ; CHECK-LABEL: front_ele_v4i8:
1885 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
1886 ; CHECK-NEXT: vredand.vs v8, v8, v8, v0.t
1887 ; CHECK-NEXT: vmv.x.s a0, v8
1888 ; CHECK-NEXT: andi a0, a0, 255
1890 %s = extractelement <4 x i8> %v, i64 0
1891 %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
1895 ; Test start value is the first element of a vector which longer than M1.
1896 declare i8 @llvm.vp.reduce.and.v32i8(i8, <32 x i8>, <32 x i1>, i32)
1897 define zeroext i8 @front_ele_v32i8(<32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) {
1898 ; CHECK-LABEL: front_ele_v32i8:
1900 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
1901 ; CHECK-NEXT: vredand.vs v8, v8, v8, v0.t
1902 ; CHECK-NEXT: vmv.x.s a0, v8
1903 ; CHECK-NEXT: andi a0, a0, 255
1905 %s = extractelement <32 x i8> %v, i64 0
1906 %r = call i8 @llvm.vp.reduce.and.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl)