1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
7 declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32)
9 define signext i8 @vpreduce_add_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vpreduce_add_v2i8:
12 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
13 ; CHECK-NEXT: vmv.s.x v9, a0
14 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
15 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
16 ; CHECK-NEXT: vmv.x.s a0, v9
18 %r = call i8 @llvm.vp.reduce.add.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
22 declare i8 @llvm.vp.reduce.umax.v2i8(i8, <2 x i8>, <2 x i1>, i32)
24 define signext i8 @vpreduce_umax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
25 ; CHECK-LABEL: vpreduce_umax_v2i8:
27 ; CHECK-NEXT: andi a0, a0, 255
28 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
29 ; CHECK-NEXT: vmv.s.x v9, a0
30 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
31 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
32 ; CHECK-NEXT: vmv.x.s a0, v9
34 %r = call i8 @llvm.vp.reduce.umax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
38 declare i8 @llvm.vp.reduce.smax.v2i8(i8, <2 x i8>, <2 x i1>, i32)
40 define signext i8 @vpreduce_smax_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
41 ; CHECK-LABEL: vpreduce_smax_v2i8:
43 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
44 ; CHECK-NEXT: vmv.s.x v9, a0
45 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
46 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
47 ; CHECK-NEXT: vmv.x.s a0, v9
49 %r = call i8 @llvm.vp.reduce.smax.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
53 declare i8 @llvm.vp.reduce.umin.v2i8(i8, <2 x i8>, <2 x i1>, i32)
55 define signext i8 @vpreduce_umin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
56 ; CHECK-LABEL: vpreduce_umin_v2i8:
58 ; CHECK-NEXT: andi a0, a0, 255
59 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
60 ; CHECK-NEXT: vmv.s.x v9, a0
61 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
62 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
63 ; CHECK-NEXT: vmv.x.s a0, v9
65 %r = call i8 @llvm.vp.reduce.umin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
69 declare i8 @llvm.vp.reduce.smin.v2i8(i8, <2 x i8>, <2 x i1>, i32)
71 define signext i8 @vpreduce_smin_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
72 ; CHECK-LABEL: vpreduce_smin_v2i8:
74 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
75 ; CHECK-NEXT: vmv.s.x v9, a0
76 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
77 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
78 ; CHECK-NEXT: vmv.x.s a0, v9
80 %r = call i8 @llvm.vp.reduce.smin.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
84 declare i8 @llvm.vp.reduce.and.v2i8(i8, <2 x i8>, <2 x i1>, i32)
86 define signext i8 @vpreduce_and_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
87 ; CHECK-LABEL: vpreduce_and_v2i8:
89 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
90 ; CHECK-NEXT: vmv.s.x v9, a0
91 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
92 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
93 ; CHECK-NEXT: vmv.x.s a0, v9
95 %r = call i8 @llvm.vp.reduce.and.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
99 declare i8 @llvm.vp.reduce.or.v2i8(i8, <2 x i8>, <2 x i1>, i32)
101 define signext i8 @vpreduce_or_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
102 ; CHECK-LABEL: vpreduce_or_v2i8:
104 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
105 ; CHECK-NEXT: vmv.s.x v9, a0
106 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
107 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
108 ; CHECK-NEXT: vmv.x.s a0, v9
110 %r = call i8 @llvm.vp.reduce.or.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
114 declare i8 @llvm.vp.reduce.xor.v2i8(i8, <2 x i8>, <2 x i1>, i32)
116 define signext i8 @vpreduce_xor_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
117 ; CHECK-LABEL: vpreduce_xor_v2i8:
119 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
120 ; CHECK-NEXT: vmv.s.x v9, a0
121 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
122 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
123 ; CHECK-NEXT: vmv.x.s a0, v9
125 %r = call i8 @llvm.vp.reduce.xor.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
129 declare i8 @llvm.vp.reduce.umin.v3i8(i8, <3 x i8>, <3 x i1>, i32)
131 define signext i8 @vpreduce_umin_v3i8(i8 signext %s, <3 x i8> %v, <3 x i1> %m, i32 zeroext %evl) {
132 ; CHECK-LABEL: vpreduce_umin_v3i8:
134 ; CHECK-NEXT: andi a0, a0, 255
135 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
136 ; CHECK-NEXT: vmv.s.x v9, a0
137 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
138 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
139 ; CHECK-NEXT: vmv.x.s a0, v9
141 %r = call i8 @llvm.vp.reduce.umin.v3i8(i8 %s, <3 x i8> %v, <3 x i1> %m, i32 %evl)
145 declare i8 @llvm.vp.reduce.add.v4i8(i8, <4 x i8>, <4 x i1>, i32)
147 define signext i8 @vpreduce_add_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
148 ; CHECK-LABEL: vpreduce_add_v4i8:
150 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
151 ; CHECK-NEXT: vmv.s.x v9, a0
152 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
153 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
154 ; CHECK-NEXT: vmv.x.s a0, v9
156 %r = call i8 @llvm.vp.reduce.add.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
160 declare i8 @llvm.vp.reduce.umax.v4i8(i8, <4 x i8>, <4 x i1>, i32)
162 define signext i8 @vpreduce_umax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
163 ; CHECK-LABEL: vpreduce_umax_v4i8:
165 ; CHECK-NEXT: andi a0, a0, 255
166 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
167 ; CHECK-NEXT: vmv.s.x v9, a0
168 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
169 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
170 ; CHECK-NEXT: vmv.x.s a0, v9
172 %r = call i8 @llvm.vp.reduce.umax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
176 declare i8 @llvm.vp.reduce.smax.v4i8(i8, <4 x i8>, <4 x i1>, i32)
178 define signext i8 @vpreduce_smax_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
179 ; CHECK-LABEL: vpreduce_smax_v4i8:
181 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
182 ; CHECK-NEXT: vmv.s.x v9, a0
183 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
184 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
185 ; CHECK-NEXT: vmv.x.s a0, v9
187 %r = call i8 @llvm.vp.reduce.smax.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
191 declare i8 @llvm.vp.reduce.umin.v4i8(i8, <4 x i8>, <4 x i1>, i32)
193 define signext i8 @vpreduce_umin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
194 ; CHECK-LABEL: vpreduce_umin_v4i8:
196 ; CHECK-NEXT: andi a0, a0, 255
197 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
198 ; CHECK-NEXT: vmv.s.x v9, a0
199 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
200 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
201 ; CHECK-NEXT: vmv.x.s a0, v9
203 %r = call i8 @llvm.vp.reduce.umin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
207 declare i8 @llvm.vp.reduce.smin.v4i8(i8, <4 x i8>, <4 x i1>, i32)
209 define signext i8 @vpreduce_smin_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
210 ; CHECK-LABEL: vpreduce_smin_v4i8:
212 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
213 ; CHECK-NEXT: vmv.s.x v9, a0
214 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
215 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
216 ; CHECK-NEXT: vmv.x.s a0, v9
218 %r = call i8 @llvm.vp.reduce.smin.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
222 declare i8 @llvm.vp.reduce.and.v4i8(i8, <4 x i8>, <4 x i1>, i32)
224 define signext i8 @vpreduce_and_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
225 ; CHECK-LABEL: vpreduce_and_v4i8:
227 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
228 ; CHECK-NEXT: vmv.s.x v9, a0
229 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
230 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
231 ; CHECK-NEXT: vmv.x.s a0, v9
233 %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
237 declare i8 @llvm.vp.reduce.or.v4i8(i8, <4 x i8>, <4 x i1>, i32)
239 define signext i8 @vpreduce_or_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
240 ; CHECK-LABEL: vpreduce_or_v4i8:
242 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
243 ; CHECK-NEXT: vmv.s.x v9, a0
244 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
245 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
246 ; CHECK-NEXT: vmv.x.s a0, v9
248 %r = call i8 @llvm.vp.reduce.or.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
252 declare i8 @llvm.vp.reduce.xor.v4i8(i8, <4 x i8>, <4 x i1>, i32)
254 define signext i8 @vpreduce_xor_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
255 ; CHECK-LABEL: vpreduce_xor_v4i8:
257 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
258 ; CHECK-NEXT: vmv.s.x v9, a0
259 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
260 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
261 ; CHECK-NEXT: vmv.x.s a0, v9
263 %r = call i8 @llvm.vp.reduce.xor.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
267 declare i16 @llvm.vp.reduce.add.v2i16(i16, <2 x i16>, <2 x i1>, i32)
269 define signext i16 @vpreduce_add_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
270 ; CHECK-LABEL: vpreduce_add_v2i16:
272 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
273 ; CHECK-NEXT: vmv.s.x v9, a0
274 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
275 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
276 ; CHECK-NEXT: vmv.x.s a0, v9
278 %r = call i16 @llvm.vp.reduce.add.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
282 declare i16 @llvm.vp.reduce.umax.v2i16(i16, <2 x i16>, <2 x i1>, i32)
284 define signext i16 @vpreduce_umax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
285 ; RV32-LABEL: vpreduce_umax_v2i16:
287 ; RV32-NEXT: slli a0, a0, 16
288 ; RV32-NEXT: srli a0, a0, 16
289 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
290 ; RV32-NEXT: vmv.s.x v9, a0
291 ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
292 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
293 ; RV32-NEXT: vmv.x.s a0, v9
296 ; RV64-LABEL: vpreduce_umax_v2i16:
298 ; RV64-NEXT: slli a0, a0, 48
299 ; RV64-NEXT: srli a0, a0, 48
300 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
301 ; RV64-NEXT: vmv.s.x v9, a0
302 ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
303 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
304 ; RV64-NEXT: vmv.x.s a0, v9
306 %r = call i16 @llvm.vp.reduce.umax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
310 declare i16 @llvm.vp.reduce.smax.v2i16(i16, <2 x i16>, <2 x i1>, i32)
312 define signext i16 @vpreduce_smax_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
313 ; CHECK-LABEL: vpreduce_smax_v2i16:
315 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
316 ; CHECK-NEXT: vmv.s.x v9, a0
317 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
318 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
319 ; CHECK-NEXT: vmv.x.s a0, v9
321 %r = call i16 @llvm.vp.reduce.smax.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
325 declare i16 @llvm.vp.reduce.umin.v2i16(i16, <2 x i16>, <2 x i1>, i32)
327 define signext i16 @vpreduce_umin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
328 ; RV32-LABEL: vpreduce_umin_v2i16:
330 ; RV32-NEXT: slli a0, a0, 16
331 ; RV32-NEXT: srli a0, a0, 16
332 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
333 ; RV32-NEXT: vmv.s.x v9, a0
334 ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
335 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
336 ; RV32-NEXT: vmv.x.s a0, v9
339 ; RV64-LABEL: vpreduce_umin_v2i16:
341 ; RV64-NEXT: slli a0, a0, 48
342 ; RV64-NEXT: srli a0, a0, 48
343 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
344 ; RV64-NEXT: vmv.s.x v9, a0
345 ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
346 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
347 ; RV64-NEXT: vmv.x.s a0, v9
349 %r = call i16 @llvm.vp.reduce.umin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
353 declare i16 @llvm.vp.reduce.smin.v2i16(i16, <2 x i16>, <2 x i1>, i32)
355 define signext i16 @vpreduce_smin_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
356 ; CHECK-LABEL: vpreduce_smin_v2i16:
358 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
359 ; CHECK-NEXT: vmv.s.x v9, a0
360 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
361 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
362 ; CHECK-NEXT: vmv.x.s a0, v9
364 %r = call i16 @llvm.vp.reduce.smin.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
368 declare i16 @llvm.vp.reduce.and.v2i16(i16, <2 x i16>, <2 x i1>, i32)
370 define signext i16 @vpreduce_and_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
371 ; CHECK-LABEL: vpreduce_and_v2i16:
373 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
374 ; CHECK-NEXT: vmv.s.x v9, a0
375 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
376 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
377 ; CHECK-NEXT: vmv.x.s a0, v9
379 %r = call i16 @llvm.vp.reduce.and.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
383 declare i16 @llvm.vp.reduce.or.v2i16(i16, <2 x i16>, <2 x i1>, i32)
385 define signext i16 @vpreduce_or_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
386 ; CHECK-LABEL: vpreduce_or_v2i16:
388 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
389 ; CHECK-NEXT: vmv.s.x v9, a0
390 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
391 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
392 ; CHECK-NEXT: vmv.x.s a0, v9
394 %r = call i16 @llvm.vp.reduce.or.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
398 declare i16 @llvm.vp.reduce.xor.v2i16(i16, <2 x i16>, <2 x i1>, i32)
400 define signext i16 @vpreduce_xor_v2i16(i16 signext %s, <2 x i16> %v, <2 x i1> %m, i32 zeroext %evl) {
401 ; CHECK-LABEL: vpreduce_xor_v2i16:
403 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
404 ; CHECK-NEXT: vmv.s.x v9, a0
405 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
406 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
407 ; CHECK-NEXT: vmv.x.s a0, v9
409 %r = call i16 @llvm.vp.reduce.xor.v2i16(i16 %s, <2 x i16> %v, <2 x i1> %m, i32 %evl)
413 declare i16 @llvm.vp.reduce.add.v4i16(i16, <4 x i16>, <4 x i1>, i32)
415 define signext i16 @vpreduce_add_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
416 ; CHECK-LABEL: vpreduce_add_v4i16:
418 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
419 ; CHECK-NEXT: vmv.s.x v9, a0
420 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
421 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
422 ; CHECK-NEXT: vmv.x.s a0, v9
424 %r = call i16 @llvm.vp.reduce.add.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
428 declare i16 @llvm.vp.reduce.umax.v4i16(i16, <4 x i16>, <4 x i1>, i32)
430 define signext i16 @vpreduce_umax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
431 ; RV32-LABEL: vpreduce_umax_v4i16:
433 ; RV32-NEXT: slli a0, a0, 16
434 ; RV32-NEXT: srli a0, a0, 16
435 ; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
436 ; RV32-NEXT: vmv.s.x v9, a0
437 ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
438 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
439 ; RV32-NEXT: vmv.x.s a0, v9
442 ; RV64-LABEL: vpreduce_umax_v4i16:
444 ; RV64-NEXT: slli a0, a0, 48
445 ; RV64-NEXT: srli a0, a0, 48
446 ; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
447 ; RV64-NEXT: vmv.s.x v9, a0
448 ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
449 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
450 ; RV64-NEXT: vmv.x.s a0, v9
452 %r = call i16 @llvm.vp.reduce.umax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
456 declare i16 @llvm.vp.reduce.smax.v4i16(i16, <4 x i16>, <4 x i1>, i32)
458 define signext i16 @vpreduce_smax_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
459 ; CHECK-LABEL: vpreduce_smax_v4i16:
461 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
462 ; CHECK-NEXT: vmv.s.x v9, a0
463 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
464 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
465 ; CHECK-NEXT: vmv.x.s a0, v9
467 %r = call i16 @llvm.vp.reduce.smax.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
471 declare i16 @llvm.vp.reduce.umin.v4i16(i16, <4 x i16>, <4 x i1>, i32)
473 define signext i16 @vpreduce_umin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
474 ; RV32-LABEL: vpreduce_umin_v4i16:
476 ; RV32-NEXT: slli a0, a0, 16
477 ; RV32-NEXT: srli a0, a0, 16
478 ; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
479 ; RV32-NEXT: vmv.s.x v9, a0
480 ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
481 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
482 ; RV32-NEXT: vmv.x.s a0, v9
485 ; RV64-LABEL: vpreduce_umin_v4i16:
487 ; RV64-NEXT: slli a0, a0, 48
488 ; RV64-NEXT: srli a0, a0, 48
489 ; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
490 ; RV64-NEXT: vmv.s.x v9, a0
491 ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
492 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
493 ; RV64-NEXT: vmv.x.s a0, v9
495 %r = call i16 @llvm.vp.reduce.umin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
499 declare i16 @llvm.vp.reduce.smin.v4i16(i16, <4 x i16>, <4 x i1>, i32)
501 define signext i16 @vpreduce_smin_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
502 ; CHECK-LABEL: vpreduce_smin_v4i16:
504 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
505 ; CHECK-NEXT: vmv.s.x v9, a0
506 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
507 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
508 ; CHECK-NEXT: vmv.x.s a0, v9
510 %r = call i16 @llvm.vp.reduce.smin.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
514 declare i16 @llvm.vp.reduce.and.v4i16(i16, <4 x i16>, <4 x i1>, i32)
516 define signext i16 @vpreduce_and_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
517 ; CHECK-LABEL: vpreduce_and_v4i16:
519 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
520 ; CHECK-NEXT: vmv.s.x v9, a0
521 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
522 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
523 ; CHECK-NEXT: vmv.x.s a0, v9
525 %r = call i16 @llvm.vp.reduce.and.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
529 declare i16 @llvm.vp.reduce.or.v4i16(i16, <4 x i16>, <4 x i1>, i32)
531 define signext i16 @vpreduce_or_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
532 ; CHECK-LABEL: vpreduce_or_v4i16:
534 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
535 ; CHECK-NEXT: vmv.s.x v9, a0
536 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
537 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
538 ; CHECK-NEXT: vmv.x.s a0, v9
540 %r = call i16 @llvm.vp.reduce.or.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
544 declare i16 @llvm.vp.reduce.xor.v4i16(i16, <4 x i16>, <4 x i1>, i32)
546 define signext i16 @vpreduce_xor_v4i16(i16 signext %s, <4 x i16> %v, <4 x i1> %m, i32 zeroext %evl) {
547 ; CHECK-LABEL: vpreduce_xor_v4i16:
549 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
550 ; CHECK-NEXT: vmv.s.x v9, a0
551 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
552 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
553 ; CHECK-NEXT: vmv.x.s a0, v9
555 %r = call i16 @llvm.vp.reduce.xor.v4i16(i16 %s, <4 x i16> %v, <4 x i1> %m, i32 %evl)
559 declare i32 @llvm.vp.reduce.add.v2i32(i32, <2 x i32>, <2 x i1>, i32)
561 define signext i32 @vpreduce_add_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
562 ; CHECK-LABEL: vpreduce_add_v2i32:
564 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
565 ; CHECK-NEXT: vmv.s.x v9, a0
566 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
567 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
568 ; CHECK-NEXT: vmv.x.s a0, v9
570 %r = call i32 @llvm.vp.reduce.add.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
574 declare i32 @llvm.vp.reduce.umax.v2i32(i32, <2 x i32>, <2 x i1>, i32)
576 define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
577 ; RV32-LABEL: vpreduce_umax_v2i32:
579 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
580 ; RV32-NEXT: vmv.s.x v9, a0
581 ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
582 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
583 ; RV32-NEXT: vmv.x.s a0, v9
586 ; RV64-LABEL: vpreduce_umax_v2i32:
588 ; RV64-NEXT: andi a0, a0, -1
589 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
590 ; RV64-NEXT: vmv.s.x v9, a0
591 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
592 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
593 ; RV64-NEXT: vmv.x.s a0, v9
595 %r = call i32 @llvm.vp.reduce.umax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
599 declare i32 @llvm.vp.reduce.smax.v2i32(i32, <2 x i32>, <2 x i1>, i32)
601 define signext i32 @vpreduce_smax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
602 ; CHECK-LABEL: vpreduce_smax_v2i32:
604 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
605 ; CHECK-NEXT: vmv.s.x v9, a0
606 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
607 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
608 ; CHECK-NEXT: vmv.x.s a0, v9
610 %r = call i32 @llvm.vp.reduce.smax.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
614 declare i32 @llvm.vp.reduce.umin.v2i32(i32, <2 x i32>, <2 x i1>, i32)
616 define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
617 ; RV32-LABEL: vpreduce_umin_v2i32:
619 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
620 ; RV32-NEXT: vmv.s.x v9, a0
621 ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
622 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
623 ; RV32-NEXT: vmv.x.s a0, v9
626 ; RV64-LABEL: vpreduce_umin_v2i32:
628 ; RV64-NEXT: andi a0, a0, -1
629 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
630 ; RV64-NEXT: vmv.s.x v9, a0
631 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
632 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
633 ; RV64-NEXT: vmv.x.s a0, v9
635 %r = call i32 @llvm.vp.reduce.umin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
639 declare i32 @llvm.vp.reduce.smin.v2i32(i32, <2 x i32>, <2 x i1>, i32)
641 define signext i32 @vpreduce_smin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
642 ; CHECK-LABEL: vpreduce_smin_v2i32:
644 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
645 ; CHECK-NEXT: vmv.s.x v9, a0
646 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
647 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
648 ; CHECK-NEXT: vmv.x.s a0, v9
650 %r = call i32 @llvm.vp.reduce.smin.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
654 declare i32 @llvm.vp.reduce.and.v2i32(i32, <2 x i32>, <2 x i1>, i32)
656 define signext i32 @vpreduce_and_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
657 ; CHECK-LABEL: vpreduce_and_v2i32:
659 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
660 ; CHECK-NEXT: vmv.s.x v9, a0
661 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
662 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
663 ; CHECK-NEXT: vmv.x.s a0, v9
665 %r = call i32 @llvm.vp.reduce.and.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
669 declare i32 @llvm.vp.reduce.or.v2i32(i32, <2 x i32>, <2 x i1>, i32)
671 define signext i32 @vpreduce_or_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
672 ; CHECK-LABEL: vpreduce_or_v2i32:
674 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
675 ; CHECK-NEXT: vmv.s.x v9, a0
676 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
677 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
678 ; CHECK-NEXT: vmv.x.s a0, v9
680 %r = call i32 @llvm.vp.reduce.or.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
684 declare i32 @llvm.vp.reduce.xor.v2i32(i32, <2 x i32>, <2 x i1>, i32)
686 define signext i32 @vpreduce_xor_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %m, i32 zeroext %evl) {
687 ; CHECK-LABEL: vpreduce_xor_v2i32:
689 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
690 ; CHECK-NEXT: vmv.s.x v9, a0
691 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
692 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
693 ; CHECK-NEXT: vmv.x.s a0, v9
695 %r = call i32 @llvm.vp.reduce.xor.v2i32(i32 %s, <2 x i32> %v, <2 x i1> %m, i32 %evl)
699 declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32)
701 define signext i32 @vpreduce_add_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
702 ; CHECK-LABEL: vpreduce_add_v4i32:
704 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
705 ; CHECK-NEXT: vmv.s.x v9, a0
706 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
707 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
708 ; CHECK-NEXT: vmv.x.s a0, v9
710 %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
714 declare i32 @llvm.vp.reduce.umax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
716 define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
717 ; RV32-LABEL: vpreduce_umax_v4i32:
719 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
720 ; RV32-NEXT: vmv.s.x v9, a0
721 ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
722 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
723 ; RV32-NEXT: vmv.x.s a0, v9
726 ; RV64-LABEL: vpreduce_umax_v4i32:
728 ; RV64-NEXT: andi a0, a0, -1
729 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
730 ; RV64-NEXT: vmv.s.x v9, a0
731 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
732 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
733 ; RV64-NEXT: vmv.x.s a0, v9
735 %r = call i32 @llvm.vp.reduce.umax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
739 declare i32 @llvm.vp.reduce.smax.v4i32(i32, <4 x i32>, <4 x i1>, i32)
741 define signext i32 @vpreduce_smax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
742 ; CHECK-LABEL: vpreduce_smax_v4i32:
744 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
745 ; CHECK-NEXT: vmv.s.x v9, a0
746 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
747 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
748 ; CHECK-NEXT: vmv.x.s a0, v9
750 %r = call i32 @llvm.vp.reduce.smax.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
754 declare i32 @llvm.vp.reduce.umin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
756 define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
757 ; RV32-LABEL: vpreduce_umin_v4i32:
759 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
760 ; RV32-NEXT: vmv.s.x v9, a0
761 ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
762 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
763 ; RV32-NEXT: vmv.x.s a0, v9
766 ; RV64-LABEL: vpreduce_umin_v4i32:
768 ; RV64-NEXT: andi a0, a0, -1
769 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
770 ; RV64-NEXT: vmv.s.x v9, a0
771 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
772 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
773 ; RV64-NEXT: vmv.x.s a0, v9
775 %r = call i32 @llvm.vp.reduce.umin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
779 declare i32 @llvm.vp.reduce.smin.v4i32(i32, <4 x i32>, <4 x i1>, i32)
781 define signext i32 @vpreduce_smin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
782 ; CHECK-LABEL: vpreduce_smin_v4i32:
784 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
785 ; CHECK-NEXT: vmv.s.x v9, a0
786 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
787 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
788 ; CHECK-NEXT: vmv.x.s a0, v9
790 %r = call i32 @llvm.vp.reduce.smin.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
794 declare i32 @llvm.vp.reduce.and.v4i32(i32, <4 x i32>, <4 x i1>, i32)
796 define signext i32 @vpreduce_and_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
797 ; CHECK-LABEL: vpreduce_and_v4i32:
799 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
800 ; CHECK-NEXT: vmv.s.x v9, a0
801 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
802 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
803 ; CHECK-NEXT: vmv.x.s a0, v9
805 %r = call i32 @llvm.vp.reduce.and.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
809 declare i32 @llvm.vp.reduce.or.v4i32(i32, <4 x i32>, <4 x i1>, i32)
811 define signext i32 @vpreduce_or_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
812 ; CHECK-LABEL: vpreduce_or_v4i32:
814 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
815 ; CHECK-NEXT: vmv.s.x v9, a0
816 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
817 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
818 ; CHECK-NEXT: vmv.x.s a0, v9
820 %r = call i32 @llvm.vp.reduce.or.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
824 declare i32 @llvm.vp.reduce.xor.v4i32(i32, <4 x i32>, <4 x i1>, i32)
826 define signext i32 @vpreduce_xor_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %m, i32 zeroext %evl) {
827 ; CHECK-LABEL: vpreduce_xor_v4i32:
829 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
830 ; CHECK-NEXT: vmv.s.x v9, a0
831 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
832 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
833 ; CHECK-NEXT: vmv.x.s a0, v9
835 %r = call i32 @llvm.vp.reduce.xor.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
839 declare i32 @llvm.vp.reduce.xor.v64i32(i32, <64 x i32>, <64 x i1>, i32)
841 define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) {
842 ; CHECK-LABEL: vpreduce_xor_v64i32:
844 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
845 ; CHECK-NEXT: li a3, 32
846 ; CHECK-NEXT: vslidedown.vi v24, v0, 4
847 ; CHECK-NEXT: mv a2, a1
848 ; CHECK-NEXT: bltu a1, a3, .LBB49_2
849 ; CHECK-NEXT: # %bb.1:
850 ; CHECK-NEXT: li a2, 32
851 ; CHECK-NEXT: .LBB49_2:
852 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
853 ; CHECK-NEXT: vmv.s.x v25, a0
854 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
855 ; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t
856 ; CHECK-NEXT: addi a0, a1, -32
857 ; CHECK-NEXT: sltu a1, a1, a0
858 ; CHECK-NEXT: addi a1, a1, -1
859 ; CHECK-NEXT: and a0, a1, a0
860 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
861 ; CHECK-NEXT: vmv1r.v v0, v24
862 ; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t
863 ; CHECK-NEXT: vmv.x.s a0, v25
865 %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl)
869 declare i64 @llvm.vp.reduce.add.v2i64(i64, <2 x i64>, <2 x i1>, i32)
871 define signext i64 @vpreduce_add_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
872 ; RV32-LABEL: vpreduce_add_v2i64:
874 ; RV32-NEXT: addi sp, sp, -16
875 ; RV32-NEXT: .cfi_def_cfa_offset 16
876 ; RV32-NEXT: sw a1, 12(sp)
877 ; RV32-NEXT: sw a0, 8(sp)
878 ; RV32-NEXT: addi a0, sp, 8
879 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
880 ; RV32-NEXT: vlse64.v v9, (a0), zero
881 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
882 ; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t
883 ; RV32-NEXT: vmv.x.s a0, v9
884 ; RV32-NEXT: li a1, 32
885 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
886 ; RV32-NEXT: vsrl.vx v8, v9, a1
887 ; RV32-NEXT: vmv.x.s a1, v8
888 ; RV32-NEXT: addi sp, sp, 16
891 ; RV64-LABEL: vpreduce_add_v2i64:
893 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
894 ; RV64-NEXT: vmv.s.x v9, a0
895 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
896 ; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t
897 ; RV64-NEXT: vmv.x.s a0, v9
899 %r = call i64 @llvm.vp.reduce.add.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
903 declare i64 @llvm.vp.reduce.umax.v2i64(i64, <2 x i64>, <2 x i1>, i32)
905 define signext i64 @vpreduce_umax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
906 ; RV32-LABEL: vpreduce_umax_v2i64:
908 ; RV32-NEXT: addi sp, sp, -16
909 ; RV32-NEXT: .cfi_def_cfa_offset 16
910 ; RV32-NEXT: sw a1, 12(sp)
911 ; RV32-NEXT: sw a0, 8(sp)
912 ; RV32-NEXT: addi a0, sp, 8
913 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
914 ; RV32-NEXT: vlse64.v v9, (a0), zero
915 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
916 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
917 ; RV32-NEXT: vmv.x.s a0, v9
918 ; RV32-NEXT: li a1, 32
919 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
920 ; RV32-NEXT: vsrl.vx v8, v9, a1
921 ; RV32-NEXT: vmv.x.s a1, v8
922 ; RV32-NEXT: addi sp, sp, 16
925 ; RV64-LABEL: vpreduce_umax_v2i64:
927 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
928 ; RV64-NEXT: vmv.s.x v9, a0
929 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
930 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
931 ; RV64-NEXT: vmv.x.s a0, v9
933 %r = call i64 @llvm.vp.reduce.umax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
937 declare i64 @llvm.vp.reduce.smax.v2i64(i64, <2 x i64>, <2 x i1>, i32)
939 define signext i64 @vpreduce_smax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
940 ; RV32-LABEL: vpreduce_smax_v2i64:
942 ; RV32-NEXT: addi sp, sp, -16
943 ; RV32-NEXT: .cfi_def_cfa_offset 16
944 ; RV32-NEXT: sw a1, 12(sp)
945 ; RV32-NEXT: sw a0, 8(sp)
946 ; RV32-NEXT: addi a0, sp, 8
947 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
948 ; RV32-NEXT: vlse64.v v9, (a0), zero
949 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
950 ; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t
951 ; RV32-NEXT: vmv.x.s a0, v9
952 ; RV32-NEXT: li a1, 32
953 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
954 ; RV32-NEXT: vsrl.vx v8, v9, a1
955 ; RV32-NEXT: vmv.x.s a1, v8
956 ; RV32-NEXT: addi sp, sp, 16
959 ; RV64-LABEL: vpreduce_smax_v2i64:
961 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
962 ; RV64-NEXT: vmv.s.x v9, a0
963 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
964 ; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t
965 ; RV64-NEXT: vmv.x.s a0, v9
967 %r = call i64 @llvm.vp.reduce.smax.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
971 declare i64 @llvm.vp.reduce.umin.v2i64(i64, <2 x i64>, <2 x i1>, i32)
973 define signext i64 @vpreduce_umin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
974 ; RV32-LABEL: vpreduce_umin_v2i64:
976 ; RV32-NEXT: addi sp, sp, -16
977 ; RV32-NEXT: .cfi_def_cfa_offset 16
978 ; RV32-NEXT: sw a1, 12(sp)
979 ; RV32-NEXT: sw a0, 8(sp)
980 ; RV32-NEXT: addi a0, sp, 8
981 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
982 ; RV32-NEXT: vlse64.v v9, (a0), zero
983 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
984 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
985 ; RV32-NEXT: vmv.x.s a0, v9
986 ; RV32-NEXT: li a1, 32
987 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
988 ; RV32-NEXT: vsrl.vx v8, v9, a1
989 ; RV32-NEXT: vmv.x.s a1, v8
990 ; RV32-NEXT: addi sp, sp, 16
993 ; RV64-LABEL: vpreduce_umin_v2i64:
995 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
996 ; RV64-NEXT: vmv.s.x v9, a0
997 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
998 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
999 ; RV64-NEXT: vmv.x.s a0, v9
1001 %r = call i64 @llvm.vp.reduce.umin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1005 declare i64 @llvm.vp.reduce.smin.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1007 define signext i64 @vpreduce_smin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1008 ; RV32-LABEL: vpreduce_smin_v2i64:
1010 ; RV32-NEXT: addi sp, sp, -16
1011 ; RV32-NEXT: .cfi_def_cfa_offset 16
1012 ; RV32-NEXT: sw a1, 12(sp)
1013 ; RV32-NEXT: sw a0, 8(sp)
1014 ; RV32-NEXT: addi a0, sp, 8
1015 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1016 ; RV32-NEXT: vlse64.v v9, (a0), zero
1017 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1018 ; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t
1019 ; RV32-NEXT: vmv.x.s a0, v9
1020 ; RV32-NEXT: li a1, 32
1021 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1022 ; RV32-NEXT: vsrl.vx v8, v9, a1
1023 ; RV32-NEXT: vmv.x.s a1, v8
1024 ; RV32-NEXT: addi sp, sp, 16
1027 ; RV64-LABEL: vpreduce_smin_v2i64:
1029 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1030 ; RV64-NEXT: vmv.s.x v9, a0
1031 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1032 ; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t
1033 ; RV64-NEXT: vmv.x.s a0, v9
1035 %r = call i64 @llvm.vp.reduce.smin.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1039 declare i64 @llvm.vp.reduce.and.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1041 define signext i64 @vpreduce_and_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1042 ; RV32-LABEL: vpreduce_and_v2i64:
1044 ; RV32-NEXT: addi sp, sp, -16
1045 ; RV32-NEXT: .cfi_def_cfa_offset 16
1046 ; RV32-NEXT: sw a1, 12(sp)
1047 ; RV32-NEXT: sw a0, 8(sp)
1048 ; RV32-NEXT: addi a0, sp, 8
1049 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1050 ; RV32-NEXT: vlse64.v v9, (a0), zero
1051 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1052 ; RV32-NEXT: vredand.vs v9, v8, v9, v0.t
1053 ; RV32-NEXT: vmv.x.s a0, v9
1054 ; RV32-NEXT: li a1, 32
1055 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1056 ; RV32-NEXT: vsrl.vx v8, v9, a1
1057 ; RV32-NEXT: vmv.x.s a1, v8
1058 ; RV32-NEXT: addi sp, sp, 16
1061 ; RV64-LABEL: vpreduce_and_v2i64:
1063 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1064 ; RV64-NEXT: vmv.s.x v9, a0
1065 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1066 ; RV64-NEXT: vredand.vs v9, v8, v9, v0.t
1067 ; RV64-NEXT: vmv.x.s a0, v9
1069 %r = call i64 @llvm.vp.reduce.and.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1073 declare i64 @llvm.vp.reduce.or.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1075 define signext i64 @vpreduce_or_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1076 ; RV32-LABEL: vpreduce_or_v2i64:
1078 ; RV32-NEXT: addi sp, sp, -16
1079 ; RV32-NEXT: .cfi_def_cfa_offset 16
1080 ; RV32-NEXT: sw a1, 12(sp)
1081 ; RV32-NEXT: sw a0, 8(sp)
1082 ; RV32-NEXT: addi a0, sp, 8
1083 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1084 ; RV32-NEXT: vlse64.v v9, (a0), zero
1085 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1086 ; RV32-NEXT: vredor.vs v9, v8, v9, v0.t
1087 ; RV32-NEXT: vmv.x.s a0, v9
1088 ; RV32-NEXT: li a1, 32
1089 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1090 ; RV32-NEXT: vsrl.vx v8, v9, a1
1091 ; RV32-NEXT: vmv.x.s a1, v8
1092 ; RV32-NEXT: addi sp, sp, 16
1095 ; RV64-LABEL: vpreduce_or_v2i64:
1097 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1098 ; RV64-NEXT: vmv.s.x v9, a0
1099 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1100 ; RV64-NEXT: vredor.vs v9, v8, v9, v0.t
1101 ; RV64-NEXT: vmv.x.s a0, v9
1103 %r = call i64 @llvm.vp.reduce.or.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1107 declare i64 @llvm.vp.reduce.xor.v2i64(i64, <2 x i64>, <2 x i1>, i32)
1109 define signext i64 @vpreduce_xor_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m, i32 zeroext %evl) {
1110 ; RV32-LABEL: vpreduce_xor_v2i64:
1112 ; RV32-NEXT: addi sp, sp, -16
1113 ; RV32-NEXT: .cfi_def_cfa_offset 16
1114 ; RV32-NEXT: sw a1, 12(sp)
1115 ; RV32-NEXT: sw a0, 8(sp)
1116 ; RV32-NEXT: addi a0, sp, 8
1117 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1118 ; RV32-NEXT: vlse64.v v9, (a0), zero
1119 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1120 ; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t
1121 ; RV32-NEXT: vmv.x.s a0, v9
1122 ; RV32-NEXT: li a1, 32
1123 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1124 ; RV32-NEXT: vsrl.vx v8, v9, a1
1125 ; RV32-NEXT: vmv.x.s a1, v8
1126 ; RV32-NEXT: addi sp, sp, 16
1129 ; RV64-LABEL: vpreduce_xor_v2i64:
1131 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1132 ; RV64-NEXT: vmv.s.x v9, a0
1133 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1134 ; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t
1135 ; RV64-NEXT: vmv.x.s a0, v9
1137 %r = call i64 @llvm.vp.reduce.xor.v2i64(i64 %s, <2 x i64> %v, <2 x i1> %m, i32 %evl)
1141 declare i64 @llvm.vp.reduce.add.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1143 define signext i64 @vpreduce_add_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1144 ; RV32-LABEL: vpreduce_add_v4i64:
1146 ; RV32-NEXT: addi sp, sp, -16
1147 ; RV32-NEXT: .cfi_def_cfa_offset 16
1148 ; RV32-NEXT: sw a1, 12(sp)
1149 ; RV32-NEXT: sw a0, 8(sp)
1150 ; RV32-NEXT: addi a0, sp, 8
1151 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1152 ; RV32-NEXT: vlse64.v v10, (a0), zero
1153 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1154 ; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t
1155 ; RV32-NEXT: vmv.x.s a0, v10
1156 ; RV32-NEXT: li a1, 32
1157 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1158 ; RV32-NEXT: vsrl.vx v8, v10, a1
1159 ; RV32-NEXT: vmv.x.s a1, v8
1160 ; RV32-NEXT: addi sp, sp, 16
1163 ; RV64-LABEL: vpreduce_add_v4i64:
1165 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1166 ; RV64-NEXT: vmv.s.x v10, a0
1167 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1168 ; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t
1169 ; RV64-NEXT: vmv.x.s a0, v10
1171 %r = call i64 @llvm.vp.reduce.add.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1175 declare i64 @llvm.vp.reduce.umax.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1177 define signext i64 @vpreduce_umax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1178 ; RV32-LABEL: vpreduce_umax_v4i64:
1180 ; RV32-NEXT: addi sp, sp, -16
1181 ; RV32-NEXT: .cfi_def_cfa_offset 16
1182 ; RV32-NEXT: sw a1, 12(sp)
1183 ; RV32-NEXT: sw a0, 8(sp)
1184 ; RV32-NEXT: addi a0, sp, 8
1185 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1186 ; RV32-NEXT: vlse64.v v10, (a0), zero
1187 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1188 ; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1189 ; RV32-NEXT: vmv.x.s a0, v10
1190 ; RV32-NEXT: li a1, 32
1191 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1192 ; RV32-NEXT: vsrl.vx v8, v10, a1
1193 ; RV32-NEXT: vmv.x.s a1, v8
1194 ; RV32-NEXT: addi sp, sp, 16
1197 ; RV64-LABEL: vpreduce_umax_v4i64:
1199 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1200 ; RV64-NEXT: vmv.s.x v10, a0
1201 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1202 ; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1203 ; RV64-NEXT: vmv.x.s a0, v10
1205 %r = call i64 @llvm.vp.reduce.umax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1209 declare i64 @llvm.vp.reduce.smax.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1211 define signext i64 @vpreduce_smax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1212 ; RV32-LABEL: vpreduce_smax_v4i64:
1214 ; RV32-NEXT: addi sp, sp, -16
1215 ; RV32-NEXT: .cfi_def_cfa_offset 16
1216 ; RV32-NEXT: sw a1, 12(sp)
1217 ; RV32-NEXT: sw a0, 8(sp)
1218 ; RV32-NEXT: addi a0, sp, 8
1219 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1220 ; RV32-NEXT: vlse64.v v10, (a0), zero
1221 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1222 ; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t
1223 ; RV32-NEXT: vmv.x.s a0, v10
1224 ; RV32-NEXT: li a1, 32
1225 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1226 ; RV32-NEXT: vsrl.vx v8, v10, a1
1227 ; RV32-NEXT: vmv.x.s a1, v8
1228 ; RV32-NEXT: addi sp, sp, 16
1231 ; RV64-LABEL: vpreduce_smax_v4i64:
1233 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1234 ; RV64-NEXT: vmv.s.x v10, a0
1235 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1236 ; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t
1237 ; RV64-NEXT: vmv.x.s a0, v10
1239 %r = call i64 @llvm.vp.reduce.smax.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1243 declare i64 @llvm.vp.reduce.umin.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1245 define signext i64 @vpreduce_umin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1246 ; RV32-LABEL: vpreduce_umin_v4i64:
1248 ; RV32-NEXT: addi sp, sp, -16
1249 ; RV32-NEXT: .cfi_def_cfa_offset 16
1250 ; RV32-NEXT: sw a1, 12(sp)
1251 ; RV32-NEXT: sw a0, 8(sp)
1252 ; RV32-NEXT: addi a0, sp, 8
1253 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1254 ; RV32-NEXT: vlse64.v v10, (a0), zero
1255 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1256 ; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t
1257 ; RV32-NEXT: vmv.x.s a0, v10
1258 ; RV32-NEXT: li a1, 32
1259 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1260 ; RV32-NEXT: vsrl.vx v8, v10, a1
1261 ; RV32-NEXT: vmv.x.s a1, v8
1262 ; RV32-NEXT: addi sp, sp, 16
1265 ; RV64-LABEL: vpreduce_umin_v4i64:
1267 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1268 ; RV64-NEXT: vmv.s.x v10, a0
1269 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1270 ; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t
1271 ; RV64-NEXT: vmv.x.s a0, v10
1273 %r = call i64 @llvm.vp.reduce.umin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1277 declare i64 @llvm.vp.reduce.smin.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1279 define signext i64 @vpreduce_smin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1280 ; RV32-LABEL: vpreduce_smin_v4i64:
1282 ; RV32-NEXT: addi sp, sp, -16
1283 ; RV32-NEXT: .cfi_def_cfa_offset 16
1284 ; RV32-NEXT: sw a1, 12(sp)
1285 ; RV32-NEXT: sw a0, 8(sp)
1286 ; RV32-NEXT: addi a0, sp, 8
1287 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1288 ; RV32-NEXT: vlse64.v v10, (a0), zero
1289 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1290 ; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t
1291 ; RV32-NEXT: vmv.x.s a0, v10
1292 ; RV32-NEXT: li a1, 32
1293 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1294 ; RV32-NEXT: vsrl.vx v8, v10, a1
1295 ; RV32-NEXT: vmv.x.s a1, v8
1296 ; RV32-NEXT: addi sp, sp, 16
1299 ; RV64-LABEL: vpreduce_smin_v4i64:
1301 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1302 ; RV64-NEXT: vmv.s.x v10, a0
1303 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1304 ; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t
1305 ; RV64-NEXT: vmv.x.s a0, v10
1307 %r = call i64 @llvm.vp.reduce.smin.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1311 declare i64 @llvm.vp.reduce.and.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1313 define signext i64 @vpreduce_and_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1314 ; RV32-LABEL: vpreduce_and_v4i64:
1316 ; RV32-NEXT: addi sp, sp, -16
1317 ; RV32-NEXT: .cfi_def_cfa_offset 16
1318 ; RV32-NEXT: sw a1, 12(sp)
1319 ; RV32-NEXT: sw a0, 8(sp)
1320 ; RV32-NEXT: addi a0, sp, 8
1321 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1322 ; RV32-NEXT: vlse64.v v10, (a0), zero
1323 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1324 ; RV32-NEXT: vredand.vs v10, v8, v10, v0.t
1325 ; RV32-NEXT: vmv.x.s a0, v10
1326 ; RV32-NEXT: li a1, 32
1327 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1328 ; RV32-NEXT: vsrl.vx v8, v10, a1
1329 ; RV32-NEXT: vmv.x.s a1, v8
1330 ; RV32-NEXT: addi sp, sp, 16
1333 ; RV64-LABEL: vpreduce_and_v4i64:
1335 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1336 ; RV64-NEXT: vmv.s.x v10, a0
1337 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1338 ; RV64-NEXT: vredand.vs v10, v8, v10, v0.t
1339 ; RV64-NEXT: vmv.x.s a0, v10
1341 %r = call i64 @llvm.vp.reduce.and.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1345 declare i64 @llvm.vp.reduce.or.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1347 define signext i64 @vpreduce_or_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1348 ; RV32-LABEL: vpreduce_or_v4i64:
1350 ; RV32-NEXT: addi sp, sp, -16
1351 ; RV32-NEXT: .cfi_def_cfa_offset 16
1352 ; RV32-NEXT: sw a1, 12(sp)
1353 ; RV32-NEXT: sw a0, 8(sp)
1354 ; RV32-NEXT: addi a0, sp, 8
1355 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1356 ; RV32-NEXT: vlse64.v v10, (a0), zero
1357 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1358 ; RV32-NEXT: vredor.vs v10, v8, v10, v0.t
1359 ; RV32-NEXT: vmv.x.s a0, v10
1360 ; RV32-NEXT: li a1, 32
1361 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1362 ; RV32-NEXT: vsrl.vx v8, v10, a1
1363 ; RV32-NEXT: vmv.x.s a1, v8
1364 ; RV32-NEXT: addi sp, sp, 16
1367 ; RV64-LABEL: vpreduce_or_v4i64:
1369 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1370 ; RV64-NEXT: vmv.s.x v10, a0
1371 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1372 ; RV64-NEXT: vredor.vs v10, v8, v10, v0.t
1373 ; RV64-NEXT: vmv.x.s a0, v10
1375 %r = call i64 @llvm.vp.reduce.or.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1379 declare i64 @llvm.vp.reduce.xor.v4i64(i64, <4 x i64>, <4 x i1>, i32)
1381 define signext i64 @vpreduce_xor_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m, i32 zeroext %evl) {
1382 ; RV32-LABEL: vpreduce_xor_v4i64:
1384 ; RV32-NEXT: addi sp, sp, -16
1385 ; RV32-NEXT: .cfi_def_cfa_offset 16
1386 ; RV32-NEXT: sw a1, 12(sp)
1387 ; RV32-NEXT: sw a0, 8(sp)
1388 ; RV32-NEXT: addi a0, sp, 8
1389 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1390 ; RV32-NEXT: vlse64.v v10, (a0), zero
1391 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1392 ; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t
1393 ; RV32-NEXT: vmv.x.s a0, v10
1394 ; RV32-NEXT: li a1, 32
1395 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1396 ; RV32-NEXT: vsrl.vx v8, v10, a1
1397 ; RV32-NEXT: vmv.x.s a1, v8
1398 ; RV32-NEXT: addi sp, sp, 16
1401 ; RV64-LABEL: vpreduce_xor_v4i64:
1403 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1404 ; RV64-NEXT: vmv.s.x v10, a0
1405 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1406 ; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t
1407 ; RV64-NEXT: vmv.x.s a0, v10
1409 %r = call i64 @llvm.vp.reduce.xor.v4i64(i64 %s, <4 x i64> %v, <4 x i1> %m, i32 %evl)
1413 declare i8 @llvm.vp.reduce.mul.v1i8(i8, <1 x i8>, <1 x i1>, i32)
1415 define i8 @vpreduce_mul_v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 zeroext %evl) {
1416 ; RV32-LABEL: vpreduce_mul_v1i8:
1418 ; RV32-NEXT: addi sp, sp, -16
1419 ; RV32-NEXT: .cfi_def_cfa_offset 16
1420 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1421 ; RV32-NEXT: .cfi_offset ra, -4
1422 ; RV32-NEXT: mv a2, a0
1423 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1424 ; RV32-NEXT: vmv.s.x v9, a1
1425 ; RV32-NEXT: vmsne.vi v9, v9, 0
1426 ; RV32-NEXT: vmand.mm v0, v9, v0
1427 ; RV32-NEXT: vmv.v.i v9, 1
1428 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1429 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1430 ; RV32-NEXT: vmv.x.s a0, v8
1431 ; RV32-NEXT: mv a1, a2
1432 ; RV32-NEXT: call __mulsi3@plt
1433 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1434 ; RV32-NEXT: addi sp, sp, 16
1437 ; RV64-LABEL: vpreduce_mul_v1i8:
1439 ; RV64-NEXT: addi sp, sp, -16
1440 ; RV64-NEXT: .cfi_def_cfa_offset 16
1441 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1442 ; RV64-NEXT: .cfi_offset ra, -8
1443 ; RV64-NEXT: mv a2, a0
1444 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1445 ; RV64-NEXT: vmv.s.x v9, a1
1446 ; RV64-NEXT: vmsne.vi v9, v9, 0
1447 ; RV64-NEXT: vmand.mm v0, v9, v0
1448 ; RV64-NEXT: vmv.v.i v9, 1
1449 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1450 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1451 ; RV64-NEXT: vmv.x.s a0, v8
1452 ; RV64-NEXT: mv a1, a2
1453 ; RV64-NEXT: call __muldi3@plt
1454 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1455 ; RV64-NEXT: addi sp, sp, 16
1457 %r = call i8 @llvm.vp.reduce.mul.v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 %evl)
1461 declare i8 @llvm.vp.reduce.mul.v2i8(i8, <2 x i8>, <2 x i1>, i32)
1463 define signext i8 @vpreduce_mul_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) {
1464 ; RV32-LABEL: vpreduce_mul_v2i8:
1466 ; RV32-NEXT: addi sp, sp, -16
1467 ; RV32-NEXT: .cfi_def_cfa_offset 16
1468 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1469 ; RV32-NEXT: .cfi_offset ra, -4
1470 ; RV32-NEXT: mv a2, a0
1471 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1472 ; RV32-NEXT: vid.v v9
1473 ; RV32-NEXT: vmsltu.vx v9, v9, a1
1474 ; RV32-NEXT: vmand.mm v0, v9, v0
1475 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1476 ; RV32-NEXT: vmv.v.i v9, 1
1477 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1478 ; RV32-NEXT: vrgather.vi v9, v8, 1
1479 ; RV32-NEXT: vmul.vv v8, v8, v9
1480 ; RV32-NEXT: vmv.x.s a0, v8
1481 ; RV32-NEXT: mv a1, a2
1482 ; RV32-NEXT: call __mulsi3@plt
1483 ; RV32-NEXT: slli a0, a0, 24
1484 ; RV32-NEXT: srai a0, a0, 24
1485 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1486 ; RV32-NEXT: addi sp, sp, 16
1489 ; RV64-LABEL: vpreduce_mul_v2i8:
1491 ; RV64-NEXT: addi sp, sp, -16
1492 ; RV64-NEXT: .cfi_def_cfa_offset 16
1493 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1494 ; RV64-NEXT: .cfi_offset ra, -8
1495 ; RV64-NEXT: mv a2, a0
1496 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1497 ; RV64-NEXT: vid.v v9
1498 ; RV64-NEXT: vmsltu.vx v9, v9, a1
1499 ; RV64-NEXT: vmand.mm v0, v9, v0
1500 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
1501 ; RV64-NEXT: vmv.v.i v9, 1
1502 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1503 ; RV64-NEXT: vrgather.vi v9, v8, 1
1504 ; RV64-NEXT: vmul.vv v8, v8, v9
1505 ; RV64-NEXT: vmv.x.s a0, v8
1506 ; RV64-NEXT: mv a1, a2
1507 ; RV64-NEXT: call __muldi3@plt
1508 ; RV64-NEXT: slli a0, a0, 56
1509 ; RV64-NEXT: srai a0, a0, 56
1510 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1511 ; RV64-NEXT: addi sp, sp, 16
1513 %r = call i8 @llvm.vp.reduce.mul.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl)
1517 declare i8 @llvm.vp.reduce.mul.v4i8(i8, <4 x i8>, <4 x i1>, i32)
1519 define signext i8 @vpreduce_mul_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
1520 ; RV32-LABEL: vpreduce_mul_v4i8:
1522 ; RV32-NEXT: addi sp, sp, -16
1523 ; RV32-NEXT: .cfi_def_cfa_offset 16
1524 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1525 ; RV32-NEXT: .cfi_offset ra, -4
1526 ; RV32-NEXT: mv a2, a0
1527 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1528 ; RV32-NEXT: vid.v v9
1529 ; RV32-NEXT: vmsltu.vx v9, v9, a1
1530 ; RV32-NEXT: vmand.mm v0, v9, v0
1531 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1532 ; RV32-NEXT: vmv.v.i v9, 1
1533 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1534 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1535 ; RV32-NEXT: vmul.vv v8, v8, v9
1536 ; RV32-NEXT: vrgather.vi v9, v8, 1
1537 ; RV32-NEXT: vmul.vv v8, v8, v9
1538 ; RV32-NEXT: vmv.x.s a0, v8
1539 ; RV32-NEXT: mv a1, a2
1540 ; RV32-NEXT: call __mulsi3@plt
1541 ; RV32-NEXT: slli a0, a0, 24
1542 ; RV32-NEXT: srai a0, a0, 24
1543 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1544 ; RV32-NEXT: addi sp, sp, 16
1547 ; RV64-LABEL: vpreduce_mul_v4i8:
1549 ; RV64-NEXT: addi sp, sp, -16
1550 ; RV64-NEXT: .cfi_def_cfa_offset 16
1551 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1552 ; RV64-NEXT: .cfi_offset ra, -8
1553 ; RV64-NEXT: mv a2, a0
1554 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1555 ; RV64-NEXT: vid.v v9
1556 ; RV64-NEXT: vmsltu.vx v9, v9, a1
1557 ; RV64-NEXT: vmand.mm v0, v9, v0
1558 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1559 ; RV64-NEXT: vmv.v.i v9, 1
1560 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1561 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1562 ; RV64-NEXT: vmul.vv v8, v8, v9
1563 ; RV64-NEXT: vrgather.vi v9, v8, 1
1564 ; RV64-NEXT: vmul.vv v8, v8, v9
1565 ; RV64-NEXT: vmv.x.s a0, v8
1566 ; RV64-NEXT: mv a1, a2
1567 ; RV64-NEXT: call __muldi3@plt
1568 ; RV64-NEXT: slli a0, a0, 56
1569 ; RV64-NEXT: srai a0, a0, 56
1570 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1571 ; RV64-NEXT: addi sp, sp, 16
1573 %r = call i8 @llvm.vp.reduce.mul.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
1577 declare i8 @llvm.vp.reduce.mul.v8i8(i8, <8 x i8>, <8 x i1>, i32)
1579 define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i32 zeroext %evl) {
1580 ; RV32-LABEL: vpreduce_mul_v8i8:
1582 ; RV32-NEXT: addi sp, sp, -16
1583 ; RV32-NEXT: .cfi_def_cfa_offset 16
1584 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1585 ; RV32-NEXT: .cfi_offset ra, -4
1586 ; RV32-NEXT: mv a2, a0
1587 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1588 ; RV32-NEXT: vid.v v10
1589 ; RV32-NEXT: vmsltu.vx v9, v10, a1
1590 ; RV32-NEXT: vmand.mm v0, v9, v0
1591 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
1592 ; RV32-NEXT: vmv.v.i v9, 1
1593 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1594 ; RV32-NEXT: vslidedown.vi v9, v8, 4
1595 ; RV32-NEXT: vmul.vv v8, v8, v9
1596 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1597 ; RV32-NEXT: vmul.vv v8, v8, v9
1598 ; RV32-NEXT: vrgather.vi v9, v8, 1
1599 ; RV32-NEXT: vmul.vv v8, v8, v9
1600 ; RV32-NEXT: vmv.x.s a0, v8
1601 ; RV32-NEXT: mv a1, a2
1602 ; RV32-NEXT: call __mulsi3@plt
1603 ; RV32-NEXT: slli a0, a0, 24
1604 ; RV32-NEXT: srai a0, a0, 24
1605 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1606 ; RV32-NEXT: addi sp, sp, 16
1609 ; RV64-LABEL: vpreduce_mul_v8i8:
1611 ; RV64-NEXT: addi sp, sp, -16
1612 ; RV64-NEXT: .cfi_def_cfa_offset 16
1613 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1614 ; RV64-NEXT: .cfi_offset ra, -8
1615 ; RV64-NEXT: mv a2, a0
1616 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1617 ; RV64-NEXT: vid.v v10
1618 ; RV64-NEXT: vmsltu.vx v9, v10, a1
1619 ; RV64-NEXT: vmand.mm v0, v9, v0
1620 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
1621 ; RV64-NEXT: vmv.v.i v9, 1
1622 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1623 ; RV64-NEXT: vslidedown.vi v9, v8, 4
1624 ; RV64-NEXT: vmul.vv v8, v8, v9
1625 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1626 ; RV64-NEXT: vmul.vv v8, v8, v9
1627 ; RV64-NEXT: vrgather.vi v9, v8, 1
1628 ; RV64-NEXT: vmul.vv v8, v8, v9
1629 ; RV64-NEXT: vmv.x.s a0, v8
1630 ; RV64-NEXT: mv a1, a2
1631 ; RV64-NEXT: call __muldi3@plt
1632 ; RV64-NEXT: slli a0, a0, 56
1633 ; RV64-NEXT: srai a0, a0, 56
1634 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1635 ; RV64-NEXT: addi sp, sp, 16
1637 %r = call i8 @llvm.vp.reduce.mul.v8i8(i8 %s, <8 x i8> %v, <8 x i1> %m, i32 %evl)
1641 declare i8 @llvm.vp.reduce.mul.v16i8(i8, <16 x i8>, <16 x i1>, i32)
1643 define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, i32 zeroext %evl) {
1644 ; RV32-LABEL: vpreduce_mul_v16i8:
1646 ; RV32-NEXT: addi sp, sp, -16
1647 ; RV32-NEXT: .cfi_def_cfa_offset 16
1648 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1649 ; RV32-NEXT: .cfi_offset ra, -4
1650 ; RV32-NEXT: mv a2, a0
1651 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1652 ; RV32-NEXT: vid.v v12
1653 ; RV32-NEXT: vmsltu.vx v9, v12, a1
1654 ; RV32-NEXT: vmand.mm v0, v9, v0
1655 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1656 ; RV32-NEXT: vmv.v.i v9, 1
1657 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
1658 ; RV32-NEXT: vslidedown.vi v9, v8, 8
1659 ; RV32-NEXT: vmul.vv v8, v8, v9
1660 ; RV32-NEXT: vslidedown.vi v9, v8, 4
1661 ; RV32-NEXT: vmul.vv v8, v8, v9
1662 ; RV32-NEXT: vslidedown.vi v9, v8, 2
1663 ; RV32-NEXT: vmul.vv v8, v8, v9
1664 ; RV32-NEXT: vrgather.vi v9, v8, 1
1665 ; RV32-NEXT: vmul.vv v8, v8, v9
1666 ; RV32-NEXT: vmv.x.s a0, v8
1667 ; RV32-NEXT: mv a1, a2
1668 ; RV32-NEXT: call __mulsi3@plt
1669 ; RV32-NEXT: slli a0, a0, 24
1670 ; RV32-NEXT: srai a0, a0, 24
1671 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1672 ; RV32-NEXT: addi sp, sp, 16
1675 ; RV64-LABEL: vpreduce_mul_v16i8:
1677 ; RV64-NEXT: addi sp, sp, -16
1678 ; RV64-NEXT: .cfi_def_cfa_offset 16
1679 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1680 ; RV64-NEXT: .cfi_offset ra, -8
1681 ; RV64-NEXT: mv a2, a0
1682 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
1683 ; RV64-NEXT: vid.v v12
1684 ; RV64-NEXT: vmsltu.vx v9, v12, a1
1685 ; RV64-NEXT: vmand.mm v0, v9, v0
1686 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
1687 ; RV64-NEXT: vmv.v.i v9, 1
1688 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
1689 ; RV64-NEXT: vslidedown.vi v9, v8, 8
1690 ; RV64-NEXT: vmul.vv v8, v8, v9
1691 ; RV64-NEXT: vslidedown.vi v9, v8, 4
1692 ; RV64-NEXT: vmul.vv v8, v8, v9
1693 ; RV64-NEXT: vslidedown.vi v9, v8, 2
1694 ; RV64-NEXT: vmul.vv v8, v8, v9
1695 ; RV64-NEXT: vrgather.vi v9, v8, 1
1696 ; RV64-NEXT: vmul.vv v8, v8, v9
1697 ; RV64-NEXT: vmv.x.s a0, v8
1698 ; RV64-NEXT: mv a1, a2
1699 ; RV64-NEXT: call __muldi3@plt
1700 ; RV64-NEXT: slli a0, a0, 56
1701 ; RV64-NEXT: srai a0, a0, 56
1702 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1703 ; RV64-NEXT: addi sp, sp, 16
1705 %r = call i8 @llvm.vp.reduce.mul.v16i8(i8 %s, <16 x i8> %v, <16 x i1> %m, i32 %evl)
1709 declare i8 @llvm.vp.reduce.mul.v32i8(i8, <32 x i8>, <32 x i1>, i32)
1711 define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) {
1712 ; RV32-LABEL: vpreduce_mul_v32i8:
1714 ; RV32-NEXT: addi sp, sp, -16
1715 ; RV32-NEXT: .cfi_def_cfa_offset 16
1716 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1717 ; RV32-NEXT: .cfi_offset ra, -4
1718 ; RV32-NEXT: mv a2, a0
1719 ; RV32-NEXT: li a0, 32
1720 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1721 ; RV32-NEXT: vid.v v16
1722 ; RV32-NEXT: vmsltu.vx v10, v16, a1
1723 ; RV32-NEXT: vmand.mm v0, v10, v0
1724 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1725 ; RV32-NEXT: vmv.v.i v10, 1
1726 ; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
1727 ; RV32-NEXT: vslidedown.vi v10, v8, 16
1728 ; RV32-NEXT: vmul.vv v8, v8, v10
1729 ; RV32-NEXT: vslidedown.vi v10, v8, 8
1730 ; RV32-NEXT: vmul.vv v8, v8, v10
1731 ; RV32-NEXT: vslidedown.vi v10, v8, 4
1732 ; RV32-NEXT: vmul.vv v8, v8, v10
1733 ; RV32-NEXT: vslidedown.vi v10, v8, 2
1734 ; RV32-NEXT: vmul.vv v8, v8, v10
1735 ; RV32-NEXT: vrgather.vi v10, v8, 1
1736 ; RV32-NEXT: vmul.vv v8, v8, v10
1737 ; RV32-NEXT: vmv.x.s a0, v8
1738 ; RV32-NEXT: mv a1, a2
1739 ; RV32-NEXT: call __mulsi3@plt
1740 ; RV32-NEXT: slli a0, a0, 24
1741 ; RV32-NEXT: srai a0, a0, 24
1742 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1743 ; RV32-NEXT: addi sp, sp, 16
1746 ; RV64-LABEL: vpreduce_mul_v32i8:
1748 ; RV64-NEXT: addi sp, sp, -16
1749 ; RV64-NEXT: .cfi_def_cfa_offset 16
1750 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1751 ; RV64-NEXT: .cfi_offset ra, -8
1752 ; RV64-NEXT: mv a2, a0
1753 ; RV64-NEXT: li a0, 32
1754 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1755 ; RV64-NEXT: vid.v v16
1756 ; RV64-NEXT: vmsltu.vx v10, v16, a1
1757 ; RV64-NEXT: vmand.mm v0, v10, v0
1758 ; RV64-NEXT: vsetvli zero, zero, e8, m2, ta, ma
1759 ; RV64-NEXT: vmv.v.i v10, 1
1760 ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
1761 ; RV64-NEXT: vslidedown.vi v10, v8, 16
1762 ; RV64-NEXT: vmul.vv v8, v8, v10
1763 ; RV64-NEXT: vslidedown.vi v10, v8, 8
1764 ; RV64-NEXT: vmul.vv v8, v8, v10
1765 ; RV64-NEXT: vslidedown.vi v10, v8, 4
1766 ; RV64-NEXT: vmul.vv v8, v8, v10
1767 ; RV64-NEXT: vslidedown.vi v10, v8, 2
1768 ; RV64-NEXT: vmul.vv v8, v8, v10
1769 ; RV64-NEXT: vrgather.vi v10, v8, 1
1770 ; RV64-NEXT: vmul.vv v8, v8, v10
1771 ; RV64-NEXT: vmv.x.s a0, v8
1772 ; RV64-NEXT: mv a1, a2
1773 ; RV64-NEXT: call __muldi3@plt
1774 ; RV64-NEXT: slli a0, a0, 56
1775 ; RV64-NEXT: srai a0, a0, 56
1776 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1777 ; RV64-NEXT: addi sp, sp, 16
1779 %r = call i8 @llvm.vp.reduce.mul.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl)
1783 declare i8 @llvm.vp.reduce.mul.v64i8(i8, <64 x i8>, <64 x i1>, i32)
1785 define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, i32 zeroext %evl) {
1786 ; RV32-LABEL: vpreduce_mul_v64i8:
1788 ; RV32-NEXT: addi sp, sp, -16
1789 ; RV32-NEXT: .cfi_def_cfa_offset 16
1790 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1791 ; RV32-NEXT: .cfi_offset ra, -4
1792 ; RV32-NEXT: li a3, 32
1793 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1794 ; RV32-NEXT: lui a2, %hi(.LCPI72_0)
1795 ; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0)
1796 ; RV32-NEXT: vle8.v v12, (a2)
1797 ; RV32-NEXT: mv a2, a0
1798 ; RV32-NEXT: vid.v v16
1799 ; RV32-NEXT: vmsltu.vx v14, v16, a1
1800 ; RV32-NEXT: vsext.vf4 v16, v12
1801 ; RV32-NEXT: vmsltu.vx v12, v16, a1
1802 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1803 ; RV32-NEXT: vslideup.vi v14, v12, 4
1804 ; RV32-NEXT: li a0, 64
1805 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
1806 ; RV32-NEXT: vmand.mm v0, v14, v0
1807 ; RV32-NEXT: vmv.v.i v12, 1
1808 ; RV32-NEXT: vmerge.vvm v8, v12, v8, v0
1809 ; RV32-NEXT: vslidedown.vx v12, v8, a3
1810 ; RV32-NEXT: vmul.vv v8, v8, v12
1811 ; RV32-NEXT: vslidedown.vi v12, v8, 16
1812 ; RV32-NEXT: vmul.vv v8, v8, v12
1813 ; RV32-NEXT: vslidedown.vi v12, v8, 8
1814 ; RV32-NEXT: vmul.vv v8, v8, v12
1815 ; RV32-NEXT: vslidedown.vi v12, v8, 4
1816 ; RV32-NEXT: vmul.vv v8, v8, v12
1817 ; RV32-NEXT: vslidedown.vi v12, v8, 2
1818 ; RV32-NEXT: vmul.vv v8, v8, v12
1819 ; RV32-NEXT: vrgather.vi v12, v8, 1
1820 ; RV32-NEXT: vmul.vv v8, v8, v12
1821 ; RV32-NEXT: vmv.x.s a0, v8
1822 ; RV32-NEXT: mv a1, a2
1823 ; RV32-NEXT: call __mulsi3@plt
1824 ; RV32-NEXT: slli a0, a0, 24
1825 ; RV32-NEXT: srai a0, a0, 24
1826 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1827 ; RV32-NEXT: addi sp, sp, 16
1830 ; RV64-LABEL: vpreduce_mul_v64i8:
1832 ; RV64-NEXT: addi sp, sp, -16
1833 ; RV64-NEXT: .cfi_def_cfa_offset 16
1834 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
1835 ; RV64-NEXT: .cfi_offset ra, -8
1836 ; RV64-NEXT: li a3, 32
1837 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
1838 ; RV64-NEXT: lui a2, %hi(.LCPI72_0)
1839 ; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0)
1840 ; RV64-NEXT: vle8.v v12, (a2)
1841 ; RV64-NEXT: mv a2, a0
1842 ; RV64-NEXT: vid.v v16
1843 ; RV64-NEXT: vmsltu.vx v14, v16, a1
1844 ; RV64-NEXT: vsext.vf4 v16, v12
1845 ; RV64-NEXT: vmsltu.vx v12, v16, a1
1846 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1847 ; RV64-NEXT: vslideup.vi v14, v12, 4
1848 ; RV64-NEXT: li a0, 64
1849 ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
1850 ; RV64-NEXT: vmand.mm v0, v14, v0
1851 ; RV64-NEXT: vmv.v.i v12, 1
1852 ; RV64-NEXT: vmerge.vvm v8, v12, v8, v0
1853 ; RV64-NEXT: vslidedown.vx v12, v8, a3
1854 ; RV64-NEXT: vmul.vv v8, v8, v12
1855 ; RV64-NEXT: vslidedown.vi v12, v8, 16
1856 ; RV64-NEXT: vmul.vv v8, v8, v12
1857 ; RV64-NEXT: vslidedown.vi v12, v8, 8
1858 ; RV64-NEXT: vmul.vv v8, v8, v12
1859 ; RV64-NEXT: vslidedown.vi v12, v8, 4
1860 ; RV64-NEXT: vmul.vv v8, v8, v12
1861 ; RV64-NEXT: vslidedown.vi v12, v8, 2
1862 ; RV64-NEXT: vmul.vv v8, v8, v12
1863 ; RV64-NEXT: vrgather.vi v12, v8, 1
1864 ; RV64-NEXT: vmul.vv v8, v8, v12
1865 ; RV64-NEXT: vmv.x.s a0, v8
1866 ; RV64-NEXT: mv a1, a2
1867 ; RV64-NEXT: call __muldi3@plt
1868 ; RV64-NEXT: slli a0, a0, 56
1869 ; RV64-NEXT: srai a0, a0, 56
1870 ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
1871 ; RV64-NEXT: addi sp, sp, 16
1873 %r = call i8 @llvm.vp.reduce.mul.v64i8(i8 %s, <64 x i8> %v, <64 x i1> %m, i32 %evl)
1877 ; Test start value is the first element of a vector.
1878 define zeroext i8 @front_ele_v4i8(<4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) {
1879 ; CHECK-LABEL: front_ele_v4i8:
1881 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
1882 ; CHECK-NEXT: vredand.vs v8, v8, v8, v0.t
1883 ; CHECK-NEXT: vmv.x.s a0, v8
1884 ; CHECK-NEXT: andi a0, a0, 255
1886 %s = extractelement <4 x i8> %v, i64 0
1887 %r = call i8 @llvm.vp.reduce.and.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl)
1891 ; Test start value is the first element of a vector which longer than M1.
1892 declare i8 @llvm.vp.reduce.and.v32i8(i8, <32 x i8>, <32 x i1>, i32)
1893 define zeroext i8 @front_ele_v32i8(<32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) {
1894 ; CHECK-LABEL: front_ele_v32i8:
1896 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
1897 ; CHECK-NEXT: vredand.vs v8, v8, v8, v0.t
1898 ; CHECK-NEXT: vmv.x.s a0, v8
1899 ; CHECK-NEXT: andi a0, a0, 255
1901 %s = extractelement <32 x i8> %v, i64 0
1902 %r = call i8 @llvm.vp.reduce.and.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl)