1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
3 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
5 ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
7 declare i8 @llvm.vp.reduce.add.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
9 define signext i8 @vpreduce_add_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vpreduce_add_nxv1i8:
12 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
13 ; CHECK-NEXT: vmv.s.x v9, a0
14 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
15 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
16 ; CHECK-NEXT: vmv.x.s a0, v9
18 %r = call i8 @llvm.vp.reduce.add.nxv1i8(i8 %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 %evl)
22 declare i8 @llvm.vp.reduce.umax.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
24 define signext i8 @vpreduce_umax_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
25 ; CHECK-LABEL: vpreduce_umax_nxv1i8:
27 ; CHECK-NEXT: andi a0, a0, 255
28 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
29 ; CHECK-NEXT: vmv.s.x v9, a0
30 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
31 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
32 ; CHECK-NEXT: vmv.x.s a0, v9
34 %r = call i8 @llvm.vp.reduce.umax.nxv1i8(i8 %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 %evl)
38 declare i8 @llvm.vp.reduce.smax.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
40 define signext i8 @vpreduce_smax_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
41 ; CHECK-LABEL: vpreduce_smax_nxv1i8:
43 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
44 ; CHECK-NEXT: vmv.s.x v9, a0
45 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
46 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
47 ; CHECK-NEXT: vmv.x.s a0, v9
49 %r = call i8 @llvm.vp.reduce.smax.nxv1i8(i8 %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 %evl)
53 declare i8 @llvm.vp.reduce.umin.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
55 define signext i8 @vpreduce_umin_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
56 ; CHECK-LABEL: vpreduce_umin_nxv1i8:
58 ; CHECK-NEXT: andi a0, a0, 255
59 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
60 ; CHECK-NEXT: vmv.s.x v9, a0
61 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
62 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
63 ; CHECK-NEXT: vmv.x.s a0, v9
65 %r = call i8 @llvm.vp.reduce.umin.nxv1i8(i8 %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 %evl)
69 declare i8 @llvm.vp.reduce.smin.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
71 define signext i8 @vpreduce_smin_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
72 ; CHECK-LABEL: vpreduce_smin_nxv1i8:
74 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
75 ; CHECK-NEXT: vmv.s.x v9, a0
76 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
77 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
78 ; CHECK-NEXT: vmv.x.s a0, v9
80 %r = call i8 @llvm.vp.reduce.smin.nxv1i8(i8 %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 %evl)
84 declare i8 @llvm.vp.reduce.and.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
86 define signext i8 @vpreduce_and_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
87 ; CHECK-LABEL: vpreduce_and_nxv1i8:
89 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
90 ; CHECK-NEXT: vmv.s.x v9, a0
91 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
92 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
93 ; CHECK-NEXT: vmv.x.s a0, v9
95 %r = call i8 @llvm.vp.reduce.and.nxv1i8(i8 %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 %evl)
99 declare i8 @llvm.vp.reduce.or.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
101 define signext i8 @vpreduce_or_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
102 ; CHECK-LABEL: vpreduce_or_nxv1i8:
104 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
105 ; CHECK-NEXT: vmv.s.x v9, a0
106 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
107 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
108 ; CHECK-NEXT: vmv.x.s a0, v9
110 %r = call i8 @llvm.vp.reduce.or.nxv1i8(i8 %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 %evl)
114 declare i8 @llvm.vp.reduce.xor.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
116 define signext i8 @vpreduce_xor_nxv1i8(i8 signext %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
117 ; CHECK-LABEL: vpreduce_xor_nxv1i8:
119 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
120 ; CHECK-NEXT: vmv.s.x v9, a0
121 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
122 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
123 ; CHECK-NEXT: vmv.x.s a0, v9
125 %r = call i8 @llvm.vp.reduce.xor.nxv1i8(i8 %s, <vscale x 1 x i8> %v, <vscale x 1 x i1> %m, i32 %evl)
129 declare i8 @llvm.vp.reduce.add.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
131 define signext i8 @vpreduce_add_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
132 ; CHECK-LABEL: vpreduce_add_nxv2i8:
134 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
135 ; CHECK-NEXT: vmv.s.x v9, a0
136 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
137 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
138 ; CHECK-NEXT: vmv.x.s a0, v9
140 %r = call i8 @llvm.vp.reduce.add.nxv2i8(i8 %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 %evl)
144 declare i8 @llvm.vp.reduce.umax.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
146 define signext i8 @vpreduce_umax_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
147 ; CHECK-LABEL: vpreduce_umax_nxv2i8:
149 ; CHECK-NEXT: andi a0, a0, 255
150 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
151 ; CHECK-NEXT: vmv.s.x v9, a0
152 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
153 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
154 ; CHECK-NEXT: vmv.x.s a0, v9
156 %r = call i8 @llvm.vp.reduce.umax.nxv2i8(i8 %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 %evl)
160 declare i8 @llvm.vp.reduce.smax.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
162 define signext i8 @vpreduce_smax_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
163 ; CHECK-LABEL: vpreduce_smax_nxv2i8:
165 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
166 ; CHECK-NEXT: vmv.s.x v9, a0
167 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
168 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
169 ; CHECK-NEXT: vmv.x.s a0, v9
171 %r = call i8 @llvm.vp.reduce.smax.nxv2i8(i8 %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 %evl)
175 declare i8 @llvm.vp.reduce.umin.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
177 define signext i8 @vpreduce_umin_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
178 ; CHECK-LABEL: vpreduce_umin_nxv2i8:
180 ; CHECK-NEXT: andi a0, a0, 255
181 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
182 ; CHECK-NEXT: vmv.s.x v9, a0
183 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
184 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
185 ; CHECK-NEXT: vmv.x.s a0, v9
187 %r = call i8 @llvm.vp.reduce.umin.nxv2i8(i8 %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 %evl)
191 declare i8 @llvm.vp.reduce.smin.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
193 define signext i8 @vpreduce_smin_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
194 ; CHECK-LABEL: vpreduce_smin_nxv2i8:
196 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
197 ; CHECK-NEXT: vmv.s.x v9, a0
198 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
199 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
200 ; CHECK-NEXT: vmv.x.s a0, v9
202 %r = call i8 @llvm.vp.reduce.smin.nxv2i8(i8 %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 %evl)
206 declare i8 @llvm.vp.reduce.and.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
208 define signext i8 @vpreduce_and_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
209 ; CHECK-LABEL: vpreduce_and_nxv2i8:
211 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
212 ; CHECK-NEXT: vmv.s.x v9, a0
213 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
214 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
215 ; CHECK-NEXT: vmv.x.s a0, v9
217 %r = call i8 @llvm.vp.reduce.and.nxv2i8(i8 %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 %evl)
221 declare i8 @llvm.vp.reduce.or.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
223 define signext i8 @vpreduce_or_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
224 ; CHECK-LABEL: vpreduce_or_nxv2i8:
226 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
227 ; CHECK-NEXT: vmv.s.x v9, a0
228 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
229 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
230 ; CHECK-NEXT: vmv.x.s a0, v9
232 %r = call i8 @llvm.vp.reduce.or.nxv2i8(i8 %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 %evl)
236 declare i8 @llvm.vp.reduce.xor.nxv2i8(i8, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
238 define signext i8 @vpreduce_xor_nxv2i8(i8 signext %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
239 ; CHECK-LABEL: vpreduce_xor_nxv2i8:
241 ; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
242 ; CHECK-NEXT: vmv.s.x v9, a0
243 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
244 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
245 ; CHECK-NEXT: vmv.x.s a0, v9
247 %r = call i8 @llvm.vp.reduce.xor.nxv2i8(i8 %s, <vscale x 2 x i8> %v, <vscale x 2 x i1> %m, i32 %evl)
251 declare i8 @llvm.vp.reduce.smax.nxv3i8(i8, <vscale x 3 x i8>, <vscale x 3 x i1>, i32)
253 define signext i8 @vpreduce_smax_nxv3i8(i8 signext %s, <vscale x 3 x i8> %v, <vscale x 3 x i1> %m, i32 zeroext %evl) {
254 ; CHECK-LABEL: vpreduce_smax_nxv3i8:
256 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
257 ; CHECK-NEXT: vmv.s.x v9, a0
258 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
259 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
260 ; CHECK-NEXT: vmv.x.s a0, v9
262 %r = call i8 @llvm.vp.reduce.smax.nxv3i8(i8 %s, <vscale x 3 x i8> %v, <vscale x 3 x i1> %m, i32 %evl)
266 declare i8 @llvm.vp.reduce.add.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
268 define signext i8 @vpreduce_add_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
269 ; CHECK-LABEL: vpreduce_add_nxv4i8:
271 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
272 ; CHECK-NEXT: vmv.s.x v9, a0
273 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
274 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
275 ; CHECK-NEXT: vmv.x.s a0, v9
277 %r = call i8 @llvm.vp.reduce.add.nxv4i8(i8 %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 %evl)
281 declare i8 @llvm.vp.reduce.umax.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
283 define signext i8 @vpreduce_umax_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
284 ; CHECK-LABEL: vpreduce_umax_nxv4i8:
286 ; CHECK-NEXT: andi a0, a0, 255
287 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
288 ; CHECK-NEXT: vmv.s.x v9, a0
289 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
290 ; CHECK-NEXT: vredmaxu.vs v9, v8, v9, v0.t
291 ; CHECK-NEXT: vmv.x.s a0, v9
293 %r = call i8 @llvm.vp.reduce.umax.nxv4i8(i8 %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 %evl)
297 declare i8 @llvm.vp.reduce.smax.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
299 define signext i8 @vpreduce_smax_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
300 ; CHECK-LABEL: vpreduce_smax_nxv4i8:
302 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
303 ; CHECK-NEXT: vmv.s.x v9, a0
304 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
305 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
306 ; CHECK-NEXT: vmv.x.s a0, v9
308 %r = call i8 @llvm.vp.reduce.smax.nxv4i8(i8 %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 %evl)
312 declare i8 @llvm.vp.reduce.umin.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
314 define signext i8 @vpreduce_umin_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
315 ; CHECK-LABEL: vpreduce_umin_nxv4i8:
317 ; CHECK-NEXT: andi a0, a0, 255
318 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
319 ; CHECK-NEXT: vmv.s.x v9, a0
320 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
321 ; CHECK-NEXT: vredminu.vs v9, v8, v9, v0.t
322 ; CHECK-NEXT: vmv.x.s a0, v9
324 %r = call i8 @llvm.vp.reduce.umin.nxv4i8(i8 %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 %evl)
328 declare i8 @llvm.vp.reduce.smin.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
330 define signext i8 @vpreduce_smin_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
331 ; CHECK-LABEL: vpreduce_smin_nxv4i8:
333 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
334 ; CHECK-NEXT: vmv.s.x v9, a0
335 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
336 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
337 ; CHECK-NEXT: vmv.x.s a0, v9
339 %r = call i8 @llvm.vp.reduce.smin.nxv4i8(i8 %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 %evl)
343 declare i8 @llvm.vp.reduce.and.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
345 define signext i8 @vpreduce_and_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
346 ; CHECK-LABEL: vpreduce_and_nxv4i8:
348 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
349 ; CHECK-NEXT: vmv.s.x v9, a0
350 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
351 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
352 ; CHECK-NEXT: vmv.x.s a0, v9
354 %r = call i8 @llvm.vp.reduce.and.nxv4i8(i8 %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 %evl)
358 declare i8 @llvm.vp.reduce.or.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
360 define signext i8 @vpreduce_or_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
361 ; CHECK-LABEL: vpreduce_or_nxv4i8:
363 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
364 ; CHECK-NEXT: vmv.s.x v9, a0
365 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
366 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
367 ; CHECK-NEXT: vmv.x.s a0, v9
369 %r = call i8 @llvm.vp.reduce.or.nxv4i8(i8 %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 %evl)
373 declare i8 @llvm.vp.reduce.xor.nxv4i8(i8, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
375 define signext i8 @vpreduce_xor_nxv4i8(i8 signext %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
376 ; CHECK-LABEL: vpreduce_xor_nxv4i8:
378 ; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
379 ; CHECK-NEXT: vmv.s.x v9, a0
380 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
381 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
382 ; CHECK-NEXT: vmv.x.s a0, v9
384 %r = call i8 @llvm.vp.reduce.xor.nxv4i8(i8 %s, <vscale x 4 x i8> %v, <vscale x 4 x i1> %m, i32 %evl)
388 declare i16 @llvm.vp.reduce.add.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
390 define signext i16 @vpreduce_add_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
391 ; CHECK-LABEL: vpreduce_add_nxv1i16:
393 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
394 ; CHECK-NEXT: vmv.s.x v9, a0
395 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
396 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
397 ; CHECK-NEXT: vmv.x.s a0, v9
399 %r = call i16 @llvm.vp.reduce.add.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
403 declare i16 @llvm.vp.reduce.umax.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
405 define signext i16 @vpreduce_umax_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
406 ; RV32-LABEL: vpreduce_umax_nxv1i16:
408 ; RV32-NEXT: slli a0, a0, 16
409 ; RV32-NEXT: srli a0, a0, 16
410 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
411 ; RV32-NEXT: vmv.s.x v9, a0
412 ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
413 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
414 ; RV32-NEXT: vmv.x.s a0, v9
417 ; RV64-LABEL: vpreduce_umax_nxv1i16:
419 ; RV64-NEXT: slli a0, a0, 48
420 ; RV64-NEXT: srli a0, a0, 48
421 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
422 ; RV64-NEXT: vmv.s.x v9, a0
423 ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
424 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
425 ; RV64-NEXT: vmv.x.s a0, v9
427 %r = call i16 @llvm.vp.reduce.umax.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
431 declare i16 @llvm.vp.reduce.smax.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
433 define signext i16 @vpreduce_smax_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
434 ; CHECK-LABEL: vpreduce_smax_nxv1i16:
436 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
437 ; CHECK-NEXT: vmv.s.x v9, a0
438 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
439 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
440 ; CHECK-NEXT: vmv.x.s a0, v9
442 %r = call i16 @llvm.vp.reduce.smax.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
446 declare i16 @llvm.vp.reduce.umin.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
448 define signext i16 @vpreduce_umin_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
449 ; RV32-LABEL: vpreduce_umin_nxv1i16:
451 ; RV32-NEXT: slli a0, a0, 16
452 ; RV32-NEXT: srli a0, a0, 16
453 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
454 ; RV32-NEXT: vmv.s.x v9, a0
455 ; RV32-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
456 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
457 ; RV32-NEXT: vmv.x.s a0, v9
460 ; RV64-LABEL: vpreduce_umin_nxv1i16:
462 ; RV64-NEXT: slli a0, a0, 48
463 ; RV64-NEXT: srli a0, a0, 48
464 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
465 ; RV64-NEXT: vmv.s.x v9, a0
466 ; RV64-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
467 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
468 ; RV64-NEXT: vmv.x.s a0, v9
470 %r = call i16 @llvm.vp.reduce.umin.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
474 declare i16 @llvm.vp.reduce.smin.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
476 define signext i16 @vpreduce_smin_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
477 ; CHECK-LABEL: vpreduce_smin_nxv1i16:
479 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
480 ; CHECK-NEXT: vmv.s.x v9, a0
481 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
482 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
483 ; CHECK-NEXT: vmv.x.s a0, v9
485 %r = call i16 @llvm.vp.reduce.smin.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
489 declare i16 @llvm.vp.reduce.and.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
491 define signext i16 @vpreduce_and_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
492 ; CHECK-LABEL: vpreduce_and_nxv1i16:
494 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
495 ; CHECK-NEXT: vmv.s.x v9, a0
496 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
497 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
498 ; CHECK-NEXT: vmv.x.s a0, v9
500 %r = call i16 @llvm.vp.reduce.and.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
504 declare i16 @llvm.vp.reduce.or.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
506 define signext i16 @vpreduce_or_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
507 ; CHECK-LABEL: vpreduce_or_nxv1i16:
509 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
510 ; CHECK-NEXT: vmv.s.x v9, a0
511 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
512 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
513 ; CHECK-NEXT: vmv.x.s a0, v9
515 %r = call i16 @llvm.vp.reduce.or.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
519 declare i16 @llvm.vp.reduce.xor.nxv1i16(i16, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
521 define signext i16 @vpreduce_xor_nxv1i16(i16 signext %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
522 ; CHECK-LABEL: vpreduce_xor_nxv1i16:
524 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
525 ; CHECK-NEXT: vmv.s.x v9, a0
526 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
527 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
528 ; CHECK-NEXT: vmv.x.s a0, v9
530 %r = call i16 @llvm.vp.reduce.xor.nxv1i16(i16 %s, <vscale x 1 x i16> %v, <vscale x 1 x i1> %m, i32 %evl)
534 declare i16 @llvm.vp.reduce.add.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
536 define signext i16 @vpreduce_add_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
537 ; CHECK-LABEL: vpreduce_add_nxv2i16:
539 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
540 ; CHECK-NEXT: vmv.s.x v9, a0
541 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
542 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
543 ; CHECK-NEXT: vmv.x.s a0, v9
545 %r = call i16 @llvm.vp.reduce.add.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
549 declare i16 @llvm.vp.reduce.umax.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
551 define signext i16 @vpreduce_umax_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
552 ; RV32-LABEL: vpreduce_umax_nxv2i16:
554 ; RV32-NEXT: slli a0, a0, 16
555 ; RV32-NEXT: srli a0, a0, 16
556 ; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
557 ; RV32-NEXT: vmv.s.x v9, a0
558 ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
559 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
560 ; RV32-NEXT: vmv.x.s a0, v9
563 ; RV64-LABEL: vpreduce_umax_nxv2i16:
565 ; RV64-NEXT: slli a0, a0, 48
566 ; RV64-NEXT: srli a0, a0, 48
567 ; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
568 ; RV64-NEXT: vmv.s.x v9, a0
569 ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
570 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
571 ; RV64-NEXT: vmv.x.s a0, v9
573 %r = call i16 @llvm.vp.reduce.umax.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
577 declare i16 @llvm.vp.reduce.smax.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
579 define signext i16 @vpreduce_smax_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
580 ; CHECK-LABEL: vpreduce_smax_nxv2i16:
582 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
583 ; CHECK-NEXT: vmv.s.x v9, a0
584 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
585 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
586 ; CHECK-NEXT: vmv.x.s a0, v9
588 %r = call i16 @llvm.vp.reduce.smax.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
592 declare i16 @llvm.vp.reduce.umin.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
594 define signext i16 @vpreduce_umin_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
595 ; RV32-LABEL: vpreduce_umin_nxv2i16:
597 ; RV32-NEXT: slli a0, a0, 16
598 ; RV32-NEXT: srli a0, a0, 16
599 ; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
600 ; RV32-NEXT: vmv.s.x v9, a0
601 ; RV32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
602 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
603 ; RV32-NEXT: vmv.x.s a0, v9
606 ; RV64-LABEL: vpreduce_umin_nxv2i16:
608 ; RV64-NEXT: slli a0, a0, 48
609 ; RV64-NEXT: srli a0, a0, 48
610 ; RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
611 ; RV64-NEXT: vmv.s.x v9, a0
612 ; RV64-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
613 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
614 ; RV64-NEXT: vmv.x.s a0, v9
616 %r = call i16 @llvm.vp.reduce.umin.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
620 declare i16 @llvm.vp.reduce.smin.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
622 define signext i16 @vpreduce_smin_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
623 ; CHECK-LABEL: vpreduce_smin_nxv2i16:
625 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
626 ; CHECK-NEXT: vmv.s.x v9, a0
627 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
628 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
629 ; CHECK-NEXT: vmv.x.s a0, v9
631 %r = call i16 @llvm.vp.reduce.smin.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
635 declare i16 @llvm.vp.reduce.and.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
637 define signext i16 @vpreduce_and_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
638 ; CHECK-LABEL: vpreduce_and_nxv2i16:
640 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
641 ; CHECK-NEXT: vmv.s.x v9, a0
642 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
643 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
644 ; CHECK-NEXT: vmv.x.s a0, v9
646 %r = call i16 @llvm.vp.reduce.and.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
650 declare i16 @llvm.vp.reduce.or.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
652 define signext i16 @vpreduce_or_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
653 ; CHECK-LABEL: vpreduce_or_nxv2i16:
655 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
656 ; CHECK-NEXT: vmv.s.x v9, a0
657 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
658 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
659 ; CHECK-NEXT: vmv.x.s a0, v9
661 %r = call i16 @llvm.vp.reduce.or.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
665 declare i16 @llvm.vp.reduce.xor.nxv2i16(i16, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
667 define signext i16 @vpreduce_xor_nxv2i16(i16 signext %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
668 ; CHECK-LABEL: vpreduce_xor_nxv2i16:
670 ; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
671 ; CHECK-NEXT: vmv.s.x v9, a0
672 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
673 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
674 ; CHECK-NEXT: vmv.x.s a0, v9
676 %r = call i16 @llvm.vp.reduce.xor.nxv2i16(i16 %s, <vscale x 2 x i16> %v, <vscale x 2 x i1> %m, i32 %evl)
680 declare i16 @llvm.vp.reduce.add.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
682 define signext i16 @vpreduce_add_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
683 ; CHECK-LABEL: vpreduce_add_nxv4i16:
685 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
686 ; CHECK-NEXT: vmv.s.x v9, a0
687 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
688 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
689 ; CHECK-NEXT: vmv.x.s a0, v9
691 %r = call i16 @llvm.vp.reduce.add.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
695 declare i16 @llvm.vp.reduce.umax.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
697 define signext i16 @vpreduce_umax_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
698 ; RV32-LABEL: vpreduce_umax_nxv4i16:
700 ; RV32-NEXT: slli a0, a0, 16
701 ; RV32-NEXT: srli a0, a0, 16
702 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
703 ; RV32-NEXT: vmv.s.x v9, a0
704 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
705 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
706 ; RV32-NEXT: vmv.x.s a0, v9
709 ; RV64-LABEL: vpreduce_umax_nxv4i16:
711 ; RV64-NEXT: slli a0, a0, 48
712 ; RV64-NEXT: srli a0, a0, 48
713 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
714 ; RV64-NEXT: vmv.s.x v9, a0
715 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
716 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
717 ; RV64-NEXT: vmv.x.s a0, v9
719 %r = call i16 @llvm.vp.reduce.umax.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
723 declare i16 @llvm.vp.reduce.smax.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
725 define signext i16 @vpreduce_smax_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
726 ; CHECK-LABEL: vpreduce_smax_nxv4i16:
728 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
729 ; CHECK-NEXT: vmv.s.x v9, a0
730 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
731 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
732 ; CHECK-NEXT: vmv.x.s a0, v9
734 %r = call i16 @llvm.vp.reduce.smax.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
738 declare i16 @llvm.vp.reduce.umin.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
740 define signext i16 @vpreduce_umin_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
741 ; RV32-LABEL: vpreduce_umin_nxv4i16:
743 ; RV32-NEXT: slli a0, a0, 16
744 ; RV32-NEXT: srli a0, a0, 16
745 ; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
746 ; RV32-NEXT: vmv.s.x v9, a0
747 ; RV32-NEXT: vsetvli zero, a1, e16, m1, ta, ma
748 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
749 ; RV32-NEXT: vmv.x.s a0, v9
752 ; RV64-LABEL: vpreduce_umin_nxv4i16:
754 ; RV64-NEXT: slli a0, a0, 48
755 ; RV64-NEXT: srli a0, a0, 48
756 ; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
757 ; RV64-NEXT: vmv.s.x v9, a0
758 ; RV64-NEXT: vsetvli zero, a1, e16, m1, ta, ma
759 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
760 ; RV64-NEXT: vmv.x.s a0, v9
762 %r = call i16 @llvm.vp.reduce.umin.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
766 declare i16 @llvm.vp.reduce.smin.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
768 define signext i16 @vpreduce_smin_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
769 ; CHECK-LABEL: vpreduce_smin_nxv4i16:
771 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
772 ; CHECK-NEXT: vmv.s.x v9, a0
773 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
774 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
775 ; CHECK-NEXT: vmv.x.s a0, v9
777 %r = call i16 @llvm.vp.reduce.smin.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
781 declare i16 @llvm.vp.reduce.and.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
783 define signext i16 @vpreduce_and_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
784 ; CHECK-LABEL: vpreduce_and_nxv4i16:
786 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
787 ; CHECK-NEXT: vmv.s.x v9, a0
788 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
789 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
790 ; CHECK-NEXT: vmv.x.s a0, v9
792 %r = call i16 @llvm.vp.reduce.and.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
796 declare i16 @llvm.vp.reduce.or.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
798 define signext i16 @vpreduce_or_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
799 ; CHECK-LABEL: vpreduce_or_nxv4i16:
801 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
802 ; CHECK-NEXT: vmv.s.x v9, a0
803 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
804 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
805 ; CHECK-NEXT: vmv.x.s a0, v9
807 %r = call i16 @llvm.vp.reduce.or.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
811 declare i16 @llvm.vp.reduce.xor.nxv4i16(i16, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
813 define signext i16 @vpreduce_xor_nxv4i16(i16 signext %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
814 ; CHECK-LABEL: vpreduce_xor_nxv4i16:
816 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
817 ; CHECK-NEXT: vmv.s.x v9, a0
818 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
819 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
820 ; CHECK-NEXT: vmv.x.s a0, v9
822 %r = call i16 @llvm.vp.reduce.xor.nxv4i16(i16 %s, <vscale x 4 x i16> %v, <vscale x 4 x i1> %m, i32 %evl)
826 declare i32 @llvm.vp.reduce.add.nxv1i32(i32, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
828 define signext i32 @vpreduce_add_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
829 ; CHECK-LABEL: vpreduce_add_nxv1i32:
831 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
832 ; CHECK-NEXT: vmv.s.x v9, a0
833 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
834 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
835 ; CHECK-NEXT: vmv.x.s a0, v9
837 %r = call i32 @llvm.vp.reduce.add.nxv1i32(i32 %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
841 declare i32 @llvm.vp.reduce.umax.nxv1i32(i32, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
843 define signext i32 @vpreduce_umax_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
844 ; RV32-LABEL: vpreduce_umax_nxv1i32:
846 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
847 ; RV32-NEXT: vmv.s.x v9, a0
848 ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
849 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
850 ; RV32-NEXT: vmv.x.s a0, v9
853 ; RV64-LABEL: vpreduce_umax_nxv1i32:
855 ; RV64-NEXT: andi a0, a0, -1
856 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
857 ; RV64-NEXT: vmv.s.x v9, a0
858 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
859 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
860 ; RV64-NEXT: vmv.x.s a0, v9
862 %r = call i32 @llvm.vp.reduce.umax.nxv1i32(i32 %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
866 declare i32 @llvm.vp.reduce.smax.nxv1i32(i32, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
868 define signext i32 @vpreduce_smax_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
869 ; CHECK-LABEL: vpreduce_smax_nxv1i32:
871 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
872 ; CHECK-NEXT: vmv.s.x v9, a0
873 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
874 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
875 ; CHECK-NEXT: vmv.x.s a0, v9
877 %r = call i32 @llvm.vp.reduce.smax.nxv1i32(i32 %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
881 declare i32 @llvm.vp.reduce.umin.nxv1i32(i32, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
883 define signext i32 @vpreduce_umin_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
884 ; RV32-LABEL: vpreduce_umin_nxv1i32:
886 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
887 ; RV32-NEXT: vmv.s.x v9, a0
888 ; RV32-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
889 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
890 ; RV32-NEXT: vmv.x.s a0, v9
893 ; RV64-LABEL: vpreduce_umin_nxv1i32:
895 ; RV64-NEXT: andi a0, a0, -1
896 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
897 ; RV64-NEXT: vmv.s.x v9, a0
898 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
899 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
900 ; RV64-NEXT: vmv.x.s a0, v9
902 %r = call i32 @llvm.vp.reduce.umin.nxv1i32(i32 %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
906 declare i32 @llvm.vp.reduce.smin.nxv1i32(i32, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
908 define signext i32 @vpreduce_smin_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
909 ; CHECK-LABEL: vpreduce_smin_nxv1i32:
911 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
912 ; CHECK-NEXT: vmv.s.x v9, a0
913 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
914 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
915 ; CHECK-NEXT: vmv.x.s a0, v9
917 %r = call i32 @llvm.vp.reduce.smin.nxv1i32(i32 %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
921 declare i32 @llvm.vp.reduce.and.nxv1i32(i32, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
923 define signext i32 @vpreduce_and_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
924 ; CHECK-LABEL: vpreduce_and_nxv1i32:
926 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
927 ; CHECK-NEXT: vmv.s.x v9, a0
928 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
929 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
930 ; CHECK-NEXT: vmv.x.s a0, v9
932 %r = call i32 @llvm.vp.reduce.and.nxv1i32(i32 %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
936 declare i32 @llvm.vp.reduce.or.nxv1i32(i32, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
938 define signext i32 @vpreduce_or_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
939 ; CHECK-LABEL: vpreduce_or_nxv1i32:
941 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
942 ; CHECK-NEXT: vmv.s.x v9, a0
943 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
944 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
945 ; CHECK-NEXT: vmv.x.s a0, v9
947 %r = call i32 @llvm.vp.reduce.or.nxv1i32(i32 %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
951 declare i32 @llvm.vp.reduce.xor.nxv1i32(i32, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
953 define signext i32 @vpreduce_xor_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
954 ; CHECK-LABEL: vpreduce_xor_nxv1i32:
956 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
957 ; CHECK-NEXT: vmv.s.x v9, a0
958 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
959 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
960 ; CHECK-NEXT: vmv.x.s a0, v9
962 %r = call i32 @llvm.vp.reduce.xor.nxv1i32(i32 %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 %evl)
966 declare i32 @llvm.vp.reduce.add.nxv2i32(i32, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
968 define signext i32 @vpreduce_add_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
969 ; CHECK-LABEL: vpreduce_add_nxv2i32:
971 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
972 ; CHECK-NEXT: vmv.s.x v9, a0
973 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
974 ; CHECK-NEXT: vredsum.vs v9, v8, v9, v0.t
975 ; CHECK-NEXT: vmv.x.s a0, v9
977 %r = call i32 @llvm.vp.reduce.add.nxv2i32(i32 %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 %evl)
981 declare i32 @llvm.vp.reduce.umax.nxv2i32(i32, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
983 define signext i32 @vpreduce_umax_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
984 ; RV32-LABEL: vpreduce_umax_nxv2i32:
986 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
987 ; RV32-NEXT: vmv.s.x v9, a0
988 ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
989 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
990 ; RV32-NEXT: vmv.x.s a0, v9
993 ; RV64-LABEL: vpreduce_umax_nxv2i32:
995 ; RV64-NEXT: andi a0, a0, -1
996 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
997 ; RV64-NEXT: vmv.s.x v9, a0
998 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
999 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
1000 ; RV64-NEXT: vmv.x.s a0, v9
1002 %r = call i32 @llvm.vp.reduce.umax.nxv2i32(i32 %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 %evl)
1006 declare i32 @llvm.vp.reduce.smax.nxv2i32(i32, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1008 define signext i32 @vpreduce_smax_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1009 ; CHECK-LABEL: vpreduce_smax_nxv2i32:
1011 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1012 ; CHECK-NEXT: vmv.s.x v9, a0
1013 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1014 ; CHECK-NEXT: vredmax.vs v9, v8, v9, v0.t
1015 ; CHECK-NEXT: vmv.x.s a0, v9
1017 %r = call i32 @llvm.vp.reduce.smax.nxv2i32(i32 %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 %evl)
1021 declare i32 @llvm.vp.reduce.umin.nxv2i32(i32, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1023 define signext i32 @vpreduce_umin_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1024 ; RV32-LABEL: vpreduce_umin_nxv2i32:
1026 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1027 ; RV32-NEXT: vmv.s.x v9, a0
1028 ; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1029 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
1030 ; RV32-NEXT: vmv.x.s a0, v9
1033 ; RV64-LABEL: vpreduce_umin_nxv2i32:
1035 ; RV64-NEXT: andi a0, a0, -1
1036 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1037 ; RV64-NEXT: vmv.s.x v9, a0
1038 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1039 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
1040 ; RV64-NEXT: vmv.x.s a0, v9
1042 %r = call i32 @llvm.vp.reduce.umin.nxv2i32(i32 %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 %evl)
1046 declare i32 @llvm.vp.reduce.smin.nxv2i32(i32, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1048 define signext i32 @vpreduce_smin_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1049 ; CHECK-LABEL: vpreduce_smin_nxv2i32:
1051 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1052 ; CHECK-NEXT: vmv.s.x v9, a0
1053 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1054 ; CHECK-NEXT: vredmin.vs v9, v8, v9, v0.t
1055 ; CHECK-NEXT: vmv.x.s a0, v9
1057 %r = call i32 @llvm.vp.reduce.smin.nxv2i32(i32 %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 %evl)
1061 declare i32 @llvm.vp.reduce.and.nxv2i32(i32, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1063 define signext i32 @vpreduce_and_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1064 ; CHECK-LABEL: vpreduce_and_nxv2i32:
1066 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1067 ; CHECK-NEXT: vmv.s.x v9, a0
1068 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1069 ; CHECK-NEXT: vredand.vs v9, v8, v9, v0.t
1070 ; CHECK-NEXT: vmv.x.s a0, v9
1072 %r = call i32 @llvm.vp.reduce.and.nxv2i32(i32 %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 %evl)
1076 declare i32 @llvm.vp.reduce.or.nxv2i32(i32, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1078 define signext i32 @vpreduce_or_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1079 ; CHECK-LABEL: vpreduce_or_nxv2i32:
1081 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1082 ; CHECK-NEXT: vmv.s.x v9, a0
1083 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1084 ; CHECK-NEXT: vredor.vs v9, v8, v9, v0.t
1085 ; CHECK-NEXT: vmv.x.s a0, v9
1087 %r = call i32 @llvm.vp.reduce.or.nxv2i32(i32 %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 %evl)
1091 declare i32 @llvm.vp.reduce.xor.nxv2i32(i32, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1093 define signext i32 @vpreduce_xor_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1094 ; CHECK-LABEL: vpreduce_xor_nxv2i32:
1096 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1097 ; CHECK-NEXT: vmv.s.x v9, a0
1098 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1099 ; CHECK-NEXT: vredxor.vs v9, v8, v9, v0.t
1100 ; CHECK-NEXT: vmv.x.s a0, v9
1102 %r = call i32 @llvm.vp.reduce.xor.nxv2i32(i32 %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 %evl)
1106 declare i32 @llvm.vp.reduce.add.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1108 define signext i32 @vpreduce_add_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1109 ; CHECK-LABEL: vpreduce_add_nxv4i32:
1111 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1112 ; CHECK-NEXT: vmv.s.x v10, a0
1113 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1114 ; CHECK-NEXT: vredsum.vs v10, v8, v10, v0.t
1115 ; CHECK-NEXT: vmv.x.s a0, v10
1117 %r = call i32 @llvm.vp.reduce.add.nxv4i32(i32 %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 %evl)
1121 declare i32 @llvm.vp.reduce.umax.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1123 define signext i32 @vpreduce_umax_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1124 ; RV32-LABEL: vpreduce_umax_nxv4i32:
1126 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1127 ; RV32-NEXT: vmv.s.x v10, a0
1128 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1129 ; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1130 ; RV32-NEXT: vmv.x.s a0, v10
1133 ; RV64-LABEL: vpreduce_umax_nxv4i32:
1135 ; RV64-NEXT: andi a0, a0, -1
1136 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1137 ; RV64-NEXT: vmv.s.x v10, a0
1138 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1139 ; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1140 ; RV64-NEXT: vmv.x.s a0, v10
1142 %r = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 %evl)
1146 declare i32 @llvm.vp.reduce.umax.nxv32i32(i32, <vscale x 32 x i32>, <vscale x 32 x i1>, i32)
1148 define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %v, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1149 ; RV32-LABEL: vpreduce_umax_nxv32i32:
1151 ; RV32-NEXT: csrr a3, vlenb
1152 ; RV32-NEXT: srli a2, a3, 2
1153 ; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
1154 ; RV32-NEXT: vslidedown.vx v24, v0, a2
1155 ; RV32-NEXT: slli a3, a3, 1
1156 ; RV32-NEXT: sub a2, a1, a3
1157 ; RV32-NEXT: sltu a4, a1, a2
1158 ; RV32-NEXT: addi a4, a4, -1
1159 ; RV32-NEXT: and a2, a4, a2
1160 ; RV32-NEXT: bltu a1, a3, .LBB67_2
1161 ; RV32-NEXT: # %bb.1:
1162 ; RV32-NEXT: mv a1, a3
1163 ; RV32-NEXT: .LBB67_2:
1164 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1165 ; RV32-NEXT: vmv.s.x v25, a0
1166 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1167 ; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t
1168 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
1169 ; RV32-NEXT: vmv1r.v v0, v24
1170 ; RV32-NEXT: vredmaxu.vs v25, v16, v25, v0.t
1171 ; RV32-NEXT: vmv.x.s a0, v25
1174 ; RV64-LABEL: vpreduce_umax_nxv32i32:
1176 ; RV64-NEXT: csrr a3, vlenb
1177 ; RV64-NEXT: srli a2, a3, 2
1178 ; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
1179 ; RV64-NEXT: vslidedown.vx v24, v0, a2
1180 ; RV64-NEXT: andi a2, a0, -1
1181 ; RV64-NEXT: slli a3, a3, 1
1182 ; RV64-NEXT: sub a0, a1, a3
1183 ; RV64-NEXT: sltu a4, a1, a0
1184 ; RV64-NEXT: addi a4, a4, -1
1185 ; RV64-NEXT: and a0, a4, a0
1186 ; RV64-NEXT: bltu a1, a3, .LBB67_2
1187 ; RV64-NEXT: # %bb.1:
1188 ; RV64-NEXT: mv a1, a3
1189 ; RV64-NEXT: .LBB67_2:
1190 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1191 ; RV64-NEXT: vmv.s.x v25, a2
1192 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
1193 ; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t
1194 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1195 ; RV64-NEXT: vmv1r.v v0, v24
1196 ; RV64-NEXT: vredmaxu.vs v25, v16, v25, v0.t
1197 ; RV64-NEXT: vmv.x.s a0, v25
1199 %r = call i32 @llvm.vp.reduce.umax.nxv32i32(i32 %s, <vscale x 32 x i32> %v, <vscale x 32 x i1> %m, i32 %evl)
1203 declare i32 @llvm.vp.reduce.smax.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1205 define signext i32 @vpreduce_smax_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1206 ; CHECK-LABEL: vpreduce_smax_nxv4i32:
1208 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1209 ; CHECK-NEXT: vmv.s.x v10, a0
1210 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1211 ; CHECK-NEXT: vredmax.vs v10, v8, v10, v0.t
1212 ; CHECK-NEXT: vmv.x.s a0, v10
1214 %r = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 %evl)
1218 declare i32 @llvm.vp.reduce.umin.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1220 define signext i32 @vpreduce_umin_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1221 ; RV32-LABEL: vpreduce_umin_nxv4i32:
1223 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1224 ; RV32-NEXT: vmv.s.x v10, a0
1225 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1226 ; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t
1227 ; RV32-NEXT: vmv.x.s a0, v10
1230 ; RV64-LABEL: vpreduce_umin_nxv4i32:
1232 ; RV64-NEXT: andi a0, a0, -1
1233 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1234 ; RV64-NEXT: vmv.s.x v10, a0
1235 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1236 ; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t
1237 ; RV64-NEXT: vmv.x.s a0, v10
1239 %r = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 %evl)
1243 declare i32 @llvm.vp.reduce.smin.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1245 define signext i32 @vpreduce_smin_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1246 ; CHECK-LABEL: vpreduce_smin_nxv4i32:
1248 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1249 ; CHECK-NEXT: vmv.s.x v10, a0
1250 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1251 ; CHECK-NEXT: vredmin.vs v10, v8, v10, v0.t
1252 ; CHECK-NEXT: vmv.x.s a0, v10
1254 %r = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 %evl)
1258 declare i32 @llvm.vp.reduce.and.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1260 define signext i32 @vpreduce_and_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1261 ; CHECK-LABEL: vpreduce_and_nxv4i32:
1263 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1264 ; CHECK-NEXT: vmv.s.x v10, a0
1265 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1266 ; CHECK-NEXT: vredand.vs v10, v8, v10, v0.t
1267 ; CHECK-NEXT: vmv.x.s a0, v10
1269 %r = call i32 @llvm.vp.reduce.and.nxv4i32(i32 %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 %evl)
1273 declare i32 @llvm.vp.reduce.or.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1275 define signext i32 @vpreduce_or_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1276 ; CHECK-LABEL: vpreduce_or_nxv4i32:
1278 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1279 ; CHECK-NEXT: vmv.s.x v10, a0
1280 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1281 ; CHECK-NEXT: vredor.vs v10, v8, v10, v0.t
1282 ; CHECK-NEXT: vmv.x.s a0, v10
1284 %r = call i32 @llvm.vp.reduce.or.nxv4i32(i32 %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 %evl)
1288 declare i32 @llvm.vp.reduce.xor.nxv4i32(i32, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1290 define signext i32 @vpreduce_xor_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1291 ; CHECK-LABEL: vpreduce_xor_nxv4i32:
1293 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1294 ; CHECK-NEXT: vmv.s.x v10, a0
1295 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1296 ; CHECK-NEXT: vredxor.vs v10, v8, v10, v0.t
1297 ; CHECK-NEXT: vmv.x.s a0, v10
1299 %r = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 %evl)
1303 declare i64 @llvm.vp.reduce.add.nxv1i64(i64, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1305 define signext i64 @vpreduce_add_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1306 ; RV32-LABEL: vpreduce_add_nxv1i64:
1308 ; RV32-NEXT: addi sp, sp, -16
1309 ; RV32-NEXT: .cfi_def_cfa_offset 16
1310 ; RV32-NEXT: sw a1, 12(sp)
1311 ; RV32-NEXT: sw a0, 8(sp)
1312 ; RV32-NEXT: addi a0, sp, 8
1313 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1314 ; RV32-NEXT: vlse64.v v9, (a0), zero
1315 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1316 ; RV32-NEXT: vredsum.vs v9, v8, v9, v0.t
1317 ; RV32-NEXT: vmv.x.s a0, v9
1318 ; RV32-NEXT: li a1, 32
1319 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1320 ; RV32-NEXT: vsrl.vx v8, v9, a1
1321 ; RV32-NEXT: vmv.x.s a1, v8
1322 ; RV32-NEXT: addi sp, sp, 16
1325 ; RV64-LABEL: vpreduce_add_nxv1i64:
1327 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1328 ; RV64-NEXT: vmv.s.x v9, a0
1329 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1330 ; RV64-NEXT: vredsum.vs v9, v8, v9, v0.t
1331 ; RV64-NEXT: vmv.x.s a0, v9
1333 %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
1337 define signext i64 @vpwreduce_add_nxv1i32(i64 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1338 ; RV32-LABEL: vpwreduce_add_nxv1i32:
1340 ; RV32-NEXT: addi sp, sp, -16
1341 ; RV32-NEXT: .cfi_def_cfa_offset 16
1342 ; RV32-NEXT: sw a1, 12(sp)
1343 ; RV32-NEXT: sw a0, 8(sp)
1344 ; RV32-NEXT: addi a0, sp, 8
1345 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1346 ; RV32-NEXT: vlse64.v v9, (a0), zero
1347 ; RV32-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
1348 ; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t
1349 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1350 ; RV32-NEXT: vmv.x.s a0, v9
1351 ; RV32-NEXT: li a1, 32
1352 ; RV32-NEXT: vsrl.vx v8, v9, a1
1353 ; RV32-NEXT: vmv.x.s a1, v8
1354 ; RV32-NEXT: addi sp, sp, 16
1357 ; RV64-LABEL: vpwreduce_add_nxv1i32:
1359 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1360 ; RV64-NEXT: vmv.s.x v9, a0
1361 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
1362 ; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t
1363 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1364 ; RV64-NEXT: vmv.x.s a0, v9
1366 %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
1367 %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, <vscale x 1 x i64> %e, <vscale x 1 x i1> %m, i32 %evl)
1371 define signext i64 @vpwreduce_uadd_nxv1i32(i64 signext %s, <vscale x 1 x i32> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1372 ; RV32-LABEL: vpwreduce_uadd_nxv1i32:
1374 ; RV32-NEXT: addi sp, sp, -16
1375 ; RV32-NEXT: .cfi_def_cfa_offset 16
1376 ; RV32-NEXT: sw a1, 12(sp)
1377 ; RV32-NEXT: sw a0, 8(sp)
1378 ; RV32-NEXT: addi a0, sp, 8
1379 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1380 ; RV32-NEXT: vlse64.v v9, (a0), zero
1381 ; RV32-NEXT: vsetvli zero, a2, e32, mf2, ta, ma
1382 ; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t
1383 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1384 ; RV32-NEXT: vmv.x.s a0, v9
1385 ; RV32-NEXT: li a1, 32
1386 ; RV32-NEXT: vsrl.vx v8, v9, a1
1387 ; RV32-NEXT: vmv.x.s a1, v8
1388 ; RV32-NEXT: addi sp, sp, 16
1391 ; RV64-LABEL: vpwreduce_uadd_nxv1i32:
1393 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1394 ; RV64-NEXT: vmv.s.x v9, a0
1395 ; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
1396 ; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t
1397 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1398 ; RV64-NEXT: vmv.x.s a0, v9
1400 %e = sext <vscale x 1 x i32> %v to <vscale x 1 x i64>
1401 %r = call i64 @llvm.vp.reduce.add.nxv1i64(i64 %s, <vscale x 1 x i64> %e, <vscale x 1 x i1> %m, i32 %evl)
1405 declare i64 @llvm.vp.reduce.umax.nxv1i64(i64, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1407 define signext i64 @vpreduce_umax_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1408 ; RV32-LABEL: vpreduce_umax_nxv1i64:
1410 ; RV32-NEXT: addi sp, sp, -16
1411 ; RV32-NEXT: .cfi_def_cfa_offset 16
1412 ; RV32-NEXT: sw a1, 12(sp)
1413 ; RV32-NEXT: sw a0, 8(sp)
1414 ; RV32-NEXT: addi a0, sp, 8
1415 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1416 ; RV32-NEXT: vlse64.v v9, (a0), zero
1417 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1418 ; RV32-NEXT: vredmaxu.vs v9, v8, v9, v0.t
1419 ; RV32-NEXT: vmv.x.s a0, v9
1420 ; RV32-NEXT: li a1, 32
1421 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1422 ; RV32-NEXT: vsrl.vx v8, v9, a1
1423 ; RV32-NEXT: vmv.x.s a1, v8
1424 ; RV32-NEXT: addi sp, sp, 16
1427 ; RV64-LABEL: vpreduce_umax_nxv1i64:
1429 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1430 ; RV64-NEXT: vmv.s.x v9, a0
1431 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1432 ; RV64-NEXT: vredmaxu.vs v9, v8, v9, v0.t
1433 ; RV64-NEXT: vmv.x.s a0, v9
1435 %r = call i64 @llvm.vp.reduce.umax.nxv1i64(i64 %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
1439 declare i64 @llvm.vp.reduce.smax.nxv1i64(i64, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1441 define signext i64 @vpreduce_smax_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1442 ; RV32-LABEL: vpreduce_smax_nxv1i64:
1444 ; RV32-NEXT: addi sp, sp, -16
1445 ; RV32-NEXT: .cfi_def_cfa_offset 16
1446 ; RV32-NEXT: sw a1, 12(sp)
1447 ; RV32-NEXT: sw a0, 8(sp)
1448 ; RV32-NEXT: addi a0, sp, 8
1449 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1450 ; RV32-NEXT: vlse64.v v9, (a0), zero
1451 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1452 ; RV32-NEXT: vredmax.vs v9, v8, v9, v0.t
1453 ; RV32-NEXT: vmv.x.s a0, v9
1454 ; RV32-NEXT: li a1, 32
1455 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1456 ; RV32-NEXT: vsrl.vx v8, v9, a1
1457 ; RV32-NEXT: vmv.x.s a1, v8
1458 ; RV32-NEXT: addi sp, sp, 16
1461 ; RV64-LABEL: vpreduce_smax_nxv1i64:
1463 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1464 ; RV64-NEXT: vmv.s.x v9, a0
1465 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1466 ; RV64-NEXT: vredmax.vs v9, v8, v9, v0.t
1467 ; RV64-NEXT: vmv.x.s a0, v9
1469 %r = call i64 @llvm.vp.reduce.smax.nxv1i64(i64 %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
1473 declare i64 @llvm.vp.reduce.umin.nxv1i64(i64, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1475 define signext i64 @vpreduce_umin_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1476 ; RV32-LABEL: vpreduce_umin_nxv1i64:
1478 ; RV32-NEXT: addi sp, sp, -16
1479 ; RV32-NEXT: .cfi_def_cfa_offset 16
1480 ; RV32-NEXT: sw a1, 12(sp)
1481 ; RV32-NEXT: sw a0, 8(sp)
1482 ; RV32-NEXT: addi a0, sp, 8
1483 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1484 ; RV32-NEXT: vlse64.v v9, (a0), zero
1485 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1486 ; RV32-NEXT: vredminu.vs v9, v8, v9, v0.t
1487 ; RV32-NEXT: vmv.x.s a0, v9
1488 ; RV32-NEXT: li a1, 32
1489 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1490 ; RV32-NEXT: vsrl.vx v8, v9, a1
1491 ; RV32-NEXT: vmv.x.s a1, v8
1492 ; RV32-NEXT: addi sp, sp, 16
1495 ; RV64-LABEL: vpreduce_umin_nxv1i64:
1497 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1498 ; RV64-NEXT: vmv.s.x v9, a0
1499 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1500 ; RV64-NEXT: vredminu.vs v9, v8, v9, v0.t
1501 ; RV64-NEXT: vmv.x.s a0, v9
1503 %r = call i64 @llvm.vp.reduce.umin.nxv1i64(i64 %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
1507 declare i64 @llvm.vp.reduce.smin.nxv1i64(i64, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1509 define signext i64 @vpreduce_smin_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1510 ; RV32-LABEL: vpreduce_smin_nxv1i64:
1512 ; RV32-NEXT: addi sp, sp, -16
1513 ; RV32-NEXT: .cfi_def_cfa_offset 16
1514 ; RV32-NEXT: sw a1, 12(sp)
1515 ; RV32-NEXT: sw a0, 8(sp)
1516 ; RV32-NEXT: addi a0, sp, 8
1517 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1518 ; RV32-NEXT: vlse64.v v9, (a0), zero
1519 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1520 ; RV32-NEXT: vredmin.vs v9, v8, v9, v0.t
1521 ; RV32-NEXT: vmv.x.s a0, v9
1522 ; RV32-NEXT: li a1, 32
1523 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1524 ; RV32-NEXT: vsrl.vx v8, v9, a1
1525 ; RV32-NEXT: vmv.x.s a1, v8
1526 ; RV32-NEXT: addi sp, sp, 16
1529 ; RV64-LABEL: vpreduce_smin_nxv1i64:
1531 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1532 ; RV64-NEXT: vmv.s.x v9, a0
1533 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1534 ; RV64-NEXT: vredmin.vs v9, v8, v9, v0.t
1535 ; RV64-NEXT: vmv.x.s a0, v9
1537 %r = call i64 @llvm.vp.reduce.smin.nxv1i64(i64 %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
1541 declare i64 @llvm.vp.reduce.and.nxv1i64(i64, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1543 define signext i64 @vpreduce_and_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1544 ; RV32-LABEL: vpreduce_and_nxv1i64:
1546 ; RV32-NEXT: addi sp, sp, -16
1547 ; RV32-NEXT: .cfi_def_cfa_offset 16
1548 ; RV32-NEXT: sw a1, 12(sp)
1549 ; RV32-NEXT: sw a0, 8(sp)
1550 ; RV32-NEXT: addi a0, sp, 8
1551 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1552 ; RV32-NEXT: vlse64.v v9, (a0), zero
1553 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1554 ; RV32-NEXT: vredand.vs v9, v8, v9, v0.t
1555 ; RV32-NEXT: vmv.x.s a0, v9
1556 ; RV32-NEXT: li a1, 32
1557 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1558 ; RV32-NEXT: vsrl.vx v8, v9, a1
1559 ; RV32-NEXT: vmv.x.s a1, v8
1560 ; RV32-NEXT: addi sp, sp, 16
1563 ; RV64-LABEL: vpreduce_and_nxv1i64:
1565 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1566 ; RV64-NEXT: vmv.s.x v9, a0
1567 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1568 ; RV64-NEXT: vredand.vs v9, v8, v9, v0.t
1569 ; RV64-NEXT: vmv.x.s a0, v9
1571 %r = call i64 @llvm.vp.reduce.and.nxv1i64(i64 %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
1575 declare i64 @llvm.vp.reduce.or.nxv1i64(i64, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1577 define signext i64 @vpreduce_or_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1578 ; RV32-LABEL: vpreduce_or_nxv1i64:
1580 ; RV32-NEXT: addi sp, sp, -16
1581 ; RV32-NEXT: .cfi_def_cfa_offset 16
1582 ; RV32-NEXT: sw a1, 12(sp)
1583 ; RV32-NEXT: sw a0, 8(sp)
1584 ; RV32-NEXT: addi a0, sp, 8
1585 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1586 ; RV32-NEXT: vlse64.v v9, (a0), zero
1587 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1588 ; RV32-NEXT: vredor.vs v9, v8, v9, v0.t
1589 ; RV32-NEXT: vmv.x.s a0, v9
1590 ; RV32-NEXT: li a1, 32
1591 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1592 ; RV32-NEXT: vsrl.vx v8, v9, a1
1593 ; RV32-NEXT: vmv.x.s a1, v8
1594 ; RV32-NEXT: addi sp, sp, 16
1597 ; RV64-LABEL: vpreduce_or_nxv1i64:
1599 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1600 ; RV64-NEXT: vmv.s.x v9, a0
1601 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1602 ; RV64-NEXT: vredor.vs v9, v8, v9, v0.t
1603 ; RV64-NEXT: vmv.x.s a0, v9
1605 %r = call i64 @llvm.vp.reduce.or.nxv1i64(i64 %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
1609 declare i64 @llvm.vp.reduce.xor.nxv1i64(i64, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1611 define signext i64 @vpreduce_xor_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1612 ; RV32-LABEL: vpreduce_xor_nxv1i64:
1614 ; RV32-NEXT: addi sp, sp, -16
1615 ; RV32-NEXT: .cfi_def_cfa_offset 16
1616 ; RV32-NEXT: sw a1, 12(sp)
1617 ; RV32-NEXT: sw a0, 8(sp)
1618 ; RV32-NEXT: addi a0, sp, 8
1619 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1620 ; RV32-NEXT: vlse64.v v9, (a0), zero
1621 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
1622 ; RV32-NEXT: vredxor.vs v9, v8, v9, v0.t
1623 ; RV32-NEXT: vmv.x.s a0, v9
1624 ; RV32-NEXT: li a1, 32
1625 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1626 ; RV32-NEXT: vsrl.vx v8, v9, a1
1627 ; RV32-NEXT: vmv.x.s a1, v8
1628 ; RV32-NEXT: addi sp, sp, 16
1631 ; RV64-LABEL: vpreduce_xor_nxv1i64:
1633 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1634 ; RV64-NEXT: vmv.s.x v9, a0
1635 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
1636 ; RV64-NEXT: vredxor.vs v9, v8, v9, v0.t
1637 ; RV64-NEXT: vmv.x.s a0, v9
1639 %r = call i64 @llvm.vp.reduce.xor.nxv1i64(i64 %s, <vscale x 1 x i64> %v, <vscale x 1 x i1> %m, i32 %evl)
1643 declare i64 @llvm.vp.reduce.add.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1645 define signext i64 @vpreduce_add_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1646 ; RV32-LABEL: vpreduce_add_nxv2i64:
1648 ; RV32-NEXT: addi sp, sp, -16
1649 ; RV32-NEXT: .cfi_def_cfa_offset 16
1650 ; RV32-NEXT: sw a1, 12(sp)
1651 ; RV32-NEXT: sw a0, 8(sp)
1652 ; RV32-NEXT: addi a0, sp, 8
1653 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1654 ; RV32-NEXT: vlse64.v v10, (a0), zero
1655 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1656 ; RV32-NEXT: vredsum.vs v10, v8, v10, v0.t
1657 ; RV32-NEXT: vmv.x.s a0, v10
1658 ; RV32-NEXT: li a1, 32
1659 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1660 ; RV32-NEXT: vsrl.vx v8, v10, a1
1661 ; RV32-NEXT: vmv.x.s a1, v8
1662 ; RV32-NEXT: addi sp, sp, 16
1665 ; RV64-LABEL: vpreduce_add_nxv2i64:
1667 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1668 ; RV64-NEXT: vmv.s.x v10, a0
1669 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1670 ; RV64-NEXT: vredsum.vs v10, v8, v10, v0.t
1671 ; RV64-NEXT: vmv.x.s a0, v10
1673 %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 %evl)
1677 define signext i64 @vwpreduce_add_nxv2i32(i64 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1678 ; RV32-LABEL: vwpreduce_add_nxv2i32:
1680 ; RV32-NEXT: addi sp, sp, -16
1681 ; RV32-NEXT: .cfi_def_cfa_offset 16
1682 ; RV32-NEXT: sw a1, 12(sp)
1683 ; RV32-NEXT: sw a0, 8(sp)
1684 ; RV32-NEXT: addi a0, sp, 8
1685 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1686 ; RV32-NEXT: vlse64.v v9, (a0), zero
1687 ; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
1688 ; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t
1689 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1690 ; RV32-NEXT: vmv.x.s a0, v9
1691 ; RV32-NEXT: li a1, 32
1692 ; RV32-NEXT: vsrl.vx v8, v9, a1
1693 ; RV32-NEXT: vmv.x.s a1, v8
1694 ; RV32-NEXT: addi sp, sp, 16
1697 ; RV64-LABEL: vwpreduce_add_nxv2i32:
1699 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1700 ; RV64-NEXT: vmv.s.x v9, a0
1701 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1702 ; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t
1703 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1704 ; RV64-NEXT: vmv.x.s a0, v9
1706 %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
1707 %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, <vscale x 2 x i64> %e, <vscale x 2 x i1> %m, i32 %evl)
1711 define signext i64 @vwpreduce_uadd_nxv2i32(i64 signext %s, <vscale x 2 x i32> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1712 ; RV32-LABEL: vwpreduce_uadd_nxv2i32:
1714 ; RV32-NEXT: addi sp, sp, -16
1715 ; RV32-NEXT: .cfi_def_cfa_offset 16
1716 ; RV32-NEXT: sw a1, 12(sp)
1717 ; RV32-NEXT: sw a0, 8(sp)
1718 ; RV32-NEXT: addi a0, sp, 8
1719 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1720 ; RV32-NEXT: vlse64.v v9, (a0), zero
1721 ; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma
1722 ; RV32-NEXT: vwredsum.vs v9, v8, v9, v0.t
1723 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1724 ; RV32-NEXT: vmv.x.s a0, v9
1725 ; RV32-NEXT: li a1, 32
1726 ; RV32-NEXT: vsrl.vx v8, v9, a1
1727 ; RV32-NEXT: vmv.x.s a1, v8
1728 ; RV32-NEXT: addi sp, sp, 16
1731 ; RV64-LABEL: vwpreduce_uadd_nxv2i32:
1733 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1734 ; RV64-NEXT: vmv.s.x v9, a0
1735 ; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
1736 ; RV64-NEXT: vwredsum.vs v9, v8, v9, v0.t
1737 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1738 ; RV64-NEXT: vmv.x.s a0, v9
1740 %e = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
1741 %r = call i64 @llvm.vp.reduce.add.nxv2i64(i64 %s, <vscale x 2 x i64> %e, <vscale x 2 x i1> %m, i32 %evl)
1745 declare i64 @llvm.vp.reduce.umax.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1747 define signext i64 @vpreduce_umax_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1748 ; RV32-LABEL: vpreduce_umax_nxv2i64:
1750 ; RV32-NEXT: addi sp, sp, -16
1751 ; RV32-NEXT: .cfi_def_cfa_offset 16
1752 ; RV32-NEXT: sw a1, 12(sp)
1753 ; RV32-NEXT: sw a0, 8(sp)
1754 ; RV32-NEXT: addi a0, sp, 8
1755 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1756 ; RV32-NEXT: vlse64.v v10, (a0), zero
1757 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1758 ; RV32-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1759 ; RV32-NEXT: vmv.x.s a0, v10
1760 ; RV32-NEXT: li a1, 32
1761 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1762 ; RV32-NEXT: vsrl.vx v8, v10, a1
1763 ; RV32-NEXT: vmv.x.s a1, v8
1764 ; RV32-NEXT: addi sp, sp, 16
1767 ; RV64-LABEL: vpreduce_umax_nxv2i64:
1769 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1770 ; RV64-NEXT: vmv.s.x v10, a0
1771 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1772 ; RV64-NEXT: vredmaxu.vs v10, v8, v10, v0.t
1773 ; RV64-NEXT: vmv.x.s a0, v10
1775 %r = call i64 @llvm.vp.reduce.umax.nxv2i64(i64 %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 %evl)
1779 declare i64 @llvm.vp.reduce.smax.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1781 define signext i64 @vpreduce_smax_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1782 ; RV32-LABEL: vpreduce_smax_nxv2i64:
1784 ; RV32-NEXT: addi sp, sp, -16
1785 ; RV32-NEXT: .cfi_def_cfa_offset 16
1786 ; RV32-NEXT: sw a1, 12(sp)
1787 ; RV32-NEXT: sw a0, 8(sp)
1788 ; RV32-NEXT: addi a0, sp, 8
1789 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1790 ; RV32-NEXT: vlse64.v v10, (a0), zero
1791 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1792 ; RV32-NEXT: vredmax.vs v10, v8, v10, v0.t
1793 ; RV32-NEXT: vmv.x.s a0, v10
1794 ; RV32-NEXT: li a1, 32
1795 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1796 ; RV32-NEXT: vsrl.vx v8, v10, a1
1797 ; RV32-NEXT: vmv.x.s a1, v8
1798 ; RV32-NEXT: addi sp, sp, 16
1801 ; RV64-LABEL: vpreduce_smax_nxv2i64:
1803 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1804 ; RV64-NEXT: vmv.s.x v10, a0
1805 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1806 ; RV64-NEXT: vredmax.vs v10, v8, v10, v0.t
1807 ; RV64-NEXT: vmv.x.s a0, v10
1809 %r = call i64 @llvm.vp.reduce.smax.nxv2i64(i64 %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 %evl)
1813 declare i64 @llvm.vp.reduce.umin.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1815 define signext i64 @vpreduce_umin_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1816 ; RV32-LABEL: vpreduce_umin_nxv2i64:
1818 ; RV32-NEXT: addi sp, sp, -16
1819 ; RV32-NEXT: .cfi_def_cfa_offset 16
1820 ; RV32-NEXT: sw a1, 12(sp)
1821 ; RV32-NEXT: sw a0, 8(sp)
1822 ; RV32-NEXT: addi a0, sp, 8
1823 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1824 ; RV32-NEXT: vlse64.v v10, (a0), zero
1825 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1826 ; RV32-NEXT: vredminu.vs v10, v8, v10, v0.t
1827 ; RV32-NEXT: vmv.x.s a0, v10
1828 ; RV32-NEXT: li a1, 32
1829 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1830 ; RV32-NEXT: vsrl.vx v8, v10, a1
1831 ; RV32-NEXT: vmv.x.s a1, v8
1832 ; RV32-NEXT: addi sp, sp, 16
1835 ; RV64-LABEL: vpreduce_umin_nxv2i64:
1837 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1838 ; RV64-NEXT: vmv.s.x v10, a0
1839 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1840 ; RV64-NEXT: vredminu.vs v10, v8, v10, v0.t
1841 ; RV64-NEXT: vmv.x.s a0, v10
1843 %r = call i64 @llvm.vp.reduce.umin.nxv2i64(i64 %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 %evl)
1847 declare i64 @llvm.vp.reduce.smin.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1849 define signext i64 @vpreduce_smin_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1850 ; RV32-LABEL: vpreduce_smin_nxv2i64:
1852 ; RV32-NEXT: addi sp, sp, -16
1853 ; RV32-NEXT: .cfi_def_cfa_offset 16
1854 ; RV32-NEXT: sw a1, 12(sp)
1855 ; RV32-NEXT: sw a0, 8(sp)
1856 ; RV32-NEXT: addi a0, sp, 8
1857 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1858 ; RV32-NEXT: vlse64.v v10, (a0), zero
1859 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1860 ; RV32-NEXT: vredmin.vs v10, v8, v10, v0.t
1861 ; RV32-NEXT: vmv.x.s a0, v10
1862 ; RV32-NEXT: li a1, 32
1863 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1864 ; RV32-NEXT: vsrl.vx v8, v10, a1
1865 ; RV32-NEXT: vmv.x.s a1, v8
1866 ; RV32-NEXT: addi sp, sp, 16
1869 ; RV64-LABEL: vpreduce_smin_nxv2i64:
1871 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1872 ; RV64-NEXT: vmv.s.x v10, a0
1873 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1874 ; RV64-NEXT: vredmin.vs v10, v8, v10, v0.t
1875 ; RV64-NEXT: vmv.x.s a0, v10
1877 %r = call i64 @llvm.vp.reduce.smin.nxv2i64(i64 %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 %evl)
1881 declare i64 @llvm.vp.reduce.and.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1883 define signext i64 @vpreduce_and_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1884 ; RV32-LABEL: vpreduce_and_nxv2i64:
1886 ; RV32-NEXT: addi sp, sp, -16
1887 ; RV32-NEXT: .cfi_def_cfa_offset 16
1888 ; RV32-NEXT: sw a1, 12(sp)
1889 ; RV32-NEXT: sw a0, 8(sp)
1890 ; RV32-NEXT: addi a0, sp, 8
1891 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1892 ; RV32-NEXT: vlse64.v v10, (a0), zero
1893 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1894 ; RV32-NEXT: vredand.vs v10, v8, v10, v0.t
1895 ; RV32-NEXT: vmv.x.s a0, v10
1896 ; RV32-NEXT: li a1, 32
1897 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1898 ; RV32-NEXT: vsrl.vx v8, v10, a1
1899 ; RV32-NEXT: vmv.x.s a1, v8
1900 ; RV32-NEXT: addi sp, sp, 16
1903 ; RV64-LABEL: vpreduce_and_nxv2i64:
1905 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1906 ; RV64-NEXT: vmv.s.x v10, a0
1907 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1908 ; RV64-NEXT: vredand.vs v10, v8, v10, v0.t
1909 ; RV64-NEXT: vmv.x.s a0, v10
1911 %r = call i64 @llvm.vp.reduce.and.nxv2i64(i64 %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 %evl)
1915 declare i64 @llvm.vp.reduce.or.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1917 define signext i64 @vpreduce_or_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1918 ; RV32-LABEL: vpreduce_or_nxv2i64:
1920 ; RV32-NEXT: addi sp, sp, -16
1921 ; RV32-NEXT: .cfi_def_cfa_offset 16
1922 ; RV32-NEXT: sw a1, 12(sp)
1923 ; RV32-NEXT: sw a0, 8(sp)
1924 ; RV32-NEXT: addi a0, sp, 8
1925 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1926 ; RV32-NEXT: vlse64.v v10, (a0), zero
1927 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1928 ; RV32-NEXT: vredor.vs v10, v8, v10, v0.t
1929 ; RV32-NEXT: vmv.x.s a0, v10
1930 ; RV32-NEXT: li a1, 32
1931 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1932 ; RV32-NEXT: vsrl.vx v8, v10, a1
1933 ; RV32-NEXT: vmv.x.s a1, v8
1934 ; RV32-NEXT: addi sp, sp, 16
1937 ; RV64-LABEL: vpreduce_or_nxv2i64:
1939 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1940 ; RV64-NEXT: vmv.s.x v10, a0
1941 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1942 ; RV64-NEXT: vredor.vs v10, v8, v10, v0.t
1943 ; RV64-NEXT: vmv.x.s a0, v10
1945 %r = call i64 @llvm.vp.reduce.or.nxv2i64(i64 %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 %evl)
1949 declare i64 @llvm.vp.reduce.xor.nxv2i64(i64, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1951 define signext i64 @vpreduce_xor_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1952 ; RV32-LABEL: vpreduce_xor_nxv2i64:
1954 ; RV32-NEXT: addi sp, sp, -16
1955 ; RV32-NEXT: .cfi_def_cfa_offset 16
1956 ; RV32-NEXT: sw a1, 12(sp)
1957 ; RV32-NEXT: sw a0, 8(sp)
1958 ; RV32-NEXT: addi a0, sp, 8
1959 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1960 ; RV32-NEXT: vlse64.v v10, (a0), zero
1961 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
1962 ; RV32-NEXT: vredxor.vs v10, v8, v10, v0.t
1963 ; RV32-NEXT: vmv.x.s a0, v10
1964 ; RV32-NEXT: li a1, 32
1965 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1966 ; RV32-NEXT: vsrl.vx v8, v10, a1
1967 ; RV32-NEXT: vmv.x.s a1, v8
1968 ; RV32-NEXT: addi sp, sp, 16
1971 ; RV64-LABEL: vpreduce_xor_nxv2i64:
1973 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1974 ; RV64-NEXT: vmv.s.x v10, a0
1975 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
1976 ; RV64-NEXT: vredxor.vs v10, v8, v10, v0.t
1977 ; RV64-NEXT: vmv.x.s a0, v10
1979 %r = call i64 @llvm.vp.reduce.xor.nxv2i64(i64 %s, <vscale x 2 x i64> %v, <vscale x 2 x i1> %m, i32 %evl)
1983 declare i64 @llvm.vp.reduce.add.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1985 define signext i64 @vpreduce_add_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1986 ; RV32-LABEL: vpreduce_add_nxv4i64:
1988 ; RV32-NEXT: addi sp, sp, -16
1989 ; RV32-NEXT: .cfi_def_cfa_offset 16
1990 ; RV32-NEXT: sw a1, 12(sp)
1991 ; RV32-NEXT: sw a0, 8(sp)
1992 ; RV32-NEXT: addi a0, sp, 8
1993 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1994 ; RV32-NEXT: vlse64.v v12, (a0), zero
1995 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
1996 ; RV32-NEXT: vredsum.vs v12, v8, v12, v0.t
1997 ; RV32-NEXT: vmv.x.s a0, v12
1998 ; RV32-NEXT: li a1, 32
1999 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2000 ; RV32-NEXT: vsrl.vx v8, v12, a1
2001 ; RV32-NEXT: vmv.x.s a1, v8
2002 ; RV32-NEXT: addi sp, sp, 16
2005 ; RV64-LABEL: vpreduce_add_nxv4i64:
2007 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2008 ; RV64-NEXT: vmv.s.x v12, a0
2009 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2010 ; RV64-NEXT: vredsum.vs v12, v8, v12, v0.t
2011 ; RV64-NEXT: vmv.x.s a0, v12
2013 %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 %evl)
2017 define signext i64 @vpwreduce_add_nxv4i32(i64 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2018 ; RV32-LABEL: vpwreduce_add_nxv4i32:
2020 ; RV32-NEXT: addi sp, sp, -16
2021 ; RV32-NEXT: .cfi_def_cfa_offset 16
2022 ; RV32-NEXT: sw a1, 12(sp)
2023 ; RV32-NEXT: sw a0, 8(sp)
2024 ; RV32-NEXT: addi a0, sp, 8
2025 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2026 ; RV32-NEXT: vlse64.v v10, (a0), zero
2027 ; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma
2028 ; RV32-NEXT: vwredsum.vs v10, v8, v10, v0.t
2029 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2030 ; RV32-NEXT: vmv.x.s a0, v10
2031 ; RV32-NEXT: li a1, 32
2032 ; RV32-NEXT: vsrl.vx v8, v10, a1
2033 ; RV32-NEXT: vmv.x.s a1, v8
2034 ; RV32-NEXT: addi sp, sp, 16
2037 ; RV64-LABEL: vpwreduce_add_nxv4i32:
2039 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2040 ; RV64-NEXT: vmv.s.x v10, a0
2041 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
2042 ; RV64-NEXT: vwredsum.vs v10, v8, v10, v0.t
2043 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2044 ; RV64-NEXT: vmv.x.s a0, v10
2046 %e = sext <vscale x 4 x i32> %v to <vscale x 4 x i64>
2047 %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, <vscale x 4 x i64> %e, <vscale x 4 x i1> %m, i32 %evl)
2051 define signext i64 @vpwreduce_uadd_nxv4i32(i64 signext %s, <vscale x 4 x i32> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2052 ; RV32-LABEL: vpwreduce_uadd_nxv4i32:
2054 ; RV32-NEXT: addi sp, sp, -16
2055 ; RV32-NEXT: .cfi_def_cfa_offset 16
2056 ; RV32-NEXT: sw a1, 12(sp)
2057 ; RV32-NEXT: sw a0, 8(sp)
2058 ; RV32-NEXT: addi a0, sp, 8
2059 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2060 ; RV32-NEXT: vlse64.v v10, (a0), zero
2061 ; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma
2062 ; RV32-NEXT: vwredsumu.vs v10, v8, v10, v0.t
2063 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2064 ; RV32-NEXT: vmv.x.s a0, v10
2065 ; RV32-NEXT: li a1, 32
2066 ; RV32-NEXT: vsrl.vx v8, v10, a1
2067 ; RV32-NEXT: vmv.x.s a1, v8
2068 ; RV32-NEXT: addi sp, sp, 16
2071 ; RV64-LABEL: vpwreduce_uadd_nxv4i32:
2073 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2074 ; RV64-NEXT: vmv.s.x v10, a0
2075 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
2076 ; RV64-NEXT: vwredsumu.vs v10, v8, v10, v0.t
2077 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2078 ; RV64-NEXT: vmv.x.s a0, v10
2080 %e = zext <vscale x 4 x i32> %v to <vscale x 4 x i64>
2081 %r = call i64 @llvm.vp.reduce.add.nxv4i64(i64 %s, <vscale x 4 x i64> %e, <vscale x 4 x i1> %m, i32 %evl)
2085 declare i64 @llvm.vp.reduce.umax.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2087 define signext i64 @vpreduce_umax_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2088 ; RV32-LABEL: vpreduce_umax_nxv4i64:
2090 ; RV32-NEXT: addi sp, sp, -16
2091 ; RV32-NEXT: .cfi_def_cfa_offset 16
2092 ; RV32-NEXT: sw a1, 12(sp)
2093 ; RV32-NEXT: sw a0, 8(sp)
2094 ; RV32-NEXT: addi a0, sp, 8
2095 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2096 ; RV32-NEXT: vlse64.v v12, (a0), zero
2097 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
2098 ; RV32-NEXT: vredmaxu.vs v12, v8, v12, v0.t
2099 ; RV32-NEXT: vmv.x.s a0, v12
2100 ; RV32-NEXT: li a1, 32
2101 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2102 ; RV32-NEXT: vsrl.vx v8, v12, a1
2103 ; RV32-NEXT: vmv.x.s a1, v8
2104 ; RV32-NEXT: addi sp, sp, 16
2107 ; RV64-LABEL: vpreduce_umax_nxv4i64:
2109 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2110 ; RV64-NEXT: vmv.s.x v12, a0
2111 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2112 ; RV64-NEXT: vredmaxu.vs v12, v8, v12, v0.t
2113 ; RV64-NEXT: vmv.x.s a0, v12
2115 %r = call i64 @llvm.vp.reduce.umax.nxv4i64(i64 %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 %evl)
2119 declare i64 @llvm.vp.reduce.smax.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2121 define signext i64 @vpreduce_smax_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2122 ; RV32-LABEL: vpreduce_smax_nxv4i64:
2124 ; RV32-NEXT: addi sp, sp, -16
2125 ; RV32-NEXT: .cfi_def_cfa_offset 16
2126 ; RV32-NEXT: sw a1, 12(sp)
2127 ; RV32-NEXT: sw a0, 8(sp)
2128 ; RV32-NEXT: addi a0, sp, 8
2129 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2130 ; RV32-NEXT: vlse64.v v12, (a0), zero
2131 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
2132 ; RV32-NEXT: vredmax.vs v12, v8, v12, v0.t
2133 ; RV32-NEXT: vmv.x.s a0, v12
2134 ; RV32-NEXT: li a1, 32
2135 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2136 ; RV32-NEXT: vsrl.vx v8, v12, a1
2137 ; RV32-NEXT: vmv.x.s a1, v8
2138 ; RV32-NEXT: addi sp, sp, 16
2141 ; RV64-LABEL: vpreduce_smax_nxv4i64:
2143 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2144 ; RV64-NEXT: vmv.s.x v12, a0
2145 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2146 ; RV64-NEXT: vredmax.vs v12, v8, v12, v0.t
2147 ; RV64-NEXT: vmv.x.s a0, v12
2149 %r = call i64 @llvm.vp.reduce.smax.nxv4i64(i64 %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 %evl)
2153 declare i64 @llvm.vp.reduce.umin.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2155 define signext i64 @vpreduce_umin_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2156 ; RV32-LABEL: vpreduce_umin_nxv4i64:
2158 ; RV32-NEXT: addi sp, sp, -16
2159 ; RV32-NEXT: .cfi_def_cfa_offset 16
2160 ; RV32-NEXT: sw a1, 12(sp)
2161 ; RV32-NEXT: sw a0, 8(sp)
2162 ; RV32-NEXT: addi a0, sp, 8
2163 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2164 ; RV32-NEXT: vlse64.v v12, (a0), zero
2165 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
2166 ; RV32-NEXT: vredminu.vs v12, v8, v12, v0.t
2167 ; RV32-NEXT: vmv.x.s a0, v12
2168 ; RV32-NEXT: li a1, 32
2169 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2170 ; RV32-NEXT: vsrl.vx v8, v12, a1
2171 ; RV32-NEXT: vmv.x.s a1, v8
2172 ; RV32-NEXT: addi sp, sp, 16
2175 ; RV64-LABEL: vpreduce_umin_nxv4i64:
2177 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2178 ; RV64-NEXT: vmv.s.x v12, a0
2179 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2180 ; RV64-NEXT: vredminu.vs v12, v8, v12, v0.t
2181 ; RV64-NEXT: vmv.x.s a0, v12
2183 %r = call i64 @llvm.vp.reduce.umin.nxv4i64(i64 %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 %evl)
2187 declare i64 @llvm.vp.reduce.smin.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2189 define signext i64 @vpreduce_smin_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2190 ; RV32-LABEL: vpreduce_smin_nxv4i64:
2192 ; RV32-NEXT: addi sp, sp, -16
2193 ; RV32-NEXT: .cfi_def_cfa_offset 16
2194 ; RV32-NEXT: sw a1, 12(sp)
2195 ; RV32-NEXT: sw a0, 8(sp)
2196 ; RV32-NEXT: addi a0, sp, 8
2197 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2198 ; RV32-NEXT: vlse64.v v12, (a0), zero
2199 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
2200 ; RV32-NEXT: vredmin.vs v12, v8, v12, v0.t
2201 ; RV32-NEXT: vmv.x.s a0, v12
2202 ; RV32-NEXT: li a1, 32
2203 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2204 ; RV32-NEXT: vsrl.vx v8, v12, a1
2205 ; RV32-NEXT: vmv.x.s a1, v8
2206 ; RV32-NEXT: addi sp, sp, 16
2209 ; RV64-LABEL: vpreduce_smin_nxv4i64:
2211 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2212 ; RV64-NEXT: vmv.s.x v12, a0
2213 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2214 ; RV64-NEXT: vredmin.vs v12, v8, v12, v0.t
2215 ; RV64-NEXT: vmv.x.s a0, v12
2217 %r = call i64 @llvm.vp.reduce.smin.nxv4i64(i64 %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 %evl)
2221 declare i64 @llvm.vp.reduce.and.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2223 define signext i64 @vpreduce_and_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2224 ; RV32-LABEL: vpreduce_and_nxv4i64:
2226 ; RV32-NEXT: addi sp, sp, -16
2227 ; RV32-NEXT: .cfi_def_cfa_offset 16
2228 ; RV32-NEXT: sw a1, 12(sp)
2229 ; RV32-NEXT: sw a0, 8(sp)
2230 ; RV32-NEXT: addi a0, sp, 8
2231 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2232 ; RV32-NEXT: vlse64.v v12, (a0), zero
2233 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
2234 ; RV32-NEXT: vredand.vs v12, v8, v12, v0.t
2235 ; RV32-NEXT: vmv.x.s a0, v12
2236 ; RV32-NEXT: li a1, 32
2237 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2238 ; RV32-NEXT: vsrl.vx v8, v12, a1
2239 ; RV32-NEXT: vmv.x.s a1, v8
2240 ; RV32-NEXT: addi sp, sp, 16
2243 ; RV64-LABEL: vpreduce_and_nxv4i64:
2245 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2246 ; RV64-NEXT: vmv.s.x v12, a0
2247 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2248 ; RV64-NEXT: vredand.vs v12, v8, v12, v0.t
2249 ; RV64-NEXT: vmv.x.s a0, v12
2251 %r = call i64 @llvm.vp.reduce.and.nxv4i64(i64 %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 %evl)
2255 declare i64 @llvm.vp.reduce.or.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2257 define signext i64 @vpreduce_or_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2258 ; RV32-LABEL: vpreduce_or_nxv4i64:
2260 ; RV32-NEXT: addi sp, sp, -16
2261 ; RV32-NEXT: .cfi_def_cfa_offset 16
2262 ; RV32-NEXT: sw a1, 12(sp)
2263 ; RV32-NEXT: sw a0, 8(sp)
2264 ; RV32-NEXT: addi a0, sp, 8
2265 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2266 ; RV32-NEXT: vlse64.v v12, (a0), zero
2267 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
2268 ; RV32-NEXT: vredor.vs v12, v8, v12, v0.t
2269 ; RV32-NEXT: vmv.x.s a0, v12
2270 ; RV32-NEXT: li a1, 32
2271 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2272 ; RV32-NEXT: vsrl.vx v8, v12, a1
2273 ; RV32-NEXT: vmv.x.s a1, v8
2274 ; RV32-NEXT: addi sp, sp, 16
2277 ; RV64-LABEL: vpreduce_or_nxv4i64:
2279 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2280 ; RV64-NEXT: vmv.s.x v12, a0
2281 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2282 ; RV64-NEXT: vredor.vs v12, v8, v12, v0.t
2283 ; RV64-NEXT: vmv.x.s a0, v12
2285 %r = call i64 @llvm.vp.reduce.or.nxv4i64(i64 %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 %evl)
2289 declare i64 @llvm.vp.reduce.xor.nxv4i64(i64, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2291 define signext i64 @vpreduce_xor_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
2292 ; RV32-LABEL: vpreduce_xor_nxv4i64:
2294 ; RV32-NEXT: addi sp, sp, -16
2295 ; RV32-NEXT: .cfi_def_cfa_offset 16
2296 ; RV32-NEXT: sw a1, 12(sp)
2297 ; RV32-NEXT: sw a0, 8(sp)
2298 ; RV32-NEXT: addi a0, sp, 8
2299 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2300 ; RV32-NEXT: vlse64.v v12, (a0), zero
2301 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
2302 ; RV32-NEXT: vredxor.vs v12, v8, v12, v0.t
2303 ; RV32-NEXT: vmv.x.s a0, v12
2304 ; RV32-NEXT: li a1, 32
2305 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2306 ; RV32-NEXT: vsrl.vx v8, v12, a1
2307 ; RV32-NEXT: vmv.x.s a1, v8
2308 ; RV32-NEXT: addi sp, sp, 16
2311 ; RV64-LABEL: vpreduce_xor_nxv4i64:
2313 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2314 ; RV64-NEXT: vmv.s.x v12, a0
2315 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
2316 ; RV64-NEXT: vredxor.vs v12, v8, v12, v0.t
2317 ; RV64-NEXT: vmv.x.s a0, v12
2319 %r = call i64 @llvm.vp.reduce.xor.nxv4i64(i64 %s, <vscale x 4 x i64> %v, <vscale x 4 x i1> %m, i32 %evl)