1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+zbb -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
4 define i64 @reduce_add(i64 %x, <4 x i64> %v) {
5 ; CHECK-LABEL: reduce_add:
6 ; CHECK: # %bb.0: # %entry
7 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8 ; CHECK-NEXT: vmv.s.x v10, a0
9 ; CHECK-NEXT: vredsum.vs v8, v8, v10
10 ; CHECK-NEXT: vmv.x.s a0, v8
13 %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
14 %res = add i64 %rdx, %x
18 define i64 @reduce_add2(<4 x i64> %v) {
19 ; CHECK-LABEL: reduce_add2:
20 ; CHECK: # %bb.0: # %entry
21 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
22 ; CHECK-NEXT: vmv.v.i v10, 8
23 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
24 ; CHECK-NEXT: vredsum.vs v8, v8, v10
25 ; CHECK-NEXT: vmv.x.s a0, v8
28 %rdx = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
29 %res = add i64 %rdx, 8
33 define i64 @reduce_and(i64 %x, <4 x i64> %v) {
34 ; CHECK-LABEL: reduce_and:
35 ; CHECK: # %bb.0: # %entry
36 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
37 ; CHECK-NEXT: vredand.vs v8, v8, v8
38 ; CHECK-NEXT: vmv.x.s a1, v8
39 ; CHECK-NEXT: and a0, a1, a0
42 %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
43 %res = and i64 %rdx, %x
47 define i64 @reduce_and2(<4 x i64> %v) {
48 ; CHECK-LABEL: reduce_and2:
49 ; CHECK: # %bb.0: # %entry
50 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
51 ; CHECK-NEXT: vredand.vs v8, v8, v8
52 ; CHECK-NEXT: vmv.x.s a0, v8
53 ; CHECK-NEXT: andi a0, a0, 8
56 %rdx = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
57 %res = and i64 %rdx, 8
61 define i64 @reduce_or(i64 %x, <4 x i64> %v) {
62 ; CHECK-LABEL: reduce_or:
63 ; CHECK: # %bb.0: # %entry
64 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
65 ; CHECK-NEXT: vredor.vs v8, v8, v8
66 ; CHECK-NEXT: vmv.x.s a1, v8
67 ; CHECK-NEXT: or a0, a1, a0
70 %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
71 %res = or i64 %rdx, %x
75 define i64 @reduce_or2(<4 x i64> %v) {
76 ; CHECK-LABEL: reduce_or2:
77 ; CHECK: # %bb.0: # %entry
78 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
79 ; CHECK-NEXT: vredor.vs v8, v8, v8
80 ; CHECK-NEXT: vmv.x.s a0, v8
81 ; CHECK-NEXT: ori a0, a0, 8
84 %rdx = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
89 define i64 @reduce_xor(i64 %x, <4 x i64> %v) {
90 ; CHECK-LABEL: reduce_xor:
91 ; CHECK: # %bb.0: # %entry
92 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
93 ; CHECK-NEXT: vmv.s.x v10, a0
94 ; CHECK-NEXT: vredxor.vs v8, v8, v10
95 ; CHECK-NEXT: vmv.x.s a0, v8
98 %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
99 %res = xor i64 %rdx, %x
103 define i64 @reduce_xor2(<4 x i64> %v) {
104 ; CHECK-LABEL: reduce_xor2:
105 ; CHECK: # %bb.0: # %entry
106 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
107 ; CHECK-NEXT: vmv.s.x v10, zero
108 ; CHECK-NEXT: vredxor.vs v8, v8, v10
109 ; CHECK-NEXT: vmv.x.s a0, v8
110 ; CHECK-NEXT: andi a0, a0, 8
113 %rdx = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
114 %res = and i64 %rdx, 8
118 define i64 @reduce_umax(i64 %x, <4 x i64> %v) {
119 ; CHECK-LABEL: reduce_umax:
120 ; CHECK: # %bb.0: # %entry
121 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
122 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
123 ; CHECK-NEXT: vmv.x.s a1, v8
124 ; CHECK-NEXT: maxu a0, a1, a0
127 %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
128 %res = call i64 @llvm.umax.i64(i64 %rdx, i64 %x)
132 define i64 @reduce_umax2(<4 x i64> %v) {
133 ; CHECK-LABEL: reduce_umax2:
134 ; CHECK: # %bb.0: # %entry
135 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
136 ; CHECK-NEXT: vredmaxu.vs v8, v8, v8
137 ; CHECK-NEXT: vmv.x.s a0, v8
138 ; CHECK-NEXT: li a1, 8
139 ; CHECK-NEXT: maxu a0, a0, a1
142 %rdx = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
143 %res = call i64 @llvm.umax.i64(i64 %rdx, i64 8)
147 define i64 @reduce_umin(i64 %x, <4 x i64> %v) {
148 ; CHECK-LABEL: reduce_umin:
149 ; CHECK: # %bb.0: # %entry
150 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
151 ; CHECK-NEXT: vredminu.vs v8, v8, v8
152 ; CHECK-NEXT: vmv.x.s a1, v8
153 ; CHECK-NEXT: minu a0, a1, a0
156 %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
157 %res = call i64 @llvm.umin.i64(i64 %rdx, i64 %x)
161 define i64 @reduce_umin2(<4 x i64> %v) {
162 ; CHECK-LABEL: reduce_umin2:
163 ; CHECK: # %bb.0: # %entry
164 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
165 ; CHECK-NEXT: vredminu.vs v8, v8, v8
166 ; CHECK-NEXT: vmv.x.s a0, v8
167 ; CHECK-NEXT: li a1, 8
168 ; CHECK-NEXT: minu a0, a0, a1
171 %rdx = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
172 %res = call i64 @llvm.umin.i64(i64 %rdx, i64 8)
176 define i64 @reduce_smax(i64 %x, <4 x i64> %v) {
177 ; CHECK-LABEL: reduce_smax:
178 ; CHECK: # %bb.0: # %entry
179 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
180 ; CHECK-NEXT: vredmax.vs v8, v8, v8
181 ; CHECK-NEXT: vmv.x.s a1, v8
182 ; CHECK-NEXT: max a0, a1, a0
185 %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
186 %res = call i64 @llvm.smax.i64(i64 %rdx, i64 %x)
190 define i64 @reduce_smax2(<4 x i64> %v) {
191 ; CHECK-LABEL: reduce_smax2:
192 ; CHECK: # %bb.0: # %entry
193 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
194 ; CHECK-NEXT: vredmax.vs v8, v8, v8
195 ; CHECK-NEXT: vmv.x.s a0, v8
196 ; CHECK-NEXT: li a1, 8
197 ; CHECK-NEXT: max a0, a0, a1
200 %rdx = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
201 %res = call i64 @llvm.smax.i64(i64 %rdx, i64 8)
205 define i64 @reduce_smin(i64 %x, <4 x i64> %v) {
206 ; CHECK-LABEL: reduce_smin:
207 ; CHECK: # %bb.0: # %entry
208 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
209 ; CHECK-NEXT: vredmin.vs v8, v8, v8
210 ; CHECK-NEXT: vmv.x.s a1, v8
211 ; CHECK-NEXT: min a0, a1, a0
214 %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
215 %res = call i64 @llvm.smin.i64(i64 %rdx, i64 %x)
219 define i64 @reduce_smin2(<4 x i64> %v) {
220 ; CHECK-LABEL: reduce_smin2:
221 ; CHECK: # %bb.0: # %entry
222 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
223 ; CHECK-NEXT: vredmin.vs v8, v8, v8
224 ; CHECK-NEXT: vmv.x.s a0, v8
225 ; CHECK-NEXT: li a1, 8
226 ; CHECK-NEXT: min a0, a0, a1
229 %rdx = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
230 %res = call i64 @llvm.smin.i64(i64 %rdx, i64 8)
234 define float @reduce_fadd(float %x, <4 x float> %v) {
235 ; CHECK-LABEL: reduce_fadd:
236 ; CHECK: # %bb.0: # %entry
237 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
238 ; CHECK-NEXT: vfmv.s.f v9, fa0
239 ; CHECK-NEXT: vfredusum.vs v8, v8, v9
240 ; CHECK-NEXT: vfmv.f.s fa0, v8
243 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float %x, <4 x float> %v)
247 define float @reduce_fadd2(float %x, <4 x float> %v) {
248 ; CHECK-LABEL: reduce_fadd2:
249 ; CHECK: # %bb.0: # %entry
250 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
251 ; CHECK-NEXT: vfmv.s.f v9, fa0
252 ; CHECK-NEXT: vfredusum.vs v8, v8, v9
253 ; CHECK-NEXT: vfmv.f.s fa0, v8
256 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %v)
257 %res = fadd fast float %rdx, %x
261 define float @reduce_fadd3(float %x, <4 x float> %v, ptr %rdxptr) {
262 ; CHECK-LABEL: reduce_fadd3:
263 ; CHECK: # %bb.0: # %entry
264 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
265 ; CHECK-NEXT: vmv.s.x v9, zero
266 ; CHECK-NEXT: vfredusum.vs v8, v8, v9
267 ; CHECK-NEXT: vfmv.f.s fa5, v8
268 ; CHECK-NEXT: fadd.s fa0, fa5, fa0
269 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
270 ; CHECK-NEXT: vse32.v v8, (a0)
273 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v)
274 %res = fadd fast float %rdx, %x
275 store float %rdx, ptr %rdxptr
279 define float @reduce_fadd4(float %x, float %y, <4 x float> %v, <4 x float> %w) {
280 ; CHECK-LABEL: reduce_fadd4:
281 ; CHECK: # %bb.0: # %entry
282 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
283 ; CHECK-NEXT: vfmv.s.f v10, fa0
284 ; CHECK-NEXT: vfredusum.vs v8, v8, v10
285 ; CHECK-NEXT: vfmv.f.s fa5, v8
286 ; CHECK-NEXT: vfmv.s.f v8, fa1
287 ; CHECK-NEXT: vfredusum.vs v8, v9, v8
288 ; CHECK-NEXT: vfmv.f.s fa4, v8
289 ; CHECK-NEXT: fdiv.s fa0, fa5, fa4
292 %rdx = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %v)
293 %rdx2 = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %w)
294 %res = fadd fast float %rdx, %x
295 %res2 = fadd fast float %rdx2, %y
296 %div = fdiv fast float %res, %res2
300 define float @reduce_fmax(float %x, <4 x float> %v) {
301 ; CHECK-LABEL: reduce_fmax:
302 ; CHECK: # %bb.0: # %entry
303 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
304 ; CHECK-NEXT: vfredmax.vs v8, v8, v8
305 ; CHECK-NEXT: vfmv.f.s fa5, v8
306 ; CHECK-NEXT: fmax.s fa0, fa0, fa5
309 %rdx = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v)
310 %res = call float @llvm.maxnum.f32(float %x, float %rdx)
314 define float @reduce_fmin(float %x, <4 x float> %v) {
315 ; CHECK-LABEL: reduce_fmin:
316 ; CHECK: # %bb.0: # %entry
317 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
318 ; CHECK-NEXT: vfredmin.vs v8, v8, v8
319 ; CHECK-NEXT: vfmv.f.s fa5, v8
320 ; CHECK-NEXT: fmin.s fa0, fa0, fa5
323 %rdx = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v)
324 %res = call float @llvm.minnum.f32(float %x, float %rdx)
328 ; Function Attrs: nofree nosync nounwind readnone willreturn
329 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
330 declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
331 declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
332 declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
333 declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>)
334 declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>)
335 declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>)
336 declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>)
337 declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
338 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
339 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
340 declare i64 @llvm.umax.i64(i64, i64)
341 declare i64 @llvm.umin.i64(i64, i64)
342 declare i64 @llvm.smax.i64(i64, i64)
343 declare i64 @llvm.smin.i64(i64, i64)
344 declare float @llvm.maxnum.f32(float ,float)
345 declare float @llvm.minnum.f32(float ,float)
347 define void @crash(<2 x i32> %0) {
348 ; CHECK-LABEL: crash:
349 ; CHECK: # %bb.0: # %entry
350 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
351 ; CHECK-NEXT: vmv.x.s a0, v8
352 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
353 ; CHECK-NEXT: vmv.v.i v8, 0
354 ; CHECK-NEXT: vmv.s.x v9, a0
355 ; CHECK-NEXT: vredsum.vs v8, v8, v9
356 ; CHECK-NEXT: vmv.x.s a0, v8
357 ; CHECK-NEXT: sb a0, 0(zero)
360 %1 = extractelement <2 x i32> %0, i64 0
361 %2 = tail call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> zeroinitializer)
362 %3 = zext i16 %2 to i32
363 %op.rdx = add i32 %1, %3
364 %conv18.us = trunc i32 %op.rdx to i8
365 store i8 %conv18.us, ptr null, align 1
368 declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)