1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare half @llvm.vp.reduce.fadd.nxv1f16(half, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
13 define half @vpreduce_fadd_nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14 ; ZVFH-LABEL: vpreduce_fadd_nxv1f16:
16 ; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
17 ; ZVFH-NEXT: vfmv.s.f v9, fa0
18 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
19 ; ZVFH-NEXT: vfredusum.vs v9, v8, v9, v0.t
20 ; ZVFH-NEXT: vfmv.f.s fa0, v9
23 ; ZVFHMIN-LABEL: vpreduce_fadd_nxv1f16:
25 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
26 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
27 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
28 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
29 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
30 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
31 ; ZVFHMIN-NEXT: vfredusum.vs v8, v9, v8, v0.t
32 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
33 ; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
35 %r = call reassoc half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 %evl)
39 define half @vpreduce_ord_fadd_nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
40 ; ZVFH-LABEL: vpreduce_ord_fadd_nxv1f16:
42 ; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
43 ; ZVFH-NEXT: vfmv.s.f v9, fa0
44 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
45 ; ZVFH-NEXT: vfredosum.vs v9, v8, v9, v0.t
46 ; ZVFH-NEXT: vfmv.f.s fa0, v9
49 ; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv1f16:
51 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
52 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
53 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
54 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
55 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
56 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
57 ; ZVFHMIN-NEXT: vfredosum.vs v8, v9, v8, v0.t
58 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
59 ; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
61 %r = call half @llvm.vp.reduce.fadd.nxv1f16(half %s, <vscale x 1 x half> %v, <vscale x 1 x i1> %m, i32 %evl)
65 declare half @llvm.vp.reduce.fadd.nxv2f16(half, <vscale x 2 x half>, <vscale x 2 x i1>, i32)
67 define half @vpreduce_fadd_nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
68 ; ZVFH-LABEL: vpreduce_fadd_nxv2f16:
70 ; ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
71 ; ZVFH-NEXT: vfmv.s.f v9, fa0
72 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
73 ; ZVFH-NEXT: vfredusum.vs v9, v8, v9, v0.t
74 ; ZVFH-NEXT: vfmv.f.s fa0, v9
77 ; ZVFHMIN-LABEL: vpreduce_fadd_nxv2f16:
79 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
80 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
81 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
82 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
83 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
84 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
85 ; ZVFHMIN-NEXT: vfredusum.vs v8, v9, v8, v0.t
86 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
87 ; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
89 %r = call reassoc half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> %m, i32 %evl)
93 define half @vpreduce_ord_fadd_nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
94 ; ZVFH-LABEL: vpreduce_ord_fadd_nxv2f16:
96 ; ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
97 ; ZVFH-NEXT: vfmv.s.f v9, fa0
98 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
99 ; ZVFH-NEXT: vfredosum.vs v9, v8, v9, v0.t
100 ; ZVFH-NEXT: vfmv.f.s fa0, v9
103 ; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv2f16:
105 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
106 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
107 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
108 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
109 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
110 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
111 ; ZVFHMIN-NEXT: vfredosum.vs v8, v9, v8, v0.t
112 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
113 ; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
115 %r = call half @llvm.vp.reduce.fadd.nxv2f16(half %s, <vscale x 2 x half> %v, <vscale x 2 x i1> %m, i32 %evl)
119 declare half @llvm.vp.reduce.fadd.nxv4f16(half, <vscale x 4 x half>, <vscale x 4 x i1>, i32)
121 define half @vpreduce_fadd_nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
122 ; ZVFH-LABEL: vpreduce_fadd_nxv4f16:
124 ; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
125 ; ZVFH-NEXT: vfmv.s.f v9, fa0
126 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
127 ; ZVFH-NEXT: vfredusum.vs v9, v8, v9, v0.t
128 ; ZVFH-NEXT: vfmv.f.s fa0, v9
131 ; ZVFHMIN-LABEL: vpreduce_fadd_nxv4f16:
133 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
134 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
135 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
136 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
137 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
138 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
139 ; ZVFHMIN-NEXT: vfredusum.vs v8, v10, v8, v0.t
140 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
141 ; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
143 %r = call reassoc half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> %m, i32 %evl)
147 define half @vpreduce_ord_fadd_nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
148 ; ZVFH-LABEL: vpreduce_ord_fadd_nxv4f16:
150 ; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
151 ; ZVFH-NEXT: vfmv.s.f v9, fa0
152 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
153 ; ZVFH-NEXT: vfredosum.vs v9, v8, v9, v0.t
154 ; ZVFH-NEXT: vfmv.f.s fa0, v9
157 ; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv4f16:
159 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
160 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
161 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
162 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
163 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
164 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
165 ; ZVFHMIN-NEXT: vfredosum.vs v8, v10, v8, v0.t
166 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
167 ; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
169 %r = call half @llvm.vp.reduce.fadd.nxv4f16(half %s, <vscale x 4 x half> %v, <vscale x 4 x i1> %m, i32 %evl)
173 declare half @llvm.vp.reduce.fadd.nxv64f16(half, <vscale x 64 x half>, <vscale x 64 x i1>, i32)
175 define half @vpreduce_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
176 ; ZVFH-LABEL: vpreduce_fadd_nxv64f16:
178 ; ZVFH-NEXT: csrr a2, vlenb
179 ; ZVFH-NEXT: srli a1, a2, 1
180 ; ZVFH-NEXT: vsetvli a3, zero, e8, m1, ta, ma
181 ; ZVFH-NEXT: vslidedown.vx v24, v0, a1
182 ; ZVFH-NEXT: slli a2, a2, 2
183 ; ZVFH-NEXT: sub a1, a0, a2
184 ; ZVFH-NEXT: sltu a3, a0, a1
185 ; ZVFH-NEXT: addi a3, a3, -1
186 ; ZVFH-NEXT: and a1, a3, a1
187 ; ZVFH-NEXT: bltu a0, a2, .LBB6_2
188 ; ZVFH-NEXT: # %bb.1:
189 ; ZVFH-NEXT: mv a0, a2
190 ; ZVFH-NEXT: .LBB6_2:
191 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
192 ; ZVFH-NEXT: vfmv.s.f v25, fa0
193 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
194 ; ZVFH-NEXT: vfredusum.vs v25, v8, v25, v0.t
195 ; ZVFH-NEXT: vmv1r.v v0, v24
196 ; ZVFH-NEXT: vsetvli zero, a1, e16, m8, ta, ma
197 ; ZVFH-NEXT: vfredusum.vs v25, v16, v25, v0.t
198 ; ZVFH-NEXT: vfmv.f.s fa0, v25
201 ; ZVFHMIN-LABEL: vpreduce_fadd_nxv64f16:
203 ; ZVFHMIN-NEXT: csrr a3, vlenb
204 ; ZVFHMIN-NEXT: srli a1, a3, 1
205 ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m1, ta, ma
206 ; ZVFHMIN-NEXT: vslidedown.vx v7, v0, a1
207 ; ZVFHMIN-NEXT: slli a5, a3, 2
208 ; ZVFHMIN-NEXT: sub a1, a0, a5
209 ; ZVFHMIN-NEXT: sltu a2, a0, a1
210 ; ZVFHMIN-NEXT: addi a2, a2, -1
211 ; ZVFHMIN-NEXT: and a1, a2, a1
212 ; ZVFHMIN-NEXT: slli a4, a3, 1
213 ; ZVFHMIN-NEXT: sub a2, a1, a4
214 ; ZVFHMIN-NEXT: sltu a6, a1, a2
215 ; ZVFHMIN-NEXT: bltu a1, a4, .LBB6_2
216 ; ZVFHMIN-NEXT: # %bb.1:
217 ; ZVFHMIN-NEXT: mv a1, a4
218 ; ZVFHMIN-NEXT: .LBB6_2:
219 ; ZVFHMIN-NEXT: addi a6, a6, -1
220 ; ZVFHMIN-NEXT: bltu a0, a5, .LBB6_4
221 ; ZVFHMIN-NEXT: # %bb.3:
222 ; ZVFHMIN-NEXT: mv a0, a5
223 ; ZVFHMIN-NEXT: .LBB6_4:
224 ; ZVFHMIN-NEXT: and a2, a6, a2
225 ; ZVFHMIN-NEXT: sub a5, a0, a4
226 ; ZVFHMIN-NEXT: sltu a6, a0, a5
227 ; ZVFHMIN-NEXT: addi a6, a6, -1
228 ; ZVFHMIN-NEXT: and a5, a6, a5
229 ; ZVFHMIN-NEXT: srli a3, a3, 2
230 ; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
231 ; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3
232 ; ZVFHMIN-NEXT: bltu a0, a4, .LBB6_6
233 ; ZVFHMIN-NEXT: # %bb.5:
234 ; ZVFHMIN-NEXT: mv a0, a4
235 ; ZVFHMIN-NEXT: .LBB6_6:
236 ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
237 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
238 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
239 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
240 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
241 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
242 ; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t
243 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
244 ; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
245 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
246 ; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
247 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
248 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
249 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
250 ; ZVFHMIN-NEXT: vmv1r.v v0, v6
251 ; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma
252 ; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t
253 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
254 ; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
255 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
256 ; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
257 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
258 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
259 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
260 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
261 ; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
262 ; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t
263 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
264 ; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
265 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
266 ; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
267 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
268 ; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
269 ; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
270 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
271 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
272 ; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma
273 ; ZVFHMIN-NEXT: vfredusum.vs v8, v24, v8, v0.t
274 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
275 ; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
277 %r = call reassoc half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl)
281 define half @vpreduce_ord_fadd_nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 zeroext %evl) {
282 ; ZVFH-LABEL: vpreduce_ord_fadd_nxv64f16:
284 ; ZVFH-NEXT: csrr a2, vlenb
285 ; ZVFH-NEXT: srli a1, a2, 1
286 ; ZVFH-NEXT: vsetvli a3, zero, e8, m1, ta, ma
287 ; ZVFH-NEXT: vslidedown.vx v24, v0, a1
288 ; ZVFH-NEXT: slli a2, a2, 2
289 ; ZVFH-NEXT: sub a1, a0, a2
290 ; ZVFH-NEXT: sltu a3, a0, a1
291 ; ZVFH-NEXT: addi a3, a3, -1
292 ; ZVFH-NEXT: and a1, a3, a1
293 ; ZVFH-NEXT: bltu a0, a2, .LBB7_2
294 ; ZVFH-NEXT: # %bb.1:
295 ; ZVFH-NEXT: mv a0, a2
296 ; ZVFH-NEXT: .LBB7_2:
297 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
298 ; ZVFH-NEXT: vfmv.s.f v25, fa0
299 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
300 ; ZVFH-NEXT: vfredosum.vs v25, v8, v25, v0.t
301 ; ZVFH-NEXT: vmv1r.v v0, v24
302 ; ZVFH-NEXT: vsetvli zero, a1, e16, m8, ta, ma
303 ; ZVFH-NEXT: vfredosum.vs v25, v16, v25, v0.t
304 ; ZVFH-NEXT: vfmv.f.s fa0, v25
307 ; ZVFHMIN-LABEL: vpreduce_ord_fadd_nxv64f16:
309 ; ZVFHMIN-NEXT: csrr a3, vlenb
310 ; ZVFHMIN-NEXT: srli a1, a3, 1
311 ; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m1, ta, ma
312 ; ZVFHMIN-NEXT: vslidedown.vx v7, v0, a1
313 ; ZVFHMIN-NEXT: slli a5, a3, 2
314 ; ZVFHMIN-NEXT: sub a1, a0, a5
315 ; ZVFHMIN-NEXT: sltu a2, a0, a1
316 ; ZVFHMIN-NEXT: addi a2, a2, -1
317 ; ZVFHMIN-NEXT: and a1, a2, a1
318 ; ZVFHMIN-NEXT: slli a4, a3, 1
319 ; ZVFHMIN-NEXT: sub a2, a1, a4
320 ; ZVFHMIN-NEXT: sltu a6, a1, a2
321 ; ZVFHMIN-NEXT: bltu a1, a4, .LBB7_2
322 ; ZVFHMIN-NEXT: # %bb.1:
323 ; ZVFHMIN-NEXT: mv a1, a4
324 ; ZVFHMIN-NEXT: .LBB7_2:
325 ; ZVFHMIN-NEXT: addi a6, a6, -1
326 ; ZVFHMIN-NEXT: bltu a0, a5, .LBB7_4
327 ; ZVFHMIN-NEXT: # %bb.3:
328 ; ZVFHMIN-NEXT: mv a0, a5
329 ; ZVFHMIN-NEXT: .LBB7_4:
330 ; ZVFHMIN-NEXT: and a2, a6, a2
331 ; ZVFHMIN-NEXT: sub a5, a0, a4
332 ; ZVFHMIN-NEXT: sltu a6, a0, a5
333 ; ZVFHMIN-NEXT: addi a6, a6, -1
334 ; ZVFHMIN-NEXT: and a5, a6, a5
335 ; ZVFHMIN-NEXT: srli a3, a3, 2
336 ; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
337 ; ZVFHMIN-NEXT: vslidedown.vx v6, v0, a3
338 ; ZVFHMIN-NEXT: bltu a0, a4, .LBB7_6
339 ; ZVFHMIN-NEXT: # %bb.5:
340 ; ZVFHMIN-NEXT: mv a0, a4
341 ; ZVFHMIN-NEXT: .LBB7_6:
342 ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
343 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
344 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
345 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
346 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
347 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
348 ; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t
349 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
350 ; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
351 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
352 ; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
353 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
354 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
355 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
356 ; ZVFHMIN-NEXT: vmv1r.v v0, v6
357 ; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma
358 ; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t
359 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
360 ; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
361 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
362 ; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
363 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
364 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
365 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
366 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
367 ; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
368 ; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t
369 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
370 ; ZVFHMIN-NEXT: fcvt.h.s fa5, fa5
371 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa5
372 ; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m8, ta, ma
373 ; ZVFHMIN-NEXT: vfmv.s.f v8, fa5
374 ; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
375 ; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
376 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
377 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
378 ; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma
379 ; ZVFHMIN-NEXT: vfredosum.vs v8, v24, v8, v0.t
380 ; ZVFHMIN-NEXT: vfmv.f.s fa5, v8
381 ; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5
383 %r = call half @llvm.vp.reduce.fadd.nxv64f16(half %s, <vscale x 64 x half> %v, <vscale x 64 x i1> %m, i32 %evl)
387 declare float @llvm.vp.reduce.fadd.nxv1f32(float, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
389 define float @vpreduce_fadd_nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
390 ; CHECK-LABEL: vpreduce_fadd_nxv1f32:
392 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
393 ; CHECK-NEXT: vfmv.s.f v9, fa0
394 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
395 ; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t
396 ; CHECK-NEXT: vfmv.f.s fa0, v9
398 %r = call reassoc float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 %evl)
402 define float @vpreduce_ord_fadd_nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
403 ; CHECK-LABEL: vpreduce_ord_fadd_nxv1f32:
405 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
406 ; CHECK-NEXT: vfmv.s.f v9, fa0
407 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
408 ; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t
409 ; CHECK-NEXT: vfmv.f.s fa0, v9
411 %r = call float @llvm.vp.reduce.fadd.nxv1f32(float %s, <vscale x 1 x float> %v, <vscale x 1 x i1> %m, i32 %evl)
415 declare float @llvm.vp.reduce.fadd.nxv2f32(float, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
417 define float @vpreduce_fadd_nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
418 ; CHECK-LABEL: vpreduce_fadd_nxv2f32:
420 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
421 ; CHECK-NEXT: vfmv.s.f v9, fa0
422 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
423 ; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t
424 ; CHECK-NEXT: vfmv.f.s fa0, v9
426 %r = call reassoc float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> %m, i32 %evl)
430 define float @vpreduce_ord_fadd_nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
431 ; CHECK-LABEL: vpreduce_ord_fadd_nxv2f32:
433 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
434 ; CHECK-NEXT: vfmv.s.f v9, fa0
435 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
436 ; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t
437 ; CHECK-NEXT: vfmv.f.s fa0, v9
439 %r = call float @llvm.vp.reduce.fadd.nxv2f32(float %s, <vscale x 2 x float> %v, <vscale x 2 x i1> %m, i32 %evl)
443 declare float @llvm.vp.reduce.fadd.nxv4f32(float, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
445 define float @vpreduce_fadd_nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
446 ; CHECK-LABEL: vpreduce_fadd_nxv4f32:
448 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
449 ; CHECK-NEXT: vfmv.s.f v10, fa0
450 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
451 ; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t
452 ; CHECK-NEXT: vfmv.f.s fa0, v10
454 %r = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> %m, i32 %evl)
458 define float @vpreduce_ord_fadd_nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
459 ; CHECK-LABEL: vpreduce_ord_fadd_nxv4f32:
461 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
462 ; CHECK-NEXT: vfmv.s.f v10, fa0
463 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
464 ; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t
465 ; CHECK-NEXT: vfmv.f.s fa0, v10
467 %r = call float @llvm.vp.reduce.fadd.nxv4f32(float %s, <vscale x 4 x float> %v, <vscale x 4 x i1> %m, i32 %evl)
471 declare double @llvm.vp.reduce.fadd.nxv1f64(double, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
473 define double @vpreduce_fadd_nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
474 ; CHECK-LABEL: vpreduce_fadd_nxv1f64:
476 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
477 ; CHECK-NEXT: vfmv.s.f v9, fa0
478 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
479 ; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t
480 ; CHECK-NEXT: vfmv.f.s fa0, v9
482 %r = call reassoc double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> %m, i32 %evl)
486 define double @vpreduce_ord_fadd_nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> %m, i32 zeroext %evl) {
487 ; CHECK-LABEL: vpreduce_ord_fadd_nxv1f64:
489 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
490 ; CHECK-NEXT: vfmv.s.f v9, fa0
491 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
492 ; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t
493 ; CHECK-NEXT: vfmv.f.s fa0, v9
495 %r = call double @llvm.vp.reduce.fadd.nxv1f64(double %s, <vscale x 1 x double> %v, <vscale x 1 x i1> %m, i32 %evl)
499 declare double @llvm.vp.reduce.fadd.nxv2f64(double, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
501 define double @vpreduce_fadd_nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
502 ; CHECK-LABEL: vpreduce_fadd_nxv2f64:
504 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
505 ; CHECK-NEXT: vfmv.s.f v10, fa0
506 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
507 ; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t
508 ; CHECK-NEXT: vfmv.f.s fa0, v10
510 %r = call reassoc double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> %m, i32 %evl)
514 define double @vpreduce_ord_fadd_nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> %m, i32 zeroext %evl) {
515 ; CHECK-LABEL: vpreduce_ord_fadd_nxv2f64:
517 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
518 ; CHECK-NEXT: vfmv.s.f v10, fa0
519 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
520 ; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t
521 ; CHECK-NEXT: vfmv.f.s fa0, v10
523 %r = call double @llvm.vp.reduce.fadd.nxv2f64(double %s, <vscale x 2 x double> %v, <vscale x 2 x i1> %m, i32 %evl)
527 declare double @llvm.vp.reduce.fadd.nxv3f64(double, <vscale x 3 x double>, <vscale x 3 x i1>, i32)
529 define double @vpreduce_fadd_nxv3f64(double %s, <vscale x 3 x double> %v, <vscale x 3 x i1> %m, i32 zeroext %evl) {
530 ; CHECK-LABEL: vpreduce_fadd_nxv3f64:
532 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
533 ; CHECK-NEXT: vfmv.s.f v12, fa0
534 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
535 ; CHECK-NEXT: vfredusum.vs v12, v8, v12, v0.t
536 ; CHECK-NEXT: vfmv.f.s fa0, v12
538 %r = call reassoc double @llvm.vp.reduce.fadd.nxv3f64(double %s, <vscale x 3 x double> %v, <vscale x 3 x i1> %m, i32 %evl)
542 define double @vpreduce_ord_fadd_nxv3f64(double %s, <vscale x 3 x double> %v, <vscale x 3 x i1> %m, i32 zeroext %evl) {
543 ; CHECK-LABEL: vpreduce_ord_fadd_nxv3f64:
545 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
546 ; CHECK-NEXT: vfmv.s.f v12, fa0
547 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
548 ; CHECK-NEXT: vfredosum.vs v12, v8, v12, v0.t
549 ; CHECK-NEXT: vfmv.f.s fa0, v12
551 %r = call double @llvm.vp.reduce.fadd.nxv3f64(double %s, <vscale x 3 x double> %v, <vscale x 3 x i1> %m, i32 %evl)
555 declare double @llvm.vp.reduce.fadd.nxv4f64(double, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
557 define double @vpreduce_fadd_nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
558 ; CHECK-LABEL: vpreduce_fadd_nxv4f64:
560 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
561 ; CHECK-NEXT: vfmv.s.f v12, fa0
562 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
563 ; CHECK-NEXT: vfredusum.vs v12, v8, v12, v0.t
564 ; CHECK-NEXT: vfmv.f.s fa0, v12
566 %r = call reassoc double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> %m, i32 %evl)
570 define double @vpreduce_ord_fadd_nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> %m, i32 zeroext %evl) {
571 ; CHECK-LABEL: vpreduce_ord_fadd_nxv4f64:
573 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
574 ; CHECK-NEXT: vfmv.s.f v12, fa0
575 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
576 ; CHECK-NEXT: vfredosum.vs v12, v8, v12, v0.t
577 ; CHECK-NEXT: vfmv.f.s fa0, v12
579 %r = call double @llvm.vp.reduce.fadd.nxv4f64(double %s, <vscale x 4 x double> %v, <vscale x 4 x i1> %m, i32 %evl)
583 define float @vreduce_fminimum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
584 ; CHECK-LABEL: vreduce_fminimum_nxv4f32:
586 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
587 ; CHECK-NEXT: vfmv.s.f v10, fa0
588 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
589 ; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t
590 ; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t
591 ; CHECK-NEXT: vcpop.m a0, v11, v0.t
592 ; CHECK-NEXT: feq.s a1, fa0, fa0
593 ; CHECK-NEXT: xori a1, a1, 1
594 ; CHECK-NEXT: or a0, a0, a1
595 ; CHECK-NEXT: beqz a0, .LBB22_2
596 ; CHECK-NEXT: # %bb.1:
597 ; CHECK-NEXT: lui a0, 523264
598 ; CHECK-NEXT: fmv.w.x fa0, a0
600 ; CHECK-NEXT: .LBB22_2:
601 ; CHECK-NEXT: vfmv.f.s fa0, v10
603 %s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
607 define float @vreduce_fmaximum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
608 ; CHECK-LABEL: vreduce_fmaximum_nxv4f32:
610 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
611 ; CHECK-NEXT: vfmv.s.f v10, fa0
612 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
613 ; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t
614 ; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t
615 ; CHECK-NEXT: vcpop.m a0, v11, v0.t
616 ; CHECK-NEXT: feq.s a1, fa0, fa0
617 ; CHECK-NEXT: xori a1, a1, 1
618 ; CHECK-NEXT: or a0, a0, a1
619 ; CHECK-NEXT: beqz a0, .LBB23_2
620 ; CHECK-NEXT: # %bb.1:
621 ; CHECK-NEXT: lui a0, 523264
622 ; CHECK-NEXT: fmv.w.x fa0, a0
624 ; CHECK-NEXT: .LBB23_2:
625 ; CHECK-NEXT: vfmv.f.s fa0, v10
627 %s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
631 define float @vreduce_fminimum_nnan_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
632 ; CHECK-LABEL: vreduce_fminimum_nnan_nxv4f32:
634 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
635 ; CHECK-NEXT: vfmv.s.f v10, fa0
636 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
637 ; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t
638 ; CHECK-NEXT: vfmv.f.s fa0, v10
640 %s = call nnan float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
644 define float @vreduce_fmaximum_nnan_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
645 ; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4f32:
647 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
648 ; CHECK-NEXT: vfmv.s.f v10, fa0
649 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
650 ; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t
651 ; CHECK-NEXT: vfmv.f.s fa0, v10
653 %s = call nnan float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
657 define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) {
658 ; CHECK-LABEL: vreduce_fminimum_v4f32:
660 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
661 ; CHECK-NEXT: vfmv.s.f v9, fa0
662 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
663 ; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t
664 ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t
665 ; CHECK-NEXT: vcpop.m a0, v8, v0.t
666 ; CHECK-NEXT: feq.s a1, fa0, fa0
667 ; CHECK-NEXT: xori a1, a1, 1
668 ; CHECK-NEXT: or a0, a0, a1
669 ; CHECK-NEXT: beqz a0, .LBB26_2
670 ; CHECK-NEXT: # %bb.1:
671 ; CHECK-NEXT: lui a0, 523264
672 ; CHECK-NEXT: fmv.w.x fa0, a0
674 ; CHECK-NEXT: .LBB26_2:
675 ; CHECK-NEXT: vfmv.f.s fa0, v9
677 %s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
681 define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) {
682 ; CHECK-LABEL: vreduce_fmaximum_v4f32:
684 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
685 ; CHECK-NEXT: vfmv.s.f v9, fa0
686 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
687 ; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t
688 ; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t
689 ; CHECK-NEXT: vcpop.m a0, v8, v0.t
690 ; CHECK-NEXT: feq.s a1, fa0, fa0
691 ; CHECK-NEXT: xori a1, a1, 1
692 ; CHECK-NEXT: or a0, a0, a1
693 ; CHECK-NEXT: beqz a0, .LBB27_2
694 ; CHECK-NEXT: # %bb.1:
695 ; CHECK-NEXT: lui a0, 523264
696 ; CHECK-NEXT: fmv.w.x fa0, a0
698 ; CHECK-NEXT: .LBB27_2:
699 ; CHECK-NEXT: vfmv.f.s fa0, v9
701 %s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)