1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
3 ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4 ; RUN: --check-prefixes=CHECK,ZVFH
5 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
6 ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7 ; RUN: --check-prefixes=CHECK,ZVFH
8 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
9 ; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10 ; RUN: --check-prefixes=CHECK,ZVFHMIN
11 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
12 ; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13 ; RUN: --check-prefixes=CHECK,ZVFHMIN
15 declare <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
17 define <vscale x 1 x bfloat> @vp_round_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
18 ; CHECK-LABEL: vp_round_nxv1bf16:
20 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
21 ; CHECK-NEXT: vmv1r.v v9, v0
22 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
23 ; CHECK-NEXT: lui a0, 307200
24 ; CHECK-NEXT: vmv1r.v v8, v0
25 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
26 ; CHECK-NEXT: vfabs.v v11, v10, v0.t
27 ; CHECK-NEXT: fmv.w.x fa5, a0
28 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
29 ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t
30 ; CHECK-NEXT: fsrmi a0, 4
31 ; CHECK-NEXT: vmv1r.v v0, v8
32 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
33 ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t
35 ; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t
36 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
37 ; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
38 ; CHECK-NEXT: vmv1r.v v0, v9
39 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
40 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
42 %v = call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
43 ret <vscale x 1 x bfloat> %v
46 define <vscale x 1 x bfloat> @vp_round_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
47 ; CHECK-LABEL: vp_round_nxv1bf16_unmasked:
49 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
50 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
51 ; CHECK-NEXT: lui a0, 307200
52 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
53 ; CHECK-NEXT: vfabs.v v8, v9
54 ; CHECK-NEXT: fmv.w.x fa5, a0
55 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
56 ; CHECK-NEXT: fsrmi a0, 4
57 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
59 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
60 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
61 ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
62 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
63 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
65 %v = call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
66 ret <vscale x 1 x bfloat> %v
69 declare <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
71 define <vscale x 2 x bfloat> @vp_round_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
72 ; CHECK-LABEL: vp_round_nxv2bf16:
74 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
75 ; CHECK-NEXT: vmv1r.v v9, v0
76 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
77 ; CHECK-NEXT: lui a0, 307200
78 ; CHECK-NEXT: vmv1r.v v8, v0
79 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
80 ; CHECK-NEXT: vfabs.v v11, v10, v0.t
81 ; CHECK-NEXT: fmv.w.x fa5, a0
82 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
83 ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t
84 ; CHECK-NEXT: fsrmi a0, 4
85 ; CHECK-NEXT: vmv.v.v v0, v8
86 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
87 ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t
89 ; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t
90 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
91 ; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
92 ; CHECK-NEXT: vmv1r.v v0, v9
93 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
94 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
96 %v = call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
97 ret <vscale x 2 x bfloat> %v
100 define <vscale x 2 x bfloat> @vp_round_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
101 ; CHECK-LABEL: vp_round_nxv2bf16_unmasked:
103 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
104 ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
105 ; CHECK-NEXT: lui a0, 307200
106 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
107 ; CHECK-NEXT: vfabs.v v8, v9
108 ; CHECK-NEXT: fmv.w.x fa5, a0
109 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
110 ; CHECK-NEXT: fsrmi a0, 4
111 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t
112 ; CHECK-NEXT: fsrm a0
113 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
114 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
115 ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t
116 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
117 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
119 %v = call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
120 ret <vscale x 2 x bfloat> %v
123 declare <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
125 define <vscale x 4 x bfloat> @vp_round_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
126 ; CHECK-LABEL: vp_round_nxv4bf16:
128 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
129 ; CHECK-NEXT: vmv1r.v v9, v0
130 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
131 ; CHECK-NEXT: lui a0, 307200
132 ; CHECK-NEXT: vmv1r.v v8, v0
133 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
134 ; CHECK-NEXT: vfabs.v v12, v10, v0.t
135 ; CHECK-NEXT: fmv.w.x fa5, a0
136 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
137 ; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t
138 ; CHECK-NEXT: fsrmi a0, 4
139 ; CHECK-NEXT: vmv1r.v v0, v8
140 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
141 ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t
142 ; CHECK-NEXT: fsrm a0
143 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
144 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
145 ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t
146 ; CHECK-NEXT: vmv1r.v v0, v9
147 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
148 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
150 %v = call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
151 ret <vscale x 4 x bfloat> %v
154 define <vscale x 4 x bfloat> @vp_round_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
155 ; CHECK-LABEL: vp_round_nxv4bf16_unmasked:
157 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
158 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
159 ; CHECK-NEXT: lui a0, 307200
160 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
161 ; CHECK-NEXT: vfabs.v v8, v10
162 ; CHECK-NEXT: fmv.w.x fa5, a0
163 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
164 ; CHECK-NEXT: fsrmi a0, 4
165 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
166 ; CHECK-NEXT: fsrm a0
167 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
168 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
169 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
170 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
171 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
173 %v = call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
174 ret <vscale x 4 x bfloat> %v
177 declare <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
179 define <vscale x 8 x bfloat> @vp_round_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
180 ; CHECK-LABEL: vp_round_nxv8bf16:
182 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
183 ; CHECK-NEXT: vmv1r.v v10, v0
184 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t
185 ; CHECK-NEXT: lui a0, 307200
186 ; CHECK-NEXT: vmv1r.v v8, v0
187 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
188 ; CHECK-NEXT: vfabs.v v16, v12, v0.t
189 ; CHECK-NEXT: fmv.w.x fa5, a0
190 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
191 ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
192 ; CHECK-NEXT: fsrmi a0, 4
193 ; CHECK-NEXT: vmv1r.v v0, v8
194 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
195 ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t
196 ; CHECK-NEXT: fsrm a0
197 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
198 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
199 ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t
200 ; CHECK-NEXT: vmv1r.v v0, v10
201 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
202 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
204 %v = call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
205 ret <vscale x 8 x bfloat> %v
208 define <vscale x 8 x bfloat> @vp_round_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
209 ; CHECK-LABEL: vp_round_nxv8bf16_unmasked:
211 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
212 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
213 ; CHECK-NEXT: lui a0, 307200
214 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
215 ; CHECK-NEXT: vfabs.v v8, v12
216 ; CHECK-NEXT: fmv.w.x fa5, a0
217 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
218 ; CHECK-NEXT: fsrmi a0, 4
219 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t
220 ; CHECK-NEXT: fsrm a0
221 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
222 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
223 ; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t
224 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
225 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
227 %v = call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
228 ret <vscale x 8 x bfloat> %v
231 declare <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
233 define <vscale x 16 x bfloat> @vp_round_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
234 ; CHECK-LABEL: vp_round_nxv16bf16:
236 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
237 ; CHECK-NEXT: vmv1r.v v12, v0
238 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t
239 ; CHECK-NEXT: lui a0, 307200
240 ; CHECK-NEXT: vmv1r.v v8, v0
241 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
242 ; CHECK-NEXT: vfabs.v v24, v16, v0.t
243 ; CHECK-NEXT: fmv.w.x fa5, a0
244 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
245 ; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t
246 ; CHECK-NEXT: fsrmi a0, 4
247 ; CHECK-NEXT: vmv1r.v v0, v8
248 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
249 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
250 ; CHECK-NEXT: fsrm a0
251 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
252 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
253 ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
254 ; CHECK-NEXT: vmv1r.v v0, v12
255 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
256 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
258 %v = call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
259 ret <vscale x 16 x bfloat> %v
262 define <vscale x 16 x bfloat> @vp_round_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
263 ; CHECK-LABEL: vp_round_nxv16bf16_unmasked:
265 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
266 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
267 ; CHECK-NEXT: lui a0, 307200
268 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
269 ; CHECK-NEXT: vfabs.v v8, v16
270 ; CHECK-NEXT: fmv.w.x fa5, a0
271 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
272 ; CHECK-NEXT: fsrmi a0, 4
273 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
274 ; CHECK-NEXT: fsrm a0
275 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
276 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
277 ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
278 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
279 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
281 %v = call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
282 ret <vscale x 16 x bfloat> %v
285 declare <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
287 define <vscale x 32 x bfloat> @vp_round_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
288 ; CHECK-LABEL: vp_round_nxv32bf16:
290 ; CHECK-NEXT: addi sp, sp, -16
291 ; CHECK-NEXT: .cfi_def_cfa_offset 16
292 ; CHECK-NEXT: csrr a1, vlenb
293 ; CHECK-NEXT: slli a1, a1, 3
294 ; CHECK-NEXT: sub sp, sp, a1
295 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
296 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
297 ; CHECK-NEXT: vmv1r.v v7, v0
298 ; CHECK-NEXT: csrr a2, vlenb
299 ; CHECK-NEXT: lui a3, 307200
300 ; CHECK-NEXT: slli a1, a2, 1
301 ; CHECK-NEXT: srli a2, a2, 2
302 ; CHECK-NEXT: fmv.w.x fa5, a3
303 ; CHECK-NEXT: sub a3, a0, a1
304 ; CHECK-NEXT: vslidedown.vx v17, v0, a2
305 ; CHECK-NEXT: sltu a2, a0, a3
306 ; CHECK-NEXT: vmv1r.v v18, v17
307 ; CHECK-NEXT: addi a2, a2, -1
308 ; CHECK-NEXT: and a2, a2, a3
309 ; CHECK-NEXT: vmv1r.v v0, v17
310 ; CHECK-NEXT: addi a3, sp, 16
311 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
312 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
313 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
314 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
315 ; CHECK-NEXT: vfabs.v v8, v24, v0.t
316 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
317 ; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t
318 ; CHECK-NEXT: fsrmi a2, 4
319 ; CHECK-NEXT: vmv1r.v v0, v18
320 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
321 ; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
322 ; CHECK-NEXT: fsrm a2
323 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
324 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
325 ; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
326 ; CHECK-NEXT: vmv1r.v v0, v17
327 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
328 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t
329 ; CHECK-NEXT: bltu a0, a1, .LBB10_2
330 ; CHECK-NEXT: # %bb.1:
331 ; CHECK-NEXT: mv a0, a1
332 ; CHECK-NEXT: .LBB10_2:
333 ; CHECK-NEXT: vmv1r.v v0, v7
334 ; CHECK-NEXT: addi a1, sp, 16
335 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
336 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
337 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t
338 ; CHECK-NEXT: vmv1r.v v8, v7
339 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
340 ; CHECK-NEXT: vfabs.v v16, v24, v0.t
341 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
342 ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t
343 ; CHECK-NEXT: fsrmi a0, 4
344 ; CHECK-NEXT: vmv1r.v v0, v8
345 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
346 ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t
347 ; CHECK-NEXT: fsrm a0
348 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
349 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
350 ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t
351 ; CHECK-NEXT: vmv1r.v v0, v7
352 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
353 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t
354 ; CHECK-NEXT: csrr a0, vlenb
355 ; CHECK-NEXT: slli a0, a0, 3
356 ; CHECK-NEXT: add sp, sp, a0
357 ; CHECK-NEXT: .cfi_def_cfa sp, 16
358 ; CHECK-NEXT: addi sp, sp, 16
359 ; CHECK-NEXT: .cfi_def_cfa_offset 0
361 %v = call <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
362 ret <vscale x 32 x bfloat> %v
365 define <vscale x 32 x bfloat> @vp_round_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
366 ; CHECK-LABEL: vp_round_nxv32bf16_unmasked:
368 ; CHECK-NEXT: addi sp, sp, -16
369 ; CHECK-NEXT: .cfi_def_cfa_offset 16
370 ; CHECK-NEXT: csrr a1, vlenb
371 ; CHECK-NEXT: slli a1, a1, 3
372 ; CHECK-NEXT: sub sp, sp, a1
373 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
374 ; CHECK-NEXT: csrr a2, vlenb
375 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
376 ; CHECK-NEXT: vmset.m v16
377 ; CHECK-NEXT: lui a3, 307200
378 ; CHECK-NEXT: slli a1, a2, 1
379 ; CHECK-NEXT: srli a2, a2, 2
380 ; CHECK-NEXT: fmv.w.x fa5, a3
381 ; CHECK-NEXT: sub a3, a0, a1
382 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
383 ; CHECK-NEXT: vslidedown.vx v16, v16, a2
384 ; CHECK-NEXT: sltu a2, a0, a3
385 ; CHECK-NEXT: vmv1r.v v17, v16
386 ; CHECK-NEXT: addi a2, a2, -1
387 ; CHECK-NEXT: and a2, a2, a3
388 ; CHECK-NEXT: vmv1r.v v0, v16
389 ; CHECK-NEXT: addi a3, sp, 16
390 ; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
391 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
392 ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t
393 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
394 ; CHECK-NEXT: vfabs.v v8, v24, v0.t
395 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
396 ; CHECK-NEXT: vmflt.vf v17, v8, fa5, v0.t
397 ; CHECK-NEXT: fsrmi a2, 4
398 ; CHECK-NEXT: vmv1r.v v0, v17
399 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
400 ; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t
401 ; CHECK-NEXT: fsrm a2
402 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
403 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
404 ; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t
405 ; CHECK-NEXT: vmv1r.v v0, v16
406 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
407 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t
408 ; CHECK-NEXT: bltu a0, a1, .LBB11_2
409 ; CHECK-NEXT: # %bb.1:
410 ; CHECK-NEXT: mv a0, a1
411 ; CHECK-NEXT: .LBB11_2:
412 ; CHECK-NEXT: addi a1, sp, 16
413 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
414 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
415 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
416 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
417 ; CHECK-NEXT: vfabs.v v24, v16
418 ; CHECK-NEXT: vmflt.vf v0, v24, fa5
419 ; CHECK-NEXT: fsrmi a0, 4
420 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
421 ; CHECK-NEXT: fsrm a0
422 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
423 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
424 ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
425 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
426 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
427 ; CHECK-NEXT: csrr a0, vlenb
428 ; CHECK-NEXT: slli a0, a0, 3
429 ; CHECK-NEXT: add sp, sp, a0
430 ; CHECK-NEXT: .cfi_def_cfa sp, 16
431 ; CHECK-NEXT: addi sp, sp, 16
432 ; CHECK-NEXT: .cfi_def_cfa_offset 0
434 %v = call <vscale x 32 x bfloat> @llvm.vp.round.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
435 ret <vscale x 32 x bfloat> %v
437 declare <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
439 define <vscale x 1 x half> @vp_round_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
440 ; ZVFH-LABEL: vp_round_nxv1f16:
442 ; ZVFH-NEXT: lui a1, %hi(.LCPI12_0)
443 ; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1)
444 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
445 ; ZVFH-NEXT: vfabs.v v9, v8, v0.t
446 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
447 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t
448 ; ZVFH-NEXT: fsrmi a0, 4
449 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
450 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
452 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
453 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
454 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
457 ; ZVFHMIN-LABEL: vp_round_nxv1f16:
459 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
460 ; ZVFHMIN-NEXT: vmv1r.v v9, v0
461 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
462 ; ZVFHMIN-NEXT: lui a0, 307200
463 ; ZVFHMIN-NEXT: vmv1r.v v8, v0
464 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
465 ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t
466 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
467 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
468 ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t
469 ; ZVFHMIN-NEXT: fsrmi a0, 4
470 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
471 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
472 ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t
473 ; ZVFHMIN-NEXT: fsrm a0
474 ; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t
475 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
476 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t
477 ; ZVFHMIN-NEXT: vmv1r.v v0, v9
478 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
479 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
481 %v = call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
482 ret <vscale x 1 x half> %v
485 define <vscale x 1 x half> @vp_round_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
486 ; ZVFH-LABEL: vp_round_nxv1f16_unmasked:
488 ; ZVFH-NEXT: lui a1, %hi(.LCPI13_0)
489 ; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1)
490 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
491 ; ZVFH-NEXT: vfabs.v v9, v8
492 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
493 ; ZVFH-NEXT: fsrmi a0, 4
494 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
496 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
497 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
498 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
501 ; ZVFHMIN-LABEL: vp_round_nxv1f16_unmasked:
503 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
504 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
505 ; ZVFHMIN-NEXT: lui a0, 307200
506 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
507 ; ZVFHMIN-NEXT: vfabs.v v8, v9
508 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
509 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
510 ; ZVFHMIN-NEXT: fsrmi a0, 4
511 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
512 ; ZVFHMIN-NEXT: fsrm a0
513 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
514 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
515 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
516 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
517 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
519 %v = call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
520 ret <vscale x 1 x half> %v
523 declare <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
525 define <vscale x 2 x half> @vp_round_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
526 ; ZVFH-LABEL: vp_round_nxv2f16:
528 ; ZVFH-NEXT: lui a1, %hi(.LCPI14_0)
529 ; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1)
530 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
531 ; ZVFH-NEXT: vfabs.v v9, v8, v0.t
532 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
533 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t
534 ; ZVFH-NEXT: fsrmi a0, 4
535 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
536 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
538 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
539 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
540 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
543 ; ZVFHMIN-LABEL: vp_round_nxv2f16:
545 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
546 ; ZVFHMIN-NEXT: vmv1r.v v9, v0
547 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
548 ; ZVFHMIN-NEXT: lui a0, 307200
549 ; ZVFHMIN-NEXT: vmv1r.v v8, v0
550 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
551 ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t
552 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
553 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
554 ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t
555 ; ZVFHMIN-NEXT: fsrmi a0, 4
556 ; ZVFHMIN-NEXT: vmv.v.v v0, v8
557 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
558 ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t
559 ; ZVFHMIN-NEXT: fsrm a0
560 ; ZVFHMIN-NEXT: vfcvt.f.x.v v11, v11, v0.t
561 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
562 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v11, v10, v0.t
563 ; ZVFHMIN-NEXT: vmv1r.v v0, v9
564 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
565 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
567 %v = call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
568 ret <vscale x 2 x half> %v
571 define <vscale x 2 x half> @vp_round_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
572 ; ZVFH-LABEL: vp_round_nxv2f16_unmasked:
574 ; ZVFH-NEXT: lui a1, %hi(.LCPI15_0)
575 ; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1)
576 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
577 ; ZVFH-NEXT: vfabs.v v9, v8
578 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
579 ; ZVFH-NEXT: fsrmi a0, 4
580 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
582 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
583 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
584 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
587 ; ZVFHMIN-LABEL: vp_round_nxv2f16_unmasked:
589 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
590 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
591 ; ZVFHMIN-NEXT: lui a0, 307200
592 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
593 ; ZVFHMIN-NEXT: vfabs.v v8, v9
594 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
595 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
596 ; ZVFHMIN-NEXT: fsrmi a0, 4
597 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
598 ; ZVFHMIN-NEXT: fsrm a0
599 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
600 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
601 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
602 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
603 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
605 %v = call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
606 ret <vscale x 2 x half> %v
609 declare <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
611 define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
612 ; ZVFH-LABEL: vp_round_nxv4f16:
614 ; ZVFH-NEXT: lui a1, %hi(.LCPI16_0)
615 ; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1)
616 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
617 ; ZVFH-NEXT: vfabs.v v9, v8, v0.t
618 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
619 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t
620 ; ZVFH-NEXT: fsrmi a0, 4
621 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
622 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
624 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
625 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
626 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
629 ; ZVFHMIN-LABEL: vp_round_nxv4f16:
631 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
632 ; ZVFHMIN-NEXT: vmv1r.v v9, v0
633 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
634 ; ZVFHMIN-NEXT: lui a0, 307200
635 ; ZVFHMIN-NEXT: vmv1r.v v8, v0
636 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
637 ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t
638 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
639 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
640 ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t
641 ; ZVFHMIN-NEXT: fsrmi a0, 4
642 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
643 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
644 ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t
645 ; ZVFHMIN-NEXT: fsrm a0
646 ; ZVFHMIN-NEXT: vfcvt.f.x.v v12, v12, v0.t
647 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
648 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10, v0.t
649 ; ZVFHMIN-NEXT: vmv1r.v v0, v9
650 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
651 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t
653 %v = call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
654 ret <vscale x 4 x half> %v
657 define <vscale x 4 x half> @vp_round_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
658 ; ZVFH-LABEL: vp_round_nxv4f16_unmasked:
660 ; ZVFH-NEXT: lui a1, %hi(.LCPI17_0)
661 ; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1)
662 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
663 ; ZVFH-NEXT: vfabs.v v9, v8
664 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
665 ; ZVFH-NEXT: fsrmi a0, 4
666 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
668 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
669 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
670 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
673 ; ZVFHMIN-LABEL: vp_round_nxv4f16_unmasked:
675 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
676 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
677 ; ZVFHMIN-NEXT: lui a0, 307200
678 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
679 ; ZVFHMIN-NEXT: vfabs.v v8, v10
680 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
681 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
682 ; ZVFHMIN-NEXT: fsrmi a0, 4
683 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
684 ; ZVFHMIN-NEXT: fsrm a0
685 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
686 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
687 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
688 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
689 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
691 %v = call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
692 ret <vscale x 4 x half> %v
695 declare <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
697 define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
698 ; ZVFH-LABEL: vp_round_nxv8f16:
700 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
701 ; ZVFH-NEXT: vmv1r.v v10, v0
702 ; ZVFH-NEXT: lui a0, %hi(.LCPI18_0)
703 ; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a0)
704 ; ZVFH-NEXT: vfabs.v v12, v8, v0.t
705 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
706 ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t
707 ; ZVFH-NEXT: fsrmi a0, 4
708 ; ZVFH-NEXT: vmv1r.v v0, v10
709 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
710 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
712 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
713 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
714 ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
717 ; ZVFHMIN-LABEL: vp_round_nxv8f16:
719 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
720 ; ZVFHMIN-NEXT: vmv1r.v v10, v0
721 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
722 ; ZVFHMIN-NEXT: lui a0, 307200
723 ; ZVFHMIN-NEXT: vmv1r.v v8, v0
724 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
725 ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t
726 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
727 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
728 ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t
729 ; ZVFHMIN-NEXT: fsrmi a0, 4
730 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
731 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
732 ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t
733 ; ZVFHMIN-NEXT: fsrm a0
734 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
735 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
736 ; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12, v0.t
737 ; ZVFHMIN-NEXT: vmv1r.v v0, v10
738 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
739 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
741 %v = call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
742 ret <vscale x 8 x half> %v
745 define <vscale x 8 x half> @vp_round_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
746 ; ZVFH-LABEL: vp_round_nxv8f16_unmasked:
748 ; ZVFH-NEXT: lui a1, %hi(.LCPI19_0)
749 ; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1)
750 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
751 ; ZVFH-NEXT: vfabs.v v10, v8
752 ; ZVFH-NEXT: vmflt.vf v0, v10, fa5
753 ; ZVFH-NEXT: fsrmi a0, 4
754 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t
756 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t
757 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
758 ; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t
761 ; ZVFHMIN-LABEL: vp_round_nxv8f16_unmasked:
763 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
764 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
765 ; ZVFHMIN-NEXT: lui a0, 307200
766 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
767 ; ZVFHMIN-NEXT: vfabs.v v8, v12
768 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
769 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
770 ; ZVFHMIN-NEXT: fsrmi a0, 4
771 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t
772 ; ZVFHMIN-NEXT: fsrm a0
773 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
774 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu
775 ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t
776 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
777 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
779 %v = call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
780 ret <vscale x 8 x half> %v
783 declare <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
785 define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
786 ; ZVFH-LABEL: vp_round_nxv16f16:
788 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
789 ; ZVFH-NEXT: vmv1r.v v12, v0
790 ; ZVFH-NEXT: lui a0, %hi(.LCPI20_0)
791 ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0)
792 ; ZVFH-NEXT: vfabs.v v16, v8, v0.t
793 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
794 ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t
795 ; ZVFH-NEXT: fsrmi a0, 4
796 ; ZVFH-NEXT: vmv1r.v v0, v12
797 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
798 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
800 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
801 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
802 ; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
805 ; ZVFHMIN-LABEL: vp_round_nxv16f16:
807 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
808 ; ZVFHMIN-NEXT: vmv1r.v v12, v0
809 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
810 ; ZVFHMIN-NEXT: lui a0, 307200
811 ; ZVFHMIN-NEXT: vmv1r.v v8, v0
812 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
813 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t
814 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
815 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
816 ; ZVFHMIN-NEXT: vmflt.vf v8, v24, fa5, v0.t
817 ; ZVFHMIN-NEXT: fsrmi a0, 4
818 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
819 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
820 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
821 ; ZVFHMIN-NEXT: fsrm a0
822 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
823 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
824 ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
825 ; ZVFHMIN-NEXT: vmv1r.v v0, v12
826 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
827 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t
829 %v = call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
830 ret <vscale x 16 x half> %v
833 define <vscale x 16 x half> @vp_round_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
834 ; ZVFH-LABEL: vp_round_nxv16f16_unmasked:
836 ; ZVFH-NEXT: lui a1, %hi(.LCPI21_0)
837 ; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1)
838 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
839 ; ZVFH-NEXT: vfabs.v v12, v8
840 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5
841 ; ZVFH-NEXT: fsrmi a0, 4
842 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t
844 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t
845 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
846 ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t
849 ; ZVFHMIN-LABEL: vp_round_nxv16f16_unmasked:
851 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
852 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
853 ; ZVFHMIN-NEXT: lui a0, 307200
854 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
855 ; ZVFHMIN-NEXT: vfabs.v v8, v16
856 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0
857 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
858 ; ZVFHMIN-NEXT: fsrmi a0, 4
859 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t
860 ; ZVFHMIN-NEXT: fsrm a0
861 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
862 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
863 ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t
864 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
865 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
867 %v = call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
868 ret <vscale x 16 x half> %v
871 declare <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
873 define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
874 ; ZVFH-LABEL: vp_round_nxv32f16:
876 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
877 ; ZVFH-NEXT: vmv1r.v v16, v0
878 ; ZVFH-NEXT: lui a0, %hi(.LCPI22_0)
879 ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0)
880 ; ZVFH-NEXT: vfabs.v v24, v8, v0.t
881 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
882 ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t
883 ; ZVFH-NEXT: fsrmi a0, 4
884 ; ZVFH-NEXT: vmv1r.v v0, v16
885 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma
886 ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t
888 ; ZVFH-NEXT: vfcvt.f.x.v v24, v24, v0.t
889 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
890 ; ZVFH-NEXT: vfsgnj.vv v8, v24, v8, v0.t
893 ; ZVFHMIN-LABEL: vp_round_nxv32f16:
895 ; ZVFHMIN-NEXT: addi sp, sp, -16
896 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
897 ; ZVFHMIN-NEXT: csrr a1, vlenb
898 ; ZVFHMIN-NEXT: slli a1, a1, 3
899 ; ZVFHMIN-NEXT: sub sp, sp, a1
900 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
901 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
902 ; ZVFHMIN-NEXT: vmv1r.v v7, v0
903 ; ZVFHMIN-NEXT: csrr a2, vlenb
904 ; ZVFHMIN-NEXT: lui a3, 307200
905 ; ZVFHMIN-NEXT: slli a1, a2, 1
906 ; ZVFHMIN-NEXT: srli a2, a2, 2
907 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3
908 ; ZVFHMIN-NEXT: sub a3, a0, a1
909 ; ZVFHMIN-NEXT: vslidedown.vx v17, v0, a2
910 ; ZVFHMIN-NEXT: sltu a2, a0, a3
911 ; ZVFHMIN-NEXT: vmv1r.v v18, v17
912 ; ZVFHMIN-NEXT: addi a2, a2, -1
913 ; ZVFHMIN-NEXT: and a2, a2, a3
914 ; ZVFHMIN-NEXT: vmv1r.v v0, v17
915 ; ZVFHMIN-NEXT: addi a3, sp, 16
916 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
917 ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
918 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
919 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
920 ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t
921 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
922 ; ZVFHMIN-NEXT: vmflt.vf v18, v8, fa5, v0.t
923 ; ZVFHMIN-NEXT: fsrmi a2, 4
924 ; ZVFHMIN-NEXT: vmv1r.v v0, v18
925 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
926 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t
927 ; ZVFHMIN-NEXT: fsrm a2
928 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
929 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
930 ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
931 ; ZVFHMIN-NEXT: vmv1r.v v0, v17
932 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
933 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
934 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2
935 ; ZVFHMIN-NEXT: # %bb.1:
936 ; ZVFHMIN-NEXT: mv a0, a1
937 ; ZVFHMIN-NEXT: .LBB22_2:
938 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
939 ; ZVFHMIN-NEXT: addi a1, sp, 16
940 ; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
941 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
942 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t
943 ; ZVFHMIN-NEXT: vmv1r.v v8, v7
944 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
945 ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t
946 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
947 ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t
948 ; ZVFHMIN-NEXT: fsrmi a0, 4
949 ; ZVFHMIN-NEXT: vmv1r.v v0, v8
950 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
951 ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t
952 ; ZVFHMIN-NEXT: fsrm a0
953 ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t
954 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
955 ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t
956 ; ZVFHMIN-NEXT: vmv1r.v v0, v7
957 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
958 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t
959 ; ZVFHMIN-NEXT: csrr a0, vlenb
960 ; ZVFHMIN-NEXT: slli a0, a0, 3
961 ; ZVFHMIN-NEXT: add sp, sp, a0
962 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
963 ; ZVFHMIN-NEXT: addi sp, sp, 16
964 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
966 %v = call <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
967 ret <vscale x 32 x half> %v
970 define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
971 ; ZVFH-LABEL: vp_round_nxv32f16_unmasked:
973 ; ZVFH-NEXT: lui a1, %hi(.LCPI23_0)
974 ; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1)
975 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
976 ; ZVFH-NEXT: vfabs.v v16, v8
977 ; ZVFH-NEXT: vmflt.vf v0, v16, fa5
978 ; ZVFH-NEXT: fsrmi a0, 4
979 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t
981 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t
982 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
983 ; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t
986 ; ZVFHMIN-LABEL: vp_round_nxv32f16_unmasked:
988 ; ZVFHMIN-NEXT: addi sp, sp, -16
989 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
990 ; ZVFHMIN-NEXT: csrr a1, vlenb
991 ; ZVFHMIN-NEXT: slli a1, a1, 3
992 ; ZVFHMIN-NEXT: sub sp, sp, a1
993 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
994 ; ZVFHMIN-NEXT: csrr a2, vlenb
995 ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma
996 ; ZVFHMIN-NEXT: vmset.m v16
997 ; ZVFHMIN-NEXT: lui a3, 307200
998 ; ZVFHMIN-NEXT: slli a1, a2, 1
999 ; ZVFHMIN-NEXT: srli a2, a2, 2
1000 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3
1001 ; ZVFHMIN-NEXT: sub a3, a0, a1
1002 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
1003 ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2
1004 ; ZVFHMIN-NEXT: sltu a2, a0, a3
1005 ; ZVFHMIN-NEXT: vmv1r.v v17, v16
1006 ; ZVFHMIN-NEXT: addi a2, a2, -1
1007 ; ZVFHMIN-NEXT: and a2, a2, a3
1008 ; ZVFHMIN-NEXT: vmv1r.v v0, v16
1009 ; ZVFHMIN-NEXT: addi a3, sp, 16
1010 ; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
1011 ; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
1012 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
1013 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1014 ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t
1015 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
1016 ; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t
1017 ; ZVFHMIN-NEXT: fsrmi a2, 4
1018 ; ZVFHMIN-NEXT: vmv1r.v v0, v17
1019 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1020 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t
1021 ; ZVFHMIN-NEXT: fsrm a2
1022 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
1023 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
1024 ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t
1025 ; ZVFHMIN-NEXT: vmv1r.v v0, v16
1026 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1027 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
1028 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB23_2
1029 ; ZVFHMIN-NEXT: # %bb.1:
1030 ; ZVFHMIN-NEXT: mv a0, a1
1031 ; ZVFHMIN-NEXT: .LBB23_2:
1032 ; ZVFHMIN-NEXT: addi a1, sp, 16
1033 ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
1034 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
1035 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
1036 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1037 ; ZVFHMIN-NEXT: vfabs.v v24, v16
1038 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5
1039 ; ZVFHMIN-NEXT: fsrmi a0, 4
1040 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t
1041 ; ZVFHMIN-NEXT: fsrm a0
1042 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t
1043 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu
1044 ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t
1045 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
1046 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
1047 ; ZVFHMIN-NEXT: csrr a0, vlenb
1048 ; ZVFHMIN-NEXT: slli a0, a0, 3
1049 ; ZVFHMIN-NEXT: add sp, sp, a0
1050 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
1051 ; ZVFHMIN-NEXT: addi sp, sp, 16
1052 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
1054 %v = call <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
1055 ret <vscale x 32 x half> %v
1058 declare <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
1060 define <vscale x 1 x float> @vp_round_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1061 ; CHECK-LABEL: vp_round_nxv1f32:
1063 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1064 ; CHECK-NEXT: vfabs.v v9, v8, v0.t
1065 ; CHECK-NEXT: lui a0, 307200
1066 ; CHECK-NEXT: fmv.w.x fa5, a0
1067 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
1068 ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
1069 ; CHECK-NEXT: fsrmi a0, 4
1070 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1071 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
1072 ; CHECK-NEXT: fsrm a0
1073 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
1074 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
1075 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
1077 %v = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
1078 ret <vscale x 1 x float> %v
1081 define <vscale x 1 x float> @vp_round_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
1082 ; CHECK-LABEL: vp_round_nxv1f32_unmasked:
1084 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
1085 ; CHECK-NEXT: vfabs.v v9, v8
1086 ; CHECK-NEXT: lui a0, 307200
1087 ; CHECK-NEXT: fmv.w.x fa5, a0
1088 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
1089 ; CHECK-NEXT: fsrmi a0, 4
1090 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
1091 ; CHECK-NEXT: fsrm a0
1092 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
1093 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
1094 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
1096 %v = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1097 ret <vscale x 1 x float> %v
1100 declare <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
1102 define <vscale x 2 x float> @vp_round_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1103 ; CHECK-LABEL: vp_round_nxv2f32:
1105 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1106 ; CHECK-NEXT: vfabs.v v9, v8, v0.t
1107 ; CHECK-NEXT: lui a0, 307200
1108 ; CHECK-NEXT: fmv.w.x fa5, a0
1109 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
1110 ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
1111 ; CHECK-NEXT: fsrmi a0, 4
1112 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1113 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
1114 ; CHECK-NEXT: fsrm a0
1115 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
1116 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
1117 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
1119 %v = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
1120 ret <vscale x 2 x float> %v
1123 define <vscale x 2 x float> @vp_round_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
1124 ; CHECK-LABEL: vp_round_nxv2f32_unmasked:
1126 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
1127 ; CHECK-NEXT: vfabs.v v9, v8
1128 ; CHECK-NEXT: lui a0, 307200
1129 ; CHECK-NEXT: fmv.w.x fa5, a0
1130 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
1131 ; CHECK-NEXT: fsrmi a0, 4
1132 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
1133 ; CHECK-NEXT: fsrm a0
1134 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
1135 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
1136 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
1138 %v = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1139 ret <vscale x 2 x float> %v
1142 declare <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
1144 define <vscale x 4 x float> @vp_round_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1145 ; CHECK-LABEL: vp_round_nxv4f32:
1147 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1148 ; CHECK-NEXT: vmv1r.v v10, v0
1149 ; CHECK-NEXT: vfabs.v v12, v8, v0.t
1150 ; CHECK-NEXT: lui a0, 307200
1151 ; CHECK-NEXT: fmv.w.x fa5, a0
1152 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1153 ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
1154 ; CHECK-NEXT: fsrmi a0, 4
1155 ; CHECK-NEXT: vmv1r.v v0, v10
1156 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1157 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
1158 ; CHECK-NEXT: fsrm a0
1159 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
1160 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1161 ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
1163 %v = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
1164 ret <vscale x 4 x float> %v
1167 define <vscale x 4 x float> @vp_round_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
1168 ; CHECK-LABEL: vp_round_nxv4f32_unmasked:
1170 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1171 ; CHECK-NEXT: vfabs.v v10, v8
1172 ; CHECK-NEXT: lui a0, 307200
1173 ; CHECK-NEXT: fmv.w.x fa5, a0
1174 ; CHECK-NEXT: vmflt.vf v0, v10, fa5
1175 ; CHECK-NEXT: fsrmi a0, 4
1176 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
1177 ; CHECK-NEXT: fsrm a0
1178 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
1179 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
1180 ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
1182 %v = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1183 ret <vscale x 4 x float> %v
1186 declare <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
1188 define <vscale x 8 x float> @vp_round_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1189 ; CHECK-LABEL: vp_round_nxv8f32:
1191 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1192 ; CHECK-NEXT: vmv1r.v v12, v0
1193 ; CHECK-NEXT: vfabs.v v16, v8, v0.t
1194 ; CHECK-NEXT: lui a0, 307200
1195 ; CHECK-NEXT: fmv.w.x fa5, a0
1196 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1197 ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
1198 ; CHECK-NEXT: fsrmi a0, 4
1199 ; CHECK-NEXT: vmv1r.v v0, v12
1200 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1201 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
1202 ; CHECK-NEXT: fsrm a0
1203 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
1204 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1205 ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
1207 %v = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
1208 ret <vscale x 8 x float> %v
1211 define <vscale x 8 x float> @vp_round_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
1212 ; CHECK-LABEL: vp_round_nxv8f32_unmasked:
1214 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1215 ; CHECK-NEXT: vfabs.v v12, v8
1216 ; CHECK-NEXT: lui a0, 307200
1217 ; CHECK-NEXT: fmv.w.x fa5, a0
1218 ; CHECK-NEXT: vmflt.vf v0, v12, fa5
1219 ; CHECK-NEXT: fsrmi a0, 4
1220 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
1221 ; CHECK-NEXT: fsrm a0
1222 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
1223 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
1224 ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
1226 %v = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1227 ret <vscale x 8 x float> %v
1230 declare <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
1232 define <vscale x 16 x float> @vp_round_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1233 ; CHECK-LABEL: vp_round_nxv16f32:
1235 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1236 ; CHECK-NEXT: vmv1r.v v16, v0
1237 ; CHECK-NEXT: vfabs.v v24, v8, v0.t
1238 ; CHECK-NEXT: lui a0, 307200
1239 ; CHECK-NEXT: fmv.w.x fa5, a0
1240 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
1241 ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
1242 ; CHECK-NEXT: fsrmi a0, 4
1243 ; CHECK-NEXT: vmv1r.v v0, v16
1244 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
1245 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
1246 ; CHECK-NEXT: fsrm a0
1247 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
1248 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
1249 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
1251 %v = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
1252 ret <vscale x 16 x float> %v
1255 define <vscale x 16 x float> @vp_round_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
1256 ; CHECK-LABEL: vp_round_nxv16f32_unmasked:
1258 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
1259 ; CHECK-NEXT: vfabs.v v16, v8
1260 ; CHECK-NEXT: lui a0, 307200
1261 ; CHECK-NEXT: fmv.w.x fa5, a0
1262 ; CHECK-NEXT: vmflt.vf v0, v16, fa5
1263 ; CHECK-NEXT: fsrmi a0, 4
1264 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
1265 ; CHECK-NEXT: fsrm a0
1266 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
1267 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
1268 ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
1270 %v = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1271 ret <vscale x 16 x float> %v
1274 declare <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
1276 define <vscale x 1 x double> @vp_round_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1277 ; CHECK-LABEL: vp_round_nxv1f64:
1279 ; CHECK-NEXT: lui a1, %hi(.LCPI34_0)
1280 ; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1)
1281 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1282 ; CHECK-NEXT: vfabs.v v9, v8, v0.t
1283 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
1284 ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t
1285 ; CHECK-NEXT: fsrmi a0, 4
1286 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1287 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
1288 ; CHECK-NEXT: fsrm a0
1289 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
1290 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
1291 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
1293 %v = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
1294 ret <vscale x 1 x double> %v
1297 define <vscale x 1 x double> @vp_round_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
1298 ; CHECK-LABEL: vp_round_nxv1f64_unmasked:
1300 ; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
1301 ; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1)
1302 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
1303 ; CHECK-NEXT: vfabs.v v9, v8
1304 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
1305 ; CHECK-NEXT: fsrmi a0, 4
1306 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
1307 ; CHECK-NEXT: fsrm a0
1308 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
1309 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
1310 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
1312 %v = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
1313 ret <vscale x 1 x double> %v
1316 declare <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
1318 define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1319 ; CHECK-LABEL: vp_round_nxv2f64:
1321 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1322 ; CHECK-NEXT: vmv1r.v v10, v0
1323 ; CHECK-NEXT: lui a0, %hi(.LCPI36_0)
1324 ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0)
1325 ; CHECK-NEXT: vfabs.v v12, v8, v0.t
1326 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
1327 ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t
1328 ; CHECK-NEXT: fsrmi a0, 4
1329 ; CHECK-NEXT: vmv1r.v v0, v10
1330 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1331 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
1332 ; CHECK-NEXT: fsrm a0
1333 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
1334 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
1335 ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
1337 %v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
1338 ret <vscale x 2 x double> %v
1341 define <vscale x 2 x double> @vp_round_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
1342 ; CHECK-LABEL: vp_round_nxv2f64_unmasked:
1344 ; CHECK-NEXT: lui a1, %hi(.LCPI37_0)
1345 ; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1)
1346 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
1347 ; CHECK-NEXT: vfabs.v v10, v8
1348 ; CHECK-NEXT: vmflt.vf v0, v10, fa5
1349 ; CHECK-NEXT: fsrmi a0, 4
1350 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
1351 ; CHECK-NEXT: fsrm a0
1352 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
1353 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
1354 ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
1356 %v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
1357 ret <vscale x 2 x double> %v
1360 declare <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
1362 define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1363 ; CHECK-LABEL: vp_round_nxv4f64:
1365 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1366 ; CHECK-NEXT: vmv1r.v v12, v0
1367 ; CHECK-NEXT: lui a0, %hi(.LCPI38_0)
1368 ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0)
1369 ; CHECK-NEXT: vfabs.v v16, v8, v0.t
1370 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1371 ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t
1372 ; CHECK-NEXT: fsrmi a0, 4
1373 ; CHECK-NEXT: vmv1r.v v0, v12
1374 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
1375 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
1376 ; CHECK-NEXT: fsrm a0
1377 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
1378 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1379 ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
1381 %v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
1382 ret <vscale x 4 x double> %v
1385 define <vscale x 4 x double> @vp_round_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
1386 ; CHECK-LABEL: vp_round_nxv4f64_unmasked:
1388 ; CHECK-NEXT: lui a1, %hi(.LCPI39_0)
1389 ; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1)
1390 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
1391 ; CHECK-NEXT: vfabs.v v12, v8
1392 ; CHECK-NEXT: vmflt.vf v0, v12, fa5
1393 ; CHECK-NEXT: fsrmi a0, 4
1394 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
1395 ; CHECK-NEXT: fsrm a0
1396 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
1397 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1398 ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
1400 %v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
1401 ret <vscale x 4 x double> %v
1404 declare <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
1406 define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
1407 ; CHECK-LABEL: vp_round_nxv7f64:
1409 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1410 ; CHECK-NEXT: vmv1r.v v16, v0
1411 ; CHECK-NEXT: lui a0, %hi(.LCPI40_0)
1412 ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0)
1413 ; CHECK-NEXT: vfabs.v v24, v8, v0.t
1414 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1415 ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
1416 ; CHECK-NEXT: fsrmi a0, 4
1417 ; CHECK-NEXT: vmv1r.v v0, v16
1418 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1419 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
1420 ; CHECK-NEXT: fsrm a0
1421 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
1422 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1423 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
1425 %v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
1426 ret <vscale x 7 x double> %v
1429 define <vscale x 7 x double> @vp_round_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
1430 ; CHECK-LABEL: vp_round_nxv7f64_unmasked:
1432 ; CHECK-NEXT: lui a1, %hi(.LCPI41_0)
1433 ; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1)
1434 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1435 ; CHECK-NEXT: vfabs.v v16, v8
1436 ; CHECK-NEXT: vmflt.vf v0, v16, fa5
1437 ; CHECK-NEXT: fsrmi a0, 4
1438 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
1439 ; CHECK-NEXT: fsrm a0
1440 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
1441 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1442 ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
1444 %v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl)
1445 ret <vscale x 7 x double> %v
1448 declare <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
1450 define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1451 ; CHECK-LABEL: vp_round_nxv8f64:
1453 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1454 ; CHECK-NEXT: vmv1r.v v16, v0
1455 ; CHECK-NEXT: lui a0, %hi(.LCPI42_0)
1456 ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0)
1457 ; CHECK-NEXT: vfabs.v v24, v8, v0.t
1458 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1459 ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t
1460 ; CHECK-NEXT: fsrmi a0, 4
1461 ; CHECK-NEXT: vmv1r.v v0, v16
1462 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1463 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
1464 ; CHECK-NEXT: fsrm a0
1465 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
1466 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1467 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
1469 %v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
1470 ret <vscale x 8 x double> %v
1473 define <vscale x 8 x double> @vp_round_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
1474 ; CHECK-LABEL: vp_round_nxv8f64_unmasked:
1476 ; CHECK-NEXT: lui a1, %hi(.LCPI43_0)
1477 ; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1)
1478 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1479 ; CHECK-NEXT: vfabs.v v16, v8
1480 ; CHECK-NEXT: vmflt.vf v0, v16, fa5
1481 ; CHECK-NEXT: fsrmi a0, 4
1482 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
1483 ; CHECK-NEXT: fsrm a0
1484 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
1485 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1486 ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
1488 %v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
1489 ret <vscale x 8 x double> %v
1493 declare <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
1495 define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1496 ; CHECK-LABEL: vp_round_nxv16f64:
1498 ; CHECK-NEXT: addi sp, sp, -16
1499 ; CHECK-NEXT: .cfi_def_cfa_offset 16
1500 ; CHECK-NEXT: csrr a1, vlenb
1501 ; CHECK-NEXT: slli a1, a1, 3
1502 ; CHECK-NEXT: sub sp, sp, a1
1503 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1504 ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1505 ; CHECK-NEXT: vmv1r.v v7, v0
1506 ; CHECK-NEXT: csrr a1, vlenb
1507 ; CHECK-NEXT: lui a2, %hi(.LCPI44_0)
1508 ; CHECK-NEXT: srli a3, a1, 3
1509 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2)
1510 ; CHECK-NEXT: sub a2, a0, a1
1511 ; CHECK-NEXT: vslidedown.vx v6, v0, a3
1512 ; CHECK-NEXT: sltu a3, a0, a2
1513 ; CHECK-NEXT: addi a3, a3, -1
1514 ; CHECK-NEXT: and a2, a3, a2
1515 ; CHECK-NEXT: vmv1r.v v0, v6
1516 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1517 ; CHECK-NEXT: vfabs.v v24, v16, v0.t
1518 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1519 ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
1520 ; CHECK-NEXT: fsrmi a2, 4
1521 ; CHECK-NEXT: vmv1r.v v0, v6
1522 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1523 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
1524 ; CHECK-NEXT: addi a3, sp, 16
1525 ; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
1526 ; CHECK-NEXT: fsrm a2
1527 ; CHECK-NEXT: addi a2, sp, 16
1528 ; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
1529 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
1530 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1531 ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
1532 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
1533 ; CHECK-NEXT: bltu a0, a1, .LBB44_2
1534 ; CHECK-NEXT: # %bb.1:
1535 ; CHECK-NEXT: mv a0, a1
1536 ; CHECK-NEXT: .LBB44_2:
1537 ; CHECK-NEXT: vmv1r.v v0, v7
1538 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1539 ; CHECK-NEXT: vfabs.v v16, v8, v0.t
1540 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1541 ; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
1542 ; CHECK-NEXT: fsrmi a0, 4
1543 ; CHECK-NEXT: vmv1r.v v0, v7
1544 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1545 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
1546 ; CHECK-NEXT: fsrm a0
1547 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
1548 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1549 ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
1550 ; CHECK-NEXT: addi a0, sp, 16
1551 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1552 ; CHECK-NEXT: csrr a0, vlenb
1553 ; CHECK-NEXT: slli a0, a0, 3
1554 ; CHECK-NEXT: add sp, sp, a0
1555 ; CHECK-NEXT: .cfi_def_cfa sp, 16
1556 ; CHECK-NEXT: addi sp, sp, 16
1557 ; CHECK-NEXT: .cfi_def_cfa_offset 0
1559 %v = call <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
1560 ret <vscale x 16 x double> %v
1563 define <vscale x 16 x double> @vp_round_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
1564 ; CHECK-LABEL: vp_round_nxv16f64_unmasked:
1566 ; CHECK-NEXT: csrr a1, vlenb
1567 ; CHECK-NEXT: lui a2, %hi(.LCPI45_0)
1568 ; CHECK-NEXT: sub a3, a0, a1
1569 ; CHECK-NEXT: fld fa5, %lo(.LCPI45_0)(a2)
1570 ; CHECK-NEXT: sltu a2, a0, a3
1571 ; CHECK-NEXT: addi a2, a2, -1
1572 ; CHECK-NEXT: and a2, a2, a3
1573 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1574 ; CHECK-NEXT: vfabs.v v24, v16
1575 ; CHECK-NEXT: vmflt.vf v0, v24, fa5
1576 ; CHECK-NEXT: fsrmi a2, 4
1577 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
1578 ; CHECK-NEXT: fsrm a2
1579 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
1580 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1581 ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
1582 ; CHECK-NEXT: bltu a0, a1, .LBB45_2
1583 ; CHECK-NEXT: # %bb.1:
1584 ; CHECK-NEXT: mv a0, a1
1585 ; CHECK-NEXT: .LBB45_2:
1586 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1587 ; CHECK-NEXT: vfabs.v v24, v8
1588 ; CHECK-NEXT: vmflt.vf v0, v24, fa5
1589 ; CHECK-NEXT: fsrmi a0, 4
1590 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
1591 ; CHECK-NEXT: fsrm a0
1592 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
1593 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
1594 ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
1596 %v = call <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
1597 ret <vscale x 16 x double> %v