1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define void @fp2si_v2f32_v2i32(ptr %x, ptr %y) {
6 ; CHECK-LABEL: fp2si_v2f32_v2i32:
8 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
9 ; CHECK-NEXT: vle32.v v8, (a0)
10 ; CHECK-NEXT: vmfne.vv v0, v8, v8
11 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
12 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
13 ; CHECK-NEXT: vse32.v v8, (a1)
15 %a = load <2 x float>, ptr %x
16 %d = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %a)
17 store <2 x i32> %d, ptr %y
20 declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>)
22 define void @fp2ui_v2f32_v2i32(ptr %x, ptr %y) {
23 ; CHECK-LABEL: fp2ui_v2f32_v2i32:
25 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
26 ; CHECK-NEXT: vle32.v v8, (a0)
27 ; CHECK-NEXT: vmfne.vv v0, v8, v8
28 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8
29 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
30 ; CHECK-NEXT: vse32.v v8, (a1)
32 %a = load <2 x float>, ptr %x
33 %d = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %a)
34 store <2 x i32> %d, ptr %y
37 declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float>)
39 define void @fp2si_v8f32_v8i32(ptr %x, ptr %y) {
41 ; CHECK-LABEL: fp2si_v8f32_v8i32:
43 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
44 ; CHECK-NEXT: vle32.v v8, (a0)
45 ; CHECK-NEXT: vmfne.vv v0, v8, v8
46 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
47 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
48 ; CHECK-NEXT: vse32.v v8, (a1)
50 %a = load <8 x float>, ptr %x
51 %d = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %a)
52 store <8 x i32> %d, ptr %y
55 declare <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float>)
57 define void @fp2ui_v8f32_v8i32(ptr %x, ptr %y) {
59 ; CHECK-LABEL: fp2ui_v8f32_v8i32:
61 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
62 ; CHECK-NEXT: vle32.v v8, (a0)
63 ; CHECK-NEXT: vmfne.vv v0, v8, v8
64 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
65 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
66 ; CHECK-NEXT: vse32.v v8, (a1)
68 %a = load <8 x float>, ptr %x
69 %d = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %a)
70 store <8 x i32> %d, ptr %y
73 declare <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float>)
75 define void @fp2si_v2f32_v2i64(ptr %x, ptr %y) {
76 ; CHECK-LABEL: fp2si_v2f32_v2i64:
78 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
79 ; CHECK-NEXT: vle32.v v8, (a0)
80 ; CHECK-NEXT: vmfne.vv v0, v8, v8
81 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v9, v8
82 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
83 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0
84 ; CHECK-NEXT: vse64.v v8, (a1)
86 %a = load <2 x float>, ptr %x
87 %d = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %a)
88 store <2 x i64> %d, ptr %y
91 declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float>)
93 define void @fp2ui_v2f32_v2i64(ptr %x, ptr %y) {
94 ; CHECK-LABEL: fp2ui_v2f32_v2i64:
96 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
97 ; CHECK-NEXT: vle32.v v8, (a0)
98 ; CHECK-NEXT: vmfne.vv v0, v8, v8
99 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v9, v8
100 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
101 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0
102 ; CHECK-NEXT: vse64.v v8, (a1)
104 %a = load <2 x float>, ptr %x
105 %d = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> %a)
106 store <2 x i64> %d, ptr %y
109 declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float>)
111 define void @fp2si_v8f32_v8i64(ptr %x, ptr %y) {
113 ; CHECK-LABEL: fp2si_v8f32_v8i64:
115 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
116 ; CHECK-NEXT: vle32.v v8, (a0)
117 ; CHECK-NEXT: vmfne.vv v0, v8, v8
118 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v12, v8
119 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
120 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
121 ; CHECK-NEXT: vse64.v v8, (a1)
123 %a = load <8 x float>, ptr %x
124 %d = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %a)
125 store <8 x i64> %d, ptr %y
128 declare <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float>)
130 define void @fp2ui_v8f32_v8i64(ptr %x, ptr %y) {
132 ; CHECK-LABEL: fp2ui_v8f32_v8i64:
134 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
135 ; CHECK-NEXT: vle32.v v8, (a0)
136 ; CHECK-NEXT: vmfne.vv v0, v8, v8
137 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v12, v8
138 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
139 ; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
140 ; CHECK-NEXT: vse64.v v8, (a1)
142 %a = load <8 x float>, ptr %x
143 %d = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %a)
144 store <8 x i64> %d, ptr %y
147 declare <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float>)
149 define void @fp2si_v2f16_v2i64(ptr %x, ptr %y) {
150 ; CHECK-LABEL: fp2si_v2f16_v2i64:
152 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
153 ; CHECK-NEXT: vle16.v v8, (a0)
154 ; CHECK-NEXT: vmfne.vv v0, v8, v8
155 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8
156 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
157 ; CHECK-NEXT: vfwcvt.rtz.x.f.v v8, v9
158 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
159 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
160 ; CHECK-NEXT: vse64.v v8, (a1)
162 %a = load <2 x half>, ptr %x
163 %d = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> %a)
164 store <2 x i64> %d, ptr %y
167 declare <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half>)
169 define void @fp2ui_v2f16_v2i64(ptr %x, ptr %y) {
170 ; CHECK-LABEL: fp2ui_v2f16_v2i64:
172 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
173 ; CHECK-NEXT: vle16.v v8, (a0)
174 ; CHECK-NEXT: vmfne.vv v0, v8, v8
175 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8
176 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
177 ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v8, v9
178 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
179 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
180 ; CHECK-NEXT: vse64.v v8, (a1)
182 %a = load <2 x half>, ptr %x
183 %d = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> %a)
184 store <2 x i64> %d, ptr %y
187 declare <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half>)
189 define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
190 ; RV32-LABEL: fp2si_v2f64_v2i8:
192 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
193 ; RV32-NEXT: vle64.v v8, (a0)
194 ; RV32-NEXT: vslidedown.vi v9, v8, 1
195 ; RV32-NEXT: vfmv.f.s fa5, v9
196 ; RV32-NEXT: lui a0, %hi(.LCPI10_0)
197 ; RV32-NEXT: fld fa4, %lo(.LCPI10_0)(a0)
198 ; RV32-NEXT: lui a0, %hi(.LCPI10_1)
199 ; RV32-NEXT: fld fa3, %lo(.LCPI10_1)(a0)
200 ; RV32-NEXT: feq.d a0, fa5, fa5
201 ; RV32-NEXT: neg a0, a0
202 ; RV32-NEXT: fmax.d fa5, fa5, fa4
203 ; RV32-NEXT: fmin.d fa5, fa5, fa3
204 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz
205 ; RV32-NEXT: and a0, a0, a2
206 ; RV32-NEXT: vfmv.f.s fa5, v8
207 ; RV32-NEXT: feq.d a2, fa5, fa5
208 ; RV32-NEXT: neg a2, a2
209 ; RV32-NEXT: fmax.d fa5, fa5, fa4
210 ; RV32-NEXT: fmin.d fa5, fa5, fa3
211 ; RV32-NEXT: fcvt.w.d a3, fa5, rtz
212 ; RV32-NEXT: and a2, a2, a3
213 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
214 ; RV32-NEXT: vmv.v.x v8, a2
215 ; RV32-NEXT: vslide1down.vx v8, v8, a0
216 ; RV32-NEXT: vse8.v v8, (a1)
219 ; RV64-LABEL: fp2si_v2f64_v2i8:
221 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
222 ; RV64-NEXT: vle64.v v8, (a0)
223 ; RV64-NEXT: vslidedown.vi v9, v8, 1
224 ; RV64-NEXT: vfmv.f.s fa5, v9
225 ; RV64-NEXT: lui a0, %hi(.LCPI10_0)
226 ; RV64-NEXT: fld fa4, %lo(.LCPI10_0)(a0)
227 ; RV64-NEXT: lui a0, %hi(.LCPI10_1)
228 ; RV64-NEXT: fld fa3, %lo(.LCPI10_1)(a0)
229 ; RV64-NEXT: feq.d a0, fa5, fa5
230 ; RV64-NEXT: neg a0, a0
231 ; RV64-NEXT: fmax.d fa5, fa5, fa4
232 ; RV64-NEXT: fmin.d fa5, fa5, fa3
233 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz
234 ; RV64-NEXT: and a0, a0, a2
235 ; RV64-NEXT: vfmv.f.s fa5, v8
236 ; RV64-NEXT: feq.d a2, fa5, fa5
237 ; RV64-NEXT: negw a2, a2
238 ; RV64-NEXT: fmax.d fa5, fa5, fa4
239 ; RV64-NEXT: fmin.d fa5, fa5, fa3
240 ; RV64-NEXT: fcvt.l.d a3, fa5, rtz
241 ; RV64-NEXT: and a2, a2, a3
242 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
243 ; RV64-NEXT: vmv.v.x v8, a2
244 ; RV64-NEXT: vslide1down.vx v8, v8, a0
245 ; RV64-NEXT: vse8.v v8, (a1)
247 %a = load <2 x double>, ptr %x
248 %d = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> %a)
249 store <2 x i8> %d, ptr %y
252 declare <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double>)
254 define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
255 ; RV32-LABEL: fp2ui_v2f64_v2i8:
257 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
258 ; RV32-NEXT: vle64.v v8, (a0)
259 ; RV32-NEXT: vslidedown.vi v9, v8, 1
260 ; RV32-NEXT: lui a0, %hi(.LCPI11_0)
261 ; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
262 ; RV32-NEXT: vfmv.f.s fa4, v9
263 ; RV32-NEXT: fcvt.d.w fa3, zero
264 ; RV32-NEXT: fmax.d fa4, fa4, fa3
265 ; RV32-NEXT: fmin.d fa4, fa4, fa5
266 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
267 ; RV32-NEXT: vfmv.f.s fa4, v8
268 ; RV32-NEXT: fmax.d fa4, fa4, fa3
269 ; RV32-NEXT: fmin.d fa5, fa4, fa5
270 ; RV32-NEXT: fcvt.wu.d a2, fa5, rtz
271 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
272 ; RV32-NEXT: vmv.v.x v8, a2
273 ; RV32-NEXT: vslide1down.vx v8, v8, a0
274 ; RV32-NEXT: vse8.v v8, (a1)
277 ; RV64-LABEL: fp2ui_v2f64_v2i8:
279 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
280 ; RV64-NEXT: vle64.v v8, (a0)
281 ; RV64-NEXT: vslidedown.vi v9, v8, 1
282 ; RV64-NEXT: lui a0, %hi(.LCPI11_0)
283 ; RV64-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
284 ; RV64-NEXT: vfmv.f.s fa4, v9
285 ; RV64-NEXT: fmv.d.x fa3, zero
286 ; RV64-NEXT: fmax.d fa4, fa4, fa3
287 ; RV64-NEXT: fmin.d fa4, fa4, fa5
288 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
289 ; RV64-NEXT: vfmv.f.s fa4, v8
290 ; RV64-NEXT: fmax.d fa4, fa4, fa3
291 ; RV64-NEXT: fmin.d fa5, fa4, fa5
292 ; RV64-NEXT: fcvt.lu.d a2, fa5, rtz
293 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
294 ; RV64-NEXT: vmv.v.x v8, a2
295 ; RV64-NEXT: vslide1down.vx v8, v8, a0
296 ; RV64-NEXT: vse8.v v8, (a1)
298 %a = load <2 x double>, ptr %x
299 %d = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> %a)
300 store <2 x i8> %d, ptr %y
303 declare <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double>)
305 define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
307 ; RV32-LABEL: fp2si_v8f64_v8i8:
309 ; RV32-NEXT: addi sp, sp, -128
310 ; RV32-NEXT: .cfi_def_cfa_offset 128
311 ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
312 ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
313 ; RV32-NEXT: .cfi_offset ra, -4
314 ; RV32-NEXT: .cfi_offset s0, -8
315 ; RV32-NEXT: addi s0, sp, 128
316 ; RV32-NEXT: .cfi_def_cfa s0, 0
317 ; RV32-NEXT: andi sp, sp, -64
318 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
319 ; RV32-NEXT: vle64.v v8, (a0)
320 ; RV32-NEXT: mv a0, sp
321 ; RV32-NEXT: vse64.v v8, (a0)
322 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
323 ; RV32-NEXT: vslidedown.vi v10, v8, 1
324 ; RV32-NEXT: vfmv.f.s fa3, v10
325 ; RV32-NEXT: lui a0, %hi(.LCPI12_0)
326 ; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
327 ; RV32-NEXT: lui a0, %hi(.LCPI12_1)
328 ; RV32-NEXT: fld fa4, %lo(.LCPI12_1)(a0)
329 ; RV32-NEXT: feq.d a0, fa3, fa3
330 ; RV32-NEXT: neg a0, a0
331 ; RV32-NEXT: fmax.d fa3, fa3, fa5
332 ; RV32-NEXT: fmin.d fa3, fa3, fa4
333 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz
334 ; RV32-NEXT: and a0, a0, a2
335 ; RV32-NEXT: vfmv.f.s fa3, v8
336 ; RV32-NEXT: feq.d a2, fa3, fa3
337 ; RV32-NEXT: neg a2, a2
338 ; RV32-NEXT: fmax.d fa3, fa3, fa5
339 ; RV32-NEXT: fmin.d fa3, fa3, fa4
340 ; RV32-NEXT: fcvt.w.d a3, fa3, rtz
341 ; RV32-NEXT: and a2, a2, a3
342 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
343 ; RV32-NEXT: vmv.v.x v10, a2
344 ; RV32-NEXT: vslide1down.vx v10, v10, a0
345 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
346 ; RV32-NEXT: vslidedown.vi v12, v8, 2
347 ; RV32-NEXT: vfmv.f.s fa3, v12
348 ; RV32-NEXT: feq.d a0, fa3, fa3
349 ; RV32-NEXT: neg a0, a0
350 ; RV32-NEXT: fmax.d fa3, fa3, fa5
351 ; RV32-NEXT: fmin.d fa3, fa3, fa4
352 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz
353 ; RV32-NEXT: and a0, a0, a2
354 ; RV32-NEXT: vslidedown.vi v8, v8, 3
355 ; RV32-NEXT: vfmv.f.s fa3, v8
356 ; RV32-NEXT: feq.d a2, fa3, fa3
357 ; RV32-NEXT: fmax.d fa3, fa3, fa5
358 ; RV32-NEXT: fmin.d fa3, fa3, fa4
359 ; RV32-NEXT: fcvt.w.d a3, fa3, rtz
360 ; RV32-NEXT: fld fa3, 40(sp)
361 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
362 ; RV32-NEXT: vslide1down.vx v8, v10, a0
363 ; RV32-NEXT: neg a0, a2
364 ; RV32-NEXT: and a0, a0, a3
365 ; RV32-NEXT: feq.d a2, fa3, fa3
366 ; RV32-NEXT: fmax.d fa3, fa3, fa5
367 ; RV32-NEXT: fmin.d fa3, fa3, fa4
368 ; RV32-NEXT: fcvt.w.d a3, fa3, rtz
369 ; RV32-NEXT: fld fa3, 32(sp)
370 ; RV32-NEXT: vslide1down.vx v8, v8, a0
371 ; RV32-NEXT: neg a0, a2
372 ; RV32-NEXT: and a0, a0, a3
373 ; RV32-NEXT: feq.d a2, fa3, fa3
374 ; RV32-NEXT: neg a2, a2
375 ; RV32-NEXT: fmax.d fa3, fa3, fa5
376 ; RV32-NEXT: fmin.d fa3, fa3, fa4
377 ; RV32-NEXT: fcvt.w.d a3, fa3, rtz
378 ; RV32-NEXT: fld fa3, 48(sp)
379 ; RV32-NEXT: and a2, a2, a3
380 ; RV32-NEXT: vmv.v.x v9, a2
381 ; RV32-NEXT: vslide1down.vx v9, v9, a0
382 ; RV32-NEXT: feq.d a0, fa3, fa3
383 ; RV32-NEXT: fmax.d fa3, fa3, fa5
384 ; RV32-NEXT: fmin.d fa3, fa3, fa4
385 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz
386 ; RV32-NEXT: fld fa3, 56(sp)
387 ; RV32-NEXT: neg a0, a0
388 ; RV32-NEXT: and a0, a0, a2
389 ; RV32-NEXT: vslide1down.vx v9, v9, a0
390 ; RV32-NEXT: feq.d a0, fa3, fa3
391 ; RV32-NEXT: neg a0, a0
392 ; RV32-NEXT: fmax.d fa5, fa3, fa5
393 ; RV32-NEXT: fmin.d fa5, fa5, fa4
394 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz
395 ; RV32-NEXT: and a0, a0, a2
396 ; RV32-NEXT: vmv.v.i v0, 15
397 ; RV32-NEXT: vslide1down.vx v9, v9, a0
398 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
399 ; RV32-NEXT: vse8.v v9, (a1)
400 ; RV32-NEXT: addi sp, s0, -128
401 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
402 ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
403 ; RV32-NEXT: addi sp, sp, 128
406 ; RV64-LABEL: fp2si_v8f64_v8i8:
408 ; RV64-NEXT: addi sp, sp, -128
409 ; RV64-NEXT: .cfi_def_cfa_offset 128
410 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
411 ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
412 ; RV64-NEXT: .cfi_offset ra, -8
413 ; RV64-NEXT: .cfi_offset s0, -16
414 ; RV64-NEXT: addi s0, sp, 128
415 ; RV64-NEXT: .cfi_def_cfa s0, 0
416 ; RV64-NEXT: andi sp, sp, -64
417 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
418 ; RV64-NEXT: vle64.v v8, (a0)
419 ; RV64-NEXT: mv a0, sp
420 ; RV64-NEXT: vse64.v v8, (a0)
421 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
422 ; RV64-NEXT: vslidedown.vi v10, v8, 1
423 ; RV64-NEXT: vfmv.f.s fa3, v10
424 ; RV64-NEXT: lui a0, %hi(.LCPI12_0)
425 ; RV64-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
426 ; RV64-NEXT: lui a0, %hi(.LCPI12_1)
427 ; RV64-NEXT: fld fa4, %lo(.LCPI12_1)(a0)
428 ; RV64-NEXT: feq.d a0, fa3, fa3
429 ; RV64-NEXT: neg a0, a0
430 ; RV64-NEXT: fmax.d fa3, fa3, fa5
431 ; RV64-NEXT: fmin.d fa3, fa3, fa4
432 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz
433 ; RV64-NEXT: and a0, a0, a2
434 ; RV64-NEXT: vfmv.f.s fa3, v8
435 ; RV64-NEXT: feq.d a2, fa3, fa3
436 ; RV64-NEXT: negw a2, a2
437 ; RV64-NEXT: fmax.d fa3, fa3, fa5
438 ; RV64-NEXT: fmin.d fa3, fa3, fa4
439 ; RV64-NEXT: fcvt.l.d a3, fa3, rtz
440 ; RV64-NEXT: and a2, a2, a3
441 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
442 ; RV64-NEXT: vmv.v.x v10, a2
443 ; RV64-NEXT: vslide1down.vx v10, v10, a0
444 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
445 ; RV64-NEXT: vslidedown.vi v12, v8, 2
446 ; RV64-NEXT: vfmv.f.s fa3, v12
447 ; RV64-NEXT: feq.d a0, fa3, fa3
448 ; RV64-NEXT: neg a0, a0
449 ; RV64-NEXT: fmax.d fa3, fa3, fa5
450 ; RV64-NEXT: fmin.d fa3, fa3, fa4
451 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz
452 ; RV64-NEXT: and a0, a0, a2
453 ; RV64-NEXT: vslidedown.vi v8, v8, 3
454 ; RV64-NEXT: vfmv.f.s fa3, v8
455 ; RV64-NEXT: feq.d a2, fa3, fa3
456 ; RV64-NEXT: fmax.d fa3, fa3, fa5
457 ; RV64-NEXT: fmin.d fa3, fa3, fa4
458 ; RV64-NEXT: fcvt.l.d a3, fa3, rtz
459 ; RV64-NEXT: fld fa3, 40(sp)
460 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
461 ; RV64-NEXT: vslide1down.vx v8, v10, a0
462 ; RV64-NEXT: neg a0, a2
463 ; RV64-NEXT: and a0, a0, a3
464 ; RV64-NEXT: feq.d a2, fa3, fa3
465 ; RV64-NEXT: fmax.d fa3, fa3, fa5
466 ; RV64-NEXT: fmin.d fa3, fa3, fa4
467 ; RV64-NEXT: fcvt.l.d a3, fa3, rtz
468 ; RV64-NEXT: fld fa3, 32(sp)
469 ; RV64-NEXT: vslide1down.vx v8, v8, a0
470 ; RV64-NEXT: neg a0, a2
471 ; RV64-NEXT: and a0, a0, a3
472 ; RV64-NEXT: feq.d a2, fa3, fa3
473 ; RV64-NEXT: negw a2, a2
474 ; RV64-NEXT: fmax.d fa3, fa3, fa5
475 ; RV64-NEXT: fmin.d fa3, fa3, fa4
476 ; RV64-NEXT: fcvt.l.d a3, fa3, rtz
477 ; RV64-NEXT: fld fa3, 48(sp)
478 ; RV64-NEXT: and a2, a2, a3
479 ; RV64-NEXT: vmv.v.x v9, a2
480 ; RV64-NEXT: vslide1down.vx v9, v9, a0
481 ; RV64-NEXT: feq.d a0, fa3, fa3
482 ; RV64-NEXT: fmax.d fa3, fa3, fa5
483 ; RV64-NEXT: fmin.d fa3, fa3, fa4
484 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz
485 ; RV64-NEXT: fld fa3, 56(sp)
486 ; RV64-NEXT: neg a0, a0
487 ; RV64-NEXT: and a0, a0, a2
488 ; RV64-NEXT: vslide1down.vx v9, v9, a0
489 ; RV64-NEXT: feq.d a0, fa3, fa3
490 ; RV64-NEXT: neg a0, a0
491 ; RV64-NEXT: fmax.d fa5, fa3, fa5
492 ; RV64-NEXT: fmin.d fa5, fa5, fa4
493 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz
494 ; RV64-NEXT: and a0, a0, a2
495 ; RV64-NEXT: vmv.v.i v0, 15
496 ; RV64-NEXT: vslide1down.vx v9, v9, a0
497 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
498 ; RV64-NEXT: vse8.v v9, (a1)
499 ; RV64-NEXT: addi sp, s0, -128
500 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
501 ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
502 ; RV64-NEXT: addi sp, sp, 128
504 %a = load <8 x double>, ptr %x
505 %d = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> %a)
506 store <8 x i8> %d, ptr %y
509 declare <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double>)
511 define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
513 ; RV32-LABEL: fp2ui_v8f64_v8i8:
515 ; RV32-NEXT: addi sp, sp, -128
516 ; RV32-NEXT: .cfi_def_cfa_offset 128
517 ; RV32-NEXT: sw ra, 124(sp) # 4-byte Folded Spill
518 ; RV32-NEXT: sw s0, 120(sp) # 4-byte Folded Spill
519 ; RV32-NEXT: .cfi_offset ra, -4
520 ; RV32-NEXT: .cfi_offset s0, -8
521 ; RV32-NEXT: addi s0, sp, 128
522 ; RV32-NEXT: .cfi_def_cfa s0, 0
523 ; RV32-NEXT: andi sp, sp, -64
524 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
525 ; RV32-NEXT: vle64.v v8, (a0)
526 ; RV32-NEXT: mv a0, sp
527 ; RV32-NEXT: vse64.v v8, (a0)
528 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
529 ; RV32-NEXT: vslidedown.vi v10, v8, 1
530 ; RV32-NEXT: lui a0, %hi(.LCPI13_0)
531 ; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
532 ; RV32-NEXT: vfmv.f.s fa4, v10
533 ; RV32-NEXT: fcvt.d.w fa3, zero
534 ; RV32-NEXT: fmax.d fa4, fa4, fa3
535 ; RV32-NEXT: fmin.d fa4, fa4, fa5
536 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
537 ; RV32-NEXT: vfmv.f.s fa4, v8
538 ; RV32-NEXT: fmax.d fa4, fa4, fa3
539 ; RV32-NEXT: fmin.d fa4, fa4, fa5
540 ; RV32-NEXT: fcvt.wu.d a2, fa4, rtz
541 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
542 ; RV32-NEXT: vslidedown.vi v10, v8, 2
543 ; RV32-NEXT: vfmv.f.s fa4, v10
544 ; RV32-NEXT: fmax.d fa4, fa4, fa3
545 ; RV32-NEXT: fmin.d fa4, fa4, fa5
546 ; RV32-NEXT: fcvt.wu.d a3, fa4, rtz
547 ; RV32-NEXT: vslidedown.vi v8, v8, 3
548 ; RV32-NEXT: vfmv.f.s fa4, v8
549 ; RV32-NEXT: fld fa2, 40(sp)
550 ; RV32-NEXT: fmax.d fa4, fa4, fa3
551 ; RV32-NEXT: fmin.d fa4, fa4, fa5
552 ; RV32-NEXT: fcvt.wu.d a4, fa4, rtz
553 ; RV32-NEXT: fmax.d fa4, fa2, fa3
554 ; RV32-NEXT: fld fa2, 32(sp)
555 ; RV32-NEXT: fmin.d fa4, fa4, fa5
556 ; RV32-NEXT: fcvt.wu.d a5, fa4, rtz
557 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
558 ; RV32-NEXT: vmv.v.x v8, a2
559 ; RV32-NEXT: fmax.d fa4, fa2, fa3
560 ; RV32-NEXT: fmin.d fa4, fa4, fa5
561 ; RV32-NEXT: fcvt.wu.d a2, fa4, rtz
562 ; RV32-NEXT: fld fa4, 48(sp)
563 ; RV32-NEXT: vslide1down.vx v8, v8, a0
564 ; RV32-NEXT: vslide1down.vx v8, v8, a3
565 ; RV32-NEXT: vslide1down.vx v8, v8, a4
566 ; RV32-NEXT: fmax.d fa4, fa4, fa3
567 ; RV32-NEXT: fmin.d fa4, fa4, fa5
568 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
569 ; RV32-NEXT: fld fa4, 56(sp)
570 ; RV32-NEXT: vmv.v.x v9, a2
571 ; RV32-NEXT: vslide1down.vx v9, v9, a5
572 ; RV32-NEXT: vslide1down.vx v9, v9, a0
573 ; RV32-NEXT: fmax.d fa4, fa4, fa3
574 ; RV32-NEXT: fmin.d fa5, fa4, fa5
575 ; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
576 ; RV32-NEXT: vmv.v.i v0, 15
577 ; RV32-NEXT: vslide1down.vx v9, v9, a0
578 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
579 ; RV32-NEXT: vse8.v v9, (a1)
580 ; RV32-NEXT: addi sp, s0, -128
581 ; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
582 ; RV32-NEXT: lw s0, 120(sp) # 4-byte Folded Reload
583 ; RV32-NEXT: addi sp, sp, 128
586 ; RV64-LABEL: fp2ui_v8f64_v8i8:
588 ; RV64-NEXT: addi sp, sp, -128
589 ; RV64-NEXT: .cfi_def_cfa_offset 128
590 ; RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
591 ; RV64-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
592 ; RV64-NEXT: .cfi_offset ra, -8
593 ; RV64-NEXT: .cfi_offset s0, -16
594 ; RV64-NEXT: addi s0, sp, 128
595 ; RV64-NEXT: .cfi_def_cfa s0, 0
596 ; RV64-NEXT: andi sp, sp, -64
597 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
598 ; RV64-NEXT: vle64.v v8, (a0)
599 ; RV64-NEXT: mv a0, sp
600 ; RV64-NEXT: vse64.v v8, (a0)
601 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
602 ; RV64-NEXT: vslidedown.vi v10, v8, 1
603 ; RV64-NEXT: lui a0, %hi(.LCPI13_0)
604 ; RV64-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
605 ; RV64-NEXT: vfmv.f.s fa4, v10
606 ; RV64-NEXT: fmv.d.x fa3, zero
607 ; RV64-NEXT: fmax.d fa4, fa4, fa3
608 ; RV64-NEXT: fmin.d fa4, fa4, fa5
609 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
610 ; RV64-NEXT: vfmv.f.s fa4, v8
611 ; RV64-NEXT: fmax.d fa4, fa4, fa3
612 ; RV64-NEXT: fmin.d fa4, fa4, fa5
613 ; RV64-NEXT: fcvt.lu.d a2, fa4, rtz
614 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
615 ; RV64-NEXT: vslidedown.vi v10, v8, 2
616 ; RV64-NEXT: vfmv.f.s fa4, v10
617 ; RV64-NEXT: fmax.d fa4, fa4, fa3
618 ; RV64-NEXT: fmin.d fa4, fa4, fa5
619 ; RV64-NEXT: fcvt.lu.d a3, fa4, rtz
620 ; RV64-NEXT: vslidedown.vi v8, v8, 3
621 ; RV64-NEXT: vfmv.f.s fa4, v8
622 ; RV64-NEXT: fld fa2, 40(sp)
623 ; RV64-NEXT: fmax.d fa4, fa4, fa3
624 ; RV64-NEXT: fmin.d fa4, fa4, fa5
625 ; RV64-NEXT: fcvt.lu.d a4, fa4, rtz
626 ; RV64-NEXT: fmax.d fa4, fa2, fa3
627 ; RV64-NEXT: fld fa2, 32(sp)
628 ; RV64-NEXT: fmin.d fa4, fa4, fa5
629 ; RV64-NEXT: fcvt.lu.d a5, fa4, rtz
630 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
631 ; RV64-NEXT: vmv.v.x v8, a2
632 ; RV64-NEXT: fmax.d fa4, fa2, fa3
633 ; RV64-NEXT: fmin.d fa4, fa4, fa5
634 ; RV64-NEXT: fcvt.lu.d a2, fa4, rtz
635 ; RV64-NEXT: fld fa4, 48(sp)
636 ; RV64-NEXT: vslide1down.vx v8, v8, a0
637 ; RV64-NEXT: vslide1down.vx v8, v8, a3
638 ; RV64-NEXT: vslide1down.vx v8, v8, a4
639 ; RV64-NEXT: fmax.d fa4, fa4, fa3
640 ; RV64-NEXT: fmin.d fa4, fa4, fa5
641 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
642 ; RV64-NEXT: fld fa4, 56(sp)
643 ; RV64-NEXT: vmv.v.x v9, a2
644 ; RV64-NEXT: vslide1down.vx v9, v9, a5
645 ; RV64-NEXT: vslide1down.vx v9, v9, a0
646 ; RV64-NEXT: fmax.d fa4, fa4, fa3
647 ; RV64-NEXT: fmin.d fa5, fa4, fa5
648 ; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
649 ; RV64-NEXT: vmv.v.i v0, 15
650 ; RV64-NEXT: vslide1down.vx v9, v9, a0
651 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
652 ; RV64-NEXT: vse8.v v9, (a1)
653 ; RV64-NEXT: addi sp, s0, -128
654 ; RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
655 ; RV64-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
656 ; RV64-NEXT: addi sp, sp, 128
658 %a = load <8 x double>, ptr %x
659 %d = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> %a)
660 store <8 x i8> %d, ptr %y
663 declare <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> %a)
665 define void @fp2si_v2f64_v2i32(ptr %x, ptr %y) {
666 ; CHECK-LABEL: fp2si_v2f64_v2i32:
668 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
669 ; CHECK-NEXT: vle64.v v8, (a0)
670 ; CHECK-NEXT: vmfne.vv v0, v8, v8
671 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
672 ; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8
673 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0
674 ; CHECK-NEXT: vse32.v v8, (a1)
676 %a = load <2 x double>, ptr %x
677 %d = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %a)
678 store <2 x i32> %d, ptr %y
681 declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>)
683 define void @fp2ui_v2f64_v2i32(ptr %x, ptr %y) {
684 ; CHECK-LABEL: fp2ui_v2f64_v2i32:
686 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
687 ; CHECK-NEXT: vle64.v v8, (a0)
688 ; CHECK-NEXT: vmfne.vv v0, v8, v8
689 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
690 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8
691 ; CHECK-NEXT: vmerge.vim v8, v9, 0, v0
692 ; CHECK-NEXT: vse32.v v8, (a1)
694 %a = load <2 x double>, ptr %x
695 %d = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %a)
696 store <2 x i32> %d, ptr %y
699 declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double>)