1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+zfh -verify-machineinstrs \
3 ; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=CHECKIZFH %s
4 ; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs \
5 ; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=CHECKIZFH %s
6 ; RUN: llc -mtriple=riscv32 -mattr=+zhinx -verify-machineinstrs \
7 ; RUN: -target-abi ilp32 < %s | FileCheck -check-prefix=CHECKIZHINX %s
8 ; RUN: llc -mtriple=riscv64 -mattr=+zhinx -verify-machineinstrs \
9 ; RUN: -target-abi lp64 < %s | FileCheck -check-prefix=CHECKIZHINX %s
10 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
11 ; RUN: | FileCheck -check-prefix=RV32I %s
12 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
13 ; RUN: | FileCheck -check-prefix=RV64I %s
14 ; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \
15 ; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFHMIN,RV32IZFHMIN %s
16 ; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \
17 ; RUN: -target-abi lp64f < %s | FileCheck --check-prefixes=CHECKIZFHMIN,RV64IZFHMIN %s
18 ; RUN: llc -mtriple=riscv32 -mattr=+zhinxmin -verify-machineinstrs \
19 ; RUN: -target-abi ilp32 < %s | FileCheck --check-prefixes=CHECKIZHINXMIN,RV32IZHINXMIN %s
20 ; RUN: llc -mtriple=riscv64 -mattr=+zhinxmin -verify-machineinstrs \
21 ; RUN: -target-abi lp64 < %s | FileCheck --check-prefixes=CHECKIZHINXMIN,RV64IZHINXMIN %s
23 ; These tests are each targeted at a particular RISC-V FPU instruction.
24 ; Compares and conversions can be found in half-fcmp.ll and half-convert.ll
25 ; respectively. Some other half-*.ll files in this folder exercise LLVM IR
26 ; instructions that don't directly match a RISC-V instruction.
28 define half @fadd_h(half %a, half %b) nounwind {
29 ; CHECKIZFH-LABEL: fadd_h:
31 ; CHECKIZFH-NEXT: fadd.h fa0, fa0, fa1
34 ; CHECKIZHINX-LABEL: fadd_h:
35 ; CHECKIZHINX: # %bb.0:
36 ; CHECKIZHINX-NEXT: fadd.h a0, a0, a1
37 ; CHECKIZHINX-NEXT: ret
39 ; RV32I-LABEL: fadd_h:
41 ; RV32I-NEXT: addi sp, sp, -16
42 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
43 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
44 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
45 ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
46 ; RV32I-NEXT: mv s0, a1
47 ; RV32I-NEXT: lui a1, 16
48 ; RV32I-NEXT: addi s2, a1, -1
49 ; RV32I-NEXT: and a0, a0, s2
50 ; RV32I-NEXT: call __extendhfsf2
51 ; RV32I-NEXT: mv s1, a0
52 ; RV32I-NEXT: and a0, s0, s2
53 ; RV32I-NEXT: call __extendhfsf2
54 ; RV32I-NEXT: mv a1, a0
55 ; RV32I-NEXT: mv a0, s1
56 ; RV32I-NEXT: call __addsf3
57 ; RV32I-NEXT: call __truncsfhf2
58 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
59 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
60 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
61 ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
62 ; RV32I-NEXT: addi sp, sp, 16
65 ; RV64I-LABEL: fadd_h:
67 ; RV64I-NEXT: addi sp, sp, -32
68 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
69 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
70 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
71 ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
72 ; RV64I-NEXT: mv s0, a1
73 ; RV64I-NEXT: lui a1, 16
74 ; RV64I-NEXT: addiw s2, a1, -1
75 ; RV64I-NEXT: and a0, a0, s2
76 ; RV64I-NEXT: call __extendhfsf2
77 ; RV64I-NEXT: mv s1, a0
78 ; RV64I-NEXT: and a0, s0, s2
79 ; RV64I-NEXT: call __extendhfsf2
80 ; RV64I-NEXT: mv a1, a0
81 ; RV64I-NEXT: mv a0, s1
82 ; RV64I-NEXT: call __addsf3
83 ; RV64I-NEXT: call __truncsfhf2
84 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
85 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
86 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
87 ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
88 ; RV64I-NEXT: addi sp, sp, 32
91 ; CHECKIZFHMIN-LABEL: fadd_h:
92 ; CHECKIZFHMIN: # %bb.0:
93 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
94 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
95 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa4, fa5
96 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
97 ; CHECKIZFHMIN-NEXT: ret
99 ; CHECKIZHINXMIN-LABEL: fadd_h:
100 ; CHECKIZHINXMIN: # %bb.0:
101 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
102 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
103 ; CHECKIZHINXMIN-NEXT: fadd.s a0, a0, a1
104 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
105 ; CHECKIZHINXMIN-NEXT: ret
106 %1 = fadd half %a, %b
110 define half @fsub_h(half %a, half %b) nounwind {
111 ; CHECKIZFH-LABEL: fsub_h:
112 ; CHECKIZFH: # %bb.0:
113 ; CHECKIZFH-NEXT: fsub.h fa0, fa0, fa1
114 ; CHECKIZFH-NEXT: ret
116 ; CHECKIZHINX-LABEL: fsub_h:
117 ; CHECKIZHINX: # %bb.0:
118 ; CHECKIZHINX-NEXT: fsub.h a0, a0, a1
119 ; CHECKIZHINX-NEXT: ret
121 ; RV32I-LABEL: fsub_h:
123 ; RV32I-NEXT: addi sp, sp, -16
124 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
125 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
126 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
127 ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
128 ; RV32I-NEXT: mv s0, a1
129 ; RV32I-NEXT: lui a1, 16
130 ; RV32I-NEXT: addi s2, a1, -1
131 ; RV32I-NEXT: and a0, a0, s2
132 ; RV32I-NEXT: call __extendhfsf2
133 ; RV32I-NEXT: mv s1, a0
134 ; RV32I-NEXT: and a0, s0, s2
135 ; RV32I-NEXT: call __extendhfsf2
136 ; RV32I-NEXT: mv a1, a0
137 ; RV32I-NEXT: mv a0, s1
138 ; RV32I-NEXT: call __subsf3
139 ; RV32I-NEXT: call __truncsfhf2
140 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
141 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
142 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
143 ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
144 ; RV32I-NEXT: addi sp, sp, 16
147 ; RV64I-LABEL: fsub_h:
149 ; RV64I-NEXT: addi sp, sp, -32
150 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
151 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
152 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
153 ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
154 ; RV64I-NEXT: mv s0, a1
155 ; RV64I-NEXT: lui a1, 16
156 ; RV64I-NEXT: addiw s2, a1, -1
157 ; RV64I-NEXT: and a0, a0, s2
158 ; RV64I-NEXT: call __extendhfsf2
159 ; RV64I-NEXT: mv s1, a0
160 ; RV64I-NEXT: and a0, s0, s2
161 ; RV64I-NEXT: call __extendhfsf2
162 ; RV64I-NEXT: mv a1, a0
163 ; RV64I-NEXT: mv a0, s1
164 ; RV64I-NEXT: call __subsf3
165 ; RV64I-NEXT: call __truncsfhf2
166 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
167 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
168 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
169 ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
170 ; RV64I-NEXT: addi sp, sp, 32
173 ; CHECKIZFHMIN-LABEL: fsub_h:
174 ; CHECKIZFHMIN: # %bb.0:
175 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
176 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
177 ; CHECKIZFHMIN-NEXT: fsub.s fa5, fa4, fa5
178 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
179 ; CHECKIZFHMIN-NEXT: ret
181 ; CHECKIZHINXMIN-LABEL: fsub_h:
182 ; CHECKIZHINXMIN: # %bb.0:
183 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
184 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
185 ; CHECKIZHINXMIN-NEXT: fsub.s a0, a0, a1
186 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
187 ; CHECKIZHINXMIN-NEXT: ret
188 %1 = fsub half %a, %b
192 define half @fmul_h(half %a, half %b) nounwind {
193 ; CHECKIZFH-LABEL: fmul_h:
194 ; CHECKIZFH: # %bb.0:
195 ; CHECKIZFH-NEXT: fmul.h fa0, fa0, fa1
196 ; CHECKIZFH-NEXT: ret
198 ; CHECKIZHINX-LABEL: fmul_h:
199 ; CHECKIZHINX: # %bb.0:
200 ; CHECKIZHINX-NEXT: fmul.h a0, a0, a1
201 ; CHECKIZHINX-NEXT: ret
203 ; RV32I-LABEL: fmul_h:
205 ; RV32I-NEXT: addi sp, sp, -16
206 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
207 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
208 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
209 ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
210 ; RV32I-NEXT: mv s0, a1
211 ; RV32I-NEXT: lui a1, 16
212 ; RV32I-NEXT: addi s2, a1, -1
213 ; RV32I-NEXT: and a0, a0, s2
214 ; RV32I-NEXT: call __extendhfsf2
215 ; RV32I-NEXT: mv s1, a0
216 ; RV32I-NEXT: and a0, s0, s2
217 ; RV32I-NEXT: call __extendhfsf2
218 ; RV32I-NEXT: mv a1, a0
219 ; RV32I-NEXT: mv a0, s1
220 ; RV32I-NEXT: call __mulsf3
221 ; RV32I-NEXT: call __truncsfhf2
222 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
223 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
224 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
225 ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
226 ; RV32I-NEXT: addi sp, sp, 16
229 ; RV64I-LABEL: fmul_h:
231 ; RV64I-NEXT: addi sp, sp, -32
232 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
233 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
234 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
235 ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
236 ; RV64I-NEXT: mv s0, a1
237 ; RV64I-NEXT: lui a1, 16
238 ; RV64I-NEXT: addiw s2, a1, -1
239 ; RV64I-NEXT: and a0, a0, s2
240 ; RV64I-NEXT: call __extendhfsf2
241 ; RV64I-NEXT: mv s1, a0
242 ; RV64I-NEXT: and a0, s0, s2
243 ; RV64I-NEXT: call __extendhfsf2
244 ; RV64I-NEXT: mv a1, a0
245 ; RV64I-NEXT: mv a0, s1
246 ; RV64I-NEXT: call __mulsf3
247 ; RV64I-NEXT: call __truncsfhf2
248 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
249 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
250 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
251 ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
252 ; RV64I-NEXT: addi sp, sp, 32
255 ; CHECKIZFHMIN-LABEL: fmul_h:
256 ; CHECKIZFHMIN: # %bb.0:
257 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
258 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
259 ; CHECKIZFHMIN-NEXT: fmul.s fa5, fa4, fa5
260 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
261 ; CHECKIZFHMIN-NEXT: ret
263 ; CHECKIZHINXMIN-LABEL: fmul_h:
264 ; CHECKIZHINXMIN: # %bb.0:
265 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
266 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
267 ; CHECKIZHINXMIN-NEXT: fmul.s a0, a0, a1
268 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
269 ; CHECKIZHINXMIN-NEXT: ret
270 %1 = fmul half %a, %b
274 define half @fdiv_h(half %a, half %b) nounwind {
275 ; CHECKIZFH-LABEL: fdiv_h:
276 ; CHECKIZFH: # %bb.0:
277 ; CHECKIZFH-NEXT: fdiv.h fa0, fa0, fa1
278 ; CHECKIZFH-NEXT: ret
280 ; CHECKIZHINX-LABEL: fdiv_h:
281 ; CHECKIZHINX: # %bb.0:
282 ; CHECKIZHINX-NEXT: fdiv.h a0, a0, a1
283 ; CHECKIZHINX-NEXT: ret
285 ; RV32I-LABEL: fdiv_h:
287 ; RV32I-NEXT: addi sp, sp, -16
288 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
289 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
290 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
291 ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
292 ; RV32I-NEXT: mv s0, a1
293 ; RV32I-NEXT: lui a1, 16
294 ; RV32I-NEXT: addi s2, a1, -1
295 ; RV32I-NEXT: and a0, a0, s2
296 ; RV32I-NEXT: call __extendhfsf2
297 ; RV32I-NEXT: mv s1, a0
298 ; RV32I-NEXT: and a0, s0, s2
299 ; RV32I-NEXT: call __extendhfsf2
300 ; RV32I-NEXT: mv a1, a0
301 ; RV32I-NEXT: mv a0, s1
302 ; RV32I-NEXT: call __divsf3
303 ; RV32I-NEXT: call __truncsfhf2
304 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
305 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
306 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
307 ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
308 ; RV32I-NEXT: addi sp, sp, 16
311 ; RV64I-LABEL: fdiv_h:
313 ; RV64I-NEXT: addi sp, sp, -32
314 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
315 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
316 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
317 ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
318 ; RV64I-NEXT: mv s0, a1
319 ; RV64I-NEXT: lui a1, 16
320 ; RV64I-NEXT: addiw s2, a1, -1
321 ; RV64I-NEXT: and a0, a0, s2
322 ; RV64I-NEXT: call __extendhfsf2
323 ; RV64I-NEXT: mv s1, a0
324 ; RV64I-NEXT: and a0, s0, s2
325 ; RV64I-NEXT: call __extendhfsf2
326 ; RV64I-NEXT: mv a1, a0
327 ; RV64I-NEXT: mv a0, s1
328 ; RV64I-NEXT: call __divsf3
329 ; RV64I-NEXT: call __truncsfhf2
330 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
331 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
332 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
333 ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
334 ; RV64I-NEXT: addi sp, sp, 32
337 ; CHECKIZFHMIN-LABEL: fdiv_h:
338 ; CHECKIZFHMIN: # %bb.0:
339 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
340 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
341 ; CHECKIZFHMIN-NEXT: fdiv.s fa5, fa4, fa5
342 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
343 ; CHECKIZFHMIN-NEXT: ret
345 ; CHECKIZHINXMIN-LABEL: fdiv_h:
346 ; CHECKIZHINXMIN: # %bb.0:
347 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
348 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
349 ; CHECKIZHINXMIN-NEXT: fdiv.s a0, a0, a1
350 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
351 ; CHECKIZHINXMIN-NEXT: ret
352 %1 = fdiv half %a, %b
356 declare half @llvm.sqrt.f16(half)
358 define half @fsqrt_h(half %a) nounwind {
359 ; CHECKIZFH-LABEL: fsqrt_h:
360 ; CHECKIZFH: # %bb.0:
361 ; CHECKIZFH-NEXT: fsqrt.h fa0, fa0
362 ; CHECKIZFH-NEXT: ret
364 ; CHECKIZHINX-LABEL: fsqrt_h:
365 ; CHECKIZHINX: # %bb.0:
366 ; CHECKIZHINX-NEXT: fsqrt.h a0, a0
367 ; CHECKIZHINX-NEXT: ret
369 ; RV32I-LABEL: fsqrt_h:
371 ; RV32I-NEXT: addi sp, sp, -16
372 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
373 ; RV32I-NEXT: slli a0, a0, 16
374 ; RV32I-NEXT: srli a0, a0, 16
375 ; RV32I-NEXT: call __extendhfsf2
376 ; RV32I-NEXT: call sqrtf
377 ; RV32I-NEXT: call __truncsfhf2
378 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
379 ; RV32I-NEXT: addi sp, sp, 16
382 ; RV64I-LABEL: fsqrt_h:
384 ; RV64I-NEXT: addi sp, sp, -16
385 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
386 ; RV64I-NEXT: slli a0, a0, 48
387 ; RV64I-NEXT: srli a0, a0, 48
388 ; RV64I-NEXT: call __extendhfsf2
389 ; RV64I-NEXT: call sqrtf
390 ; RV64I-NEXT: call __truncsfhf2
391 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
392 ; RV64I-NEXT: addi sp, sp, 16
395 ; CHECKIZFHMIN-LABEL: fsqrt_h:
396 ; CHECKIZFHMIN: # %bb.0:
397 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0
398 ; CHECKIZFHMIN-NEXT: fsqrt.s fa5, fa5
399 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
400 ; CHECKIZFHMIN-NEXT: ret
402 ; CHECKIZHINXMIN-LABEL: fsqrt_h:
403 ; CHECKIZHINXMIN: # %bb.0:
404 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
405 ; CHECKIZHINXMIN-NEXT: fsqrt.s a0, a0
406 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
407 ; CHECKIZHINXMIN-NEXT: ret
408 %1 = call half @llvm.sqrt.f16(half %a)
412 declare half @llvm.copysign.f16(half, half)
414 define half @fsgnj_h(half %a, half %b) nounwind {
415 ; CHECKIZFH-LABEL: fsgnj_h:
416 ; CHECKIZFH: # %bb.0:
417 ; CHECKIZFH-NEXT: fsgnj.h fa0, fa0, fa1
418 ; CHECKIZFH-NEXT: ret
420 ; CHECKIZHINX-LABEL: fsgnj_h:
421 ; CHECKIZHINX: # %bb.0:
422 ; CHECKIZHINX-NEXT: fsgnj.h a0, a0, a1
423 ; CHECKIZHINX-NEXT: ret
425 ; RV32I-LABEL: fsgnj_h:
427 ; RV32I-NEXT: lui a2, 1048568
428 ; RV32I-NEXT: slli a0, a0, 17
429 ; RV32I-NEXT: and a1, a1, a2
430 ; RV32I-NEXT: srli a0, a0, 17
431 ; RV32I-NEXT: or a0, a0, a1
434 ; RV64I-LABEL: fsgnj_h:
436 ; RV64I-NEXT: lui a2, 1048568
437 ; RV64I-NEXT: slli a0, a0, 49
438 ; RV64I-NEXT: and a1, a1, a2
439 ; RV64I-NEXT: srli a0, a0, 49
440 ; RV64I-NEXT: or a0, a0, a1
443 ; RV32IZFHMIN-LABEL: fsgnj_h:
444 ; RV32IZFHMIN: # %bb.0:
445 ; RV32IZFHMIN-NEXT: fmv.x.h a0, fa1
446 ; RV32IZFHMIN-NEXT: lui a1, 1048568
447 ; RV32IZFHMIN-NEXT: and a0, a0, a1
448 ; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0
449 ; RV32IZFHMIN-NEXT: slli a1, a1, 17
450 ; RV32IZFHMIN-NEXT: srli a1, a1, 17
451 ; RV32IZFHMIN-NEXT: or a0, a1, a0
452 ; RV32IZFHMIN-NEXT: fmv.h.x fa0, a0
453 ; RV32IZFHMIN-NEXT: ret
455 ; RV64IZFHMIN-LABEL: fsgnj_h:
456 ; RV64IZFHMIN: # %bb.0:
457 ; RV64IZFHMIN-NEXT: fmv.x.h a0, fa1
458 ; RV64IZFHMIN-NEXT: lui a1, 1048568
459 ; RV64IZFHMIN-NEXT: and a0, a0, a1
460 ; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0
461 ; RV64IZFHMIN-NEXT: slli a1, a1, 49
462 ; RV64IZFHMIN-NEXT: srli a1, a1, 49
463 ; RV64IZFHMIN-NEXT: or a0, a1, a0
464 ; RV64IZFHMIN-NEXT: fmv.h.x fa0, a0
465 ; RV64IZFHMIN-NEXT: ret
467 ; RV32IZHINXMIN-LABEL: fsgnj_h:
468 ; RV32IZHINXMIN: # %bb.0:
469 ; RV32IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11
470 ; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10
471 ; RV32IZHINXMIN-NEXT: lui a2, 1048568
472 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17
473 ; RV32IZHINXMIN-NEXT: and a1, a1, a2
474 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17
475 ; RV32IZHINXMIN-NEXT: or a0, a0, a1
476 ; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10
477 ; RV32IZHINXMIN-NEXT: ret
479 ; RV64IZHINXMIN-LABEL: fsgnj_h:
480 ; RV64IZHINXMIN: # %bb.0:
481 ; RV64IZHINXMIN-NEXT: # kill: def $x11_h killed $x11_h def $x11
482 ; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10
483 ; RV64IZHINXMIN-NEXT: lui a2, 1048568
484 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49
485 ; RV64IZHINXMIN-NEXT: and a1, a1, a2
486 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49
487 ; RV64IZHINXMIN-NEXT: or a0, a0, a1
488 ; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10
489 ; RV64IZHINXMIN-NEXT: ret
490 %1 = call half @llvm.copysign.f16(half %a, half %b)
494 ; This function performs extra work to ensure that
495 ; DAGCombiner::visitBITCAST doesn't replace the fneg with an xor.
496 define i32 @fneg_h(half %a, half %b) nounwind {
497 ; CHECKIZFH-LABEL: fneg_h:
498 ; CHECKIZFH: # %bb.0:
499 ; CHECKIZFH-NEXT: fadd.h fa5, fa0, fa0
500 ; CHECKIZFH-NEXT: fneg.h fa4, fa5
501 ; CHECKIZFH-NEXT: feq.h a0, fa5, fa4
502 ; CHECKIZFH-NEXT: ret
504 ; CHECKIZHINX-LABEL: fneg_h:
505 ; CHECKIZHINX: # %bb.0:
506 ; CHECKIZHINX-NEXT: fadd.h a0, a0, a0
507 ; CHECKIZHINX-NEXT: fneg.h a1, a0
508 ; CHECKIZHINX-NEXT: feq.h a0, a0, a1
509 ; CHECKIZHINX-NEXT: ret
511 ; RV32I-LABEL: fneg_h:
513 ; RV32I-NEXT: addi sp, sp, -16
514 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
515 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
516 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
517 ; RV32I-NEXT: lui a1, 16
518 ; RV32I-NEXT: addi s1, a1, -1
519 ; RV32I-NEXT: and a0, a0, s1
520 ; RV32I-NEXT: call __extendhfsf2
521 ; RV32I-NEXT: mv a1, a0
522 ; RV32I-NEXT: call __addsf3
523 ; RV32I-NEXT: call __truncsfhf2
524 ; RV32I-NEXT: and a0, a0, s1
525 ; RV32I-NEXT: call __extendhfsf2
526 ; RV32I-NEXT: mv s0, a0
527 ; RV32I-NEXT: lui a0, 524288
528 ; RV32I-NEXT: xor a0, s0, a0
529 ; RV32I-NEXT: call __truncsfhf2
530 ; RV32I-NEXT: and a0, a0, s1
531 ; RV32I-NEXT: call __extendhfsf2
532 ; RV32I-NEXT: mv a1, a0
533 ; RV32I-NEXT: mv a0, s0
534 ; RV32I-NEXT: call __eqsf2
535 ; RV32I-NEXT: seqz a0, a0
536 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
537 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
538 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
539 ; RV32I-NEXT: addi sp, sp, 16
542 ; RV64I-LABEL: fneg_h:
544 ; RV64I-NEXT: addi sp, sp, -32
545 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
546 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
547 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
548 ; RV64I-NEXT: lui a1, 16
549 ; RV64I-NEXT: addiw s1, a1, -1
550 ; RV64I-NEXT: and a0, a0, s1
551 ; RV64I-NEXT: call __extendhfsf2
552 ; RV64I-NEXT: mv a1, a0
553 ; RV64I-NEXT: call __addsf3
554 ; RV64I-NEXT: call __truncsfhf2
555 ; RV64I-NEXT: and a0, a0, s1
556 ; RV64I-NEXT: call __extendhfsf2
557 ; RV64I-NEXT: mv s0, a0
558 ; RV64I-NEXT: lui a0, 524288
559 ; RV64I-NEXT: xor a0, s0, a0
560 ; RV64I-NEXT: call __truncsfhf2
561 ; RV64I-NEXT: and a0, a0, s1
562 ; RV64I-NEXT: call __extendhfsf2
563 ; RV64I-NEXT: mv a1, a0
564 ; RV64I-NEXT: mv a0, s0
565 ; RV64I-NEXT: call __eqsf2
566 ; RV64I-NEXT: seqz a0, a0
567 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
568 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
569 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
570 ; RV64I-NEXT: addi sp, sp, 32
573 ; CHECKIZFHMIN-LABEL: fneg_h:
574 ; CHECKIZFHMIN: # %bb.0:
575 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0
576 ; CHECKIZFHMIN-NEXT: lui a0, 1048568
577 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa5
578 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
579 ; CHECKIZFHMIN-NEXT: fmv.x.h a1, fa5
580 ; CHECKIZFHMIN-NEXT: xor a0, a1, a0
581 ; CHECKIZFHMIN-NEXT: fmv.h.x fa4, a0
582 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa4
583 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
584 ; CHECKIZFHMIN-NEXT: feq.s a0, fa5, fa4
585 ; CHECKIZFHMIN-NEXT: ret
587 ; CHECKIZHINXMIN-LABEL: fneg_h:
588 ; CHECKIZHINXMIN: # %bb.0:
589 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
590 ; CHECKIZHINXMIN-NEXT: lui a1, 1048568
591 ; CHECKIZHINXMIN-NEXT: fadd.s a0, a0, a0
592 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
593 ; CHECKIZHINXMIN-NEXT: xor a1, a0, a1
594 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
595 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
596 ; CHECKIZHINXMIN-NEXT: feq.s a0, a0, a1
597 ; CHECKIZHINXMIN-NEXT: ret
598 %1 = fadd half %a, %a
600 %3 = fcmp oeq half %1, %2
601 %4 = zext i1 %3 to i32
605 ; This function performs extra work to ensure that
606 ; DAGCombiner::visitBITCAST doesn't replace the fneg with an xor.
607 define half @fsgnjn_h(half %a, half %b) nounwind {
608 ; CHECKIZFH-LABEL: fsgnjn_h:
609 ; CHECKIZFH: # %bb.0:
610 ; CHECKIZFH-NEXT: fadd.h fa5, fa0, fa1
611 ; CHECKIZFH-NEXT: fsgnjn.h fa0, fa0, fa5
612 ; CHECKIZFH-NEXT: ret
614 ; CHECKIZHINX-LABEL: fsgnjn_h:
615 ; CHECKIZHINX: # %bb.0:
616 ; CHECKIZHINX-NEXT: fadd.h a1, a0, a1
617 ; CHECKIZHINX-NEXT: fsgnjn.h a0, a0, a1
618 ; CHECKIZHINX-NEXT: ret
620 ; RV32I-LABEL: fsgnjn_h:
622 ; RV32I-NEXT: addi sp, sp, -32
623 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
624 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
625 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
626 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
627 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
628 ; RV32I-NEXT: mv s0, a1
629 ; RV32I-NEXT: mv s1, a0
630 ; RV32I-NEXT: lui a0, 16
631 ; RV32I-NEXT: addi s3, a0, -1
632 ; RV32I-NEXT: and a0, s1, s3
633 ; RV32I-NEXT: call __extendhfsf2
634 ; RV32I-NEXT: mv s2, a0
635 ; RV32I-NEXT: and a0, s0, s3
636 ; RV32I-NEXT: call __extendhfsf2
637 ; RV32I-NEXT: mv a1, a0
638 ; RV32I-NEXT: mv a0, s2
639 ; RV32I-NEXT: call __addsf3
640 ; RV32I-NEXT: call __truncsfhf2
641 ; RV32I-NEXT: and a0, a0, s3
642 ; RV32I-NEXT: call __extendhfsf2
643 ; RV32I-NEXT: lui a1, 524288
644 ; RV32I-NEXT: xor a0, a0, a1
645 ; RV32I-NEXT: call __truncsfhf2
646 ; RV32I-NEXT: lui a1, 1048568
647 ; RV32I-NEXT: slli s1, s1, 17
648 ; RV32I-NEXT: and a0, a0, a1
649 ; RV32I-NEXT: srli s1, s1, 17
650 ; RV32I-NEXT: or a0, s1, a0
651 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
652 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
653 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
654 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
655 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
656 ; RV32I-NEXT: addi sp, sp, 32
659 ; RV64I-LABEL: fsgnjn_h:
661 ; RV64I-NEXT: addi sp, sp, -48
662 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
663 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
664 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
665 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
666 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
667 ; RV64I-NEXT: mv s0, a1
668 ; RV64I-NEXT: mv s1, a0
669 ; RV64I-NEXT: lui a0, 16
670 ; RV64I-NEXT: addiw s3, a0, -1
671 ; RV64I-NEXT: and a0, s1, s3
672 ; RV64I-NEXT: call __extendhfsf2
673 ; RV64I-NEXT: mv s2, a0
674 ; RV64I-NEXT: and a0, s0, s3
675 ; RV64I-NEXT: call __extendhfsf2
676 ; RV64I-NEXT: mv a1, a0
677 ; RV64I-NEXT: mv a0, s2
678 ; RV64I-NEXT: call __addsf3
679 ; RV64I-NEXT: call __truncsfhf2
680 ; RV64I-NEXT: and a0, a0, s3
681 ; RV64I-NEXT: call __extendhfsf2
682 ; RV64I-NEXT: lui a1, 524288
683 ; RV64I-NEXT: xor a0, a0, a1
684 ; RV64I-NEXT: call __truncsfhf2
685 ; RV64I-NEXT: lui a1, 1048568
686 ; RV64I-NEXT: slli s1, s1, 49
687 ; RV64I-NEXT: and a0, a0, a1
688 ; RV64I-NEXT: srli s1, s1, 49
689 ; RV64I-NEXT: or a0, s1, a0
690 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
691 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
692 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
693 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
694 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
695 ; RV64I-NEXT: addi sp, sp, 48
698 ; RV32IZFHMIN-LABEL: fsgnjn_h:
699 ; RV32IZFHMIN: # %bb.0:
700 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa1
701 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa0
702 ; RV32IZFHMIN-NEXT: lui a0, 1048568
703 ; RV32IZFHMIN-NEXT: fadd.s fa5, fa4, fa5
704 ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
705 ; RV32IZFHMIN-NEXT: fmv.x.h a1, fa5
706 ; RV32IZFHMIN-NEXT: not a1, a1
707 ; RV32IZFHMIN-NEXT: and a0, a1, a0
708 ; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0
709 ; RV32IZFHMIN-NEXT: slli a1, a1, 17
710 ; RV32IZFHMIN-NEXT: srli a1, a1, 17
711 ; RV32IZFHMIN-NEXT: or a0, a1, a0
712 ; RV32IZFHMIN-NEXT: fmv.h.x fa0, a0
713 ; RV32IZFHMIN-NEXT: ret
715 ; RV64IZFHMIN-LABEL: fsgnjn_h:
716 ; RV64IZFHMIN: # %bb.0:
717 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa1
718 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa0
719 ; RV64IZFHMIN-NEXT: lui a0, 1048568
720 ; RV64IZFHMIN-NEXT: fadd.s fa5, fa4, fa5
721 ; RV64IZFHMIN-NEXT: fcvt.h.s fa5, fa5
722 ; RV64IZFHMIN-NEXT: fmv.x.h a1, fa5
723 ; RV64IZFHMIN-NEXT: not a1, a1
724 ; RV64IZFHMIN-NEXT: and a0, a1, a0
725 ; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0
726 ; RV64IZFHMIN-NEXT: slli a1, a1, 49
727 ; RV64IZFHMIN-NEXT: srli a1, a1, 49
728 ; RV64IZFHMIN-NEXT: or a0, a1, a0
729 ; RV64IZFHMIN-NEXT: fmv.h.x fa0, a0
730 ; RV64IZFHMIN-NEXT: ret
732 ; RV32IZHINXMIN-LABEL: fsgnjn_h:
733 ; RV32IZHINXMIN: # %bb.0:
734 ; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10
735 ; RV32IZHINXMIN-NEXT: fcvt.s.h a1, a1
736 ; RV32IZHINXMIN-NEXT: fcvt.s.h a2, a0
737 ; RV32IZHINXMIN-NEXT: fadd.s a1, a2, a1
738 ; RV32IZHINXMIN-NEXT: lui a2, 1048568
739 ; RV32IZHINXMIN-NEXT: slli a0, a0, 17
740 ; RV32IZHINXMIN-NEXT: fcvt.h.s a1, a1
741 ; RV32IZHINXMIN-NEXT: not a1, a1
742 ; RV32IZHINXMIN-NEXT: and a1, a1, a2
743 ; RV32IZHINXMIN-NEXT: srli a0, a0, 17
744 ; RV32IZHINXMIN-NEXT: or a0, a0, a1
745 ; RV32IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10
746 ; RV32IZHINXMIN-NEXT: ret
748 ; RV64IZHINXMIN-LABEL: fsgnjn_h:
749 ; RV64IZHINXMIN: # %bb.0:
750 ; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10
751 ; RV64IZHINXMIN-NEXT: fcvt.s.h a1, a1
752 ; RV64IZHINXMIN-NEXT: fcvt.s.h a2, a0
753 ; RV64IZHINXMIN-NEXT: fadd.s a1, a2, a1
754 ; RV64IZHINXMIN-NEXT: lui a2, 1048568
755 ; RV64IZHINXMIN-NEXT: slli a0, a0, 49
756 ; RV64IZHINXMIN-NEXT: fcvt.h.s a1, a1
757 ; RV64IZHINXMIN-NEXT: not a1, a1
758 ; RV64IZHINXMIN-NEXT: and a1, a1, a2
759 ; RV64IZHINXMIN-NEXT: srli a0, a0, 49
760 ; RV64IZHINXMIN-NEXT: or a0, a0, a1
761 ; RV64IZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10
762 ; RV64IZHINXMIN-NEXT: ret
763 %1 = fadd half %a, %b
765 %3 = call half @llvm.copysign.f16(half %a, half %2)
769 declare half @llvm.fabs.f16(half)
771 ; This function performs extra work to ensure that
772 ; DAGCombiner::visitBITCAST doesn't replace the fabs with an and.
773 define half @fabs_h(half %a, half %b) nounwind {
774 ; CHECKIZFH-LABEL: fabs_h:
775 ; CHECKIZFH: # %bb.0:
776 ; CHECKIZFH-NEXT: fadd.h fa5, fa0, fa1
777 ; CHECKIZFH-NEXT: fabs.h fa4, fa5
778 ; CHECKIZFH-NEXT: fadd.h fa0, fa4, fa5
779 ; CHECKIZFH-NEXT: ret
781 ; CHECKIZHINX-LABEL: fabs_h:
782 ; CHECKIZHINX: # %bb.0:
783 ; CHECKIZHINX-NEXT: fadd.h a0, a0, a1
784 ; CHECKIZHINX-NEXT: fabs.h a1, a0
785 ; CHECKIZHINX-NEXT: fadd.h a0, a1, a0
786 ; CHECKIZHINX-NEXT: ret
788 ; RV32I-LABEL: fabs_h:
790 ; RV32I-NEXT: addi sp, sp, -16
791 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
792 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
793 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
794 ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
795 ; RV32I-NEXT: mv s0, a1
796 ; RV32I-NEXT: lui a1, 16
797 ; RV32I-NEXT: addi s2, a1, -1
798 ; RV32I-NEXT: and a0, a0, s2
799 ; RV32I-NEXT: call __extendhfsf2
800 ; RV32I-NEXT: mv s1, a0
801 ; RV32I-NEXT: and a0, s0, s2
802 ; RV32I-NEXT: call __extendhfsf2
803 ; RV32I-NEXT: mv a1, a0
804 ; RV32I-NEXT: mv a0, s1
805 ; RV32I-NEXT: call __addsf3
806 ; RV32I-NEXT: call __truncsfhf2
807 ; RV32I-NEXT: and a0, a0, s2
808 ; RV32I-NEXT: call __extendhfsf2
809 ; RV32I-NEXT: mv s0, a0
810 ; RV32I-NEXT: slli a0, a0, 1
811 ; RV32I-NEXT: srli a0, a0, 1
812 ; RV32I-NEXT: call __truncsfhf2
813 ; RV32I-NEXT: and a0, a0, s2
814 ; RV32I-NEXT: call __extendhfsf2
815 ; RV32I-NEXT: mv a1, s0
816 ; RV32I-NEXT: call __addsf3
817 ; RV32I-NEXT: call __truncsfhf2
818 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
819 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
820 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
821 ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
822 ; RV32I-NEXT: addi sp, sp, 16
825 ; RV64I-LABEL: fabs_h:
827 ; RV64I-NEXT: addi sp, sp, -32
828 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
829 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
830 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
831 ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
832 ; RV64I-NEXT: mv s0, a1
833 ; RV64I-NEXT: lui a1, 16
834 ; RV64I-NEXT: addiw s2, a1, -1
835 ; RV64I-NEXT: and a0, a0, s2
836 ; RV64I-NEXT: call __extendhfsf2
837 ; RV64I-NEXT: mv s1, a0
838 ; RV64I-NEXT: and a0, s0, s2
839 ; RV64I-NEXT: call __extendhfsf2
840 ; RV64I-NEXT: mv a1, a0
841 ; RV64I-NEXT: mv a0, s1
842 ; RV64I-NEXT: call __addsf3
843 ; RV64I-NEXT: call __truncsfhf2
844 ; RV64I-NEXT: and a0, a0, s2
845 ; RV64I-NEXT: call __extendhfsf2
846 ; RV64I-NEXT: mv s0, a0
847 ; RV64I-NEXT: slli a0, a0, 33
848 ; RV64I-NEXT: srli a0, a0, 33
849 ; RV64I-NEXT: call __truncsfhf2
850 ; RV64I-NEXT: and a0, a0, s2
851 ; RV64I-NEXT: call __extendhfsf2
852 ; RV64I-NEXT: mv a1, s0
853 ; RV64I-NEXT: call __addsf3
854 ; RV64I-NEXT: call __truncsfhf2
855 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
856 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
857 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
858 ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
859 ; RV64I-NEXT: addi sp, sp, 32
862 ; RV32IZFHMIN-LABEL: fabs_h:
863 ; RV32IZFHMIN: # %bb.0:
864 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa1
865 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa0
866 ; RV32IZFHMIN-NEXT: fadd.s fa5, fa4, fa5
867 ; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
868 ; RV32IZFHMIN-NEXT: fmv.x.h a0, fa5
869 ; RV32IZFHMIN-NEXT: slli a0, a0, 17
870 ; RV32IZFHMIN-NEXT: srli a0, a0, 17
871 ; RV32IZFHMIN-NEXT: fmv.h.x fa4, a0
872 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5
873 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa4
874 ; RV32IZFHMIN-NEXT: fadd.s fa5, fa4, fa5
875 ; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa5
876 ; RV32IZFHMIN-NEXT: ret
878 ; RV64IZFHMIN-LABEL: fabs_h:
879 ; RV64IZFHMIN: # %bb.0:
880 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa1
881 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa0
882 ; RV64IZFHMIN-NEXT: fadd.s fa5, fa4, fa5
883 ; RV64IZFHMIN-NEXT: fcvt.h.s fa5, fa5
884 ; RV64IZFHMIN-NEXT: fmv.x.h a0, fa5
885 ; RV64IZFHMIN-NEXT: slli a0, a0, 49
886 ; RV64IZFHMIN-NEXT: srli a0, a0, 49
887 ; RV64IZFHMIN-NEXT: fmv.h.x fa4, a0
888 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5
889 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa4
890 ; RV64IZFHMIN-NEXT: fadd.s fa5, fa4, fa5
891 ; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa5
892 ; RV64IZFHMIN-NEXT: ret
894 ; RV32IZHINXMIN-LABEL: fabs_h:
895 ; RV32IZHINXMIN: # %bb.0:
896 ; RV32IZHINXMIN-NEXT: fcvt.s.h a1, a1
897 ; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0
898 ; RV32IZHINXMIN-NEXT: fadd.s a0, a0, a1
899 ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
900 ; RV32IZHINXMIN-NEXT: slli a1, a0, 17
901 ; RV32IZHINXMIN-NEXT: srli a1, a1, 17
902 ; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0
903 ; RV32IZHINXMIN-NEXT: fcvt.s.h a1, a1
904 ; RV32IZHINXMIN-NEXT: fadd.s a0, a1, a0
905 ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
906 ; RV32IZHINXMIN-NEXT: ret
908 ; RV64IZHINXMIN-LABEL: fabs_h:
909 ; RV64IZHINXMIN: # %bb.0:
910 ; RV64IZHINXMIN-NEXT: fcvt.s.h a1, a1
911 ; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
912 ; RV64IZHINXMIN-NEXT: fadd.s a0, a0, a1
913 ; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
914 ; RV64IZHINXMIN-NEXT: slli a1, a0, 49
915 ; RV64IZHINXMIN-NEXT: srli a1, a1, 49
916 ; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0
917 ; RV64IZHINXMIN-NEXT: fcvt.s.h a1, a1
918 ; RV64IZHINXMIN-NEXT: fadd.s a0, a1, a0
919 ; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0
920 ; RV64IZHINXMIN-NEXT: ret
921 %1 = fadd half %a, %b
922 %2 = call half @llvm.fabs.f16(half %1)
923 %3 = fadd half %2, %1
927 declare half @llvm.minnum.f16(half, half)
929 define half @fmin_h(half %a, half %b) nounwind {
930 ; CHECKIZFH-LABEL: fmin_h:
931 ; CHECKIZFH: # %bb.0:
932 ; CHECKIZFH-NEXT: fmin.h fa0, fa0, fa1
933 ; CHECKIZFH-NEXT: ret
935 ; CHECKIZHINX-LABEL: fmin_h:
936 ; CHECKIZHINX: # %bb.0:
937 ; CHECKIZHINX-NEXT: fmin.h a0, a0, a1
938 ; CHECKIZHINX-NEXT: ret
940 ; RV32I-LABEL: fmin_h:
942 ; RV32I-NEXT: addi sp, sp, -16
943 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
944 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
945 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
946 ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
947 ; RV32I-NEXT: mv s0, a1
948 ; RV32I-NEXT: lui a1, 16
949 ; RV32I-NEXT: addi s2, a1, -1
950 ; RV32I-NEXT: and a0, a0, s2
951 ; RV32I-NEXT: call __extendhfsf2
952 ; RV32I-NEXT: mv s1, a0
953 ; RV32I-NEXT: and a0, s0, s2
954 ; RV32I-NEXT: call __extendhfsf2
955 ; RV32I-NEXT: mv a1, a0
956 ; RV32I-NEXT: mv a0, s1
957 ; RV32I-NEXT: call fminf
958 ; RV32I-NEXT: call __truncsfhf2
959 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
960 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
961 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
962 ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
963 ; RV32I-NEXT: addi sp, sp, 16
966 ; RV64I-LABEL: fmin_h:
968 ; RV64I-NEXT: addi sp, sp, -32
969 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
970 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
971 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
972 ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
973 ; RV64I-NEXT: mv s0, a1
974 ; RV64I-NEXT: lui a1, 16
975 ; RV64I-NEXT: addiw s2, a1, -1
976 ; RV64I-NEXT: and a0, a0, s2
977 ; RV64I-NEXT: call __extendhfsf2
978 ; RV64I-NEXT: mv s1, a0
979 ; RV64I-NEXT: and a0, s0, s2
980 ; RV64I-NEXT: call __extendhfsf2
981 ; RV64I-NEXT: mv a1, a0
982 ; RV64I-NEXT: mv a0, s1
983 ; RV64I-NEXT: call fminf
984 ; RV64I-NEXT: call __truncsfhf2
985 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
986 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
987 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
988 ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
989 ; RV64I-NEXT: addi sp, sp, 32
992 ; CHECKIZFHMIN-LABEL: fmin_h:
993 ; CHECKIZFHMIN: # %bb.0:
994 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
995 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
996 ; CHECKIZFHMIN-NEXT: fmin.s fa5, fa4, fa5
997 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
998 ; CHECKIZFHMIN-NEXT: ret
1000 ; CHECKIZHINXMIN-LABEL: fmin_h:
1001 ; CHECKIZHINXMIN: # %bb.0:
1002 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1003 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1004 ; CHECKIZHINXMIN-NEXT: fmin.s a0, a0, a1
1005 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1006 ; CHECKIZHINXMIN-NEXT: ret
1007 %1 = call half @llvm.minnum.f16(half %a, half %b)
1011 declare half @llvm.maxnum.f16(half, half)
1013 define half @fmax_h(half %a, half %b) nounwind {
1014 ; CHECKIZFH-LABEL: fmax_h:
1015 ; CHECKIZFH: # %bb.0:
1016 ; CHECKIZFH-NEXT: fmax.h fa0, fa0, fa1
1017 ; CHECKIZFH-NEXT: ret
1019 ; CHECKIZHINX-LABEL: fmax_h:
1020 ; CHECKIZHINX: # %bb.0:
1021 ; CHECKIZHINX-NEXT: fmax.h a0, a0, a1
1022 ; CHECKIZHINX-NEXT: ret
1024 ; RV32I-LABEL: fmax_h:
1026 ; RV32I-NEXT: addi sp, sp, -16
1027 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
1028 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
1029 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
1030 ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
1031 ; RV32I-NEXT: mv s0, a1
1032 ; RV32I-NEXT: lui a1, 16
1033 ; RV32I-NEXT: addi s2, a1, -1
1034 ; RV32I-NEXT: and a0, a0, s2
1035 ; RV32I-NEXT: call __extendhfsf2
1036 ; RV32I-NEXT: mv s1, a0
1037 ; RV32I-NEXT: and a0, s0, s2
1038 ; RV32I-NEXT: call __extendhfsf2
1039 ; RV32I-NEXT: mv a1, a0
1040 ; RV32I-NEXT: mv a0, s1
1041 ; RV32I-NEXT: call fmaxf
1042 ; RV32I-NEXT: call __truncsfhf2
1043 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
1044 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
1045 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
1046 ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
1047 ; RV32I-NEXT: addi sp, sp, 16
1050 ; RV64I-LABEL: fmax_h:
1052 ; RV64I-NEXT: addi sp, sp, -32
1053 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
1054 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
1055 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
1056 ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
1057 ; RV64I-NEXT: mv s0, a1
1058 ; RV64I-NEXT: lui a1, 16
1059 ; RV64I-NEXT: addiw s2, a1, -1
1060 ; RV64I-NEXT: and a0, a0, s2
1061 ; RV64I-NEXT: call __extendhfsf2
1062 ; RV64I-NEXT: mv s1, a0
1063 ; RV64I-NEXT: and a0, s0, s2
1064 ; RV64I-NEXT: call __extendhfsf2
1065 ; RV64I-NEXT: mv a1, a0
1066 ; RV64I-NEXT: mv a0, s1
1067 ; RV64I-NEXT: call fmaxf
1068 ; RV64I-NEXT: call __truncsfhf2
1069 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
1070 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
1071 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
1072 ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
1073 ; RV64I-NEXT: addi sp, sp, 32
1076 ; CHECKIZFHMIN-LABEL: fmax_h:
1077 ; CHECKIZFHMIN: # %bb.0:
1078 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
1079 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
1080 ; CHECKIZFHMIN-NEXT: fmax.s fa5, fa4, fa5
1081 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
1082 ; CHECKIZFHMIN-NEXT: ret
1084 ; CHECKIZHINXMIN-LABEL: fmax_h:
1085 ; CHECKIZHINXMIN: # %bb.0:
1086 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1087 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1088 ; CHECKIZHINXMIN-NEXT: fmax.s a0, a0, a1
1089 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1090 ; CHECKIZHINXMIN-NEXT: ret
1091 %1 = call half @llvm.maxnum.f16(half %a, half %b)
1095 declare half @llvm.fma.f16(half, half, half)
1097 define half @fmadd_h(half %a, half %b, half %c) nounwind {
1098 ; CHECKIZFH-LABEL: fmadd_h:
1099 ; CHECKIZFH: # %bb.0:
1100 ; CHECKIZFH-NEXT: fmadd.h fa0, fa0, fa1, fa2
1101 ; CHECKIZFH-NEXT: ret
1103 ; CHECKIZHINX-LABEL: fmadd_h:
1104 ; CHECKIZHINX: # %bb.0:
1105 ; CHECKIZHINX-NEXT: fmadd.h a0, a0, a1, a2
1106 ; CHECKIZHINX-NEXT: ret
1108 ; RV32I-LABEL: fmadd_h:
1110 ; RV32I-NEXT: addi sp, sp, -32
1111 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1112 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1113 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1114 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1115 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1116 ; RV32I-NEXT: mv s0, a2
1117 ; RV32I-NEXT: mv s1, a1
1118 ; RV32I-NEXT: lui a1, 16
1119 ; RV32I-NEXT: addi s3, a1, -1
1120 ; RV32I-NEXT: and a0, a0, s3
1121 ; RV32I-NEXT: call __extendhfsf2
1122 ; RV32I-NEXT: mv s2, a0
1123 ; RV32I-NEXT: and a0, s1, s3
1124 ; RV32I-NEXT: call __extendhfsf2
1125 ; RV32I-NEXT: mv s1, a0
1126 ; RV32I-NEXT: and a0, s0, s3
1127 ; RV32I-NEXT: call __extendhfsf2
1128 ; RV32I-NEXT: mv a2, a0
1129 ; RV32I-NEXT: mv a0, s2
1130 ; RV32I-NEXT: mv a1, s1
1131 ; RV32I-NEXT: call fmaf
1132 ; RV32I-NEXT: call __truncsfhf2
1133 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1134 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1135 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1136 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1137 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1138 ; RV32I-NEXT: addi sp, sp, 32
1141 ; RV64I-LABEL: fmadd_h:
1143 ; RV64I-NEXT: addi sp, sp, -48
1144 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1145 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1146 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1147 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1148 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1149 ; RV64I-NEXT: mv s0, a2
1150 ; RV64I-NEXT: mv s1, a1
1151 ; RV64I-NEXT: lui a1, 16
1152 ; RV64I-NEXT: addiw s3, a1, -1
1153 ; RV64I-NEXT: and a0, a0, s3
1154 ; RV64I-NEXT: call __extendhfsf2
1155 ; RV64I-NEXT: mv s2, a0
1156 ; RV64I-NEXT: and a0, s1, s3
1157 ; RV64I-NEXT: call __extendhfsf2
1158 ; RV64I-NEXT: mv s1, a0
1159 ; RV64I-NEXT: and a0, s0, s3
1160 ; RV64I-NEXT: call __extendhfsf2
1161 ; RV64I-NEXT: mv a2, a0
1162 ; RV64I-NEXT: mv a0, s2
1163 ; RV64I-NEXT: mv a1, s1
1164 ; RV64I-NEXT: call fmaf
1165 ; RV64I-NEXT: call __truncsfhf2
1166 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1167 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1168 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1169 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1170 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1171 ; RV64I-NEXT: addi sp, sp, 48
1174 ; CHECKIZFHMIN-LABEL: fmadd_h:
1175 ; CHECKIZFHMIN: # %bb.0:
1176 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa2
1177 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa1
1178 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa0
1179 ; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
1180 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
1181 ; CHECKIZFHMIN-NEXT: ret
1183 ; CHECKIZHINXMIN-LABEL: fmadd_h:
1184 ; CHECKIZHINXMIN: # %bb.0:
1185 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1186 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1187 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1188 ; CHECKIZHINXMIN-NEXT: fmadd.s a0, a0, a1, a2
1189 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1190 ; CHECKIZHINXMIN-NEXT: ret
1191 %1 = call half @llvm.fma.f16(half %a, half %b, half %c)
1195 define half @fmsub_h(half %a, half %b, half %c) nounwind {
1196 ; CHECKIZFH-LABEL: fmsub_h:
1197 ; CHECKIZFH: # %bb.0:
1198 ; CHECKIZFH-NEXT: fmv.h.x fa5, zero
1199 ; CHECKIZFH-NEXT: fadd.h fa5, fa2, fa5
1200 ; CHECKIZFH-NEXT: fmsub.h fa0, fa0, fa1, fa5
1201 ; CHECKIZFH-NEXT: ret
1203 ; CHECKIZHINX-LABEL: fmsub_h:
1204 ; CHECKIZHINX: # %bb.0:
1205 ; CHECKIZHINX-NEXT: fadd.h a2, a2, zero
1206 ; CHECKIZHINX-NEXT: fmsub.h a0, a0, a1, a2
1207 ; CHECKIZHINX-NEXT: ret
1209 ; RV32I-LABEL: fmsub_h:
1211 ; RV32I-NEXT: addi sp, sp, -32
1212 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1213 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1214 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1215 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1216 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1217 ; RV32I-NEXT: mv s0, a1
1218 ; RV32I-NEXT: mv s1, a0
1219 ; RV32I-NEXT: lui a0, 16
1220 ; RV32I-NEXT: addi s3, a0, -1
1221 ; RV32I-NEXT: and a0, a2, s3
1222 ; RV32I-NEXT: call __extendhfsf2
1223 ; RV32I-NEXT: li a1, 0
1224 ; RV32I-NEXT: call __addsf3
1225 ; RV32I-NEXT: call __truncsfhf2
1226 ; RV32I-NEXT: and a0, a0, s3
1227 ; RV32I-NEXT: call __extendhfsf2
1228 ; RV32I-NEXT: lui a1, 524288
1229 ; RV32I-NEXT: xor a0, a0, a1
1230 ; RV32I-NEXT: call __truncsfhf2
1231 ; RV32I-NEXT: mv s2, a0
1232 ; RV32I-NEXT: and a0, s1, s3
1233 ; RV32I-NEXT: call __extendhfsf2
1234 ; RV32I-NEXT: mv s1, a0
1235 ; RV32I-NEXT: and a0, s0, s3
1236 ; RV32I-NEXT: call __extendhfsf2
1237 ; RV32I-NEXT: mv s0, a0
1238 ; RV32I-NEXT: and a0, s2, s3
1239 ; RV32I-NEXT: call __extendhfsf2
1240 ; RV32I-NEXT: mv a2, a0
1241 ; RV32I-NEXT: mv a0, s1
1242 ; RV32I-NEXT: mv a1, s0
1243 ; RV32I-NEXT: call fmaf
1244 ; RV32I-NEXT: call __truncsfhf2
1245 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1246 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1247 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1248 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1249 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1250 ; RV32I-NEXT: addi sp, sp, 32
1253 ; RV64I-LABEL: fmsub_h:
1255 ; RV64I-NEXT: addi sp, sp, -48
1256 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1257 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1258 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1259 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1260 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1261 ; RV64I-NEXT: mv s0, a1
1262 ; RV64I-NEXT: mv s1, a0
1263 ; RV64I-NEXT: lui a0, 16
1264 ; RV64I-NEXT: addiw s3, a0, -1
1265 ; RV64I-NEXT: and a0, a2, s3
1266 ; RV64I-NEXT: call __extendhfsf2
1267 ; RV64I-NEXT: li a1, 0
1268 ; RV64I-NEXT: call __addsf3
1269 ; RV64I-NEXT: call __truncsfhf2
1270 ; RV64I-NEXT: and a0, a0, s3
1271 ; RV64I-NEXT: call __extendhfsf2
1272 ; RV64I-NEXT: lui a1, 524288
1273 ; RV64I-NEXT: xor a0, a0, a1
1274 ; RV64I-NEXT: call __truncsfhf2
1275 ; RV64I-NEXT: mv s2, a0
1276 ; RV64I-NEXT: and a0, s1, s3
1277 ; RV64I-NEXT: call __extendhfsf2
1278 ; RV64I-NEXT: mv s1, a0
1279 ; RV64I-NEXT: and a0, s0, s3
1280 ; RV64I-NEXT: call __extendhfsf2
1281 ; RV64I-NEXT: mv s0, a0
1282 ; RV64I-NEXT: and a0, s2, s3
1283 ; RV64I-NEXT: call __extendhfsf2
1284 ; RV64I-NEXT: mv a2, a0
1285 ; RV64I-NEXT: mv a0, s1
1286 ; RV64I-NEXT: mv a1, s0
1287 ; RV64I-NEXT: call fmaf
1288 ; RV64I-NEXT: call __truncsfhf2
1289 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1290 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1291 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1292 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1293 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1294 ; RV64I-NEXT: addi sp, sp, 48
1297 ; CHECKIZFHMIN-LABEL: fmsub_h:
1298 ; CHECKIZFHMIN: # %bb.0:
1299 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa2
1300 ; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero
1301 ; CHECKIZFHMIN-NEXT: lui a0, 1048568
1302 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa1
1303 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
1304 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
1305 ; CHECKIZFHMIN-NEXT: fmv.x.h a1, fa5
1306 ; CHECKIZFHMIN-NEXT: xor a0, a1, a0
1307 ; CHECKIZFHMIN-NEXT: fmv.h.x fa5, a0
1308 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
1309 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
1310 ; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa4, fa3, fa5
1311 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
1312 ; CHECKIZFHMIN-NEXT: ret
1314 ; CHECKIZHINXMIN-LABEL: fmsub_h:
1315 ; CHECKIZHINXMIN: # %bb.0:
1316 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1317 ; CHECKIZHINXMIN-NEXT: lui a3, 1048568
1318 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1319 ; CHECKIZHINXMIN-NEXT: fadd.s a2, a2, zero
1320 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a2, a2
1321 ; CHECKIZHINXMIN-NEXT: xor a2, a2, a3
1322 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1323 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1324 ; CHECKIZHINXMIN-NEXT: fmadd.s a0, a0, a1, a2
1325 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1326 ; CHECKIZHINXMIN-NEXT: ret
1327 %c_ = fadd half 0.0, %c ; avoid negation using xor
1328 %negc = fsub half -0.0, %c_
1329 %1 = call half @llvm.fma.f16(half %a, half %b, half %negc)
1333 define half @fnmadd_h(half %a, half %b, half %c) nounwind {
1334 ; CHECKIZFH-LABEL: fnmadd_h:
1335 ; CHECKIZFH: # %bb.0:
1336 ; CHECKIZFH-NEXT: fmv.h.x fa5, zero
1337 ; CHECKIZFH-NEXT: fadd.h fa4, fa0, fa5
1338 ; CHECKIZFH-NEXT: fadd.h fa5, fa2, fa5
1339 ; CHECKIZFH-NEXT: fnmadd.h fa0, fa4, fa1, fa5
1340 ; CHECKIZFH-NEXT: ret
1342 ; CHECKIZHINX-LABEL: fnmadd_h:
1343 ; CHECKIZHINX: # %bb.0:
1344 ; CHECKIZHINX-NEXT: fadd.h a0, a0, zero
1345 ; CHECKIZHINX-NEXT: fadd.h a2, a2, zero
1346 ; CHECKIZHINX-NEXT: fnmadd.h a0, a0, a1, a2
1347 ; CHECKIZHINX-NEXT: ret
1349 ; RV32I-LABEL: fnmadd_h:
1351 ; RV32I-NEXT: addi sp, sp, -32
1352 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1353 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1354 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1355 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1356 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1357 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
1358 ; RV32I-NEXT: mv s1, a2
1359 ; RV32I-NEXT: mv s0, a1
1360 ; RV32I-NEXT: lui s3, 16
1361 ; RV32I-NEXT: addi s3, s3, -1
1362 ; RV32I-NEXT: and a0, a0, s3
1363 ; RV32I-NEXT: call __extendhfsf2
1364 ; RV32I-NEXT: li a1, 0
1365 ; RV32I-NEXT: call __addsf3
1366 ; RV32I-NEXT: call __truncsfhf2
1367 ; RV32I-NEXT: mv s2, a0
1368 ; RV32I-NEXT: and a0, s1, s3
1369 ; RV32I-NEXT: call __extendhfsf2
1370 ; RV32I-NEXT: li a1, 0
1371 ; RV32I-NEXT: call __addsf3
1372 ; RV32I-NEXT: call __truncsfhf2
1373 ; RV32I-NEXT: mv s1, a0
1374 ; RV32I-NEXT: and a0, s2, s3
1375 ; RV32I-NEXT: call __extendhfsf2
1376 ; RV32I-NEXT: lui s4, 524288
1377 ; RV32I-NEXT: xor a0, a0, s4
1378 ; RV32I-NEXT: call __truncsfhf2
1379 ; RV32I-NEXT: mv s2, a0
1380 ; RV32I-NEXT: and a0, s1, s3
1381 ; RV32I-NEXT: call __extendhfsf2
1382 ; RV32I-NEXT: xor a0, a0, s4
1383 ; RV32I-NEXT: call __truncsfhf2
1384 ; RV32I-NEXT: mv s1, a0
1385 ; RV32I-NEXT: and a0, s0, s3
1386 ; RV32I-NEXT: call __extendhfsf2
1387 ; RV32I-NEXT: mv s0, a0
1388 ; RV32I-NEXT: and a0, s2, s3
1389 ; RV32I-NEXT: call __extendhfsf2
1390 ; RV32I-NEXT: mv s2, a0
1391 ; RV32I-NEXT: and a0, s1, s3
1392 ; RV32I-NEXT: call __extendhfsf2
1393 ; RV32I-NEXT: mv a2, a0
1394 ; RV32I-NEXT: mv a0, s2
1395 ; RV32I-NEXT: mv a1, s0
1396 ; RV32I-NEXT: call fmaf
1397 ; RV32I-NEXT: call __truncsfhf2
1398 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1399 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1400 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1401 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1402 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1403 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
1404 ; RV32I-NEXT: addi sp, sp, 32
1407 ; RV64I-LABEL: fnmadd_h:
1409 ; RV64I-NEXT: addi sp, sp, -48
1410 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1411 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1412 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1413 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1414 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1415 ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
1416 ; RV64I-NEXT: mv s1, a2
1417 ; RV64I-NEXT: mv s0, a1
1418 ; RV64I-NEXT: lui s3, 16
1419 ; RV64I-NEXT: addiw s3, s3, -1
1420 ; RV64I-NEXT: and a0, a0, s3
1421 ; RV64I-NEXT: call __extendhfsf2
1422 ; RV64I-NEXT: li a1, 0
1423 ; RV64I-NEXT: call __addsf3
1424 ; RV64I-NEXT: call __truncsfhf2
1425 ; RV64I-NEXT: mv s2, a0
1426 ; RV64I-NEXT: and a0, s1, s3
1427 ; RV64I-NEXT: call __extendhfsf2
1428 ; RV64I-NEXT: li a1, 0
1429 ; RV64I-NEXT: call __addsf3
1430 ; RV64I-NEXT: call __truncsfhf2
1431 ; RV64I-NEXT: mv s1, a0
1432 ; RV64I-NEXT: and a0, s2, s3
1433 ; RV64I-NEXT: call __extendhfsf2
1434 ; RV64I-NEXT: lui s4, 524288
1435 ; RV64I-NEXT: xor a0, a0, s4
1436 ; RV64I-NEXT: call __truncsfhf2
1437 ; RV64I-NEXT: mv s2, a0
1438 ; RV64I-NEXT: and a0, s1, s3
1439 ; RV64I-NEXT: call __extendhfsf2
1440 ; RV64I-NEXT: xor a0, a0, s4
1441 ; RV64I-NEXT: call __truncsfhf2
1442 ; RV64I-NEXT: mv s1, a0
1443 ; RV64I-NEXT: and a0, s0, s3
1444 ; RV64I-NEXT: call __extendhfsf2
1445 ; RV64I-NEXT: mv s0, a0
1446 ; RV64I-NEXT: and a0, s2, s3
1447 ; RV64I-NEXT: call __extendhfsf2
1448 ; RV64I-NEXT: mv s2, a0
1449 ; RV64I-NEXT: and a0, s1, s3
1450 ; RV64I-NEXT: call __extendhfsf2
1451 ; RV64I-NEXT: mv a2, a0
1452 ; RV64I-NEXT: mv a0, s2
1453 ; RV64I-NEXT: mv a1, s0
1454 ; RV64I-NEXT: call fmaf
1455 ; RV64I-NEXT: call __truncsfhf2
1456 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1457 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1458 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1459 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1460 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1461 ; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
1462 ; RV64I-NEXT: addi sp, sp, 48
1465 ; CHECKIZFHMIN-LABEL: fnmadd_h:
1466 ; CHECKIZFHMIN: # %bb.0:
1467 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0
1468 ; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero
1469 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa2
1470 ; CHECKIZFHMIN-NEXT: lui a0, 1048568
1471 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
1472 ; CHECKIZFHMIN-NEXT: fadd.s fa4, fa3, fa4
1473 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
1474 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa4, fa4
1475 ; CHECKIZFHMIN-NEXT: fmv.x.h a1, fa5
1476 ; CHECKIZFHMIN-NEXT: fmv.x.h a2, fa4
1477 ; CHECKIZFHMIN-NEXT: xor a1, a1, a0
1478 ; CHECKIZFHMIN-NEXT: xor a0, a2, a0
1479 ; CHECKIZFHMIN-NEXT: fmv.h.x fa5, a1
1480 ; CHECKIZFHMIN-NEXT: fmv.h.x fa4, a0
1481 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa4
1482 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
1483 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa1
1484 ; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa5, fa3, fa4
1485 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
1486 ; CHECKIZFHMIN-NEXT: ret
1488 ; CHECKIZHINXMIN-LABEL: fnmadd_h:
1489 ; CHECKIZHINXMIN: # %bb.0:
1490 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1491 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1492 ; CHECKIZHINXMIN-NEXT: lui a3, 1048568
1493 ; CHECKIZHINXMIN-NEXT: fadd.s a0, a0, zero
1494 ; CHECKIZHINXMIN-NEXT: fadd.s a2, a2, zero
1495 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1496 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a2, a2
1497 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a3
1498 ; CHECKIZHINXMIN-NEXT: xor a2, a2, a3
1499 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1500 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1501 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1502 ; CHECKIZHINXMIN-NEXT: fmadd.s a0, a0, a1, a2
1503 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1504 ; CHECKIZHINXMIN-NEXT: ret
1505 %a_ = fadd half 0.0, %a
1506 %c_ = fadd half 0.0, %c
1507 %nega = fsub half -0.0, %a_
1508 %negc = fsub half -0.0, %c_
1509 %1 = call half @llvm.fma.f16(half %nega, half %b, half %negc)
1513 define half @fnmadd_h_2(half %a, half %b, half %c) nounwind {
1514 ; CHECKIZFH-LABEL: fnmadd_h_2:
1515 ; CHECKIZFH: # %bb.0:
1516 ; CHECKIZFH-NEXT: fmv.h.x fa5, zero
1517 ; CHECKIZFH-NEXT: fadd.h fa4, fa1, fa5
1518 ; CHECKIZFH-NEXT: fadd.h fa5, fa2, fa5
1519 ; CHECKIZFH-NEXT: fnmadd.h fa0, fa4, fa0, fa5
1520 ; CHECKIZFH-NEXT: ret
1522 ; CHECKIZHINX-LABEL: fnmadd_h_2:
1523 ; CHECKIZHINX: # %bb.0:
1524 ; CHECKIZHINX-NEXT: fadd.h a1, a1, zero
1525 ; CHECKIZHINX-NEXT: fadd.h a2, a2, zero
1526 ; CHECKIZHINX-NEXT: fnmadd.h a0, a1, a0, a2
1527 ; CHECKIZHINX-NEXT: ret
1529 ; RV32I-LABEL: fnmadd_h_2:
1531 ; RV32I-NEXT: addi sp, sp, -32
1532 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1533 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1534 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1535 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1536 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1537 ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
1538 ; RV32I-NEXT: mv s1, a2
1539 ; RV32I-NEXT: mv s0, a0
1540 ; RV32I-NEXT: lui s3, 16
1541 ; RV32I-NEXT: addi s3, s3, -1
1542 ; RV32I-NEXT: and a0, a1, s3
1543 ; RV32I-NEXT: call __extendhfsf2
1544 ; RV32I-NEXT: li a1, 0
1545 ; RV32I-NEXT: call __addsf3
1546 ; RV32I-NEXT: call __truncsfhf2
1547 ; RV32I-NEXT: mv s2, a0
1548 ; RV32I-NEXT: and a0, s1, s3
1549 ; RV32I-NEXT: call __extendhfsf2
1550 ; RV32I-NEXT: li a1, 0
1551 ; RV32I-NEXT: call __addsf3
1552 ; RV32I-NEXT: call __truncsfhf2
1553 ; RV32I-NEXT: mv s1, a0
1554 ; RV32I-NEXT: and a0, s2, s3
1555 ; RV32I-NEXT: call __extendhfsf2
1556 ; RV32I-NEXT: lui s4, 524288
1557 ; RV32I-NEXT: xor a0, a0, s4
1558 ; RV32I-NEXT: call __truncsfhf2
1559 ; RV32I-NEXT: mv s2, a0
1560 ; RV32I-NEXT: and a0, s1, s3
1561 ; RV32I-NEXT: call __extendhfsf2
1562 ; RV32I-NEXT: xor a0, a0, s4
1563 ; RV32I-NEXT: call __truncsfhf2
1564 ; RV32I-NEXT: mv s1, a0
1565 ; RV32I-NEXT: and a0, s0, s3
1566 ; RV32I-NEXT: call __extendhfsf2
1567 ; RV32I-NEXT: mv s0, a0
1568 ; RV32I-NEXT: and a0, s2, s3
1569 ; RV32I-NEXT: call __extendhfsf2
1570 ; RV32I-NEXT: mv s2, a0
1571 ; RV32I-NEXT: and a0, s1, s3
1572 ; RV32I-NEXT: call __extendhfsf2
1573 ; RV32I-NEXT: mv a2, a0
1574 ; RV32I-NEXT: mv a0, s0
1575 ; RV32I-NEXT: mv a1, s2
1576 ; RV32I-NEXT: call fmaf
1577 ; RV32I-NEXT: call __truncsfhf2
1578 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1579 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1580 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1581 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1582 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1583 ; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
1584 ; RV32I-NEXT: addi sp, sp, 32
1587 ; RV64I-LABEL: fnmadd_h_2:
1589 ; RV64I-NEXT: addi sp, sp, -48
1590 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1591 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1592 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1593 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1594 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1595 ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
1596 ; RV64I-NEXT: mv s1, a2
1597 ; RV64I-NEXT: mv s0, a0
1598 ; RV64I-NEXT: lui s3, 16
1599 ; RV64I-NEXT: addiw s3, s3, -1
1600 ; RV64I-NEXT: and a0, a1, s3
1601 ; RV64I-NEXT: call __extendhfsf2
1602 ; RV64I-NEXT: li a1, 0
1603 ; RV64I-NEXT: call __addsf3
1604 ; RV64I-NEXT: call __truncsfhf2
1605 ; RV64I-NEXT: mv s2, a0
1606 ; RV64I-NEXT: and a0, s1, s3
1607 ; RV64I-NEXT: call __extendhfsf2
1608 ; RV64I-NEXT: li a1, 0
1609 ; RV64I-NEXT: call __addsf3
1610 ; RV64I-NEXT: call __truncsfhf2
1611 ; RV64I-NEXT: mv s1, a0
1612 ; RV64I-NEXT: and a0, s2, s3
1613 ; RV64I-NEXT: call __extendhfsf2
1614 ; RV64I-NEXT: lui s4, 524288
1615 ; RV64I-NEXT: xor a0, a0, s4
1616 ; RV64I-NEXT: call __truncsfhf2
1617 ; RV64I-NEXT: mv s2, a0
1618 ; RV64I-NEXT: and a0, s1, s3
1619 ; RV64I-NEXT: call __extendhfsf2
1620 ; RV64I-NEXT: xor a0, a0, s4
1621 ; RV64I-NEXT: call __truncsfhf2
1622 ; RV64I-NEXT: mv s1, a0
1623 ; RV64I-NEXT: and a0, s0, s3
1624 ; RV64I-NEXT: call __extendhfsf2
1625 ; RV64I-NEXT: mv s0, a0
1626 ; RV64I-NEXT: and a0, s2, s3
1627 ; RV64I-NEXT: call __extendhfsf2
1628 ; RV64I-NEXT: mv s2, a0
1629 ; RV64I-NEXT: and a0, s1, s3
1630 ; RV64I-NEXT: call __extendhfsf2
1631 ; RV64I-NEXT: mv a2, a0
1632 ; RV64I-NEXT: mv a0, s0
1633 ; RV64I-NEXT: mv a1, s2
1634 ; RV64I-NEXT: call fmaf
1635 ; RV64I-NEXT: call __truncsfhf2
1636 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1637 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1638 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1639 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1640 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1641 ; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
1642 ; RV64I-NEXT: addi sp, sp, 48
1645 ; CHECKIZFHMIN-LABEL: fnmadd_h_2:
1646 ; CHECKIZFHMIN: # %bb.0:
1647 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
1648 ; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero
1649 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa2
1650 ; CHECKIZFHMIN-NEXT: lui a0, 1048568
1651 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
1652 ; CHECKIZFHMIN-NEXT: fadd.s fa4, fa3, fa4
1653 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
1654 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa4, fa4
1655 ; CHECKIZFHMIN-NEXT: fmv.x.h a1, fa5
1656 ; CHECKIZFHMIN-NEXT: fmv.x.h a2, fa4
1657 ; CHECKIZFHMIN-NEXT: xor a1, a1, a0
1658 ; CHECKIZFHMIN-NEXT: xor a0, a2, a0
1659 ; CHECKIZFHMIN-NEXT: fmv.h.x fa5, a1
1660 ; CHECKIZFHMIN-NEXT: fmv.h.x fa4, a0
1661 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa4
1662 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
1663 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa0
1664 ; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa3, fa5, fa4
1665 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
1666 ; CHECKIZFHMIN-NEXT: ret
1668 ; CHECKIZHINXMIN-LABEL: fnmadd_h_2:
1669 ; CHECKIZHINXMIN: # %bb.0:
1670 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1671 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1672 ; CHECKIZHINXMIN-NEXT: lui a3, 1048568
1673 ; CHECKIZHINXMIN-NEXT: fadd.s a1, a1, zero
1674 ; CHECKIZHINXMIN-NEXT: fadd.s a2, a2, zero
1675 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a1, a1
1676 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a2, a2
1677 ; CHECKIZHINXMIN-NEXT: xor a1, a1, a3
1678 ; CHECKIZHINXMIN-NEXT: xor a2, a2, a3
1679 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1680 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1681 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1682 ; CHECKIZHINXMIN-NEXT: fmadd.s a0, a0, a1, a2
1683 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1684 ; CHECKIZHINXMIN-NEXT: ret
1685 %b_ = fadd half 0.0, %b
1686 %c_ = fadd half 0.0, %c
1687 %negb = fsub half -0.0, %b_
1688 %negc = fsub half -0.0, %c_
1689 %1 = call half @llvm.fma.f16(half %a, half %negb, half %negc)
1693 define half @fnmadd_h_3(half %a, half %b, half %c) nounwind {
1694 ; RV32IZFH-LABEL: fnmadd_h_3:
1695 ; RV32IZFH: # %bb.0:
1696 ; RV32IZFH-NEXT: fmadd.h ft0, fa0, fa1, fa2
1697 ; RV32IZFH-NEXT: fneg.h fa0, ft0
1698 ; RV32IZFH-NEXT: ret
1700 ; RV64IZFH-LABEL: fnmadd_h_3:
1701 ; RV64IZFH: # %bb.0:
1702 ; RV64IZFH-NEXT: fmadd.h ft0, fa0, fa1, fa2
1703 ; RV64IZFH-NEXT: fneg.h fa0, ft0
1704 ; RV64IZFH-NEXT: ret
1706 ; CHECKIZFH-LABEL: fnmadd_h_3:
1707 ; CHECKIZFH: # %bb.0:
1708 ; CHECKIZFH-NEXT: fmadd.h fa5, fa0, fa1, fa2
1709 ; CHECKIZFH-NEXT: fneg.h fa0, fa5
1710 ; CHECKIZFH-NEXT: ret
1712 ; CHECKIZHINX-LABEL: fnmadd_h_3:
1713 ; CHECKIZHINX: # %bb.0:
1714 ; CHECKIZHINX-NEXT: fmadd.h a0, a0, a1, a2
1715 ; CHECKIZHINX-NEXT: fneg.h a0, a0
1716 ; CHECKIZHINX-NEXT: ret
1718 ; RV32I-LABEL: fnmadd_h_3:
1720 ; RV32I-NEXT: addi sp, sp, -32
1721 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1722 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1723 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1724 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1725 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1726 ; RV32I-NEXT: mv s0, a2
1727 ; RV32I-NEXT: mv s1, a1
1728 ; RV32I-NEXT: lui a1, 16
1729 ; RV32I-NEXT: addi s3, a1, -1
1730 ; RV32I-NEXT: and a0, a0, s3
1731 ; RV32I-NEXT: call __extendhfsf2
1732 ; RV32I-NEXT: mv s2, a0
1733 ; RV32I-NEXT: and a0, s1, s3
1734 ; RV32I-NEXT: call __extendhfsf2
1735 ; RV32I-NEXT: mv s1, a0
1736 ; RV32I-NEXT: and a0, s0, s3
1737 ; RV32I-NEXT: call __extendhfsf2
1738 ; RV32I-NEXT: mv a2, a0
1739 ; RV32I-NEXT: mv a0, s2
1740 ; RV32I-NEXT: mv a1, s1
1741 ; RV32I-NEXT: call fmaf
1742 ; RV32I-NEXT: call __truncsfhf2
1743 ; RV32I-NEXT: lui a1, 1048568
1744 ; RV32I-NEXT: xor a0, a0, a1
1745 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1746 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1747 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1748 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1749 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1750 ; RV32I-NEXT: addi sp, sp, 32
1753 ; RV64I-LABEL: fnmadd_h_3:
1755 ; RV64I-NEXT: addi sp, sp, -48
1756 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1757 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1758 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1759 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1760 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1761 ; RV64I-NEXT: mv s0, a2
1762 ; RV64I-NEXT: mv s1, a1
1763 ; RV64I-NEXT: lui a1, 16
1764 ; RV64I-NEXT: addiw s3, a1, -1
1765 ; RV64I-NEXT: and a0, a0, s3
1766 ; RV64I-NEXT: call __extendhfsf2
1767 ; RV64I-NEXT: mv s2, a0
1768 ; RV64I-NEXT: and a0, s1, s3
1769 ; RV64I-NEXT: call __extendhfsf2
1770 ; RV64I-NEXT: mv s1, a0
1771 ; RV64I-NEXT: and a0, s0, s3
1772 ; RV64I-NEXT: call __extendhfsf2
1773 ; RV64I-NEXT: mv a2, a0
1774 ; RV64I-NEXT: mv a0, s2
1775 ; RV64I-NEXT: mv a1, s1
1776 ; RV64I-NEXT: call fmaf
1777 ; RV64I-NEXT: call __truncsfhf2
1778 ; RV64I-NEXT: lui a1, 1048568
1779 ; RV64I-NEXT: xor a0, a0, a1
1780 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1781 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1782 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1783 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1784 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1785 ; RV64I-NEXT: addi sp, sp, 48
1788 ; CHECKIZFHMIN-LABEL: fnmadd_h_3:
1789 ; CHECKIZFHMIN: # %bb.0:
1790 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa2
1791 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa1
1792 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa0
1793 ; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
1794 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
1795 ; CHECKIZFHMIN-NEXT: fmv.x.h a0, fa5
1796 ; CHECKIZFHMIN-NEXT: lui a1, 1048568
1797 ; CHECKIZFHMIN-NEXT: xor a0, a0, a1
1798 ; CHECKIZFHMIN-NEXT: fmv.h.x fa0, a0
1799 ; CHECKIZFHMIN-NEXT: ret
1801 ; CHECKIZHINXMIN-LABEL: fnmadd_h_3:
1802 ; CHECKIZHINXMIN: # %bb.0:
1803 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1804 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1805 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1806 ; CHECKIZHINXMIN-NEXT: fmadd.s a0, a0, a1, a2
1807 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1808 ; CHECKIZHINXMIN-NEXT: lui a1, 1048568
1809 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a1
1810 ; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10
1811 ; CHECKIZHINXMIN-NEXT: ret
1812 %1 = call half @llvm.fma.f16(half %a, half %b, half %c)
1818 define half @fnmadd_nsz(half %a, half %b, half %c) nounwind {
1819 ; RV32IZFH-LABEL: fnmadd_nsz:
1820 ; RV32IZFH: # %bb.0:
1821 ; RV32IZFH-NEXT: fnmadd.h fa0, fa0, fa1, fa2
1822 ; RV32IZFH-NEXT: ret
1824 ; RV64IZFH-LABEL: fnmadd_nsz:
1825 ; RV64IZFH: # %bb.0:
1826 ; RV64IZFH-NEXT: fnmadd.h fa0, fa0, fa1, fa2
1827 ; RV64IZFH-NEXT: ret
1829 ; CHECKIZFH-LABEL: fnmadd_nsz:
1830 ; CHECKIZFH: # %bb.0:
1831 ; CHECKIZFH-NEXT: fnmadd.h fa0, fa0, fa1, fa2
1832 ; CHECKIZFH-NEXT: ret
1834 ; CHECKIZHINX-LABEL: fnmadd_nsz:
1835 ; CHECKIZHINX: # %bb.0:
1836 ; CHECKIZHINX-NEXT: fnmadd.h a0, a0, a1, a2
1837 ; CHECKIZHINX-NEXT: ret
1839 ; RV32I-LABEL: fnmadd_nsz:
1841 ; RV32I-NEXT: addi sp, sp, -32
1842 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1843 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1844 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1845 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1846 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1847 ; RV32I-NEXT: mv s0, a2
1848 ; RV32I-NEXT: mv s1, a1
1849 ; RV32I-NEXT: lui a1, 16
1850 ; RV32I-NEXT: addi s3, a1, -1
1851 ; RV32I-NEXT: and a0, a0, s3
1852 ; RV32I-NEXT: call __extendhfsf2
1853 ; RV32I-NEXT: mv s2, a0
1854 ; RV32I-NEXT: and a0, s1, s3
1855 ; RV32I-NEXT: call __extendhfsf2
1856 ; RV32I-NEXT: mv s1, a0
1857 ; RV32I-NEXT: and a0, s0, s3
1858 ; RV32I-NEXT: call __extendhfsf2
1859 ; RV32I-NEXT: mv a2, a0
1860 ; RV32I-NEXT: mv a0, s2
1861 ; RV32I-NEXT: mv a1, s1
1862 ; RV32I-NEXT: call fmaf
1863 ; RV32I-NEXT: call __truncsfhf2
1864 ; RV32I-NEXT: lui a1, 1048568
1865 ; RV32I-NEXT: xor a0, a0, a1
1866 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1867 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1868 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1869 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1870 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1871 ; RV32I-NEXT: addi sp, sp, 32
1874 ; RV64I-LABEL: fnmadd_nsz:
1876 ; RV64I-NEXT: addi sp, sp, -48
1877 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1878 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
1879 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
1880 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
1881 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
1882 ; RV64I-NEXT: mv s0, a2
1883 ; RV64I-NEXT: mv s1, a1
1884 ; RV64I-NEXT: lui a1, 16
1885 ; RV64I-NEXT: addiw s3, a1, -1
1886 ; RV64I-NEXT: and a0, a0, s3
1887 ; RV64I-NEXT: call __extendhfsf2
1888 ; RV64I-NEXT: mv s2, a0
1889 ; RV64I-NEXT: and a0, s1, s3
1890 ; RV64I-NEXT: call __extendhfsf2
1891 ; RV64I-NEXT: mv s1, a0
1892 ; RV64I-NEXT: and a0, s0, s3
1893 ; RV64I-NEXT: call __extendhfsf2
1894 ; RV64I-NEXT: mv a2, a0
1895 ; RV64I-NEXT: mv a0, s2
1896 ; RV64I-NEXT: mv a1, s1
1897 ; RV64I-NEXT: call fmaf
1898 ; RV64I-NEXT: call __truncsfhf2
1899 ; RV64I-NEXT: lui a1, 1048568
1900 ; RV64I-NEXT: xor a0, a0, a1
1901 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
1902 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
1903 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
1904 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
1905 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
1906 ; RV64I-NEXT: addi sp, sp, 48
1909 ; CHECKIZFHMIN-LABEL: fnmadd_nsz:
1910 ; CHECKIZFHMIN: # %bb.0:
1911 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa2
1912 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa1
1913 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa0
1914 ; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa3, fa4, fa5
1915 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
1916 ; CHECKIZFHMIN-NEXT: fmv.x.h a0, fa5
1917 ; CHECKIZFHMIN-NEXT: lui a1, 1048568
1918 ; CHECKIZFHMIN-NEXT: xor a0, a0, a1
1919 ; CHECKIZFHMIN-NEXT: fmv.h.x fa0, a0
1920 ; CHECKIZFHMIN-NEXT: ret
1922 ; CHECKIZHINXMIN-LABEL: fnmadd_nsz:
1923 ; CHECKIZHINXMIN: # %bb.0:
1924 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
1925 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
1926 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
1927 ; CHECKIZHINXMIN-NEXT: fmadd.s a0, a0, a1, a2
1928 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
1929 ; CHECKIZHINXMIN-NEXT: lui a1, 1048568
1930 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a1
1931 ; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h killed $x10
1932 ; CHECKIZHINXMIN-NEXT: ret
1933 %1 = call nsz half @llvm.fma.f16(half %a, half %b, half %c)
1934 %neg = fneg nsz half %1
1938 define half @fnmsub_h(half %a, half %b, half %c) nounwind {
1939 ; CHECKIZFH-LABEL: fnmsub_h:
1940 ; CHECKIZFH: # %bb.0:
1941 ; CHECKIZFH-NEXT: fmv.h.x fa5, zero
1942 ; CHECKIZFH-NEXT: fadd.h fa5, fa0, fa5
1943 ; CHECKIZFH-NEXT: fnmsub.h fa0, fa5, fa1, fa2
1944 ; CHECKIZFH-NEXT: ret
1946 ; CHECKIZHINX-LABEL: fnmsub_h:
1947 ; CHECKIZHINX: # %bb.0:
1948 ; CHECKIZHINX-NEXT: fadd.h a0, a0, zero
1949 ; CHECKIZHINX-NEXT: fnmsub.h a0, a0, a1, a2
1950 ; CHECKIZHINX-NEXT: ret
1952 ; RV32I-LABEL: fnmsub_h:
1954 ; RV32I-NEXT: addi sp, sp, -32
1955 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
1956 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
1957 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
1958 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
1959 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
1960 ; RV32I-NEXT: mv s0, a2
1961 ; RV32I-NEXT: mv s1, a1
1962 ; RV32I-NEXT: lui a1, 16
1963 ; RV32I-NEXT: addi s3, a1, -1
1964 ; RV32I-NEXT: and a0, a0, s3
1965 ; RV32I-NEXT: call __extendhfsf2
1966 ; RV32I-NEXT: li a1, 0
1967 ; RV32I-NEXT: call __addsf3
1968 ; RV32I-NEXT: call __truncsfhf2
1969 ; RV32I-NEXT: and a0, a0, s3
1970 ; RV32I-NEXT: call __extendhfsf2
1971 ; RV32I-NEXT: lui a1, 524288
1972 ; RV32I-NEXT: xor a0, a0, a1
1973 ; RV32I-NEXT: call __truncsfhf2
1974 ; RV32I-NEXT: mv s2, a0
1975 ; RV32I-NEXT: and a0, s1, s3
1976 ; RV32I-NEXT: call __extendhfsf2
1977 ; RV32I-NEXT: mv s1, a0
1978 ; RV32I-NEXT: and a0, s0, s3
1979 ; RV32I-NEXT: call __extendhfsf2
1980 ; RV32I-NEXT: mv s0, a0
1981 ; RV32I-NEXT: and a0, s2, s3
1982 ; RV32I-NEXT: call __extendhfsf2
1983 ; RV32I-NEXT: mv a1, s1
1984 ; RV32I-NEXT: mv a2, s0
1985 ; RV32I-NEXT: call fmaf
1986 ; RV32I-NEXT: call __truncsfhf2
1987 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
1988 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
1989 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
1990 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
1991 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
1992 ; RV32I-NEXT: addi sp, sp, 32
1995 ; RV64I-LABEL: fnmsub_h:
1997 ; RV64I-NEXT: addi sp, sp, -48
1998 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
1999 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
2000 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
2001 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
2002 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
2003 ; RV64I-NEXT: mv s0, a2
2004 ; RV64I-NEXT: mv s1, a1
2005 ; RV64I-NEXT: lui a1, 16
2006 ; RV64I-NEXT: addiw s3, a1, -1
2007 ; RV64I-NEXT: and a0, a0, s3
2008 ; RV64I-NEXT: call __extendhfsf2
2009 ; RV64I-NEXT: li a1, 0
2010 ; RV64I-NEXT: call __addsf3
2011 ; RV64I-NEXT: call __truncsfhf2
2012 ; RV64I-NEXT: and a0, a0, s3
2013 ; RV64I-NEXT: call __extendhfsf2
2014 ; RV64I-NEXT: lui a1, 524288
2015 ; RV64I-NEXT: xor a0, a0, a1
2016 ; RV64I-NEXT: call __truncsfhf2
2017 ; RV64I-NEXT: mv s2, a0
2018 ; RV64I-NEXT: and a0, s1, s3
2019 ; RV64I-NEXT: call __extendhfsf2
2020 ; RV64I-NEXT: mv s1, a0
2021 ; RV64I-NEXT: and a0, s0, s3
2022 ; RV64I-NEXT: call __extendhfsf2
2023 ; RV64I-NEXT: mv s0, a0
2024 ; RV64I-NEXT: and a0, s2, s3
2025 ; RV64I-NEXT: call __extendhfsf2
2026 ; RV64I-NEXT: mv a1, s1
2027 ; RV64I-NEXT: mv a2, s0
2028 ; RV64I-NEXT: call fmaf
2029 ; RV64I-NEXT: call __truncsfhf2
2030 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
2031 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
2032 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
2033 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
2034 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
2035 ; RV64I-NEXT: addi sp, sp, 48
2038 ; CHECKIZFHMIN-LABEL: fnmsub_h:
2039 ; CHECKIZFHMIN: # %bb.0:
2040 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0
2041 ; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero
2042 ; CHECKIZFHMIN-NEXT: lui a0, 1048568
2043 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa2
2044 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
2045 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
2046 ; CHECKIZFHMIN-NEXT: fmv.x.h a1, fa5
2047 ; CHECKIZFHMIN-NEXT: xor a0, a1, a0
2048 ; CHECKIZFHMIN-NEXT: fmv.h.x fa5, a0
2049 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
2050 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa1
2051 ; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa5, fa4, fa3
2052 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
2053 ; CHECKIZFHMIN-NEXT: ret
2055 ; CHECKIZHINXMIN-LABEL: fnmsub_h:
2056 ; CHECKIZHINXMIN: # %bb.0:
2057 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2058 ; CHECKIZHINXMIN-NEXT: lui a3, 1048568
2059 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
2060 ; CHECKIZHINXMIN-NEXT: fadd.s a0, a0, zero
2061 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2062 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a3
2063 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2064 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2065 ; CHECKIZHINXMIN-NEXT: fmadd.s a0, a0, a1, a2
2066 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2067 ; CHECKIZHINXMIN-NEXT: ret
2068 %a_ = fadd half 0.0, %a
2069 %nega = fsub half -0.0, %a_
2070 %1 = call half @llvm.fma.f16(half %nega, half %b, half %c)
2074 define half @fnmsub_h_2(half %a, half %b, half %c) nounwind {
2075 ; CHECKIZFH-LABEL: fnmsub_h_2:
2076 ; CHECKIZFH: # %bb.0:
2077 ; CHECKIZFH-NEXT: fmv.h.x fa5, zero
2078 ; CHECKIZFH-NEXT: fadd.h fa5, fa1, fa5
2079 ; CHECKIZFH-NEXT: fnmsub.h fa0, fa5, fa0, fa2
2080 ; CHECKIZFH-NEXT: ret
2082 ; CHECKIZHINX-LABEL: fnmsub_h_2:
2083 ; CHECKIZHINX: # %bb.0:
2084 ; CHECKIZHINX-NEXT: fadd.h a1, a1, zero
2085 ; CHECKIZHINX-NEXT: fnmsub.h a0, a1, a0, a2
2086 ; CHECKIZHINX-NEXT: ret
2088 ; RV32I-LABEL: fnmsub_h_2:
2090 ; RV32I-NEXT: addi sp, sp, -32
2091 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
2092 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
2093 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
2094 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
2095 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
2096 ; RV32I-NEXT: mv s0, a2
2097 ; RV32I-NEXT: mv s1, a0
2098 ; RV32I-NEXT: lui a0, 16
2099 ; RV32I-NEXT: addi s3, a0, -1
2100 ; RV32I-NEXT: and a0, a1, s3
2101 ; RV32I-NEXT: call __extendhfsf2
2102 ; RV32I-NEXT: li a1, 0
2103 ; RV32I-NEXT: call __addsf3
2104 ; RV32I-NEXT: call __truncsfhf2
2105 ; RV32I-NEXT: and a0, a0, s3
2106 ; RV32I-NEXT: call __extendhfsf2
2107 ; RV32I-NEXT: lui a1, 524288
2108 ; RV32I-NEXT: xor a0, a0, a1
2109 ; RV32I-NEXT: call __truncsfhf2
2110 ; RV32I-NEXT: mv s2, a0
2111 ; RV32I-NEXT: and a0, s1, s3
2112 ; RV32I-NEXT: call __extendhfsf2
2113 ; RV32I-NEXT: mv s1, a0
2114 ; RV32I-NEXT: and a0, s0, s3
2115 ; RV32I-NEXT: call __extendhfsf2
2116 ; RV32I-NEXT: mv s0, a0
2117 ; RV32I-NEXT: and a0, s2, s3
2118 ; RV32I-NEXT: call __extendhfsf2
2119 ; RV32I-NEXT: mv a1, a0
2120 ; RV32I-NEXT: mv a0, s1
2121 ; RV32I-NEXT: mv a2, s0
2122 ; RV32I-NEXT: call fmaf
2123 ; RV32I-NEXT: call __truncsfhf2
2124 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
2125 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
2126 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
2127 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
2128 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
2129 ; RV32I-NEXT: addi sp, sp, 32
2132 ; RV64I-LABEL: fnmsub_h_2:
2134 ; RV64I-NEXT: addi sp, sp, -48
2135 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
2136 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
2137 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
2138 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
2139 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
2140 ; RV64I-NEXT: mv s0, a2
2141 ; RV64I-NEXT: mv s1, a0
2142 ; RV64I-NEXT: lui a0, 16
2143 ; RV64I-NEXT: addiw s3, a0, -1
2144 ; RV64I-NEXT: and a0, a1, s3
2145 ; RV64I-NEXT: call __extendhfsf2
2146 ; RV64I-NEXT: li a1, 0
2147 ; RV64I-NEXT: call __addsf3
2148 ; RV64I-NEXT: call __truncsfhf2
2149 ; RV64I-NEXT: and a0, a0, s3
2150 ; RV64I-NEXT: call __extendhfsf2
2151 ; RV64I-NEXT: lui a1, 524288
2152 ; RV64I-NEXT: xor a0, a0, a1
2153 ; RV64I-NEXT: call __truncsfhf2
2154 ; RV64I-NEXT: mv s2, a0
2155 ; RV64I-NEXT: and a0, s1, s3
2156 ; RV64I-NEXT: call __extendhfsf2
2157 ; RV64I-NEXT: mv s1, a0
2158 ; RV64I-NEXT: and a0, s0, s3
2159 ; RV64I-NEXT: call __extendhfsf2
2160 ; RV64I-NEXT: mv s0, a0
2161 ; RV64I-NEXT: and a0, s2, s3
2162 ; RV64I-NEXT: call __extendhfsf2
2163 ; RV64I-NEXT: mv a1, a0
2164 ; RV64I-NEXT: mv a0, s1
2165 ; RV64I-NEXT: mv a2, s0
2166 ; RV64I-NEXT: call fmaf
2167 ; RV64I-NEXT: call __truncsfhf2
2168 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
2169 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
2170 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
2171 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
2172 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
2173 ; RV64I-NEXT: addi sp, sp, 48
2176 ; CHECKIZFHMIN-LABEL: fnmsub_h_2:
2177 ; CHECKIZFHMIN: # %bb.0:
2178 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
2179 ; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero
2180 ; CHECKIZFHMIN-NEXT: lui a0, 1048568
2181 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa2
2182 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
2183 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
2184 ; CHECKIZFHMIN-NEXT: fmv.x.h a1, fa5
2185 ; CHECKIZFHMIN-NEXT: xor a0, a1, a0
2186 ; CHECKIZFHMIN-NEXT: fmv.h.x fa5, a0
2187 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
2188 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
2189 ; CHECKIZFHMIN-NEXT: fmadd.s fa5, fa4, fa5, fa3
2190 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
2191 ; CHECKIZFHMIN-NEXT: ret
2193 ; CHECKIZHINXMIN-LABEL: fnmsub_h_2:
2194 ; CHECKIZHINXMIN: # %bb.0:
2195 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2196 ; CHECKIZHINXMIN-NEXT: lui a3, 1048568
2197 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
2198 ; CHECKIZHINXMIN-NEXT: fadd.s a1, a1, zero
2199 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a1, a1
2200 ; CHECKIZHINXMIN-NEXT: xor a1, a1, a3
2201 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2202 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2203 ; CHECKIZHINXMIN-NEXT: fmadd.s a0, a0, a1, a2
2204 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2205 ; CHECKIZHINXMIN-NEXT: ret
2206 %b_ = fadd half 0.0, %b
2207 %negb = fsub half -0.0, %b_
2208 %1 = call half @llvm.fma.f16(half %a, half %negb, half %c)
2212 define half @fmadd_h_contract(half %a, half %b, half %c) nounwind {
2213 ; CHECKIZFH-LABEL: fmadd_h_contract:
2214 ; CHECKIZFH: # %bb.0:
2215 ; CHECKIZFH-NEXT: fmadd.h fa0, fa0, fa1, fa2
2216 ; CHECKIZFH-NEXT: ret
2218 ; CHECKIZHINX-LABEL: fmadd_h_contract:
2219 ; CHECKIZHINX: # %bb.0:
2220 ; CHECKIZHINX-NEXT: fmadd.h a0, a0, a1, a2
2221 ; CHECKIZHINX-NEXT: ret
2223 ; RV32I-LABEL: fmadd_h_contract:
2225 ; RV32I-NEXT: addi sp, sp, -32
2226 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
2227 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
2228 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
2229 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
2230 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
2231 ; RV32I-NEXT: mv s0, a2
2232 ; RV32I-NEXT: mv s1, a1
2233 ; RV32I-NEXT: lui a1, 16
2234 ; RV32I-NEXT: addi s3, a1, -1
2235 ; RV32I-NEXT: and a0, a0, s3
2236 ; RV32I-NEXT: call __extendhfsf2
2237 ; RV32I-NEXT: mv s2, a0
2238 ; RV32I-NEXT: and a0, s1, s3
2239 ; RV32I-NEXT: call __extendhfsf2
2240 ; RV32I-NEXT: mv a1, a0
2241 ; RV32I-NEXT: mv a0, s2
2242 ; RV32I-NEXT: call __mulsf3
2243 ; RV32I-NEXT: call __truncsfhf2
2244 ; RV32I-NEXT: mv s1, a0
2245 ; RV32I-NEXT: and a0, s0, s3
2246 ; RV32I-NEXT: call __extendhfsf2
2247 ; RV32I-NEXT: mv s0, a0
2248 ; RV32I-NEXT: and a0, s1, s3
2249 ; RV32I-NEXT: call __extendhfsf2
2250 ; RV32I-NEXT: mv a1, s0
2251 ; RV32I-NEXT: call __addsf3
2252 ; RV32I-NEXT: call __truncsfhf2
2253 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
2254 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
2255 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
2256 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
2257 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
2258 ; RV32I-NEXT: addi sp, sp, 32
2261 ; RV64I-LABEL: fmadd_h_contract:
2263 ; RV64I-NEXT: addi sp, sp, -48
2264 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
2265 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
2266 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
2267 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
2268 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
2269 ; RV64I-NEXT: mv s0, a2
2270 ; RV64I-NEXT: mv s1, a1
2271 ; RV64I-NEXT: lui a1, 16
2272 ; RV64I-NEXT: addiw s3, a1, -1
2273 ; RV64I-NEXT: and a0, a0, s3
2274 ; RV64I-NEXT: call __extendhfsf2
2275 ; RV64I-NEXT: mv s2, a0
2276 ; RV64I-NEXT: and a0, s1, s3
2277 ; RV64I-NEXT: call __extendhfsf2
2278 ; RV64I-NEXT: mv a1, a0
2279 ; RV64I-NEXT: mv a0, s2
2280 ; RV64I-NEXT: call __mulsf3
2281 ; RV64I-NEXT: call __truncsfhf2
2282 ; RV64I-NEXT: mv s1, a0
2283 ; RV64I-NEXT: and a0, s0, s3
2284 ; RV64I-NEXT: call __extendhfsf2
2285 ; RV64I-NEXT: mv s0, a0
2286 ; RV64I-NEXT: and a0, s1, s3
2287 ; RV64I-NEXT: call __extendhfsf2
2288 ; RV64I-NEXT: mv a1, s0
2289 ; RV64I-NEXT: call __addsf3
2290 ; RV64I-NEXT: call __truncsfhf2
2291 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
2292 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
2293 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
2294 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
2295 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
2296 ; RV64I-NEXT: addi sp, sp, 48
2299 ; CHECKIZFHMIN-LABEL: fmadd_h_contract:
2300 ; CHECKIZFHMIN: # %bb.0:
2301 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa1
2302 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa0
2303 ; CHECKIZFHMIN-NEXT: fmul.s fa5, fa4, fa5
2304 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
2305 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
2306 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa2
2307 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
2308 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
2309 ; CHECKIZFHMIN-NEXT: ret
2311 ; CHECKIZHINXMIN-LABEL: fmadd_h_contract:
2312 ; CHECKIZHINXMIN: # %bb.0:
2313 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2314 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2315 ; CHECKIZHINXMIN-NEXT: fmul.s a0, a0, a1
2316 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2317 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2318 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a2
2319 ; CHECKIZHINXMIN-NEXT: fadd.s a0, a0, a1
2320 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2321 ; CHECKIZHINXMIN-NEXT: ret
2322 %1 = fmul contract half %a, %b
2323 %2 = fadd contract half %1, %c
2327 define half @fmsub_h_contract(half %a, half %b, half %c) nounwind {
2328 ; CHECKIZFH-LABEL: fmsub_h_contract:
2329 ; CHECKIZFH: # %bb.0:
2330 ; CHECKIZFH-NEXT: fmv.h.x fa5, zero
2331 ; CHECKIZFH-NEXT: fadd.h fa5, fa2, fa5
2332 ; CHECKIZFH-NEXT: fmsub.h fa0, fa0, fa1, fa5
2333 ; CHECKIZFH-NEXT: ret
2335 ; CHECKIZHINX-LABEL: fmsub_h_contract:
2336 ; CHECKIZHINX: # %bb.0:
2337 ; CHECKIZHINX-NEXT: fadd.h a2, a2, zero
2338 ; CHECKIZHINX-NEXT: fmsub.h a0, a0, a1, a2
2339 ; CHECKIZHINX-NEXT: ret
2341 ; RV32I-LABEL: fmsub_h_contract:
2343 ; RV32I-NEXT: addi sp, sp, -32
2344 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
2345 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
2346 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
2347 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
2348 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
2349 ; RV32I-NEXT: mv s0, a1
2350 ; RV32I-NEXT: mv s1, a0
2351 ; RV32I-NEXT: lui a0, 16
2352 ; RV32I-NEXT: addi s3, a0, -1
2353 ; RV32I-NEXT: and a0, a2, s3
2354 ; RV32I-NEXT: call __extendhfsf2
2355 ; RV32I-NEXT: li a1, 0
2356 ; RV32I-NEXT: call __addsf3
2357 ; RV32I-NEXT: call __truncsfhf2
2358 ; RV32I-NEXT: mv s2, a0
2359 ; RV32I-NEXT: and a0, s1, s3
2360 ; RV32I-NEXT: call __extendhfsf2
2361 ; RV32I-NEXT: mv s1, a0
2362 ; RV32I-NEXT: and a0, s0, s3
2363 ; RV32I-NEXT: call __extendhfsf2
2364 ; RV32I-NEXT: mv a1, a0
2365 ; RV32I-NEXT: mv a0, s1
2366 ; RV32I-NEXT: call __mulsf3
2367 ; RV32I-NEXT: call __truncsfhf2
2368 ; RV32I-NEXT: and a0, a0, s3
2369 ; RV32I-NEXT: call __extendhfsf2
2370 ; RV32I-NEXT: mv s0, a0
2371 ; RV32I-NEXT: and a0, s2, s3
2372 ; RV32I-NEXT: call __extendhfsf2
2373 ; RV32I-NEXT: mv a1, a0
2374 ; RV32I-NEXT: mv a0, s0
2375 ; RV32I-NEXT: call __subsf3
2376 ; RV32I-NEXT: call __truncsfhf2
2377 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
2378 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
2379 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
2380 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
2381 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
2382 ; RV32I-NEXT: addi sp, sp, 32
2385 ; RV64I-LABEL: fmsub_h_contract:
2387 ; RV64I-NEXT: addi sp, sp, -48
2388 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
2389 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
2390 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
2391 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
2392 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
2393 ; RV64I-NEXT: mv s0, a1
2394 ; RV64I-NEXT: mv s1, a0
2395 ; RV64I-NEXT: lui a0, 16
2396 ; RV64I-NEXT: addiw s3, a0, -1
2397 ; RV64I-NEXT: and a0, a2, s3
2398 ; RV64I-NEXT: call __extendhfsf2
2399 ; RV64I-NEXT: li a1, 0
2400 ; RV64I-NEXT: call __addsf3
2401 ; RV64I-NEXT: call __truncsfhf2
2402 ; RV64I-NEXT: mv s2, a0
2403 ; RV64I-NEXT: and a0, s1, s3
2404 ; RV64I-NEXT: call __extendhfsf2
2405 ; RV64I-NEXT: mv s1, a0
2406 ; RV64I-NEXT: and a0, s0, s3
2407 ; RV64I-NEXT: call __extendhfsf2
2408 ; RV64I-NEXT: mv a1, a0
2409 ; RV64I-NEXT: mv a0, s1
2410 ; RV64I-NEXT: call __mulsf3
2411 ; RV64I-NEXT: call __truncsfhf2
2412 ; RV64I-NEXT: and a0, a0, s3
2413 ; RV64I-NEXT: call __extendhfsf2
2414 ; RV64I-NEXT: mv s0, a0
2415 ; RV64I-NEXT: and a0, s2, s3
2416 ; RV64I-NEXT: call __extendhfsf2
2417 ; RV64I-NEXT: mv a1, a0
2418 ; RV64I-NEXT: mv a0, s0
2419 ; RV64I-NEXT: call __subsf3
2420 ; RV64I-NEXT: call __truncsfhf2
2421 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
2422 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
2423 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
2424 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
2425 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
2426 ; RV64I-NEXT: addi sp, sp, 48
2429 ; CHECKIZFHMIN-LABEL: fmsub_h_contract:
2430 ; CHECKIZFHMIN: # %bb.0:
2431 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa2
2432 ; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero
2433 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa1
2434 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa2, fa0
2435 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
2436 ; CHECKIZFHMIN-NEXT: fmul.s fa4, fa2, fa3
2437 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
2438 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa4, fa4
2439 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
2440 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa4
2441 ; CHECKIZFHMIN-NEXT: fsub.s fa5, fa4, fa5
2442 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
2443 ; CHECKIZFHMIN-NEXT: ret
2445 ; CHECKIZHINXMIN-LABEL: fmsub_h_contract:
2446 ; CHECKIZHINXMIN: # %bb.0:
2447 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
2448 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2449 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2450 ; CHECKIZHINXMIN-NEXT: fadd.s a2, a2, zero
2451 ; CHECKIZHINXMIN-NEXT: fmul.s a0, a0, a1
2452 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a1, a2
2453 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2454 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2455 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2456 ; CHECKIZHINXMIN-NEXT: fsub.s a0, a0, a1
2457 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2458 ; CHECKIZHINXMIN-NEXT: ret
2459 %c_ = fadd half 0.0, %c ; avoid negation using xor
2460 %1 = fmul contract half %a, %b
2461 %2 = fsub contract half %1, %c_
2465 define half @fnmadd_h_contract(half %a, half %b, half %c) nounwind {
2466 ; CHECKIZFH-LABEL: fnmadd_h_contract:
2467 ; CHECKIZFH: # %bb.0:
2468 ; CHECKIZFH-NEXT: fmv.h.x fa5, zero
2469 ; CHECKIZFH-NEXT: fadd.h fa4, fa0, fa5
2470 ; CHECKIZFH-NEXT: fadd.h fa3, fa1, fa5
2471 ; CHECKIZFH-NEXT: fadd.h fa5, fa2, fa5
2472 ; CHECKIZFH-NEXT: fnmadd.h fa0, fa4, fa3, fa5
2473 ; CHECKIZFH-NEXT: ret
2475 ; CHECKIZHINX-LABEL: fnmadd_h_contract:
2476 ; CHECKIZHINX: # %bb.0:
2477 ; CHECKIZHINX-NEXT: fadd.h a0, a0, zero
2478 ; CHECKIZHINX-NEXT: fadd.h a1, a1, zero
2479 ; CHECKIZHINX-NEXT: fadd.h a2, a2, zero
2480 ; CHECKIZHINX-NEXT: fnmadd.h a0, a0, a1, a2
2481 ; CHECKIZHINX-NEXT: ret
2483 ; RV32I-LABEL: fnmadd_h_contract:
2485 ; RV32I-NEXT: addi sp, sp, -32
2486 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
2487 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
2488 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
2489 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
2490 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
2491 ; RV32I-NEXT: mv s0, a2
2492 ; RV32I-NEXT: mv s1, a1
2493 ; RV32I-NEXT: lui s3, 16
2494 ; RV32I-NEXT: addi s3, s3, -1
2495 ; RV32I-NEXT: and a0, a0, s3
2496 ; RV32I-NEXT: call __extendhfsf2
2497 ; RV32I-NEXT: li a1, 0
2498 ; RV32I-NEXT: call __addsf3
2499 ; RV32I-NEXT: call __truncsfhf2
2500 ; RV32I-NEXT: mv s2, a0
2501 ; RV32I-NEXT: and a0, s1, s3
2502 ; RV32I-NEXT: call __extendhfsf2
2503 ; RV32I-NEXT: li a1, 0
2504 ; RV32I-NEXT: call __addsf3
2505 ; RV32I-NEXT: call __truncsfhf2
2506 ; RV32I-NEXT: mv s1, a0
2507 ; RV32I-NEXT: and a0, s0, s3
2508 ; RV32I-NEXT: call __extendhfsf2
2509 ; RV32I-NEXT: li a1, 0
2510 ; RV32I-NEXT: call __addsf3
2511 ; RV32I-NEXT: call __truncsfhf2
2512 ; RV32I-NEXT: mv s0, a0
2513 ; RV32I-NEXT: and a0, s2, s3
2514 ; RV32I-NEXT: call __extendhfsf2
2515 ; RV32I-NEXT: mv s2, a0
2516 ; RV32I-NEXT: and a0, s1, s3
2517 ; RV32I-NEXT: call __extendhfsf2
2518 ; RV32I-NEXT: mv a1, a0
2519 ; RV32I-NEXT: mv a0, s2
2520 ; RV32I-NEXT: call __mulsf3
2521 ; RV32I-NEXT: call __truncsfhf2
2522 ; RV32I-NEXT: and a0, a0, s3
2523 ; RV32I-NEXT: call __extendhfsf2
2524 ; RV32I-NEXT: lui a1, 524288
2525 ; RV32I-NEXT: xor a0, a0, a1
2526 ; RV32I-NEXT: call __truncsfhf2
2527 ; RV32I-NEXT: mv s1, a0
2528 ; RV32I-NEXT: and a0, s0, s3
2529 ; RV32I-NEXT: call __extendhfsf2
2530 ; RV32I-NEXT: mv s0, a0
2531 ; RV32I-NEXT: and a0, s1, s3
2532 ; RV32I-NEXT: call __extendhfsf2
2533 ; RV32I-NEXT: mv a1, s0
2534 ; RV32I-NEXT: call __subsf3
2535 ; RV32I-NEXT: call __truncsfhf2
2536 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
2537 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
2538 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
2539 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
2540 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
2541 ; RV32I-NEXT: addi sp, sp, 32
2544 ; RV64I-LABEL: fnmadd_h_contract:
2546 ; RV64I-NEXT: addi sp, sp, -48
2547 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
2548 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
2549 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
2550 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
2551 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
2552 ; RV64I-NEXT: mv s0, a2
2553 ; RV64I-NEXT: mv s1, a1
2554 ; RV64I-NEXT: lui s3, 16
2555 ; RV64I-NEXT: addiw s3, s3, -1
2556 ; RV64I-NEXT: and a0, a0, s3
2557 ; RV64I-NEXT: call __extendhfsf2
2558 ; RV64I-NEXT: li a1, 0
2559 ; RV64I-NEXT: call __addsf3
2560 ; RV64I-NEXT: call __truncsfhf2
2561 ; RV64I-NEXT: mv s2, a0
2562 ; RV64I-NEXT: and a0, s1, s3
2563 ; RV64I-NEXT: call __extendhfsf2
2564 ; RV64I-NEXT: li a1, 0
2565 ; RV64I-NEXT: call __addsf3
2566 ; RV64I-NEXT: call __truncsfhf2
2567 ; RV64I-NEXT: mv s1, a0
2568 ; RV64I-NEXT: and a0, s0, s3
2569 ; RV64I-NEXT: call __extendhfsf2
2570 ; RV64I-NEXT: li a1, 0
2571 ; RV64I-NEXT: call __addsf3
2572 ; RV64I-NEXT: call __truncsfhf2
2573 ; RV64I-NEXT: mv s0, a0
2574 ; RV64I-NEXT: and a0, s2, s3
2575 ; RV64I-NEXT: call __extendhfsf2
2576 ; RV64I-NEXT: mv s2, a0
2577 ; RV64I-NEXT: and a0, s1, s3
2578 ; RV64I-NEXT: call __extendhfsf2
2579 ; RV64I-NEXT: mv a1, a0
2580 ; RV64I-NEXT: mv a0, s2
2581 ; RV64I-NEXT: call __mulsf3
2582 ; RV64I-NEXT: call __truncsfhf2
2583 ; RV64I-NEXT: and a0, a0, s3
2584 ; RV64I-NEXT: call __extendhfsf2
2585 ; RV64I-NEXT: lui a1, 524288
2586 ; RV64I-NEXT: xor a0, a0, a1
2587 ; RV64I-NEXT: call __truncsfhf2
2588 ; RV64I-NEXT: mv s1, a0
2589 ; RV64I-NEXT: and a0, s0, s3
2590 ; RV64I-NEXT: call __extendhfsf2
2591 ; RV64I-NEXT: mv s0, a0
2592 ; RV64I-NEXT: and a0, s1, s3
2593 ; RV64I-NEXT: call __extendhfsf2
2594 ; RV64I-NEXT: mv a1, s0
2595 ; RV64I-NEXT: call __subsf3
2596 ; RV64I-NEXT: call __truncsfhf2
2597 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
2598 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
2599 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
2600 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
2601 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
2602 ; RV64I-NEXT: addi sp, sp, 48
2605 ; CHECKIZFHMIN-LABEL: fnmadd_h_contract:
2606 ; CHECKIZFHMIN: # %bb.0:
2607 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0
2608 ; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero
2609 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa1
2610 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa2, fa2
2611 ; CHECKIZFHMIN-NEXT: lui a0, 1048568
2612 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
2613 ; CHECKIZFHMIN-NEXT: fadd.s fa3, fa3, fa4
2614 ; CHECKIZFHMIN-NEXT: fadd.s fa4, fa2, fa4
2615 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
2616 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa3, fa3
2617 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa4, fa4
2618 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa3
2619 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
2620 ; CHECKIZFHMIN-NEXT: fmul.s fa5, fa5, fa3
2621 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
2622 ; CHECKIZFHMIN-NEXT: fmv.x.h a1, fa5
2623 ; CHECKIZFHMIN-NEXT: xor a0, a1, a0
2624 ; CHECKIZFHMIN-NEXT: fmv.h.x fa5, a0
2625 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
2626 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa4
2627 ; CHECKIZFHMIN-NEXT: fsub.s fa5, fa5, fa4
2628 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
2629 ; CHECKIZFHMIN-NEXT: ret
2631 ; CHECKIZHINXMIN-LABEL: fnmadd_h_contract:
2632 ; CHECKIZHINXMIN: # %bb.0:
2633 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2634 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2635 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a2
2636 ; CHECKIZHINXMIN-NEXT: fadd.s a0, a0, zero
2637 ; CHECKIZHINXMIN-NEXT: fadd.s a1, a1, zero
2638 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2639 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a1, a1
2640 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2641 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2642 ; CHECKIZHINXMIN-NEXT: fmul.s a0, a0, a1
2643 ; CHECKIZHINXMIN-NEXT: lui a1, 1048568
2644 ; CHECKIZHINXMIN-NEXT: fadd.s a2, a2, zero
2645 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a2, a2
2646 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2647 ; CHECKIZHINXMIN-NEXT: xor a0, a0, a1
2648 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2649 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a2
2650 ; CHECKIZHINXMIN-NEXT: fsub.s a0, a0, a1
2651 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2652 ; CHECKIZHINXMIN-NEXT: ret
2653 %a_ = fadd half 0.0, %a ; avoid negation using xor
2654 %b_ = fadd half 0.0, %b ; avoid negation using xor
2655 %c_ = fadd half 0.0, %c ; avoid negation using xor
2656 %1 = fmul contract half %a_, %b_
2658 %3 = fsub contract half %2, %c_
2662 define half @fnmsub_h_contract(half %a, half %b, half %c) nounwind {
2663 ; CHECKIZFH-LABEL: fnmsub_h_contract:
2664 ; CHECKIZFH: # %bb.0:
2665 ; CHECKIZFH-NEXT: fmv.h.x fa5, zero
2666 ; CHECKIZFH-NEXT: fadd.h fa4, fa0, fa5
2667 ; CHECKIZFH-NEXT: fadd.h fa5, fa1, fa5
2668 ; CHECKIZFH-NEXT: fnmsub.h fa0, fa4, fa5, fa2
2669 ; CHECKIZFH-NEXT: ret
2671 ; CHECKIZHINX-LABEL: fnmsub_h_contract:
2672 ; CHECKIZHINX: # %bb.0:
2673 ; CHECKIZHINX-NEXT: fadd.h a0, a0, zero
2674 ; CHECKIZHINX-NEXT: fadd.h a1, a1, zero
2675 ; CHECKIZHINX-NEXT: fnmsub.h a0, a0, a1, a2
2676 ; CHECKIZHINX-NEXT: ret
2678 ; RV32I-LABEL: fnmsub_h_contract:
2680 ; RV32I-NEXT: addi sp, sp, -32
2681 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
2682 ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
2683 ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
2684 ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
2685 ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
2686 ; RV32I-NEXT: mv s0, a2
2687 ; RV32I-NEXT: mv s1, a1
2688 ; RV32I-NEXT: lui s3, 16
2689 ; RV32I-NEXT: addi s3, s3, -1
2690 ; RV32I-NEXT: and a0, a0, s3
2691 ; RV32I-NEXT: call __extendhfsf2
2692 ; RV32I-NEXT: li a1, 0
2693 ; RV32I-NEXT: call __addsf3
2694 ; RV32I-NEXT: call __truncsfhf2
2695 ; RV32I-NEXT: mv s2, a0
2696 ; RV32I-NEXT: and a0, s1, s3
2697 ; RV32I-NEXT: call __extendhfsf2
2698 ; RV32I-NEXT: li a1, 0
2699 ; RV32I-NEXT: call __addsf3
2700 ; RV32I-NEXT: call __truncsfhf2
2701 ; RV32I-NEXT: mv s1, a0
2702 ; RV32I-NEXT: and a0, s2, s3
2703 ; RV32I-NEXT: call __extendhfsf2
2704 ; RV32I-NEXT: mv s2, a0
2705 ; RV32I-NEXT: and a0, s1, s3
2706 ; RV32I-NEXT: call __extendhfsf2
2707 ; RV32I-NEXT: mv a1, a0
2708 ; RV32I-NEXT: mv a0, s2
2709 ; RV32I-NEXT: call __mulsf3
2710 ; RV32I-NEXT: call __truncsfhf2
2711 ; RV32I-NEXT: mv s1, a0
2712 ; RV32I-NEXT: and a0, s0, s3
2713 ; RV32I-NEXT: call __extendhfsf2
2714 ; RV32I-NEXT: mv s0, a0
2715 ; RV32I-NEXT: and a0, s1, s3
2716 ; RV32I-NEXT: call __extendhfsf2
2717 ; RV32I-NEXT: mv a1, a0
2718 ; RV32I-NEXT: mv a0, s0
2719 ; RV32I-NEXT: call __subsf3
2720 ; RV32I-NEXT: call __truncsfhf2
2721 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
2722 ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
2723 ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
2724 ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
2725 ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
2726 ; RV32I-NEXT: addi sp, sp, 32
2729 ; RV64I-LABEL: fnmsub_h_contract:
2731 ; RV64I-NEXT: addi sp, sp, -48
2732 ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
2733 ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
2734 ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
2735 ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
2736 ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
2737 ; RV64I-NEXT: mv s0, a2
2738 ; RV64I-NEXT: mv s1, a1
2739 ; RV64I-NEXT: lui s3, 16
2740 ; RV64I-NEXT: addiw s3, s3, -1
2741 ; RV64I-NEXT: and a0, a0, s3
2742 ; RV64I-NEXT: call __extendhfsf2
2743 ; RV64I-NEXT: li a1, 0
2744 ; RV64I-NEXT: call __addsf3
2745 ; RV64I-NEXT: call __truncsfhf2
2746 ; RV64I-NEXT: mv s2, a0
2747 ; RV64I-NEXT: and a0, s1, s3
2748 ; RV64I-NEXT: call __extendhfsf2
2749 ; RV64I-NEXT: li a1, 0
2750 ; RV64I-NEXT: call __addsf3
2751 ; RV64I-NEXT: call __truncsfhf2
2752 ; RV64I-NEXT: mv s1, a0
2753 ; RV64I-NEXT: and a0, s2, s3
2754 ; RV64I-NEXT: call __extendhfsf2
2755 ; RV64I-NEXT: mv s2, a0
2756 ; RV64I-NEXT: and a0, s1, s3
2757 ; RV64I-NEXT: call __extendhfsf2
2758 ; RV64I-NEXT: mv a1, a0
2759 ; RV64I-NEXT: mv a0, s2
2760 ; RV64I-NEXT: call __mulsf3
2761 ; RV64I-NEXT: call __truncsfhf2
2762 ; RV64I-NEXT: mv s1, a0
2763 ; RV64I-NEXT: and a0, s0, s3
2764 ; RV64I-NEXT: call __extendhfsf2
2765 ; RV64I-NEXT: mv s0, a0
2766 ; RV64I-NEXT: and a0, s1, s3
2767 ; RV64I-NEXT: call __extendhfsf2
2768 ; RV64I-NEXT: mv a1, a0
2769 ; RV64I-NEXT: mv a0, s0
2770 ; RV64I-NEXT: call __subsf3
2771 ; RV64I-NEXT: call __truncsfhf2
2772 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
2773 ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
2774 ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
2775 ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
2776 ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
2777 ; RV64I-NEXT: addi sp, sp, 48
2780 ; CHECKIZFHMIN-LABEL: fnmsub_h_contract:
2781 ; CHECKIZFHMIN: # %bb.0:
2782 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa0
2783 ; CHECKIZFHMIN-NEXT: fmv.w.x fa4, zero
2784 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa3, fa1
2785 ; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4
2786 ; CHECKIZFHMIN-NEXT: fadd.s fa4, fa3, fa4
2787 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
2788 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa4, fa4
2789 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa4
2790 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
2791 ; CHECKIZFHMIN-NEXT: fmul.s fa5, fa5, fa4
2792 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa5, fa5
2793 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5
2794 ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa2
2795 ; CHECKIZFHMIN-NEXT: fsub.s fa5, fa4, fa5
2796 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5
2797 ; CHECKIZFHMIN-NEXT: ret
2799 ; CHECKIZHINXMIN-LABEL: fnmsub_h_contract:
2800 ; CHECKIZHINXMIN: # %bb.0:
2801 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2802 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2803 ; CHECKIZHINXMIN-NEXT: fadd.s a0, a0, zero
2804 ; CHECKIZHINXMIN-NEXT: fadd.s a1, a1, zero
2805 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2806 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a1, a1
2807 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2808 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2809 ; CHECKIZHINXMIN-NEXT: fmul.s a0, a0, a1
2810 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2811 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2812 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a2
2813 ; CHECKIZHINXMIN-NEXT: fsub.s a0, a1, a0
2814 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2815 ; CHECKIZHINXMIN-NEXT: ret
2816 %a_ = fadd half 0.0, %a ; avoid negation using xor
2817 %b_ = fadd half 0.0, %b ; avoid negation using xor
2818 %1 = fmul contract half %a_, %b_
2819 %2 = fsub contract half %c, %1
2823 define half @fsgnjx_f16(half %x, half %y) nounwind {
2824 ; CHECKIZFH-LABEL: fsgnjx_f16:
2825 ; CHECKIZFH: # %bb.0:
2826 ; CHECKIZFH-NEXT: fsgnjx.h fa0, fa1, fa0
2827 ; CHECKIZFH-NEXT: ret
2829 ; CHECKIZHINX-LABEL: fsgnjx_f16:
2830 ; CHECKIZHINX: # %bb.0:
2831 ; CHECKIZHINX-NEXT: fsgnjx.h a0, a1, a0
2832 ; CHECKIZHINX-NEXT: ret
2834 ; RV32I-LABEL: fsgnjx_f16:
2836 ; RV32I-NEXT: addi sp, sp, -16
2837 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
2838 ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
2839 ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
2840 ; RV32I-NEXT: li a2, 15
2841 ; RV32I-NEXT: slli a2, a2, 10
2842 ; RV32I-NEXT: or s1, a0, a2
2843 ; RV32I-NEXT: slli a0, a1, 16
2844 ; RV32I-NEXT: srli a0, a0, 16
2845 ; RV32I-NEXT: call __extendhfsf2
2846 ; RV32I-NEXT: mv s0, a0
2847 ; RV32I-NEXT: lui a0, 12
2848 ; RV32I-NEXT: addi a0, a0, -1024
2849 ; RV32I-NEXT: and a0, s1, a0
2850 ; RV32I-NEXT: call __extendhfsf2
2851 ; RV32I-NEXT: mv a1, s0
2852 ; RV32I-NEXT: call __mulsf3
2853 ; RV32I-NEXT: call __truncsfhf2
2854 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
2855 ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
2856 ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
2857 ; RV32I-NEXT: addi sp, sp, 16
2860 ; RV64I-LABEL: fsgnjx_f16:
2862 ; RV64I-NEXT: addi sp, sp, -32
2863 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
2864 ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
2865 ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
2866 ; RV64I-NEXT: li a2, 15
2867 ; RV64I-NEXT: slli a2, a2, 10
2868 ; RV64I-NEXT: or s1, a0, a2
2869 ; RV64I-NEXT: slli a0, a1, 48
2870 ; RV64I-NEXT: srli a0, a0, 48
2871 ; RV64I-NEXT: call __extendhfsf2
2872 ; RV64I-NEXT: mv s0, a0
2873 ; RV64I-NEXT: lui a0, 12
2874 ; RV64I-NEXT: addiw a0, a0, -1024
2875 ; RV64I-NEXT: and a0, s1, a0
2876 ; RV64I-NEXT: call __extendhfsf2
2877 ; RV64I-NEXT: mv a1, s0
2878 ; RV64I-NEXT: call __mulsf3
2879 ; RV64I-NEXT: call __truncsfhf2
2880 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
2881 ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
2882 ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
2883 ; RV64I-NEXT: addi sp, sp, 32
2886 ; RV32IZFHMIN-LABEL: fsgnjx_f16:
2887 ; RV32IZFHMIN: # %bb.0:
2888 ; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0)
2889 ; RV32IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0)
2890 ; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0
2891 ; RV32IZFHMIN-NEXT: lui a2, 1048568
2892 ; RV32IZFHMIN-NEXT: and a1, a1, a2
2893 ; RV32IZFHMIN-NEXT: slli a0, a0, 17
2894 ; RV32IZFHMIN-NEXT: srli a0, a0, 17
2895 ; RV32IZFHMIN-NEXT: or a0, a0, a1
2896 ; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0
2897 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5
2898 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa1
2899 ; RV32IZFHMIN-NEXT: fmul.s fa5, fa5, fa4
2900 ; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa5
2901 ; RV32IZFHMIN-NEXT: ret
2903 ; RV64IZFHMIN-LABEL: fsgnjx_f16:
2904 ; RV64IZFHMIN: # %bb.0:
2905 ; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0)
2906 ; RV64IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0)
2907 ; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0
2908 ; RV64IZFHMIN-NEXT: lui a2, 1048568
2909 ; RV64IZFHMIN-NEXT: and a1, a1, a2
2910 ; RV64IZFHMIN-NEXT: slli a0, a0, 49
2911 ; RV64IZFHMIN-NEXT: srli a0, a0, 49
2912 ; RV64IZFHMIN-NEXT: or a0, a0, a1
2913 ; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0
2914 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5
2915 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa1
2916 ; RV64IZFHMIN-NEXT: fmul.s fa5, fa5, fa4
2917 ; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa5
2918 ; RV64IZFHMIN-NEXT: ret
2920 ; CHECKIZHINXMIN-LABEL: fsgnjx_f16:
2921 ; CHECKIZHINXMIN: # %bb.0:
2922 ; CHECKIZHINXMIN-NEXT: # kill: def $x10_h killed $x10_h def $x10
2923 ; CHECKIZHINXMIN-NEXT: lui a2, 1048568
2924 ; CHECKIZHINXMIN-NEXT: and a0, a0, a2
2925 ; CHECKIZHINXMIN-NEXT: li a2, 15
2926 ; CHECKIZHINXMIN-NEXT: slli a2, a2, 10
2927 ; CHECKIZHINXMIN-NEXT: or a0, a0, a2
2928 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0
2929 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1
2930 ; CHECKIZHINXMIN-NEXT: fmul.s a0, a0, a1
2931 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0
2932 ; CHECKIZHINXMIN-NEXT: ret
2933 %z = call half @llvm.copysign.f16(half 1.0, half %x)
2934 %mul = fmul half %z, %y