1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple armv8a-none-none-eabihf -mattr=fullfp16 -asm-verbose=false < %s | FileCheck %s
4 define void @test_fadd(ptr %p, ptr %q) {
5 ; CHECK-LABEL: test_fadd:
6 ; CHECK: vldr.16 s0, [r1]
7 ; CHECK-NEXT: vldr.16 s2, [r0]
8 ; CHECK-NEXT: vadd.f16 s0, s2, s0
9 ; CHECK-NEXT: vstr.16 s0, [r0]
11 %a = load half, ptr %p, align 2
12 %b = load half, ptr %q, align 2
18 define void @test_fsub(ptr %p, ptr %q) {
19 ; CHECK-LABEL: test_fsub:
20 ; CHECK: vldr.16 s0, [r1]
21 ; CHECK-NEXT: vldr.16 s2, [r0]
22 ; CHECK-NEXT: vsub.f16 s0, s2, s0
23 ; CHECK-NEXT: vstr.16 s0, [r0]
25 %a = load half, ptr %p, align 2
26 %b = load half, ptr %q, align 2
32 define void @test_fmul(ptr %p, ptr %q) {
33 ; CHECK-LABEL: test_fmul:
34 ; CHECK: vldr.16 s0, [r1]
35 ; CHECK-NEXT: vldr.16 s2, [r0]
36 ; CHECK-NEXT: vmul.f16 s0, s2, s0
37 ; CHECK-NEXT: vstr.16 s0, [r0]
39 %a = load half, ptr %p, align 2
40 %b = load half, ptr %q, align 2
46 define void @test_fdiv(ptr %p, ptr %q) {
47 ; CHECK-LABEL: test_fdiv:
48 ; CHECK: vldr.16 s0, [r1]
49 ; CHECK-NEXT: vldr.16 s2, [r0]
50 ; CHECK-NEXT: vdiv.f16 s0, s2, s0
51 ; CHECK-NEXT: vstr.16 s0, [r0]
53 %a = load half, ptr %p, align 2
54 %b = load half, ptr %q, align 2
60 define arm_aapcs_vfpcc void @test_frem(ptr %p, ptr %q) {
61 ; CHECK-LABEL: test_frem:
62 ; CHECK: .save {r4, lr}
63 ; CHECK-NEXT: push {r4, lr}
64 ; CHECK-NEXT: vldr.16 s0, [r0]
65 ; CHECK-NEXT: vldr.16 s2, [r1]
66 ; CHECK-NEXT: mov r4, r0
67 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
68 ; CHECK-NEXT: vcvtb.f32.f16 s1, s2
69 ; CHECK-NEXT: bl fmodf
70 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
71 ; CHECK-NEXT: vstr.16 s0, [r4]
72 ; CHECK-NEXT: pop {r4, pc}
73 %a = load half, ptr %p, align 2
74 %b = load half, ptr %q, align 2
80 define void @test_load_store(ptr %p, ptr %q) {
81 ; CHECK-LABEL: test_load_store:
82 ; CHECK: vldr.16 s0, [r0]
83 ; CHECK-NEXT: vstr.16 s0, [r1]
85 %a = load half, ptr %p, align 2
90 define i32 @test_fptosi_i32(ptr %p) {
91 ; CHECK-LABEL: test_fptosi_i32:
92 ; CHECK: vldr.16 s0, [r0]
93 ; CHECK-NEXT: vcvt.s32.f16 s0, s0
94 ; CHECK-NEXT: vmov r0, s0
96 %a = load half, ptr %p, align 2
97 %r = fptosi half %a to i32
102 ;define i64 @test_fptosi_i64(ptr %p) {
103 ; %a = load half, ptr %p, align 2
104 ; %r = fptosi half %a to i64
108 define i32 @test_fptoui_i32(ptr %p) {
109 ; CHECK-LABEL: test_fptoui_i32:
110 ; CHECK: vldr.16 s0, [r0]
111 ; CHECK-NEXT: vcvt.u32.f16 s0, s0
112 ; CHECK-NEXT: vmov r0, s0
114 %a = load half, ptr %p, align 2
115 %r = fptoui half %a to i32
120 ;define i64 @test_fptoui_i64(ptr %p) {
121 ; %a = load half, ptr %p, align 2
122 ; %r = fptoui half %a to i64
126 define void @test_sitofp_i32(i32 %a, ptr %p) {
127 ; CHECK-LABEL: test_sitofp_i32:
129 ; CHECK-NEXT: vcvt.f16.s32 s0, s0
130 ; CHECK-NEXT: vstr.16 s0, [r1]
132 %r = sitofp i32 %a to half
133 store half %r, ptr %p
137 define void @test_uitofp_i32(i32 %a, ptr %p) {
138 ; CHECK-LABEL: test_uitofp_i32:
140 ; CHECK-NEXT: vcvt.f16.u32 s0, s0
141 ; CHECK-NEXT: vstr.16 s0, [r1]
143 %r = uitofp i32 %a to half
144 store half %r, ptr %p
149 ;define void @test_sitofp_i64(i64 %a, ptr %p) {
150 ; %r = sitofp i64 %a to half
151 ; store half %r, ptr %p
156 ;define void @test_uitofp_i64(i64 %a, ptr %p) {
157 ; %r = uitofp i64 %a to half
158 ; store half %r, ptr %p
162 define void @test_fptrunc_float(float %f, ptr %p) {
163 ; CHECK-LABEL: test_fptrunc_float:
164 ; CHECK: vcvtb.f16.f32 s0, s0
165 ; CHECK-NEXT: vstr.16 s0, [r0]
167 %a = fptrunc float %f to half
168 store half %a, ptr %p
172 define void @test_fptrunc_double(double %d, ptr %p) {
173 ; CHECK-LABEL: test_fptrunc_double:
174 ; CHECK: vcvtb.f16.f64 s0, d0
175 ; CHECK-NEXT: vstr.16 s0, [r0]
177 %a = fptrunc double %d to half
178 store half %a, ptr %p
182 define float @test_fpextend_float(ptr %p) {
183 ; CHECK-LABEL: test_fpextend_float:
184 ; CHECK: vldr.16 s0, [r0]
185 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
187 %a = load half, ptr %p, align 2
188 %r = fpext half %a to float
192 define double @test_fpextend_double(ptr %p) {
193 ; CHECK-LABEL: test_fpextend_double:
194 ; CHECK: vldr.16 s0, [r0]
195 ; CHECK-NEXT: vcvtb.f64.f16 d0, s0
197 %a = load half, ptr %p, align 2
198 %r = fpext half %a to double
202 define i16 @test_bitcast_halftoi16(ptr %p) {
203 ; CHECK-LABEL: test_bitcast_halftoi16:
204 ; CHECK: ldrh r0, [r0]
206 %a = load half, ptr %p, align 2
207 %r = bitcast half %a to i16
211 define void @test_bitcast_i16tohalf(i16 %a, ptr %p) {
212 ; CHECK-LABEL: test_bitcast_i16tohalf:
213 ; CHECK: strh r0, [r1]
215 %r = bitcast i16 %a to half
216 store half %r, ptr %p
220 define void @test_sqrt(ptr %p) {
221 ; CHECK-LABEL: test_sqrt:
222 ; CHECK: vldr.16 s0, [r0]
223 ; CHECK-NEXT: vsqrt.f16 s0, s0
224 ; CHECK-NEXT: vstr.16 s0, [r0]
226 %a = load half, ptr %p, align 2
227 %r = call half @llvm.sqrt.f16(half %a)
228 store half %r, ptr %p
232 define void @test_fpowi(ptr %p, i32 %b) {
233 ; CHECK-LABEL: test_fpowi:
234 ; CHECK: .save {r4, lr}
235 ; CHECK-NEXT: push {r4, lr}
236 ; CHECK-NEXT: vldr.16 s0, [r0]
237 ; CHECK-NEXT: mov r4, r0
238 ; CHECK-NEXT: mov r0, r1
239 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
240 ; CHECK-NEXT: bl __powisf2
241 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
242 ; CHECK-NEXT: vstr.16 s0, [r4]
243 ; CHECK-NEXT: pop {r4, pc}
244 %a = load half, ptr %p, align 2
245 %r = call half @llvm.powi.f16.i32(half %a, i32 %b)
246 store half %r, ptr %p
250 define void @test_sin(ptr %p) {
251 ; CHECK-LABEL: test_sin:
252 ; CHECK: .save {r4, lr}
253 ; CHECK-NEXT: push {r4, lr}
254 ; CHECK-NEXT: vldr.16 s0, [r0]
255 ; CHECK-NEXT: mov r4, r0
256 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
257 ; CHECK-NEXT: bl sinf
258 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
259 ; CHECK-NEXT: vstr.16 s0, [r4]
260 ; CHECK-NEXT: pop {r4, pc}
261 %a = load half, ptr %p, align 2
262 %r = call half @llvm.sin.f16(half %a)
263 store half %r, ptr %p
267 define void @test_cos(ptr %p) {
268 ; CHECK-LABEL: test_cos:
269 ; CHECK: .save {r4, lr}
270 ; CHECK-NEXT: push {r4, lr}
271 ; CHECK-NEXT: vldr.16 s0, [r0]
272 ; CHECK-NEXT: mov r4, r0
273 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
274 ; CHECK-NEXT: bl cosf
275 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
276 ; CHECK-NEXT: vstr.16 s0, [r4]
277 ; CHECK-NEXT: pop {r4, pc}
278 %a = load half, ptr %p, align 2
279 %r = call half @llvm.cos.f16(half %a)
280 store half %r, ptr %p
284 define void @test_tan(ptr %p) {
285 ; CHECK-LABEL: test_tan:
286 ; CHECK: .save {r4, lr}
287 ; CHECK-NEXT: push {r4, lr}
288 ; CHECK-NEXT: vldr.16 s0, [r0]
289 ; CHECK-NEXT: mov r4, r0
290 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
291 ; CHECK-NEXT: bl tanf
292 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
293 ; CHECK-NEXT: vstr.16 s0, [r4]
294 ; CHECK-NEXT: pop {r4, pc}
295 %a = load half, ptr %p, align 2
296 %r = call half @llvm.tan.f16(half %a)
297 store half %r, ptr %p
301 define void @test_pow(ptr %p, ptr %q) {
302 ; CHECK-LABEL: test_pow:
303 ; CHECK: .save {r4, lr}
304 ; CHECK-NEXT: push {r4, lr}
305 ; CHECK-NEXT: vldr.16 s0, [r0]
306 ; CHECK-NEXT: vldr.16 s2, [r1]
307 ; CHECK-NEXT: mov r4, r0
308 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
309 ; CHECK-NEXT: vcvtb.f32.f16 s1, s2
310 ; CHECK-NEXT: bl powf
311 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
312 ; CHECK-NEXT: vstr.16 s0, [r4]
313 ; CHECK-NEXT: pop {r4, pc}
314 %a = load half, ptr %p, align 2
315 %b = load half, ptr %q, align 2
316 %r = call half @llvm.pow.f16(half %a, half %b)
317 store half %r, ptr %p
321 define void @test_exp(ptr %p) {
322 ; CHECK-LABEL: test_exp:
323 ; CHECK: .save {r4, lr}
324 ; CHECK-NEXT: push {r4, lr}
325 ; CHECK-NEXT: vldr.16 s0, [r0]
326 ; CHECK-NEXT: mov r4, r0
327 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
328 ; CHECK-NEXT: bl expf
329 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
330 ; CHECK-NEXT: vstr.16 s0, [r4]
331 ; CHECK-NEXT: pop {r4, pc}
332 %a = load half, ptr %p, align 2
333 %r = call half @llvm.exp.f16(half %a)
334 store half %r, ptr %p
338 define void @test_exp2(ptr %p) {
339 ; CHECK-LABEL: test_exp2:
340 ; CHECK: .save {r4, lr}
341 ; CHECK-NEXT: push {r4, lr}
342 ; CHECK-NEXT: vldr.16 s0, [r0]
343 ; CHECK-NEXT: mov r4, r0
344 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
345 ; CHECK-NEXT: bl exp2f
346 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
347 ; CHECK-NEXT: vstr.16 s0, [r4]
348 ; CHECK-NEXT: pop {r4, pc}
349 %a = load half, ptr %p, align 2
350 %r = call half @llvm.exp2.f16(half %a)
351 store half %r, ptr %p
355 define void @test_log(ptr %p) {
356 ; CHECK-LABEL: test_log:
357 ; CHECK: .save {r4, lr}
358 ; CHECK-NEXT: push {r4, lr}
359 ; CHECK-NEXT: vldr.16 s0, [r0]
360 ; CHECK-NEXT: mov r4, r0
361 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
362 ; CHECK-NEXT: bl logf
363 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
364 ; CHECK-NEXT: vstr.16 s0, [r4]
365 ; CHECK-NEXT: pop {r4, pc}
366 %a = load half, ptr %p, align 2
367 %r = call half @llvm.log.f16(half %a)
368 store half %r, ptr %p
372 define void @test_log10(ptr %p) {
373 ; CHECK-LABEL: test_log10:
374 ; CHECK: .save {r4, lr}
375 ; CHECK-NEXT: push {r4, lr}
376 ; CHECK-NEXT: vldr.16 s0, [r0]
377 ; CHECK-NEXT: mov r4, r0
378 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
379 ; CHECK-NEXT: bl log10f
380 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
381 ; CHECK-NEXT: vstr.16 s0, [r4]
382 ; CHECK-NEXT: pop {r4, pc}
383 %a = load half, ptr %p, align 2
384 %r = call half @llvm.log10.f16(half %a)
385 store half %r, ptr %p
389 define void @test_log2(ptr %p) {
390 ; CHECK-LABEL: test_log2:
391 ; CHECK: .save {r4, lr}
392 ; CHECK-NEXT: push {r4, lr}
393 ; CHECK-NEXT: vldr.16 s0, [r0]
394 ; CHECK-NEXT: mov r4, r0
395 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
396 ; CHECK-NEXT: bl log2f
397 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
398 ; CHECK-NEXT: vstr.16 s0, [r4]
399 ; CHECK-NEXT: pop {r4, pc}
400 %a = load half, ptr %p, align 2
401 %r = call half @llvm.log2.f16(half %a)
402 store half %r, ptr %p
406 define void @test_fma(ptr %p, ptr %q, ptr %r) {
407 ; CHECK-LABEL: test_fma:
408 ; CHECK: vldr.16 s0, [r1]
409 ; CHECK-NEXT: vldr.16 s2, [r0]
410 ; CHECK-NEXT: vldr.16 s4, [r2]
411 ; CHECK-NEXT: vfma.f16 s4, s2, s0
412 ; CHECK-NEXT: vstr.16 s4, [r0]
414 %a = load half, ptr %p, align 2
415 %b = load half, ptr %q, align 2
416 %c = load half, ptr %r, align 2
417 %v = call half @llvm.fma.f16(half %a, half %b, half %c)
418 store half %v, ptr %p
422 define void @test_fabs(ptr %p) {
423 ; CHECK-LABEL: test_fabs:
424 ; CHECK: vldr.16 s0, [r0]
425 ; CHECK-NEXT: vabs.f16 s0, s0
426 ; CHECK-NEXT: vstr.16 s0, [r0]
428 %a = load half, ptr %p, align 2
429 %r = call half @llvm.fabs.f16(half %a)
430 store half %r, ptr %p
434 define void @test_minnum(ptr %p, ptr %q) {
435 ; CHECK-LABEL: test_minnum:
436 ; CHECK: vldr.16 s0, [r1]
437 ; CHECK-NEXT: vldr.16 s2, [r0]
438 ; CHECK-NEXT: vminnm.f16 s0, s2, s0
439 ; CHECK-NEXT: vstr.16 s0, [r0]
441 %a = load half, ptr %p, align 2
442 %b = load half, ptr %q, align 2
443 %r = call half @llvm.minnum.f16(half %a, half %b)
444 store half %r, ptr %p
448 define void @test_maxnum(ptr %p, ptr %q) {
449 ; CHECK-LABEL: test_maxnum:
450 ; CHECK: vldr.16 s0, [r1]
451 ; CHECK-NEXT: vldr.16 s2, [r0]
452 ; CHECK-NEXT: vmaxnm.f16 s0, s2, s0
453 ; CHECK-NEXT: vstr.16 s0, [r0]
455 %a = load half, ptr %p, align 2
456 %b = load half, ptr %q, align 2
457 %r = call half @llvm.maxnum.f16(half %a, half %b)
458 store half %r, ptr %p
462 define void @test_minimum(ptr %p) {
463 ; CHECK-LABEL: test_minimum:
464 ; CHECK: vldr.16 s2, [r0]
465 ; CHECK-NEXT: vmov.f16 s0, #1.000000e+00
466 ; CHECK-NEXT: vcmp.f16 s2, s0
467 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
468 ; CHECK-NEXT: vselge.f16 s0, s0, s2
469 ; CHECK-NEXT: vstr.16 s0, [r0]
471 %a = load half, ptr %p, align 2
472 %c = fcmp ult half %a, 1.0
473 %r = select i1 %c, half %a, half 1.0
474 store half %r, ptr %p
478 define void @test_maximum(ptr %p) {
479 ; CHECK-LABEL: test_maximum:
480 ; CHECK: vldr.16 s2, [r0]
481 ; CHECK-NEXT: vmov.f16 s0, #1.000000e+00
482 ; CHECK-NEXT: vcmp.f16 s0, s2
483 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
484 ; CHECK-NEXT: vselge.f16 s0, s0, s2
485 ; CHECK-NEXT: vstr.16 s0, [r0]
487 %a = load half, ptr %p, align 2
488 %c = fcmp ugt half %a, 1.0
489 %r = select i1 %c, half %a, half 1.0
490 store half %r, ptr %p
494 define void @test_copysign(ptr %p, ptr %q) {
495 ; CHECK-LABEL: test_copysign:
497 ; CHECK-NEXT: sub sp, sp, #4
498 ; CHECK-NEXT: vldr.16 s0, [r1]
499 ; CHECK-NEXT: vstr.16 s0, [sp]
500 ; CHECK-NEXT: vldr.16 s0, [r0]
501 ; CHECK-NEXT: ldrb r1, [sp, #1]
502 ; CHECK-NEXT: vabs.f16 s0, s0
503 ; CHECK-NEXT: tst r1, #128
504 ; CHECK-NEXT: vneg.f16 s2, s0
505 ; CHECK-NEXT: vseleq.f16 s0, s0, s2
506 ; CHECK-NEXT: vstr.16 s0, [r0]
507 ; CHECK-NEXT: add sp, sp, #4
509 %a = load half, ptr %p, align 2
510 %b = load half, ptr %q, align 2
511 %r = call half @llvm.copysign.f16(half %a, half %b)
512 store half %r, ptr %p
516 define void @test_floor(ptr %p) {
517 ; CHECK-LABEL: test_floor:
518 ; CHECK: vldr.16 s0, [r0]
519 ; CHECK-NEXT: vrintm.f16 s0, s0
520 ; CHECK-NEXT: vstr.16 s0, [r0]
522 %a = load half, ptr %p, align 2
523 %r = call half @llvm.floor.f16(half %a)
524 store half %r, ptr %p
528 define void @test_ceil(ptr %p) {
529 ; CHECK-LABEL: test_ceil:
530 ; CHECK: vldr.16 s0, [r0]
531 ; CHECK-NEXT: vrintp.f16 s0, s0
532 ; CHECK-NEXT: vstr.16 s0, [r0]
534 %a = load half, ptr %p, align 2
535 %r = call half @llvm.ceil.f16(half %a)
536 store half %r, ptr %p
540 define void @test_trunc(ptr %p) {
541 ; CHECK-LABEL: test_trunc:
542 ; CHECK: vldr.16 s0, [r0]
543 ; CHECK-NEXT: vrintz.f16 s0, s0
544 ; CHECK-NEXT: vstr.16 s0, [r0]
546 %a = load half, ptr %p, align 2
547 %r = call half @llvm.trunc.f16(half %a)
548 store half %r, ptr %p
552 define void @test_rint(ptr %p) {
553 ; CHECK-LABEL: test_rint:
554 ; CHECK: vldr.16 s0, [r0]
555 ; CHECK-NEXT: vrintx.f16 s0, s0
556 ; CHECK-NEXT: vstr.16 s0, [r0]
558 %a = load half, ptr %p, align 2
559 %r = call half @llvm.rint.f16(half %a)
560 store half %r, ptr %p
564 define void @test_nearbyint(ptr %p) {
565 ; CHECK-LABEL: test_nearbyint:
566 ; CHECK: vldr.16 s0, [r0]
567 ; CHECK-NEXT: vrintr.f16 s0, s0
568 ; CHECK-NEXT: vstr.16 s0, [r0]
570 %a = load half, ptr %p, align 2
571 %r = call half @llvm.nearbyint.f16(half %a)
572 store half %r, ptr %p
576 define void @test_round(ptr %p) {
577 ; CHECK-LABEL: test_round:
578 ; CHECK: vldr.16 s0, [r0]
579 ; CHECK-NEXT: vrinta.f16 s0, s0
580 ; CHECK-NEXT: vstr.16 s0, [r0]
582 %a = load half, ptr %p, align 2
583 %r = call half @llvm.round.f16(half %a)
584 store half %r, ptr %p
588 define void @test_fmuladd(ptr %p, ptr %q, ptr %r) {
589 ; CHECK-LABEL: test_fmuladd:
590 ; CHECK: vldr.16 s0, [r1]
591 ; CHECK-NEXT: vldr.16 s2, [r0]
592 ; CHECK-NEXT: vldr.16 s4, [r2]
593 ; CHECK-NEXT: vfma.f16 s4, s2, s0
594 ; CHECK-NEXT: vstr.16 s4, [r0]
596 %a = load half, ptr %p, align 2
597 %b = load half, ptr %q, align 2
598 %c = load half, ptr %r, align 2
599 %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
600 store half %v, ptr %p
604 declare half @llvm.sqrt.f16(half %a)
605 declare half @llvm.powi.f16.i32(half %a, i32 %b)
606 declare half @llvm.sin.f16(half %a)
607 declare half @llvm.cos.f16(half %a)
608 declare half @llvm.tan.f16(half %a)
609 declare half @llvm.pow.f16(half %a, half %b)
610 declare half @llvm.exp.f16(half %a)
611 declare half @llvm.exp2.f16(half %a)
612 declare half @llvm.log.f16(half %a)
613 declare half @llvm.log10.f16(half %a)
614 declare half @llvm.log2.f16(half %a)
615 declare half @llvm.fma.f16(half %a, half %b, half %c)
616 declare half @llvm.fabs.f16(half %a)
617 declare half @llvm.minnum.f16(half %a, half %b)
618 declare half @llvm.maxnum.f16(half %a, half %b)
619 declare half @llvm.copysign.f16(half %a, half %b)
620 declare half @llvm.floor.f16(half %a)
621 declare half @llvm.ceil.f16(half %a)
622 declare half @llvm.trunc.f16(half %a)
623 declare half @llvm.rint.f16(half %a)
624 declare half @llvm.nearbyint.f16(half %a)
625 declare half @llvm.round.f16(half %a)
626 declare half @llvm.fmuladd.f16(half %a, half %b, half %c)