1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple armv8a-none-none-eabihf -mattr=fullfp16 -asm-verbose=false < %s | FileCheck %s
4 define void @test_fadd(ptr %p, ptr %q) {
5 ; CHECK-LABEL: test_fadd:
6 ; CHECK: vldr.16 s0, [r1]
7 ; CHECK-NEXT: vldr.16 s2, [r0]
8 ; CHECK-NEXT: vadd.f16 s0, s2, s0
9 ; CHECK-NEXT: vstr.16 s0, [r0]
11 %a = load half, ptr %p, align 2
12 %b = load half, ptr %q, align 2
18 define void @test_fsub(ptr %p, ptr %q) {
19 ; CHECK-LABEL: test_fsub:
20 ; CHECK: vldr.16 s0, [r1]
21 ; CHECK-NEXT: vldr.16 s2, [r0]
22 ; CHECK-NEXT: vsub.f16 s0, s2, s0
23 ; CHECK-NEXT: vstr.16 s0, [r0]
25 %a = load half, ptr %p, align 2
26 %b = load half, ptr %q, align 2
32 define void @test_fmul(ptr %p, ptr %q) {
33 ; CHECK-LABEL: test_fmul:
34 ; CHECK: vldr.16 s0, [r1]
35 ; CHECK-NEXT: vldr.16 s2, [r0]
36 ; CHECK-NEXT: vmul.f16 s0, s2, s0
37 ; CHECK-NEXT: vstr.16 s0, [r0]
39 %a = load half, ptr %p, align 2
40 %b = load half, ptr %q, align 2
46 define void @test_fdiv(ptr %p, ptr %q) {
47 ; CHECK-LABEL: test_fdiv:
48 ; CHECK: vldr.16 s0, [r1]
49 ; CHECK-NEXT: vldr.16 s2, [r0]
50 ; CHECK-NEXT: vdiv.f16 s0, s2, s0
51 ; CHECK-NEXT: vstr.16 s0, [r0]
53 %a = load half, ptr %p, align 2
54 %b = load half, ptr %q, align 2
60 define arm_aapcs_vfpcc void @test_frem(ptr %p, ptr %q) {
61 ; CHECK-LABEL: test_frem:
62 ; CHECK: .save {r4, lr}
63 ; CHECK-NEXT: push {r4, lr}
64 ; CHECK-NEXT: vldr.16 s0, [r0]
65 ; CHECK-NEXT: vldr.16 s2, [r1]
66 ; CHECK-NEXT: mov r4, r0
67 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
68 ; CHECK-NEXT: vcvtb.f32.f16 s1, s2
69 ; CHECK-NEXT: bl fmodf
70 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
71 ; CHECK-NEXT: vstr.16 s0, [r4]
72 ; CHECK-NEXT: pop {r4, pc}
73 %a = load half, ptr %p, align 2
74 %b = load half, ptr %q, align 2
80 define void @test_load_store(ptr %p, ptr %q) {
81 ; CHECK-LABEL: test_load_store:
82 ; CHECK: vldr.16 s0, [r0]
83 ; CHECK-NEXT: vstr.16 s0, [r1]
85 %a = load half, ptr %p, align 2
90 define i32 @test_fptosi_i32(ptr %p) {
91 ; CHECK-LABEL: test_fptosi_i32:
92 ; CHECK: vldr.16 s0, [r0]
93 ; CHECK-NEXT: vcvt.s32.f16 s0, s0
94 ; CHECK-NEXT: vmov r0, s0
96 %a = load half, ptr %p, align 2
97 %r = fptosi half %a to i32
102 ;define i64 @test_fptosi_i64(ptr %p) {
103 ; %a = load half, ptr %p, align 2
104 ; %r = fptosi half %a to i64
108 define i32 @test_fptoui_i32(ptr %p) {
109 ; CHECK-LABEL: test_fptoui_i32:
110 ; CHECK: vldr.16 s0, [r0]
111 ; CHECK-NEXT: vcvt.u32.f16 s0, s0
112 ; CHECK-NEXT: vmov r0, s0
114 %a = load half, ptr %p, align 2
115 %r = fptoui half %a to i32
120 ;define i64 @test_fptoui_i64(ptr %p) {
121 ; %a = load half, ptr %p, align 2
122 ; %r = fptoui half %a to i64
126 define void @test_sitofp_i32(i32 %a, ptr %p) {
127 ; CHECK-LABEL: test_sitofp_i32:
129 ; CHECK-NEXT: vcvt.f16.s32 s0, s0
130 ; CHECK-NEXT: vstr.16 s0, [r1]
132 %r = sitofp i32 %a to half
133 store half %r, ptr %p
137 define void @test_uitofp_i32(i32 %a, ptr %p) {
138 ; CHECK-LABEL: test_uitofp_i32:
140 ; CHECK-NEXT: vcvt.f16.u32 s0, s0
141 ; CHECK-NEXT: vstr.16 s0, [r1]
143 %r = uitofp i32 %a to half
144 store half %r, ptr %p
149 ;define void @test_sitofp_i64(i64 %a, ptr %p) {
150 ; %r = sitofp i64 %a to half
151 ; store half %r, ptr %p
156 ;define void @test_uitofp_i64(i64 %a, ptr %p) {
157 ; %r = uitofp i64 %a to half
158 ; store half %r, ptr %p
162 define void @test_fptrunc_float(float %f, ptr %p) {
163 ; CHECK-LABEL: test_fptrunc_float:
164 ; CHECK: vcvtb.f16.f32 s0, s0
165 ; CHECK-NEXT: vstr.16 s0, [r0]
167 %a = fptrunc float %f to half
168 store half %a, ptr %p
172 define void @test_fptrunc_double(double %d, ptr %p) {
173 ; CHECK-LABEL: test_fptrunc_double:
174 ; CHECK: vcvtb.f16.f64 s0, d0
175 ; CHECK-NEXT: vstr.16 s0, [r0]
177 %a = fptrunc double %d to half
178 store half %a, ptr %p
182 define float @test_fpextend_float(ptr %p) {
183 ; CHECK-LABEL: test_fpextend_float:
184 ; CHECK: vldr.16 s0, [r0]
185 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
187 %a = load half, ptr %p, align 2
188 %r = fpext half %a to float
192 define double @test_fpextend_double(ptr %p) {
193 ; CHECK-LABEL: test_fpextend_double:
194 ; CHECK: vldr.16 s0, [r0]
195 ; CHECK-NEXT: vcvtb.f64.f16 d0, s0
197 %a = load half, ptr %p, align 2
198 %r = fpext half %a to double
202 define i16 @test_bitcast_halftoi16(ptr %p) {
203 ; CHECK-LABEL: test_bitcast_halftoi16:
204 ; CHECK: ldrh r0, [r0]
206 %a = load half, ptr %p, align 2
207 %r = bitcast half %a to i16
211 define void @test_bitcast_i16tohalf(i16 %a, ptr %p) {
212 ; CHECK-LABEL: test_bitcast_i16tohalf:
213 ; CHECK: strh r0, [r1]
215 %r = bitcast i16 %a to half
216 store half %r, ptr %p
220 define void @test_sqrt(ptr %p) {
221 ; CHECK-LABEL: test_sqrt:
222 ; CHECK: vldr.16 s0, [r0]
223 ; CHECK-NEXT: vsqrt.f16 s0, s0
224 ; CHECK-NEXT: vstr.16 s0, [r0]
226 %a = load half, ptr %p, align 2
227 %r = call half @llvm.sqrt.f16(half %a)
228 store half %r, ptr %p
232 define void @test_fpowi(ptr %p, i32 %b) {
233 ; CHECK-LABEL: test_fpowi:
234 ; CHECK: .save {r4, lr}
235 ; CHECK-NEXT: push {r4, lr}
236 ; CHECK-NEXT: vldr.16 s0, [r0]
237 ; CHECK-NEXT: mov r4, r0
238 ; CHECK-NEXT: mov r0, r1
239 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
240 ; CHECK-NEXT: bl __powisf2
241 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
242 ; CHECK-NEXT: vstr.16 s0, [r4]
243 ; CHECK-NEXT: pop {r4, pc}
244 %a = load half, ptr %p, align 2
245 %r = call half @llvm.powi.f16.i32(half %a, i32 %b)
246 store half %r, ptr %p
250 define void @test_sin(ptr %p) {
251 ; CHECK-LABEL: test_sin:
252 ; CHECK: .save {r4, lr}
253 ; CHECK-NEXT: push {r4, lr}
254 ; CHECK-NEXT: vldr.16 s0, [r0]
255 ; CHECK-NEXT: mov r4, r0
256 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
257 ; CHECK-NEXT: bl sinf
258 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
259 ; CHECK-NEXT: vstr.16 s0, [r4]
260 ; CHECK-NEXT: pop {r4, pc}
261 %a = load half, ptr %p, align 2
262 %r = call half @llvm.sin.f16(half %a)
263 store half %r, ptr %p
267 define void @test_cos(ptr %p) {
268 ; CHECK-LABEL: test_cos:
269 ; CHECK: .save {r4, lr}
270 ; CHECK-NEXT: push {r4, lr}
271 ; CHECK-NEXT: vldr.16 s0, [r0]
272 ; CHECK-NEXT: mov r4, r0
273 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
274 ; CHECK-NEXT: bl cosf
275 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
276 ; CHECK-NEXT: vstr.16 s0, [r4]
277 ; CHECK-NEXT: pop {r4, pc}
278 %a = load half, ptr %p, align 2
279 %r = call half @llvm.cos.f16(half %a)
280 store half %r, ptr %p
284 define void @test_pow(ptr %p, ptr %q) {
285 ; CHECK-LABEL: test_pow:
286 ; CHECK: .save {r4, lr}
287 ; CHECK-NEXT: push {r4, lr}
288 ; CHECK-NEXT: vldr.16 s0, [r0]
289 ; CHECK-NEXT: vldr.16 s2, [r1]
290 ; CHECK-NEXT: mov r4, r0
291 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
292 ; CHECK-NEXT: vcvtb.f32.f16 s1, s2
293 ; CHECK-NEXT: bl powf
294 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
295 ; CHECK-NEXT: vstr.16 s0, [r4]
296 ; CHECK-NEXT: pop {r4, pc}
297 %a = load half, ptr %p, align 2
298 %b = load half, ptr %q, align 2
299 %r = call half @llvm.pow.f16(half %a, half %b)
300 store half %r, ptr %p
304 define void @test_exp(ptr %p) {
305 ; CHECK-LABEL: test_exp:
306 ; CHECK: .save {r4, lr}
307 ; CHECK-NEXT: push {r4, lr}
308 ; CHECK-NEXT: vldr.16 s0, [r0]
309 ; CHECK-NEXT: mov r4, r0
310 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
311 ; CHECK-NEXT: bl expf
312 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
313 ; CHECK-NEXT: vstr.16 s0, [r4]
314 ; CHECK-NEXT: pop {r4, pc}
315 %a = load half, ptr %p, align 2
316 %r = call half @llvm.exp.f16(half %a)
317 store half %r, ptr %p
321 define void @test_exp2(ptr %p) {
322 ; CHECK-LABEL: test_exp2:
323 ; CHECK: .save {r4, lr}
324 ; CHECK-NEXT: push {r4, lr}
325 ; CHECK-NEXT: vldr.16 s0, [r0]
326 ; CHECK-NEXT: mov r4, r0
327 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
328 ; CHECK-NEXT: bl exp2f
329 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
330 ; CHECK-NEXT: vstr.16 s0, [r4]
331 ; CHECK-NEXT: pop {r4, pc}
332 %a = load half, ptr %p, align 2
333 %r = call half @llvm.exp2.f16(half %a)
334 store half %r, ptr %p
338 define void @test_log(ptr %p) {
339 ; CHECK-LABEL: test_log:
340 ; CHECK: .save {r4, lr}
341 ; CHECK-NEXT: push {r4, lr}
342 ; CHECK-NEXT: vldr.16 s0, [r0]
343 ; CHECK-NEXT: mov r4, r0
344 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
345 ; CHECK-NEXT: bl logf
346 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
347 ; CHECK-NEXT: vstr.16 s0, [r4]
348 ; CHECK-NEXT: pop {r4, pc}
349 %a = load half, ptr %p, align 2
350 %r = call half @llvm.log.f16(half %a)
351 store half %r, ptr %p
355 define void @test_log10(ptr %p) {
356 ; CHECK-LABEL: test_log10:
357 ; CHECK: .save {r4, lr}
358 ; CHECK-NEXT: push {r4, lr}
359 ; CHECK-NEXT: vldr.16 s0, [r0]
360 ; CHECK-NEXT: mov r4, r0
361 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
362 ; CHECK-NEXT: bl log10f
363 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
364 ; CHECK-NEXT: vstr.16 s0, [r4]
365 ; CHECK-NEXT: pop {r4, pc}
366 %a = load half, ptr %p, align 2
367 %r = call half @llvm.log10.f16(half %a)
368 store half %r, ptr %p
372 define void @test_log2(ptr %p) {
373 ; CHECK-LABEL: test_log2:
374 ; CHECK: .save {r4, lr}
375 ; CHECK-NEXT: push {r4, lr}
376 ; CHECK-NEXT: vldr.16 s0, [r0]
377 ; CHECK-NEXT: mov r4, r0
378 ; CHECK-NEXT: vcvtb.f32.f16 s0, s0
379 ; CHECK-NEXT: bl log2f
380 ; CHECK-NEXT: vcvtb.f16.f32 s0, s0
381 ; CHECK-NEXT: vstr.16 s0, [r4]
382 ; CHECK-NEXT: pop {r4, pc}
383 %a = load half, ptr %p, align 2
384 %r = call half @llvm.log2.f16(half %a)
385 store half %r, ptr %p
389 define void @test_fma(ptr %p, ptr %q, ptr %r) {
390 ; CHECK-LABEL: test_fma:
391 ; CHECK: vldr.16 s0, [r1]
392 ; CHECK-NEXT: vldr.16 s2, [r0]
393 ; CHECK-NEXT: vldr.16 s4, [r2]
394 ; CHECK-NEXT: vfma.f16 s4, s2, s0
395 ; CHECK-NEXT: vstr.16 s4, [r0]
397 %a = load half, ptr %p, align 2
398 %b = load half, ptr %q, align 2
399 %c = load half, ptr %r, align 2
400 %v = call half @llvm.fma.f16(half %a, half %b, half %c)
401 store half %v, ptr %p
405 define void @test_fabs(ptr %p) {
406 ; CHECK-LABEL: test_fabs:
407 ; CHECK: vldr.16 s0, [r0]
408 ; CHECK-NEXT: vabs.f16 s0, s0
409 ; CHECK-NEXT: vstr.16 s0, [r0]
411 %a = load half, ptr %p, align 2
412 %r = call half @llvm.fabs.f16(half %a)
413 store half %r, ptr %p
417 define void @test_minnum(ptr %p, ptr %q) {
418 ; CHECK-LABEL: test_minnum:
419 ; CHECK: vldr.16 s0, [r1]
420 ; CHECK-NEXT: vldr.16 s2, [r0]
421 ; CHECK-NEXT: vminnm.f16 s0, s2, s0
422 ; CHECK-NEXT: vstr.16 s0, [r0]
424 %a = load half, ptr %p, align 2
425 %b = load half, ptr %q, align 2
426 %r = call half @llvm.minnum.f16(half %a, half %b)
427 store half %r, ptr %p
431 define void @test_maxnum(ptr %p, ptr %q) {
432 ; CHECK-LABEL: test_maxnum:
433 ; CHECK: vldr.16 s0, [r1]
434 ; CHECK-NEXT: vldr.16 s2, [r0]
435 ; CHECK-NEXT: vmaxnm.f16 s0, s2, s0
436 ; CHECK-NEXT: vstr.16 s0, [r0]
438 %a = load half, ptr %p, align 2
439 %b = load half, ptr %q, align 2
440 %r = call half @llvm.maxnum.f16(half %a, half %b)
441 store half %r, ptr %p
445 define void @test_minimum(ptr %p) {
446 ; CHECK-LABEL: test_minimum:
447 ; CHECK: vldr.16 s2, [r0]
448 ; CHECK-NEXT: vmov.f16 s0, #1.000000e+00
449 ; CHECK-NEXT: vcmp.f16 s2, s0
450 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
451 ; CHECK-NEXT: vselge.f16 s0, s0, s2
452 ; CHECK-NEXT: vstr.16 s0, [r0]
454 %a = load half, ptr %p, align 2
455 %c = fcmp ult half %a, 1.0
456 %r = select i1 %c, half %a, half 1.0
457 store half %r, ptr %p
461 define void @test_maximum(ptr %p) {
462 ; CHECK-LABEL: test_maximum:
463 ; CHECK: vldr.16 s2, [r0]
464 ; CHECK-NEXT: vmov.f16 s0, #1.000000e+00
465 ; CHECK-NEXT: vcmp.f16 s0, s2
466 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
467 ; CHECK-NEXT: vselge.f16 s0, s0, s2
468 ; CHECK-NEXT: vstr.16 s0, [r0]
470 %a = load half, ptr %p, align 2
471 %c = fcmp ugt half %a, 1.0
472 %r = select i1 %c, half %a, half 1.0
473 store half %r, ptr %p
477 define void @test_copysign(ptr %p, ptr %q) {
478 ; CHECK-LABEL: test_copysign:
480 ; CHECK-NEXT: sub sp, sp, #4
481 ; CHECK-NEXT: vldr.16 s0, [r1]
482 ; CHECK-NEXT: vstr.16 s0, [sp]
483 ; CHECK-NEXT: vldr.16 s0, [r0]
484 ; CHECK-NEXT: ldrb r1, [sp, #1]
485 ; CHECK-NEXT: vabs.f16 s0, s0
486 ; CHECK-NEXT: tst r1, #128
487 ; CHECK-NEXT: vneg.f16 s2, s0
488 ; CHECK-NEXT: vseleq.f16 s0, s0, s2
489 ; CHECK-NEXT: vstr.16 s0, [r0]
490 ; CHECK-NEXT: add sp, sp, #4
492 %a = load half, ptr %p, align 2
493 %b = load half, ptr %q, align 2
494 %r = call half @llvm.copysign.f16(half %a, half %b)
495 store half %r, ptr %p
499 define void @test_floor(ptr %p) {
500 ; CHECK-LABEL: test_floor:
501 ; CHECK: vldr.16 s0, [r0]
502 ; CHECK-NEXT: vrintm.f16 s0, s0
503 ; CHECK-NEXT: vstr.16 s0, [r0]
505 %a = load half, ptr %p, align 2
506 %r = call half @llvm.floor.f16(half %a)
507 store half %r, ptr %p
511 define void @test_ceil(ptr %p) {
512 ; CHECK-LABEL: test_ceil:
513 ; CHECK: vldr.16 s0, [r0]
514 ; CHECK-NEXT: vrintp.f16 s0, s0
515 ; CHECK-NEXT: vstr.16 s0, [r0]
517 %a = load half, ptr %p, align 2
518 %r = call half @llvm.ceil.f16(half %a)
519 store half %r, ptr %p
523 define void @test_trunc(ptr %p) {
524 ; CHECK-LABEL: test_trunc:
525 ; CHECK: vldr.16 s0, [r0]
526 ; CHECK-NEXT: vrintz.f16 s0, s0
527 ; CHECK-NEXT: vstr.16 s0, [r0]
529 %a = load half, ptr %p, align 2
530 %r = call half @llvm.trunc.f16(half %a)
531 store half %r, ptr %p
535 define void @test_rint(ptr %p) {
536 ; CHECK-LABEL: test_rint:
537 ; CHECK: vldr.16 s0, [r0]
538 ; CHECK-NEXT: vrintx.f16 s0, s0
539 ; CHECK-NEXT: vstr.16 s0, [r0]
541 %a = load half, ptr %p, align 2
542 %r = call half @llvm.rint.f16(half %a)
543 store half %r, ptr %p
547 define void @test_nearbyint(ptr %p) {
548 ; CHECK-LABEL: test_nearbyint:
549 ; CHECK: vldr.16 s0, [r0]
550 ; CHECK-NEXT: vrintr.f16 s0, s0
551 ; CHECK-NEXT: vstr.16 s0, [r0]
553 %a = load half, ptr %p, align 2
554 %r = call half @llvm.nearbyint.f16(half %a)
555 store half %r, ptr %p
559 define void @test_round(ptr %p) {
560 ; CHECK-LABEL: test_round:
561 ; CHECK: vldr.16 s0, [r0]
562 ; CHECK-NEXT: vrinta.f16 s0, s0
563 ; CHECK-NEXT: vstr.16 s0, [r0]
565 %a = load half, ptr %p, align 2
566 %r = call half @llvm.round.f16(half %a)
567 store half %r, ptr %p
571 define void @test_fmuladd(ptr %p, ptr %q, ptr %r) {
572 ; CHECK-LABEL: test_fmuladd:
573 ; CHECK: vldr.16 s0, [r1]
574 ; CHECK-NEXT: vldr.16 s2, [r0]
575 ; CHECK-NEXT: vldr.16 s4, [r2]
576 ; CHECK-NEXT: vfma.f16 s4, s2, s0
577 ; CHECK-NEXT: vstr.16 s4, [r0]
579 %a = load half, ptr %p, align 2
580 %b = load half, ptr %q, align 2
581 %c = load half, ptr %r, align 2
582 %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
583 store half %v, ptr %p
587 declare half @llvm.sqrt.f16(half %a)
588 declare half @llvm.powi.f16.i32(half %a, i32 %b)
589 declare half @llvm.sin.f16(half %a)
590 declare half @llvm.cos.f16(half %a)
591 declare half @llvm.pow.f16(half %a, half %b)
592 declare half @llvm.exp.f16(half %a)
593 declare half @llvm.exp2.f16(half %a)
594 declare half @llvm.log.f16(half %a)
595 declare half @llvm.log10.f16(half %a)
596 declare half @llvm.log2.f16(half %a)
597 declare half @llvm.fma.f16(half %a, half %b, half %c)
598 declare half @llvm.fabs.f16(half %a)
599 declare half @llvm.minnum.f16(half %a, half %b)
600 declare half @llvm.maxnum.f16(half %a, half %b)
601 declare half @llvm.copysign.f16(half %a, half %b)
602 declare half @llvm.floor.f16(half %a)
603 declare half @llvm.ceil.f16(half %a)
604 declare half @llvm.trunc.f16(half %a)
605 declare half @llvm.rint.f16(half %a)
606 declare half @llvm.nearbyint.f16(half %a)
607 declare half @llvm.round.f16(half %a)
608 declare half @llvm.fmuladd.f16(half %a, half %b, half %c)