1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-avx512fp16 | FileCheck %s -check-prefix=LIBCALL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512fp16 | FileCheck %s -check-prefix=FP16
5 define void @test1(float %src, i16* %dest) {
6 ; LIBCALL-LABEL: test1:
8 ; LIBCALL-NEXT: pushq %rbx
9 ; LIBCALL-NEXT: .cfi_def_cfa_offset 16
10 ; LIBCALL-NEXT: .cfi_offset %rbx, -16
11 ; LIBCALL-NEXT: movq %rdi, %rbx
12 ; LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT
13 ; LIBCALL-NEXT: movw %ax, (%rbx)
14 ; LIBCALL-NEXT: popq %rbx
15 ; LIBCALL-NEXT: .cfi_def_cfa_offset 8
20 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
21 ; FP16-NEXT: vmovsh %xmm0, (%rdi)
23 %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
24 store i16 %1, i16* %dest, align 2
28 define float @test2(i16* nocapture %src) {
29 ; LIBCALL-LABEL: test2:
31 ; LIBCALL-NEXT: movzwl (%rdi), %edi
32 ; LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL
36 ; FP16-NEXT: vmovsh (%rdi), %xmm0
37 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
39 %1 = load i16, i16* %src, align 2
40 %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
44 define float @test3(float %src) nounwind uwtable readnone {
45 ; LIBCALL-LABEL: test3:
47 ; LIBCALL-NEXT: pushq %rax
48 ; LIBCALL-NEXT: .cfi_def_cfa_offset 16
49 ; LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT
50 ; LIBCALL-NEXT: movzwl %ax, %edi
51 ; LIBCALL-NEXT: popq %rax
52 ; LIBCALL-NEXT: .cfi_def_cfa_offset 8
53 ; LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL
57 ; FP16-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0
58 ; FP16-NEXT: vcvtsh2ss %xmm0, %xmm0, %xmm0
60 %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src)
61 %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
65 ; FIXME: Should it be __extendhfdf2?
66 define double @test4(i16* nocapture %src) {
67 ; LIBCALL-LABEL: test4:
69 ; LIBCALL-NEXT: pushq %rax
70 ; LIBCALL-NEXT: .cfi_def_cfa_offset 16
71 ; LIBCALL-NEXT: movzwl (%rdi), %edi
72 ; LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT
73 ; LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0
74 ; LIBCALL-NEXT: popq %rax
75 ; LIBCALL-NEXT: .cfi_def_cfa_offset 8
80 ; FP16-NEXT: vmovsh (%rdi), %xmm0
81 ; FP16-NEXT: vcvtsh2sd %xmm0, %xmm0, %xmm0
83 %1 = load i16, i16* %src, align 2
84 %2 = tail call double @llvm.convert.from.fp16.f64(i16 %1)
88 define i16 @test5(double %src) {
89 ; LIBCALL-LABEL: test5:
91 ; LIBCALL-NEXT: jmp __truncdfhf2@PLT # TAILCALL
95 ; FP16-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0
96 ; FP16-NEXT: vmovw %xmm0, %eax
97 ; FP16-NEXT: # kill: def $ax killed $ax killed $eax
99 %val = tail call i16 @llvm.convert.to.fp16.f64(double %src)
103 ; FIXME: Should it be __extendhfxf2?
104 define x86_fp80 @test6(i16* nocapture %src) {
105 ; LIBCALL-LABEL: test6:
107 ; LIBCALL-NEXT: pushq %rax
108 ; LIBCALL-NEXT: .cfi_def_cfa_offset 16
109 ; LIBCALL-NEXT: movzwl (%rdi), %edi
110 ; LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT
111 ; LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp)
112 ; LIBCALL-NEXT: flds {{[0-9]+}}(%rsp)
113 ; LIBCALL-NEXT: popq %rax
114 ; LIBCALL-NEXT: .cfi_def_cfa_offset 8
119 ; FP16-NEXT: pushq %rax
120 ; FP16-NEXT: .cfi_def_cfa_offset 16
121 ; FP16-NEXT: vmovsh (%rdi), %xmm0
122 ; FP16-NEXT: callq __extendhfxf2@PLT
123 ; FP16-NEXT: popq %rax
124 ; FP16-NEXT: .cfi_def_cfa_offset 8
126 %1 = load i16, i16* %src, align 2
127 %2 = tail call x86_fp80 @llvm.convert.from.fp16.f80(i16 %1)
131 define i16 @test7(x86_fp80 %src) {
132 ; LIBCALL-LABEL: test7:
134 ; LIBCALL-NEXT: jmp __truncxfhf2@PLT # TAILCALL
138 ; FP16-NEXT: subq $24, %rsp
139 ; FP16-NEXT: .cfi_def_cfa_offset 32
140 ; FP16-NEXT: fldt {{[0-9]+}}(%rsp)
141 ; FP16-NEXT: fstpt (%rsp)
142 ; FP16-NEXT: callq __truncxfhf2@PLT
143 ; FP16-NEXT: vmovw %xmm0, %eax
144 ; FP16-NEXT: # kill: def $ax killed $ax killed $eax
145 ; FP16-NEXT: addq $24, %rsp
146 ; FP16-NEXT: .cfi_def_cfa_offset 8
148 %val = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %src)
152 declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
153 declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
154 declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
155 declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
156 declare x86_fp80 @llvm.convert.from.fp16.f80(i16) nounwind readnone
157 declare i16 @llvm.convert.to.fp16.f80(x86_fp80) nounwind readnone