From 9af8dca70fce1538675a857e3a053f6d35b18aea Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Thu, 6 Apr 2023 09:26:16 -0400 Subject: [PATCH] [libc][math] Update range reduction step for log10f and reduce its latency. Simplify the range reduction steps by choosing the reduction constants carefully so that the reduced arguments v = r*m_x - 1 and v^2 are exact in double precision, even without FMA instructions, and -2^-8 <= v < 2^-7. This allows the polynomial evaluations to be parallelized more efficiently. Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D147676 --- libc/docs/math/index.rst | 114 ++++++++-------- libc/src/math/generic/common_constants.cpp | 28 ++++ libc/src/math/generic/common_constants.h | 3 + libc/src/math/generic/log10f.cpp | 204 ++++++++++++++++------------- libc/test/src/math/log10f_test.cpp | 10 +- 5 files changed, 209 insertions(+), 150 deletions(-) diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 3ea2500c2cb8..8a99d67562a5 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -210,63 +210,63 @@ Performance table below, using the system library as reference (such as the `GNU C library `_ on Linux). Fmod performance results obtained with "differential_testing". -+--------------+-------------------------------+-------------------------------+-------------------------------------+---------------------------------------------------------------------+ -| | Reciprocal throughput (ns) | Latency (ns) | Testing ranges | Testing configuration | -| +-----------+-------------------+-----------+-------------------+ +------------+-------------------------+--------------+---------------+ -| | LLVM libc | Reference (glibc) | LLVM libc | Reference (glibc) | | CPU | OS | Compiler | Special flags | -+==============+===========+===================+===========+===================+=====================================+============+=========================+==============+===============+ -| acosf | 24 | 29 | 62 | 77 | :math:`[-1, 1]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| acoshf | 18 | 26 | 73 | 74 | :math:`[1, 21]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| asinf | 23 | 27 | 62 | 62 | :math:`[-1, 1]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| asinhf | 21 | 39 | 77 | 91 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| atanf | 27 | 29 | 79 | 68 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| atanhf | 18 | 66 | 68 | 133 | :math:`[-1, 1]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| cosf | 13 | 32 | 53 | 59 | :math:`[0, 2\pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| coshf | 14 | 20 | 50 | 48 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| expf | 9 | 7 | 44 | 38 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| exp10f | 10 | 8 | 40 | 38 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| exp2f | 9 | 6 | 35 | 31 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| expm1f | 9 | 44 | 42 | 121 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| fmodf | 73 | 263 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | -| +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| | 9 | 11 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| fmod | 595 | 3297 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | -| +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| | 14 | 13 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| hypotf | 25 | 15 | 64 | 49 | :math:`[-10, 10] \times [-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| logf | 12 | 10 | 56 | 46 | :math:`[e^{-1}, e]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| log10f | 13 | 25 | 57 | 72 | :math:`[e^{-1}, e]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| log1pf | 16 | 33 | 61 | 97 | :math:`[e^{-0.5} - 1, e^{0.5} - 1]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| log2f | 13 | 10 | 57 | 46 | :math:`[e^{-1}, e]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| sinf | 12 | 25 | 51 | 57 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| sincosf | 19 | 30 | 57 | 68 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| sinhf | 13 | 63 | 48 | 137 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| tanf | 16 | 50 | 61 | 107 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| tanhf | 13 | 55 | 57 | 123 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ ++--------------+-------------------------------+-------------------------------+-------------------------------------+----------------------------------------------------------------------+ +| | Reciprocal throughput (clk) | Latency (clk) | Testing ranges | Testing configuration | +| +-----------+-------------------+-----------+-------------------+ +-------------+-------------------------+--------------+---------------+ +| | LLVM libc | Reference (glibc) | LLVM libc | Reference (glibc) | | CPU | OS | Compiler | Special flags | ++==============+===========+===================+===========+===================+=====================================+=============+=========================+==============+===============+ +| acosf | 24 | 29 | 62 | 77 | :math:`[-1, 1]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| acoshf | 18 | 26 | 73 | 74 | :math:`[1, 21]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| asinf | 23 | 27 | 62 | 62 | :math:`[-1, 1]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| asinhf | 21 | 39 | 77 | 91 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| atanf | 27 | 29 | 79 | 68 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| atanhf | 18 | 66 | 68 | 133 | :math:`[-1, 1]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| cosf | 13 | 32 | 53 | 59 | :math:`[0, 2\pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| coshf | 14 | 20 | 50 | 48 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| expf | 9 | 7 | 44 | 38 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| exp10f | 10 | 8 | 40 | 38 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| exp2f | 9 | 6 | 35 | 31 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| expm1f | 9 | 44 | 42 | 121 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| fmodf | 73 | 263 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +| +-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| | 9 | 11 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| fmod | 595 | 3297 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +| +-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| | 14 | 13 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| hypotf | 25 | 15 | 64 | 49 | :math:`[-10, 10] \times [-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| logf | 12 | 10 | 56 | 46 | :math:`[e^{-1}, e]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| log10f | 9 | 17 | 35 | 48 | :math:`[e^{-1}, e]` | Ryzen 5900X | Ubuntu 22.04 LTS x86_64 | Clang 15.0.6 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| log1pf | 16 | 33 | 61 | 97 | :math:`[e^{-0.5} - 1, e^{0.5} - 1]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| log2f | 13 | 10 | 57 | 46 | :math:`[e^{-1}, e]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| sinf | 12 | 25 | 51 | 57 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| sincosf | 19 | 30 | 57 | 68 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| sinhf | 13 | 63 | 48 | 137 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| tanf | 16 | 50 | 61 | 107 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ +| tanhf | 13 | 55 | 57 | 123 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+-------------+-------------------------+--------------+---------------+ Algorithms + Implementation Details =================================== diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp index d56f620ad370..a694888bdb7a 100644 --- a/libc/src/math/generic/common_constants.cpp +++ b/libc/src/math/generic/common_constants.cpp @@ -102,6 +102,34 @@ const double LOG_F[128] = { 0x1.58cadb5cd7989p-1, 0x1.5ad404c359f2cp-1, 0x1.5cdb1dc6c1764p-1, 0x1.5ee02a9241675p-1, 0x1.60e32f44788d8p-1}; +// Range reduction constants for logarithms. +// r(0) = 1, r(127) = 0.5 +// r(k) = 2^-8 * ceil(2^8 * (1 - 2^-8) / (1 + k*2^-7)) +// The constants are chosen so that v = fma(r, m_x, -1) is exact in single +// precision, and -2^-8 <= v < 2^-7. +// TODO(lntue): Add reference to how the constants are derived after the +// resulting paper is ready. +const float R[128] = { + 0x1p0, 0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1, 0x1.ecp-1, 0x1.e8p-1, + 0x1.e4p-1, 0x1.ep-1, 0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1, + 0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1, 0x1.bep-1, 0x1.bap-1, + 0x1.b8p-1, 0x1.b4p-1, 0x1.b2p-1, 0x1.aep-1, 0x1.acp-1, 0x1.a8p-1, 0x1.a6p-1, + 0x1.a4p-1, 0x1.ap-1, 0x1.9ep-1, 0x1.9cp-1, 0x1.98p-1, 0x1.96p-1, 0x1.94p-1, + 0x1.92p-1, 0x1.9p-1, 0x1.8cp-1, 0x1.8ap-1, 0x1.88p-1, 0x1.86p-1, 0x1.84p-1, + 0x1.8p-1, 0x1.7ep-1, 0x1.7cp-1, 0x1.7ap-1, 0x1.78p-1, 0x1.76p-1, 0x1.74p-1, + 0x1.72p-1, 0x1.7p-1, 0x1.6ep-1, 0x1.6cp-1, 0x1.6ap-1, 0x1.68p-1, 0x1.66p-1, + 0x1.64p-1, 0x1.62p-1, 0x1.6p-1, 0x1.5ep-1, 0x1.5cp-1, 0x1.5ap-1, 0x1.58p-1, + 0x1.56p-1, 0x1.54p-1, 0x1.54p-1, 0x1.52p-1, 0x1.5p-1, 0x1.4ep-1, 0x1.4cp-1, + 0x1.4ap-1, 0x1.4ap-1, 0x1.48p-1, 0x1.46p-1, 0x1.44p-1, 0x1.42p-1, 0x1.4p-1, + 0x1.4p-1, 0x1.3ep-1, 0x1.3cp-1, 0x1.3ap-1, 0x1.3ap-1, 0x1.38p-1, 0x1.36p-1, + 0x1.34p-1, 0x1.34p-1, 0x1.32p-1, 0x1.3p-1, 0x1.3p-1, 0x1.2ep-1, 0x1.2cp-1, + 0x1.2cp-1, 0x1.2ap-1, 0x1.28p-1, 0x1.28p-1, 0x1.26p-1, 0x1.24p-1, 0x1.24p-1, + 0x1.22p-1, 0x1.2p-1, 0x1.2p-1, 0x1.1ep-1, 0x1.1cp-1, 0x1.1cp-1, 0x1.1ap-1, + 0x1.1ap-1, 0x1.18p-1, 0x1.16p-1, 0x1.16p-1, 0x1.14p-1, 0x1.14p-1, 0x1.12p-1, + 0x1.1p-1, 0x1.1p-1, 0x1.0ep-1, 0x1.0ep-1, 0x1.0cp-1, 0x1.0cp-1, 0x1.0ap-1, + 0x1.0ap-1, 0x1.08p-1, 0x1.08p-1, 0x1.06p-1, 0x1.06p-1, 0x1.04p-1, 0x1.04p-1, + 0x1.02p-1, 0x1.0p-1}; + // Lookup table for exp(m) with m = -104, ..., 89. // -104 = floor(log(single precision's min denormal)) // 89 = ceil(log(single precision's max normal)) diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h index 4aecff9429df..c20eb8e61b53 100644 --- a/libc/src/math/generic/common_constants.h +++ b/libc/src/math/generic/common_constants.h @@ -17,6 +17,9 @@ extern const double ONE_OVER_F[128]; // Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127. extern const double LOG_F[128]; +// Lookup table for range reduction constants r for logarithms. +extern const float R[128]; + // Lookup table for exp(m) with m = -104, ..., 89. // -104 = floor(log(single precision's min denormal)) // 89 = ceil(log(single precision's max normal)) diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp index bcf17c672a63..812850aff123 100644 --- a/libc/src/math/generic/log10f.cpp +++ b/libc/src/math/generic/log10f.cpp @@ -56,53 +56,51 @@ namespace __llvm_libc { -// Exact power of 10 in float: - -// Lookup table for log10(f) = log10(1 + n*2^(-7)) where n = 0..127. -static constexpr double LOG10_F[128] = { - 0x0.0000000000000p+0, 0x1.bafd47221ed26p-9, 0x1.b9476a4fcd10fp-8, - 0x1.49b0851443684p-7, 0x1.b5e908eb13790p-7, 0x1.10a83a8446c78p-6, - 0x1.45f4f5acb8be0p-6, 0x1.7adc3df3b1ff8p-6, 0x1.af5f92b00e610p-6, - 0x1.e3806acbd058fp-6, 0x1.0ba01a8170000p-5, 0x1.25502c0fc314cp-5, - 0x1.3ed1199a5e425p-5, 0x1.58238eeb353dap-5, 0x1.71483427d2a99p-5, - 0x1.8a3fadeb847f4p-5, 0x1.a30a9d609efeap-5, 0x1.bba9a058dfd84p-5, - 0x1.d41d5164facb4p-5, 0x1.ec6647eb58808p-5, 0x1.02428c1f08016p-4, - 0x1.0e3d29d81165ep-4, 0x1.1a23445501816p-4, 0x1.25f5215eb594ap-4, - 0x1.31b3055c47118p-4, 0x1.3d5d335c53179p-4, 0x1.48f3ed1df48fbp-4, - 0x1.5477731973e85p-4, 0x1.5fe80488af4fdp-4, 0x1.6b45df6f3e2c9p-4, - 0x1.769140a2526fdp-4, 0x1.81ca63d05a44ap-4, 0x1.8cf183886480dp-4, - 0x1.9806d9414a209p-4, 0x1.a30a9d609efeap-4, 0x1.adfd07416be07p-4, - 0x1.b8de4d3ab3d98p-4, 0x1.c3aea4a5c6effp-4, 0x1.ce6e41e463da5p-4, - 0x1.d91d5866aa99cp-4, 0x1.e3bc1ab0e19fep-4, 0x1.ee4aba610f204p-4, - 0x1.f8c9683468191p-4, 0x1.019c2a064b486p-3, 0x1.06cbd67a6c3b6p-3, - 0x1.0bf3d0937c41cp-3, 0x1.11142f0811357p-3, 0x1.162d082ac9d10p-3, - 0x1.1b3e71ec94f7bp-3, 0x1.204881dee8777p-3, 0x1.254b4d35e7d3cp-3, - 0x1.2a46e8ca7ba2ap-3, 0x1.2f3b691c5a001p-3, 0x1.3428e2540096dp-3, - 0x1.390f6844a0b83p-3, 0x1.3def0e6dfdf85p-3, 0x1.42c7e7fe3fc02p-3, - 0x1.479a07d3b6411p-3, 0x1.4c65807e93338p-3, 0x1.512a644296c3dp-3, - 0x1.55e8c518b10f8p-3, 0x1.5aa0b4b0988fap-3, 0x1.5f52447255c92p-3, - 0x1.63fd857fc49bbp-3, 0x1.68a288b60b7fcp-3, 0x1.6d415eaf0906bp-3, - 0x1.71da17c2b7e80p-3, 0x1.766cc40889e85p-3, 0x1.7af97358b9e04p-3, - 0x1.7f80354d952a0p-3, 0x1.84011944bcb75p-3, 0x1.887c2e605e119p-3, - 0x1.8cf183886480dp-3, 0x1.9161276ba2978p-3, 0x1.95cb2880f45bap-3, - 0x1.9a2f95085a45cp-3, 0x1.9e8e7b0c0d4bep-3, 0x1.a2e7e8618c2d2p-3, - 0x1.a73beaaaa22f4p-3, 0x1.ab8a8f56677fcp-3, 0x1.afd3e3a23b680p-3, - 0x1.b417f49ab8807p-3, 0x1.b856cf1ca3105p-3, 0x1.bc907fd5d1c40p-3, - 0x1.c0c5134610e26p-3, 0x1.c4f495c0002a2p-3, 0x1.c91f1369eb7cap-3, - 0x1.cd44983e9e7bdp-3, 0x1.d165300e333f7p-3, 0x1.d580e67edc43dp-3, - 0x1.d997c70da9b47p-3, 0x1.dda9dd0f4a329p-3, 0x1.e1b733b0c7381p-3, - 0x1.e5bfd5f83d342p-3, 0x1.e9c3cec58f807p-3, 0x1.edc328d3184afp-3, - 0x1.f1bdeeb654901p-3, 0x1.f5b42ae08c407p-3, 0x1.f9a5e79f76ac5p-3, - 0x1.fd932f1ddb4d6p-3, 0x1.00be05b217844p-2, 0x1.02b0432c96ff0p-2, - 0x1.04a054e139004p-2, 0x1.068e3fa282e3dp-2, 0x1.087a0832fa7acp-2, - 0x1.0a63b3456c819p-2, 0x1.0c4b457d3193dp-2, 0x1.0e30c36e71a7fp-2, - 0x1.1014319e661bdp-2, 0x1.11f594839a5bdp-2, 0x1.13d4f0862b2e1p-2, - 0x1.15b24a0004a92p-2, 0x1.178da53d1ee01p-2, 0x1.1967067bb94b8p-2, - 0x1.1b3e71ec94f7bp-2, 0x1.1d13ebb32d7f9p-2, 0x1.1ee777e5f0dc3p-2, - 0x1.20b91a8e76105p-2, 0x1.2288d7a9b2b64p-2, 0x1.2456b3282f786p-2, - 0x1.2622b0ee3b79dp-2, 0x1.27ecd4d41eb67p-2, 0x1.29b522a64b609p-2, - 0x1.2b7b9e258e422p-2, 0x1.2d404b073e27ep-2, 0x1.2f032cf56a5bep-2, - 0x1.30c4478f0835fp-2, 0x1.32839e681fc62p-2}; +// Lookup table for -log10(r) where r is defined in common_constants.cpp. +static constexpr double LOG10_R[128] = { + 0x0.0000000000000p+0, 0x1.be76bd77b4fc3p-9, 0x1.c03a80ae5e054p-8, + 0x1.51824c7587ebp-7, 0x1.c3d0837784c41p-7, 0x1.1b85d6044e9aep-6, + 0x1.559bd2406c3bap-6, 0x1.902c31d62a843p-6, 0x1.cb38fccd8bfdbp-6, + 0x1.e8eeb09f2f6cbp-6, 0x1.125d0432ea20ep-5, 0x1.30838cdc2fbfdp-5, + 0x1.3faf7c663060ep-5, 0x1.5e3966b7e9295p-5, 0x1.7d070145f4fd7p-5, + 0x1.8c878eeb05074p-5, 0x1.abbcebd84fcap-5, 0x1.bb7209d1e24e5p-5, + 0x1.db11ed766abf4p-5, 0x1.eafd05035bd3bp-5, 0x1.0585283764178p-4, + 0x1.0d966cc6500fap-4, 0x1.1dd5460c8b16fp-4, 0x1.2603072a25f82p-4, + 0x1.367ba3aaa1883p-4, 0x1.3ec6ad5407868p-4, 0x1.4f7aad9bbcbafp-4, + 0x1.57e3d47c3af7bp-4, 0x1.605735ee985f1p-4, 0x1.715d0ce367afcp-4, + 0x1.79efb57b0f803p-4, 0x1.828cfed29a215p-4, 0x1.93e7de0fc3e8p-4, + 0x1.9ca5aa1729f45p-4, 0x1.a56e8325f5c87p-4, 0x1.ae4285509950bp-4, + 0x1.b721cd17157e3p-4, 0x1.c902a19e65111p-4, 0x1.d204698cb42bdp-4, + 0x1.db11ed766abf4p-4, 0x1.e42b4c16caaf3p-4, 0x1.ed50a4a26eafcp-4, + 0x1.ffbfc2bbc7803p-4, 0x1.0484e4942aa43p-3, 0x1.093025a19976cp-3, + 0x1.0de1b56356b04p-3, 0x1.1299a4fb3e306p-3, 0x1.175805d1587c1p-3, + 0x1.1c1ce9955c0c6p-3, 0x1.20e8624038fedp-3, 0x1.25ba8215af7fcp-3, + 0x1.2a935ba5f1479p-3, 0x1.2f7301cf4e87bp-3, 0x1.345987bfeea91p-3, + 0x1.394700f7953fdp-3, 0x1.3e3b8149739d4p-3, 0x1.43371cde076c2p-3, + 0x1.4839e83506c87p-3, 0x1.4d43f8275a483p-3, 0x1.525561e9256eep-3, + 0x1.576e3b0bde0a7p-3, 0x1.5c8e998072fe2p-3, 0x1.61b6939983048p-3, + 0x1.66e6400da3f77p-3, 0x1.6c1db5f9bb336p-3, 0x1.6c1db5f9bb336p-3, + 0x1.715d0ce367afcp-3, 0x1.76a45cbb7e6ffp-3, 0x1.7bf3bde099f3p-3, + 0x1.814b4921bd52bp-3, 0x1.86ab17c10bc7fp-3, 0x1.86ab17c10bc7fp-3, + 0x1.8c13437695532p-3, 0x1.9183e673394fap-3, 0x1.96fd1b639fc09p-3, + 0x1.9c7efd734a2f9p-3, 0x1.a209a84fbcff8p-3, 0x1.a209a84fbcff8p-3, + 0x1.a79d382bc21d9p-3, 0x1.ad39c9c2c608p-3, 0x1.b2df7a5c50299p-3, + 0x1.b2df7a5c50299p-3, 0x1.b88e67cf9798p-3, 0x1.be46b087354bcp-3, + 0x1.c4087384f4f8p-3, 0x1.c4087384f4f8p-3, 0x1.c9d3d065c5b42p-3, + 0x1.cfa8e765cbb72p-3, 0x1.cfa8e765cbb72p-3, 0x1.d587d96494759p-3, + 0x1.db70c7e96e7f3p-3, 0x1.db70c7e96e7f3p-3, 0x1.e163d527e68cfp-3, + 0x1.e76124046b3f3p-3, 0x1.e76124046b3f3p-3, 0x1.ed68d819191fcp-3, + 0x1.f37b15bab08d1p-3, 0x1.f37b15bab08d1p-3, 0x1.f99801fdb749dp-3, + 0x1.ffbfc2bbc7803p-3, 0x1.ffbfc2bbc7803p-3, 0x1.02f93f4c87101p-2, + 0x1.06182e84fd4acp-2, 0x1.06182e84fd4acp-2, 0x1.093cc32c90f84p-2, + 0x1.093cc32c90f84p-2, 0x1.0c6711d6abd7ap-2, 0x1.0f972f87ff3d6p-2, + 0x1.0f972f87ff3d6p-2, 0x1.12cd31b9c99ffp-2, 0x1.12cd31b9c99ffp-2, + 0x1.16092e5d3a9a6p-2, 0x1.194b3bdef6b9ep-2, 0x1.194b3bdef6b9ep-2, + 0x1.1c93712abc7ffp-2, 0x1.1c93712abc7ffp-2, 0x1.1fe1e5af2c141p-2, + 0x1.1fe1e5af2c141p-2, 0x1.2336b161b3337p-2, 0x1.2336b161b3337p-2, + 0x1.2691ecc29f042p-2, 0x1.2691ecc29f042p-2, 0x1.29f3b0e15584bp-2, + 0x1.29f3b0e15584bp-2, 0x1.2d5c1760b86bbp-2, 0x1.2d5c1760b86bbp-2, + 0x1.30cb3a7bb3625p-2, 0x1.34413509f79ffp-2}; LLVM_LIBC_FUNCTION(float, log10f, (float x)) { constexpr double LOG10_2 = 0x1.34413509f79ffp-2; @@ -112,39 +110,52 @@ LLVM_LIBC_FUNCTION(float, log10f, (float x)) { uint32_t x_u = xbits.uintval(); // Exact powers of 10 and other hard-to-round cases. - switch (x_u) { - case 0x4120'0000U: // x = 10 - return 1.0f; - case 0x42c8'0000U: // x = 100 - return 2.0f; - case 0x447a'0000U: // x = 1,000 - return 3.0f; - case 0x461c'4000U: // x = 10,000 - return 4.0f; - case 0x47c3'5000U: // x = 100,000 - return 5.0f; - case 0x4974'2400U: // x = 1,000,000 - return 6.0f; - case 0x4b18'9680U: // x = 10,000,000 - return 7.0f; - case 0x4cbe'bc20U: // x = 100,000,000 - return 8.0f; - case 0x4e6e'6b28U: // x = 1,000,000,000 - return 9.0f; - case 0x5015'02f9U: // x = 10,000,000,000 - return 10.0f; - case 0x4f13'4f83U: // x = 2471461632.0 - return fputil::round_result_slightly_down(0x1.2c9314p+3f); - case 0x7956'ba5eU: // x = 69683218960000541503257137270226944.0 - return fputil::round_result_slightly_up(0x1.16bebap+5f); + if (LIBC_UNLIKELY((x_u & 0x3FF) == 0)) { + switch (x_u) { + case 0x3f80'0000U: // x = 1 + return 0.0f; + case 0x4120'0000U: // x = 10 + return 1.0f; + case 0x42c8'0000U: // x = 100 + return 2.0f; + case 0x447a'0000U: // x = 1,000 + return 3.0f; + case 0x461c'4000U: // x = 10,000 + return 4.0f; + case 0x47c3'5000U: // x = 100,000 + return 5.0f; + case 0x4974'2400U: // x = 1,000,000 + return 6.0f; + } + } else { + switch (x_u) { + case 0x4b18'9680U: // x = 10,000,000 + return 7.0f; + case 0x4cbe'bc20U: // x = 100,000,000 + return 8.0f; + case 0x4e6e'6b28U: // x = 1,000,000,000 + return 9.0f; + case 0x5015'02f9U: // x = 10,000,000,000 + return 10.0f; + case 0x0efe'ee7aU: // x = 0x1.fddcf4p-98f + return fputil::round_result_slightly_up(-0x1.d33a46p+4f); + case 0x3f5f'de1bU: // x = 0x1.bfbc36p-1f + return fputil::round_result_slightly_up(-0x1.dd2c6ep-5f); + case 0x3f80'70d8U: // x = 0x1.00e1bp0f + return fputil::round_result_slightly_up(0x1.8762c4p-10f); #ifndef LIBC_TARGET_CPU_HAS_FMA - case 0x08ae'a356U: // x = 0x1.5d46acp-110f - return fputil::round_result_slightly_up(-0x1.07d3b4p+5f); - case 0x1c7d'a337U: // x = 0x1.fb466ep-71f - return fputil::round_result_slightly_up(-0x1.5137dp+4f); - case 0x69c8'c583U: // x = 0x1.918b06p+84f - return fputil::round_result_slightly_down(0x1.97b652p+4f); + case 0x08ae'a356U: // x = 0x1.5d46acp-110f + return fputil::round_result_slightly_up(-0x1.07d3b4p+5f); + case 0x120b'93dcU: // x = 0x1.1727b8p-91f + return fputil::round_result_slightly_down(-0x1.b5b2aep+4f); + case 0x13ae'78d3U: // x = 0x1.5cf1a6p-88f + return fputil::round_result_slightly_down(-0x1.a5b2aep+4f); + case 0x4f13'4f83U: // x = 2471461632.0 + return fputil::round_result_slightly_down(0x1.2c9314p+3f); + case 0x7956'ba5eU: // x = 69683218960000541503257137270226944.0 + return fputil::round_result_slightly_up(0x1.16bebap+5f); #endif // LIBC_TARGET_CPU_HAS_FMA + } } int m = -FPBits::EXPONENT_BIAS; @@ -168,25 +179,36 @@ LLVM_LIBC_FUNCTION(float, log10f, (float x)) { // Normalize denormal inputs. xbits.set_val(xbits.get_val() * 0x1.0p23f); m -= 23; + x_u = xbits.uintval(); } - m += xbits.get_unbiased_exponent(); - int f_index = xbits.get_mantissa() >> 16; + // Add unbiased exponent. + m += static_cast(x_u >> 23); + // Extract 7 leading fractional bits of the mantissa + int index = (x_u >> 16) & 0x7F; // Set bits to 1.m xbits.set_unbiased_exponent(0x7F); - FPBits f = xbits; - f.bits &= ~0x0000'FFFF; - - double d = static_cast(xbits) - static_cast(f); - d *= ONE_OVER_F[f_index]; - - double extra_factor = - fputil::multiply_add(static_cast(m), LOG10_2, LOG10_F[f_index]); + float u = static_cast(xbits); + double v; +#ifdef LIBC_TARGET_CPU_HAS_FMA + v = static_cast(fputil::multiply_add(u, R[index], -1.0f)); // Exact. +#else + v = fputil::multiply_add(static_cast(u), + static_cast(R[index]), -1.0); // Exact +#endif // LIBC_TARGET_CPU_HAS_FMA - double r = fputil::polyeval(d, extra_factor, 0x1.bcb7b1526e4c5p-2, - -0x1.bcb7b1518a5e9p-3, 0x1.287a72a6f716p-3, - -0x1.bcadb40b85565p-4, 0x1.5e0bc97f97e22p-4); + // Degree-5 polynomial approximation of log10 generated by: + // > P = fpminimax(log10(1 + x)/x, 4, [|D...|], [-2^-8, 2^-7]); + constexpr double COEFFS[5] = {0x1.bcb7b1526e2e5p-2, -0x1.bcb7b1528d43dp-3, + 0x1.287a77eb4ca0dp-3, -0x1.bcb8110a181b5p-4, + 0x1.60e7e3e747129p-4}; + double v2 = v * v; // Exact + double p2 = fputil::multiply_add(v, COEFFS[4], COEFFS[3]); + double p1 = fputil::multiply_add(v, COEFFS[2], COEFFS[1]); + double p0 = fputil::multiply_add(v, COEFFS[0], LOG10_R[index]); + double r = fputil::multiply_add(static_cast(m), LOG10_2, + fputil::polyeval(v2, p0, p1, p2)); return static_cast(r); } diff --git a/libc/test/src/math/log10f_test.cpp b/libc/test/src/math/log10f_test.cpp index e488647daed3..1ae604bd9233 100644 --- a/libc/test/src/math/log10f_test.cpp +++ b/libc/test/src/math/log10f_test.cpp @@ -28,12 +28,13 @@ TEST(LlvmLibcLog10fTest, SpecialNumbers) { EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log10f(-0.0f), FE_DIVBYZERO); EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log10f(-1.0f), FE_INVALID); - EXPECT_FP_EQ(zero, __llvm_libc::log10f(1.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, __llvm_libc::log10f(1.0f)); } TEST(LlvmLibcLog10fTest, TrickyInputs) { - constexpr int N = 15; + constexpr int N = 21; constexpr uint32_t INPUTS[N] = { + 0x3f800000U /*1.0f*/, 0x41200000U /*10.0f*/, 0x42c80000U /*100.0f*/, 0x447a0000U /*1,000.0f*/, @@ -49,6 +50,11 @@ TEST(LlvmLibcLog10fTest, TrickyInputs) { 0x08ae'a356U /*0x1.5d46acp-110f*/, 0x1c7d'a337U /*0x1.fb466ep-71f*/, 0x69c8'c583U /*0x1.918b06p+84f*/, + 0x0efe'ee7aU /*0x1.fddcf4p-98f*/, + 0x3f5f'de1bU /*0x1.bfbc36p-1f*/, + 0x3f80'70d8U /*0x1.00e1bp0f*/, + 0x120b'93dcU /*0x1.1727b8p-91f*/, + 0x13ae'78d3U /*0x1.5cf1a6p-88f*/, }; for (int i = 0; i < N; ++i) { -- 2.11.4.GIT