1 //===-- Single-precision erf(x) function ----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "src/math/erff.h"
10 #include "src/__support/FPUtil/FPBits.h"
11 #include "src/__support/FPUtil/PolyEval.h"
12 #include "src/__support/FPUtil/except_value_utils.h"
13 #include "src/__support/FPUtil/multiply_add.h"
14 #include "src/__support/common.h"
15 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
17 namespace __llvm_libc
{
19 // Polynomials approximating erf(x)/x on ( k/8, (k + 1)/8 ) generated by Sollya
21 // > P = fpminimax(erf(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|],
24 constexpr double COEFFS
[32][8] = {
25 {0x1.20dd750429b6dp0
, -0x1.812746b037753p
-2, 0x1.ce2f219e8596ap
-4,
26 -0x1.b82cdacb78fdap
-6, 0x1.56479297dfda5p
-8, -0x1.8b3ac5455ef02p
-11,
27 -0x1.126fcac367e3bp
-8, 0x1.2d0bdb3ba4984p
-4},
28 {0x1.20dd750429b6dp0
, -0x1.812746b0379a8p
-2, 0x1.ce2f21a03cf2ap
-4,
29 -0x1.b82ce30de083ep
-6, 0x1.565bcad3eb60fp
-8, -0x1.c02c66f659256p
-11,
30 0x1.f92f673385229p
-14, -0x1.def402648ae9p
-17},
31 {0x1.20dd750429b34p0
, -0x1.812746b032dcep
-2, 0x1.ce2f219d84aaep
-4,
32 -0x1.b82ce22dcf139p
-6, 0x1.565b9efcd4af1p
-8, -0x1.c021f1af414bcp
-11,
33 0x1.f7c6d177eff82p
-14, -0x1.c9e4410dcf865p
-17},
34 {0x1.20dd750426eabp0
, -0x1.812746ae592c7p
-2, 0x1.ce2f211525f14p
-4,
35 -0x1.b82ccc125e63fp
-6, 0x1.56596f261cfd3p
-8, -0x1.bfde1ff8eeecfp
-11,
36 0x1.f31a9d15dc5d8p
-14, -0x1.a5a4362844b3cp
-17},
37 {0x1.20dd75039c705p0
, -0x1.812746777e74dp
-2, 0x1.ce2f17af98a1bp
-4,
38 -0x1.b82be4b817cbep
-6, 0x1.564bec2e2962ep
-8, -0x1.bee86f9da3558p
-11,
39 0x1.e9443689dc0ccp
-14, -0x1.79c0f230805d8p
-17},
40 {0x1.20dd74f811211p0
, -0x1.81274371a3e8fp
-2, 0x1.ce2ec038262e5p
-4,
41 -0x1.b8265b82c5e1fp
-6, 0x1.5615a2e239267p
-8, -0x1.bc63ae023dcebp
-11,
42 0x1.d87c2102f7e06p
-14, -0x1.49584bea41d62p
-17},
43 {0x1.20dd746d063e3p0
, -0x1.812729a8a950fp
-2, 0x1.ce2cb0a2df232p
-4,
44 -0x1.b80eca1f51278p
-6, 0x1.5572e26c46815p
-8, -0x1.b715e5638b65ep
-11,
45 0x1.bfbb195484968p
-14, -0x1.177a565c15c52p
-17},
46 {0x1.20dd701b44486p0
, -0x1.812691145f237p
-2, 0x1.ce23a06b8cfd9p
-4,
47 -0x1.b7c1dc7245288p
-6, 0x1.53e92f7f397ddp
-8, -0x1.ad97cc4acf0b2p
-11,
48 0x1.9f028b2b09b71p
-14, -0x1.cdc4da08da8c1p
-18},
49 {0x1.20dd5715ac332p0
, -0x1.8123e680bd0ebp
-2, 0x1.ce0457aded691p
-4,
50 -0x1.b6f52d52bed4p
-6, 0x1.50c291b84414cp
-8, -0x1.9ea246b1ad4a9p
-11,
51 0x1.77654674e0cap
-14, -0x1.737c11a1bcebbp
-18},
52 {0x1.20dce6593e114p0
, -0x1.811a59c02eadcp
-2, 0x1.cdab53c7cd7d5p
-4,
53 -0x1.b526d2e321eedp
-6, 0x1.4b1d32cd8b994p
-8, -0x1.8963143ec0a1ep
-11,
54 0x1.4ad5700e4db91p
-14, -0x1.231e100e43ef2p
-18},
55 {0x1.20db48bfd5a62p0
, -0x1.80fdd84f9e308p
-2, 0x1.ccd340d462983p
-4,
56 -0x1.b196a2928768p
-6, 0x1.4210c2c13a0f7p
-8, -0x1.6dbdfb4ff71aep
-11,
57 0x1.1bca2d17fbd71p
-14, -0x1.bca36f90c7cf5p
-19},
58 {0x1.20d64b2f8f508p0
, -0x1.80b4d4f19fa8bp
-2, 0x1.cb088197262e3p
-4,
59 -0x1.ab51fd02e5b99p
-6, 0x1.34e1e5e81a632p
-8, -0x1.4c66377b502cep
-11,
60 0x1.d9ad25066213cp
-15, -0x1.4b0df7dd0cfa1p
-19},
61 {0x1.20c8fc1243576p0
, -0x1.8010cb2009e27p
-2, 0x1.c7a47e9299315p
-4,
62 -0x1.a155be5683654p
-6, 0x1.233502694997bp
-8, -0x1.26c94b7d813p
-11,
63 0x1.8094f1de25fb9p
-15, -0x1.e0e3d776c6eefp
-20},
64 {0x1.20a9bd1611bc1p0
, -0x1.7ec7fbce83f9p
-2, 0x1.c1d757d7317b7p
-4,
65 -0x1.92c160cd589fp
-6, 0x1.0d307269cc5c2p
-8, -0x1.fda5b0d2d1879p
-12,
66 0x1.2fdd7b3b14a7fp
-15, -0x1.54eed4a26af5ap
-20},
67 {0x1.20682834f943dp0
, -0x1.7c73f747bf5a9p
-2, 0x1.b8c2db4a9ffd1p
-4,
68 -0x1.7f0e4ffe989ecp
-6, 0x1.e7061eae4166ep
-9, -0x1.ad36e873fff2dp
-12,
69 0x1.d39222396128ep
-16, -0x1.d83dacec5ea6bp
-21},
70 {0x1.1feb8d12676d7p0
, -0x1.7898347284afep
-2, 0x1.aba3466b34451p
-4,
71 -0x1.663adc573e2f9p
-6, 0x1.ae99fb17c3e08p
-9, -0x1.602f950ad5535p
-12,
72 0x1.5e9717490609dp
-16, -0x1.3fca107bbc8d5p
-21},
73 {0x1.1f12fe3c536fap0
, -0x1.72b1d1f22e6d3p
-2, 0x1.99fc0eed4a896p
-4,
74 -0x1.48db0a87bd8c6p
-6, 0x1.73e368895aa61p
-9, -0x1.19b35d5301fc8p
-12,
75 0x1.007987e4bb033p
-16, -0x1.a7edcd4c2dc7p
-22},
76 {0x1.1db7b0df84d5dp0
, -0x1.6a4e4a41cde02p
-2, 0x1.83bbded16455dp
-4,
77 -0x1.2809b3b36977ep
-6, 0x1.39c08bab44679p
-9, -0x1.b7b45a70ed119p
-13,
78 0x1.6e99b36410e7bp
-17, -0x1.13619bb7ebc0cp
-22},
79 {0x1.1bb1c85c4a527p0
, -0x1.5f23b99a249a3p
-2, 0x1.694c91fa0d12cp
-4,
80 -0x1.053e1ce11c72dp
-6, 0x1.02bf72c50ea78p
-9, -0x1.4f478fb56cb02p
-13,
81 0x1.005f80ecbe213p
-17, -0x1.5f2446bde7f5bp
-23},
82 {0x1.18dec3bd51f9dp0
, -0x1.5123f58346186p
-2, 0x1.4b8a1ca536ab4p
-4,
83 -0x1.c4243015cc723p
-7, 0x1.a1a8a01d351efp
-10, -0x1.f466b34f1d86bp
-14,
84 0x1.5f835eea0bf6ap
-18, -0x1.b83165b939234p
-24},
85 {0x1.152804c3369f4p0
, -0x1.4084cd4afd4bcp
-2, 0x1.2ba2e836e47aap
-4,
86 -0x1.800f2dfc6904bp
-7, 0x1.4a6daf0669c59p
-10, -0x1.6e326ab872317p
-14,
87 0x1.d9761a6a755a5p
-19, -0x1.0fca33f9dd4b5p
-24},
88 {0x1.1087ad68356aap0
, -0x1.2dbb044707459p
-2, 0x1.0aea8ceaa0384p
-4,
89 -0x1.40b516d52b3d2p
-7, 0x1.00c9e05f01d22p
-10, -0x1.076afb0dc0ff7p
-14,
90 0x1.39fadec400657p
-19, -0x1.4b5761352e7e3p
-25},
91 {0x1.0b0a7a8ba4a22p0
, -0x1.196990d22d4a1p
-2, 0x1.d5551e6ac0c4dp
-5,
92 -0x1.07cce1770bd1ap
-7, 0x1.890347b8848bfp
-11, -0x1.757ec96750b6ap
-15,
93 0x1.9b258a1e06bcep
-20, -0x1.8fc6d22da7572p
-26},
94 {0x1.04ce2be70fb47p0
, -0x1.0449e4b0b9cacp
-2, 0x1.97f7424f4b0e7p
-5,
95 -0x1.ac825439c42f4p
-8, 0x1.28f5f65426dfbp
-11, -0x1.05b699a90f90fp
-15,
96 0x1.0a888eecf4593p
-20, -0x1.deace2b32bb31p
-27},
97 {0x1.fbf9fb0e11cc8p
-1, -0x1.de2640856545ap
-3, 0x1.5f5b1f47f851p
-5,
98 -0x1.588bc71eb41b9p
-8, 0x1.bc6a0a772f56dp
-12, -0x1.6b9fad1f1657ap
-16,
99 0x1.573204ba66504p
-21, -0x1.1d38065c94e44p
-27},
100 {0x1.ed8f18c99e031p
-1, -0x1.b4cb6acd903b4p
-3, 0x1.2c7f3dddd6fc1p
-5,
101 -0x1.13052067df4ep
-8, 0x1.4a5027444082fp
-12, -0x1.f672bab0e2554p
-17,
102 0x1.b83c756348cc9p
-22, -0x1.534f1a1079499p
-28},
103 {0x1.debd33044166dp
-1, -0x1.8d7cd9053f7d8p
-3, 0x1.ff9957fb3d6e7p
-6,
104 -0x1.b50be55de0f36p
-9, 0x1.e92c8ec53a628p
-13, -0x1.5a4b88d508007p
-17,
105 0x1.1a27737559e26p
-22, -0x1.942ae62cb2c14p
-29},
106 {0x1.cfdbf0386f3bdp
-1, -0x1.68e33d93b0dc4p
-3, 0x1.b2683d58f53dep
-6,
107 -0x1.5a9174e70d26fp
-9, 0x1.69ddd326d49cdp
-13, -0x1.dd8f397a8219cp
-18,
108 0x1.6a755016ad4ddp
-23, -0x1.e366e0139187dp
-30},
109 {0x1.c132adb8d7464p
-1, -0x1.475a899f61b46p
-3, 0x1.70a431397a77cp
-6,
110 -0x1.12e3d35beeee2p
-9, 0x1.0c16b05738333p
-13, -0x1.4a47f873e144ep
-18,
111 0x1.d3d494c698c02p
-24, -0x1.2302c59547fe5p
-30},
112 {0x1.b2f5fd05555e7p
-1, -0x1.28feefbe03ec7p
-3, 0x1.3923acbb3a676p
-6,
113 -0x1.b4ff793cd6358p
-10, 0x1.8ea0eb8c913bcp
-14, -0x1.cb31ec2baceb1p
-19,
114 0x1.30011e7e80c04p
-24, -0x1.617710635cb1dp
-31},
115 {0x1.a54853cd9593ep
-1, -0x1.0dbdbaea4dc8ep
-3, 0x1.0a93e2c20a0fdp
-6,
116 -0x1.5c969ff401ea8p
-10, 0x1.29e0cc64fe627p
-14, -0x1.4160d8e9d3c2ap
-19,
117 0x1.8e7b67594624ap
-25, -0x1.b1cf2c975b09bp
-32},
118 {0x1.983ceece09ff8p
-1, -0x1.eacc78f7a2dp
-4, 0x1.c74418410655fp
-7,
119 -0x1.1756a050e441ep
-10, 0x1.bff3650f7f548p
-15, -0x1.c56c0217d3adap
-20,
120 0x1.07b4918d0b489p
-25, -0x1.0d4be8c1c50f8p
-32},
123 LLVM_LIBC_FUNCTION(float, erff
, (float x
)) {
124 using FPBits
= typename
fputil::FPBits
<float>;
127 uint32_t x_u
= xbits
.uintval();
128 uint32_t x_abs
= x_u
& 0x7fff'ffffU
;
130 // Exceptional values
131 if (LIBC_UNLIKELY(x_abs
== 0x3f65'9229U
)) // |x| = 0x1.cb2452p-1f
132 return x
< 0.0f
? fputil::round_result_slightly_down(-0x1.972ea8p
-1f
)
133 : fputil::round_result_slightly_up(0x1.972ea8p
-1f
);
134 if (LIBC_UNLIKELY(x_abs
== 0x4004'1e6aU
)) // |x| = 0x1.083cd4p+1f
135 return x
< 0.0f
? fputil::round_result_slightly_down(-0x1.fe3462p
-1f
)
136 : fputil::round_result_slightly_up(0x1.fe3462p
-1f
);
138 // if (LIBC_UNLIKELY(x_abs > 0x407a'd444U)) {
139 if (LIBC_UNLIKELY(x_abs
>= 0x4080'0000U
)) {
140 const float ONE
[2] = {1.0f
, -1.0f
};
141 const float SMALL
[2] = {-0x1.0p
-25f
, 0x1.0p
-25f
};
143 int sign
= static_cast<int>(xbits
.get_sign());
145 if (LIBC_UNLIKELY(x_abs
>= 0x7f80'0000U
)) {
146 return (x_abs
> 0x7f80'0000) ? x
: ONE
[sign
];
149 return ONE
[sign
] + SMALL
[sign
];
152 // Polynomial approximation:
153 // erf(x) ~ x * (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14)
154 double xd
= static_cast<double>(x
);
155 double xsq
= xd
* xd
;
157 const uint32_t EIGHT
= 3 << FPBits::FloatProp::MANTISSA_WIDTH
;
158 int idx
= static_cast<int>(FPBits(x_abs
+ EIGHT
).get_val());
160 double x4
= xsq
* xsq
;
161 double c0
= fputil::multiply_add(xsq
, COEFFS
[idx
][1], COEFFS
[idx
][0]);
162 double c1
= fputil::multiply_add(xsq
, COEFFS
[idx
][3], COEFFS
[idx
][2]);
163 double c2
= fputil::multiply_add(xsq
, COEFFS
[idx
][5], COEFFS
[idx
][4]);
164 double c3
= fputil::multiply_add(xsq
, COEFFS
[idx
][7], COEFFS
[idx
][6]);
167 double p0
= fputil::multiply_add(x4
, c1
, c0
);
168 double p1
= fputil::multiply_add(x4
, c3
, c2
);
170 return static_cast<float>(xd
* fputil::multiply_add(x8
, p1
, p0
));
173 } // namespace __llvm_libc