1 //===-- Single-precision x^y function -------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "src/math/powf.h"
10 #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
11 #include "src/__support/CPP/bit.h"
12 #include "src/__support/CPP/optional.h"
13 #include "src/__support/FPUtil/FPBits.h"
14 #include "src/__support/FPUtil/PolyEval.h"
15 #include "src/__support/FPUtil/double_double.h"
16 #include "src/__support/FPUtil/except_value_utils.h"
17 #include "src/__support/FPUtil/multiply_add.h"
18 #include "src/__support/FPUtil/nearest_integer.h"
19 #include "src/__support/FPUtil/rounding_mode.h"
20 #include "src/__support/FPUtil/sqrt.h" // Speedup for powf(x, 1/2) = sqrtf(x)
21 #include "src/__support/builtin_wrappers.h"
22 #include "src/__support/common.h"
23 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
25 #include "exp10f_impl.h" // Speedup for powf(10, y) = exp10f(y)
26 #include "exp2f_impl.h" // Speedup for powf(2, y) = exp2f(y)
30 namespace LIBC_NAMESPACE
{
32 using fputil::DoubleDouble
;
33 using fputil::TripleDouble
;
37 #ifdef LIBC_TARGET_CPU_HAS_FMA
38 constexpr uint64_t ERR
= 64;
40 constexpr uint64_t ERR
= 128;
41 #endif // LIBC_TARGET_CPU_HAS_FMA
43 // We choose the precision of the high part to be 53 - 24 - 8, so that when
44 // y * (e_x + LOG2_R_DD[i].hi) is exact.
45 // Generated by Sollya with:
46 // > for i from 0 to 127 do {
47 // r = 2^-8 * ceil(2^8 * (1 - 2^-8) / (1 + i * 2^-7) );
49 // b = round(1 + a, 53 - 24 - 8, RN) - 1;
50 // c = round(a - b, D, RN);
51 // d = round(a - b - c, D, RN);
52 // print("{", d, ",", c, ", ", b, "},");
54 static constexpr TripleDouble LOG2_R_TD
[128] = {
56 {0x1.84a2c615b70adp
-79, -0x1.177c23362928cp
-25, 0x1.72c8p
-7},
57 {-0x1.f27b820fd03eap
-76, -0x1.179e0caa9c9abp
-22, 0x1.744p
-6},
58 {-0x1.f27ef487c8f34p
-77, -0x1.c6cea541f5b7p
-23, 0x1.184cp
-5},
59 {-0x1.e3f80fbc71454p
-76, -0x1.66c4d4e554434p
-22, 0x1.773ap
-5},
60 {-0x1.9f8ef14d5f6eep
-79, -0x1.70700a00fdd55p
-24, 0x1.d6ecp
-5},
61 {0x1.452bbce7398c1p
-77, 0x1.53002a4e86631p
-23, 0x1.1bb3p
-4},
62 {-0x1.990555535afdp
-81, 0x1.fcd15f101c142p
-25, 0x1.4c56p
-4},
63 {0x1.447e30ad393eep
-78, 0x1.25b3eed319cedp
-22, 0x1.7d6p
-4},
64 {0x1.b7759da88a2dap
-76, -0x1.4195120d8486fp
-22, 0x1.960dp
-4},
65 {0x1.cee7766ece702p
-78, 0x1.45b878e27d0d9p
-23, 0x1.c7b5p
-4},
66 {-0x1.a55c745ecdc2fp
-77, 0x1.770744593a4cbp
-22, 0x1.f9c9p
-4},
67 {0x1.f7ec992caa67fp
-77, 0x1.c673032495d24p
-22, 0x1.097ep
-3},
68 {-0x1.433638c6ece3ep
-77, -0x1.1eaa65b49696ep
-22, 0x1.22dbp
-3},
69 {0x1.58f27b6518824p
-76, 0x1.b2866f2850b22p
-22, 0x1.3c6f8p
-3},
70 {-0x1.86bdcfdfd4a4cp
-79, 0x1.8ee37cd2ea9d3p
-25, 0x1.494f8p
-3},
71 {-0x1.ff7044a68a7fap
-80, 0x1.7e86f9c2154fbp
-24, 0x1.633a8p
-3},
72 {-0x1.aa21694561327p
-81, 0x1.8e3cfc25f0ce6p
-26, 0x1.7046p
-3},
73 {-0x1.d209f2d4239c6p
-87, 0x1.57f7a64ccd537p
-28, 0x1.8a898p
-3},
74 {-0x1.a55e97e60e632p
-76, -0x1.a761c09fbd2aep
-22, 0x1.97c2p
-3},
75 {0x1.261179225541ep
-76, 0x1.24bea9a2c66f3p
-22, 0x1.b26p
-3},
76 {-0x1.08fa30510fca9p
-82, -0x1.60002ccfe43f5p
-25, 0x1.bfc68p
-3},
77 {-0x1.63ec8d56242f9p
-76, 0x1.69f220e97f22cp
-22, 0x1.dac2p
-3},
78 {0x1.8bcdaf0534365p
-76, -0x1.6164f64c210ep
-22, 0x1.e858p
-3},
79 {0x1.1003282896056p
-78, -0x1.0c1678ae89767p
-24, 0x1.01d9cp
-2},
80 {0x1.01bcc7025fa92p
-78, -0x1.f26a05c813d57p
-22, 0x1.08bdp
-2},
81 {-0x1.fe8a8648e9ebcp
-80, 0x1.4d8fc561c8d44p
-24, 0x1.169cp
-2},
82 {0x1.08dfb23650c75p
-79, -0x1.362ad8f7ca2dp
-22, 0x1.1d984p
-2},
83 {-0x1.f8d5a89861a5ep
-79, 0x1.2b13cd6c4d042p
-22, 0x1.249ccp
-2},
84 {-0x1.a1c872983511ep
-76, -0x1.1c8f11979a5dbp
-22, 0x1.32cp
-2},
85 {0x1.e8e21bff3336bp
-77, 0x1.c2ab3edefe569p
-23, 0x1.39de8p
-2},
86 {0x1.fd1994fb2c4a1p
-80, 0x1.7c3eca28e69cap
-26, 0x1.4106p
-2},
87 {0x1.6b94b51cf76b1p
-80, -0x1.34c4e99e1c6c6p
-24, 0x1.4f6fcp
-2},
88 {-0x1.31d55da1d0f66p
-76, -0x1.194a871b63619p
-22, 0x1.56b24p
-2},
89 {-0x1.378b22691e28bp
-77, 0x1.e3dd5c1c885aep
-23, 0x1.5dfdcp
-2},
90 {0x1.99e302970e411p
-83, -0x1.6ccf3b1129b7cp
-23, 0x1.6552cp
-2},
91 {0x1.20164a049664dp
-82, -0x1.2f346e2bf924bp
-23, 0x1.6cb1p
-2},
92 {-0x1.d14aac4d864c3p
-77, -0x1.fa61aaa59c1d8p
-23, 0x1.7b8ap
-2},
93 {0x1.496ab4e4b293fp
-79, 0x1.90c11fd32a3abp
-22, 0x1.8304cp
-2},
94 {-0x1.d209f2d4239c6p
-86, 0x1.57f7a64ccd537p
-27, 0x1.8a898p
-2},
95 {0x1.eae3326327babp
-81, 0x1.249ba76fee235p
-27, 0x1.9218p
-2},
96 {0x1.fa05bddfded8cp
-77, -0x1.aad2729b21ae5p
-23, 0x1.99b08p
-2},
97 {-0x1.624140d175ba2p
-77, 0x1.71810a5e1818p
-22, 0x1.a8ff8p
-2},
98 {0x1.f1c5160c515c1p
-81, -0x1.6172fe015e13cp
-27, 0x1.b0b68p
-2},
99 {-0x1.86a6204eec8cp
-79, 0x1.5ec6c1bfbf89ap
-24, 0x1.b877cp
-2},
100 {0x1.718f761dd3915p
-78, 0x1.678bf6cdedf51p
-24, 0x1.c0438p
-2},
101 {-0x1.d4ee66c3700e4p
-76, 0x1.c2d45fe43895ep
-22, 0x1.c819cp
-2},
102 {-0x1.7d14533586306p
-77, -0x1.9ee52ed49d71dp
-22, 0x1.cffbp
-2},
103 {0x1.5ce9fb5a7bb5bp
-81, 0x1.5786af187a96bp
-27, 0x1.d7e6cp
-2},
104 {-0x1.ae6face57ad3bp
-77, 0x1.3ab0dc56138c9p
-23, 0x1.dfdd8p
-2},
105 {0x1.5ac93b443d55fp
-78, 0x1.fe538ab34efb5p
-22, 0x1.e7df4p
-2},
106 {0x1.f1753e0ae1e8fp
-76, -0x1.e4fee07aa4b68p
-22, 0x1.efec8p
-2},
107 {0x1.cdfd4c297069bp
-76, -0x1.172f32fe67287p
-22, 0x1.f804cp
-2},
108 {0x1.97a0e8f3ba742p
-79, -0x1.9a83ff9ab9cc8p
-22, 0x1.00144p
-1},
109 {-0x1.800450f5b2357p
-78, -0x1.68cb06cece193p
-22, 0x1.042bep
-1},
110 {-0x1.a839041241fe7p
-78, 0x1.8cd71ddf82e2p
-22, 0x1.08494p
-1},
111 {0x1.ed0b8eeccca86p
-78, 0x1.5e18ab2df3ae6p
-22, 0x1.0c6cap
-1},
112 {0x1.3dd41df9689b3p
-79, 0x1.5dee4d9d8a273p
-25, 0x1.1096p
-1},
113 {-0x1.990555535afdp
-82, 0x1.fcd15f101c142p
-26, 0x1.14c56p
-1},
114 {-0x1.1773d02c9055cp
-77, -0x1.2474b0f992ba1p
-23, 0x1.18faep
-1},
115 {-0x1.4aeef330c53c1p
-78, 0x1.4b5a92a606047p
-24, 0x1.1d368p
-1},
116 {0x1.8e6ff749ebacbp
-77, 0x1.16186fcf54bbdp
-22, 0x1.21786p
-1},
117 {0x1.c09d761c548ebp
-84, 0x1.18efabeb7d722p
-27, 0x1.25c0ap
-1},
118 {0x1.aaa73a428e1e4p
-78, -0x1.e5fc7d238691dp
-24, 0x1.2a0f4p
-1},
119 {-0x1.af2f3d8b63fbap
-79, 0x1.f5809faf6283cp
-22, 0x1.2e644p
-1},
120 {-0x1.af2f3d8b63fbap
-79, 0x1.f5809faf6283cp
-22, 0x1.2e644p
-1},
121 {0x1.78de359f2bb88p
-77, 0x1.c6e1dcd0cb449p
-22, 0x1.32bfep
-1},
122 {-0x1.415ae1a715618p
-76, 0x1.76e0e8f74b4d5p
-22, 0x1.37222p
-1},
123 {-0x1.4991b5375621fp
-79, -0x1.cb82c89692d99p
-24, 0x1.3b8b2p
-1},
124 {-0x1.827d37deb2236p
-76, -0x1.63161c5432aebp
-22, 0x1.3ffaep
-1},
125 {0x1.9576edac01c78p
-77, 0x1.458104c41b901p
-22, 0x1.44716p
-1},
126 {0x1.9576edac01c78p
-77, 0x1.458104c41b901p
-22, 0x1.44716p
-1},
127 {-0x1.05a27b81e2219p
-77, -0x1.cd9d0cde578d5p
-22, 0x1.48efp
-1},
128 {0x1.237616778b4bap
-82, 0x1.b9884591add87p
-26, 0x1.4d738p
-1},
129 {0x1.3b7d7e5d148bbp
-76, 0x1.c6042978605ffp
-22, 0x1.51ff2p
-1},
130 {-0x1.cc3f936a5977cp
-79, -0x1.fc4c96b37dcf6p
-22, 0x1.56922p
-1},
131 {0x1.20164a049664dp
-83, -0x1.2f346e2bf924bp
-24, 0x1.5b2c4p
-1},
132 {0x1.20164a049664dp
-83, -0x1.2f346e2bf924bp
-24, 0x1.5b2c4p
-1},
133 {-0x1.a212919a92f7ap
-77, 0x1.c4e4fbb68a4d1p
-22, 0x1.5fcdcp
-1},
134 {-0x1.b64b03f7230ddp
-77, -0x1.9d499bd9b3226p
-23, 0x1.6476ep
-1},
135 {-0x1.1ec6379e6e3b9p
-77, -0x1.f89b355ede26fp
-23, 0x1.69278p
-1},
136 {-0x1.1ec6379e6e3b9p
-77, -0x1.f89b355ede26fp
-23, 0x1.69278p
-1},
137 {-0x1.4ba44c03bfbbdp
-78, 0x1.53c7e319f6e92p
-24, 0x1.6ddfcp
-1},
138 {-0x1.c36fc650d030fp
-77, -0x1.b291f070528c7p
-22, 0x1.729fep
-1},
139 {-0x1.69e5693a7f067p
-80, 0x1.2967a451a7b48p
-25, 0x1.7767cp
-1},
140 {-0x1.69e5693a7f067p
-80, 0x1.2967a451a7b48p
-25, 0x1.7767cp
-1},
141 {0x1.6598aae91499ap
-76, 0x1.244fcff690fcep
-22, 0x1.7c37ap
-1},
142 {0x1.99d61ec432837p
-77, 0x1.46fd97f5dc572p
-23, 0x1.810fap
-1},
143 {0x1.99d61ec432837p
-77, 0x1.46fd97f5dc572p
-23, 0x1.810fap
-1},
144 {0x1.855c42078f81bp
-76, -0x1.f3a7352663e5p
-22, 0x1.85efep
-1},
145 {-0x1.59408e815107p
-77, 0x1.b3cda690370b5p
-23, 0x1.8ad84p
-1},
146 {-0x1.59408e815107p
-77, 0x1.b3cda690370b5p
-23, 0x1.8ad84p
-1},
147 {0x1.33b318085e50ap
-78, 0x1.3226b211bf1d9p
-23, 0x1.8fc92p
-1},
148 {0x1.343fe7c9cb4aep
-79, 0x1.d24b136c101eep
-23, 0x1.94c28p
-1},
149 {0x1.343fe7c9cb4aep
-79, 0x1.d24b136c101eep
-23, 0x1.94c28p
-1},
150 {-0x1.d19522e56fe6p
-76, 0x1.7c40c7907e82ap
-22, 0x1.99c48p
-1},
151 {-0x1.23b9d8ea55c3ep
-77, -0x1.e81781d97ee91p
-22, 0x1.9ecf6p
-1},
152 {-0x1.23b9d8ea55c3ep
-77, -0x1.e81781d97ee91p
-22, 0x1.9ecf6p
-1},
153 {0x1.829440c24aeb6p
-78, -0x1.6a77813f94e01p
-22, 0x1.a3e3p
-1},
154 {-0x1.624140d175ba2p
-76, -0x1.1cfdeb43cfdp
-22, 0x1.a8ffap
-1},
155 {-0x1.624140d175ba2p
-76, -0x1.1cfdeb43cfdp
-22, 0x1.a8ffap
-1},
156 {0x1.afa6f024fb045p
-77, -0x1.f983f74d3138fp
-23, 0x1.ae256p
-1},
157 {-0x1.603ad3a5d326dp
-78, -0x1.e278ae1a1f51fp
-23, 0x1.b3546p
-1},
158 {-0x1.603ad3a5d326dp
-78, -0x1.e278ae1a1f51fp
-23, 0x1.b3546p
-1},
159 {-0x1.0c1e0e5855d6ap
-77, -0x1.97552b7b5ea45p
-23, 0x1.b88ccp
-1},
160 {-0x1.0c1e0e5855d6ap
-77, -0x1.97552b7b5ea45p
-23, 0x1.b88ccp
-1},
161 {0x1.c817ad56baa16p
-78, -0x1.19b4f3c72c4f8p
-24, 0x1.bdceap
-1},
162 {0x1.44c47ac1bf62bp
-77, 0x1.f7402d26f1a12p
-23, 0x1.c31a2p
-1},
163 {0x1.44c47ac1bf62bp
-77, 0x1.f7402d26f1a12p
-23, 0x1.c31a2p
-1},
164 {-0x1.69b9465eae1e6p
-78, -0x1.2056d5dd31d96p
-23, 0x1.c86f8p
-1},
165 {-0x1.69b9465eae1e6p
-78, -0x1.2056d5dd31d96p
-23, 0x1.c86f8p
-1},
166 {-0x1.24a6d9d1d1904p
-79, -0x1.6e46335aae723p
-24, 0x1.cdcecp
-1},
167 {-0x1.3826144575ac4p
-76, -0x1.beb244c59f331p
-22, 0x1.d3382p
-1},
168 {-0x1.3826144575ac4p
-76, -0x1.beb244c59f331p
-22, 0x1.d3382p
-1},
169 {0x1.dbc96b3b12b25p
-81, 0x1.16c071e93fd97p
-27, 0x1.d8abap
-1},
170 {0x1.dbc96b3b12b25p
-81, 0x1.16c071e93fd97p
-27, 0x1.d8abap
-1},
171 {0x1.68a8ccdbd1f33p
-77, 0x1.d8175819530c2p
-22, 0x1.de298p
-1},
172 {0x1.68a8ccdbd1f33p
-77, 0x1.d8175819530c2p
-22, 0x1.de298p
-1},
173 {0x1.e586711df5ea1p
-79, 0x1.51bd552842c1cp
-23, 0x1.e3b2p
-1},
174 {0x1.e586711df5ea1p
-79, 0x1.51bd552842c1cp
-23, 0x1.e3b2p
-1},
175 {-0x1.bc25adf042483p
-79, 0x1.914e204f19d94p
-22, 0x1.e9452p
-1},
176 {-0x1.bc25adf042483p
-79, 0x1.914e204f19d94p
-22, 0x1.e9452p
-1},
177 {0x1.d7d82b65c5686p
-76, 0x1.c55d997da24fdp
-22, 0x1.eee32p
-1},
178 {0x1.d7d82b65c5686p
-76, 0x1.c55d997da24fdp
-22, 0x1.eee32p
-1},
179 {-0x1.3f108c0857ca3p
-77, -0x1.685c2d2298a6ep
-22, 0x1.f48c4p
-1},
180 {-0x1.3f108c0857ca3p
-77, -0x1.685c2d2298a6ep
-22, 0x1.f48c4p
-1},
181 {-0x1.bd800bca7a221p
-78, 0x1.7a4887bd74039p
-22, 0x1.fa406p
-1},
185 // Look up table for the second range reduction step:
186 // Generated by Sollya with:
187 // > for i from -64 to 128 do {
188 // r = 2^-16 * nearestint(2^16 / (1 + i * 2^-14) );
190 // b = round(a, D, RN);
191 // c = round(a - b, D, RN);
192 // print("{", c, ", ", b, "},");
194 static constexpr DoubleDouble LOG2_R2_DD
[] = {
195 {0x1.ff25180953e64p
-62, -0x1.720c2ab2312a9p
-8},
196 {-0x1.15ffd79560d8fp
-62, -0x1.6c4c92b1478ffp
-8},
197 {0x1.b8d6d6f2e3579p
-62, -0x1.668ce3c873549p
-8},
198 {-0x1.5bfc3f0d5ef71p
-62, -0x1.60cd1df6fde91p
-8},
199 {-0x1.d1f7a8777984ap
-64, -0x1.5b0d413c30b5ep
-8},
200 {0x1.8e858515b8343p
-66, -0x1.554d4d97551abp
-8},
201 {0x1.e165c4014c1f2p
-62, -0x1.4f8d4307b46ecp
-8},
202 {0x1.0f84b2cc14c7ep
-63, -0x1.49cd218c9800bp
-8},
203 {0x1.de618ed0db9a6p
-62, -0x1.440ce9254916cp
-8},
204 {-0x1.f6b8587e64f22p
-62, -0x1.3e4c99d110ee7p
-8},
205 {-0x1.7f793c84cfa63p
-64, -0x1.388c338f38bdp
-8},
206 {-0x1.7d7ecf6258c9ap
-65, -0x1.32cbb65f09aeep
-8},
207 {-0x1.810bc5ac188f5p
-62, -0x1.2d0b223fcce81p
-8},
208 {-0x1.950035fc5b67cp
-62, -0x1.274a7730cb841p
-8},
209 {0x1.4f47f3048cdadp
-62, -0x1.2189b5314e95dp
-8},
210 {0x1.269519861e298p
-68, -0x1.1bc8dc409f279p
-8},
211 {-0x1.5c2b0a46a7e2fp
-62, -0x1.1607ec5e063b3p
-8},
212 {0x1.5001ac8f0bda8p
-63, -0x1.1046e588cccap
-8},
213 {0x1.106f246af5d41p
-62, -0x1.0a85c7c03bc4ap
-8},
214 {0x1.82a00583b34bap
-66, -0x1.0354423e3c666p
-8},
215 {0x1.b6f37deb3137p
-65, -0x1.fb25e19f11aecp
-9},
216 {-0x1.44a2140444811p
-63, -0x1.efa310d6550ecp
-9},
217 {0x1.f5e68a763133fp
-63, -0x1.e4201220d4858p
-9},
218 {0x1.692083115f0b9p
-63, -0x1.d89ce57d219a6p
-9},
219 {0x1.144bb17b9ac9cp
-63, -0x1.cd198ae9cdc3dp
-9},
220 {0x1.ee7f086d32c05p
-63, -0x1.c19602656a671p
-9},
221 {-0x1.d4f1167538dbep
-63, -0x1.b6124bee88d82p
-9},
222 {0x1.7df8d226c67ep
-63, -0x1.aa8e6783ba5a2p
-9},
223 {0x1.60545d61b9512p
-63, -0x1.9f0a5523901ebp
-9},
224 {0x1.54c99c291702p
-63, -0x1.938614cc9b468p
-9},
225 {-0x1.a7e678d7280dep
-64, -0x1.8801a67d6ce1p
-9},
226 {-0x1.6d419bbeb223ap
-64, -0x1.7c7d0a3495ec9p
-9},
227 {0x1.ce2b9892e27e9p
-64, -0x1.70f83ff0a7565p
-9},
228 {-0x1.a4db4eff7bd61p
-63, -0x1.657347b031fa2p
-9},
229 {0x1.5bb04682fab82p
-63, -0x1.59ee2171c6a2fp
-9},
230 {-0x1.78b8bfe6a3adep
-64, -0x1.4e68cd33f60a3p
-9},
231 {0x1.574c3ce9b89b1p
-63, -0x1.42e34af550d87p
-9},
232 {0x1.08fb216647b7bp
-63, -0x1.375d9ab467a4dp
-9},
233 {0x1.ed5a50e7b919cp
-66, -0x1.2bd7bc6fcaf56p
-9},
234 {0x1.91ad7a23f86fep
-63, -0x1.2051b0260b3fp
-9},
235 {0x1.3ab2c932b8b0ap
-64, -0x1.14cb75d5b8e54p
-9},
236 {-0x1.c63bcdf120f7ap
-63, -0x1.09450d7d643a9p
-9},
237 {0x1.8af8c4ab4e82dp
-64, -0x1.fb7cee373b008p
-10},
238 {0x1.a52c2ca9d8b9bp
-65, -0x1.e46f655de9cc6p
-10},
239 {-0x1.460b177a58742p
-64, -0x1.cd61806bf5166p
-10},
240 {0x1.611089de8d12ap
-66, -0x1.b6533f5e7cf9bp
-10},
241 {-0x1.4209853cee70cp
-69, -0x1.9f44a232a16eep
-10},
242 {0x1.964e032541a28p
-64, -0x1.8835a8e5824c3p
-10},
243 {-0x1.fa9f94392637bp
-66, -0x1.712653743f454p
-10},
244 {-0x1.3293693721a53p
-64, -0x1.5a16a1dbf7eb6p
-10},
245 {-0x1.6e2af03c83c6ep
-68, -0x1.43069419cbad5p
-10},
246 {-0x1.b5f05b9d5bd29p
-65, -0x1.2bf62a2ad9d74p
-10},
247 {0x1.3db883c072f72p
-64, -0x1.14e5640c4193p
-10},
248 {-0x1.a675a1c045304p
-68, -0x1.fba8837643cf6p
-11},
249 {0x1.3b9c2aeb00068p
-66, -0x1.cd85866933743p
-11},
250 {-0x1.2911a381901ebp
-66, -0x1.9f61d0eb8f98bp
-11},
251 {-0x1.5ea75a74def03p
-68, -0x1.713d62f7957c3p
-11},
252 {-0x1.305b92f93ffep
-67, -0x1.43183c878218dp
-11},
253 {0x1.b7c8c8dd40d35p
-68, -0x1.14f25d959223ap
-11},
254 {0x1.dc915d58a62f6p
-66, -0x1.cd978c3804191p
-12},
255 {0x1.c7bc3fe53cd94p
-66, -0x1.7148ec2a1bfc9p
-12},
256 {-0x1.427ce595cc53cp
-67, -0x1.14f8daf5e3bcfp
-12},
257 {-0x1.d523885ac824cp
-67, -0x1.714eb11fa5363p
-13},
258 {-0x1.945957f63330ap
-69, -0x1.715193b17d35dp
-14},
260 {-0x1.88fb2ea8bf9eap
-70, 0x1.7157590356aeep
-14},
261 {-0x1.5aeaee345d04ep
-68, 0x1.715a3bc3593d5p
-13},
262 {-0x1.7fce430230132p
-66, 0x1.1505d6ee104c5p
-12},
263 {-0x1.9a480f204ff09p
-70, 0x1.716001718cb2bp
-12},
264 {-0x1.00e7233f2d8bdp
-68, 0x1.cdbb9d77ae5a8p
-12},
265 {0x1.09d379fa18c5dp
-67, 0x1.150c5586012b8p
-11},
266 {0x1.b6b9d90a104d3p
-65, 0x1.433b951d0b231p
-11},
267 {0x1.4d9a3ea651885p
-65, 0x1.716b8d86bc285p
-11},
268 {-0x1.7590b3a76f0f9p
-67, 0x1.9f9c3ec8db94fp
-11},
269 {0x1.f183ca5b21bfep
-65, 0x1.cdcda8e93107fp
-11},
270 {-0x1.a7e3465ba127p
-66, 0x1.fbffcbed8465fp
-11},
271 {-0x1.7821f738d1221p
-64, 0x1.151953edceec6p
-10},
272 {0x1.3bb4c0fb95359p
-65, 0x1.2c331e5ca2e7dp
-10},
273 {0x1.236028e962f8p
-64, 0x1.434d4546227fcp
-10},
274 {0x1.aaaa64d30f184p
-66, 0x1.5a67c8ad32315p
-10},
275 {-0x1.a821b7cc57a7ap
-64, 0x1.7182a894b69c6p
-10},
276 {-0x1.13d9d78aace21p
-64, 0x1.889de4ff94838p
-10},
277 {-0x1.2f249a6b923ap
-64, 0x1.9fb97df0b0cc2p
-10},
278 {-0x1.d47dc3664be7ap
-68, 0x1.b6d5736af07e6p
-10},
279 {0x1.bd1522c6418fbp
-64, 0x1.cdf1c57138c53p
-10},
280 {-0x1.bacdbb22d2163p
-64, 0x1.e50e74066eee6p
-10},
281 {-0x1.ca7604812d77bp
-64, 0x1.fc2b7f2d786a5p
-10},
282 {-0x1.2b6832f8830bfp
-63, 0x1.09a473749d663p
-9},
283 {0x1.4e712033d0457p
-65, 0x1.1533559e4de55p
-9},
284 {-0x1.473dd044017b5p
-66, 0x1.20c26615409f1p
-9},
285 {-0x1.e033bcac726d3p
-63, 0x1.2c51a4dae8915p
-9},
286 {-0x1.4a47a2b18a0fap
-63, 0x1.37e111f0b8cb5p
-9},
287 {0x1.6f3615771c17bp
-66, 0x1.4370ad58246ddp
-9},
288 {0x1.c0ee6c32d6236p
-65, 0x1.4f0077129eabp
-9},
289 {0x1.fa94c99761b8fp
-64, 0x1.5a906f219ac67p
-9},
290 {-0x1.979e6b473fbf8p
-64, 0x1.662095868c153p
-9},
291 {0x1.30edde8d24c7bp
-64, 0x1.71b0ea42e5fdap
-9},
292 {-0x1.d01594fe1421cp
-64, 0x1.7d416d581bf7cp
-9},
293 {0x1.50bf7b995b49ap
-63, 0x1.88d21ec7a18cdp
-9},
294 {-0x1.28ea2bcec5018p
-63, 0x1.9462fe92ea57cp
-9},
295 {0x1.ed6add489c30bp
-65, 0x1.9ff40cbb6a04bp
-9},
296 {0x1.201d5c3bbeb69p
-64, 0x1.ab85494294517p
-9},
297 {-0x1.a05d0d4461ea9p
-64, 0x1.b716b429dd0d3p
-9},
298 {-0x1.7c974c8a392fdp
-63, 0x1.c2a84d72b8189p
-9},
299 {-0x1.f068238451bdep
-64, 0x1.ce3a151e9965bp
-9},
300 {-0x1.5e4d95c6259c3p
-66, 0x1.d9cc0b2ef4f83p
-9},
301 {-0x1.1fc262efaad6cp
-63, 0x1.e55e2fa53ee53p
-9},
302 {0x1.49eee7abc7716p
-63, 0x1.f0f08282eb533p
-9},
303 {-0x1.903de284d2782p
-65, 0x1.fc8303c96e7a6p
-9},
304 {-0x1.ec564845134cbp
-63, 0x1.040ad9bd1e522p
-8},
305 {-0x1.7692b7791cf1fp
-66, 0x1.0861eadabc3dcp
-8},
306 {-0x1.37829afb11c1p
-62, 0x1.0e2b6b51e4f7ep
-8},
307 {0x1.6706b91c3b0bap
-62, 0x1.13f5030033459p
-8},
308 {-0x1.7558ccd710756p
-62, 0x1.19beb1e6616c9p
-8},
309 {0x1.79f72a5bbe9dep
-62, 0x1.1f88780529bb1p
-8},
310 {-0x1.e1297c110b25p
-62, 0x1.2552555d46886p
-8},
311 {0x1.29930d567ca26p
-62, 0x1.2b1c49ef72343p
-8},
312 {0x1.a08cbd7592a17p
-65, 0x1.30e655bc67275p
-8},
313 {0x1.e4f9d4ac5db83p
-62, 0x1.36b078c4dfd31p
-8},
314 {-0x1.ed1b0aafd30c2p
-62, 0x1.3c7ab30996b1cp
-8},
315 {0x1.e78f0aa014b32p
-62, 0x1.4245048b46462p
-8},
316 {0x1.8594548038a0fp
-69, 0x1.480f6d4aa91c2p
-8},
317 {0x1.3df498168a333p
-63, 0x1.4dd9ed4879c82p
-8},
318 {0x1.b1c502544f82ap
-62, 0x1.53a4848572e77p
-8},
319 {-0x1.dc50552fe0da9p
-63, 0x1.596f33024f203p
-8},
320 {-0x1.671d85c357d5ep
-62, 0x1.5f39f8bfc9212p
-8},
321 {0x1.1c670cabccefap
-64, 0x1.6504d5be9ba1ep
-8},
322 {-0x1.9983a9e98f318p
-62, 0x1.6acfc9ff8162fp
-8},
323 {0x1.ae1a26af3eebep
-62, 0x1.709ad583352d6p
-8},
324 {0x1.655eb510bfda3p
-62, 0x1.7665f84a71d35p
-8},
325 {-0x1.e287bc0192e15p
-64, 0x1.7c313255f22f8p
-8},
326 {0x1.cc4944139ccbfp
-63, 0x1.81fc83a671257p
-8},
327 {0x1.4e09b4cb8645bp
-62, 0x1.87c7ec3ca9a19p
-8},
328 {-0x1.5becc991e3a5fp
-64, 0x1.8d936c1956991p
-8},
329 {-0x1.ddfa3f1e15ba8p
-62, 0x1.935f033d3309ep
-8},
330 {-0x1.b7b06ea3fb362p
-62, 0x1.992ab1a8f9facp
-8},
331 {0x1.32d614904e46cp
-62, 0x1.9ef6775d667b4p
-8},
332 {-0x1.7186892b5bfaep
-64, 0x1.a4c2545b33a3ep
-8},
333 {-0x1.d4de10b28dfd8p
-62, 0x1.aa8e48a31c95cp
-8},
334 {0x1.4bb4b3bdc8175p
-62, 0x1.b05a5435dc7adp
-8},
335 {0x1.9cedbd1d7fba5p
-62, 0x1.b62677142e86p
-8},
336 {-0x1.0ed3379beaffdp
-66, 0x1.bbf2b13ecdf2fp
-8},
337 {0x1.6e86a125567a6p
-62, 0x1.c1bf02b67606p
-8},
338 {-0x1.35038e0c0a52cp
-62, 0x1.c6184f1b326d9p
-8},
339 {0x1.05ef8bf5adf5ep
-67, 0x1.cbe4c95b6c5abp
-8},
340 {-0x1.b7338b99a6b26p
-65, 0x1.d1b15aeab217cp
-8},
341 {0x1.9e901c30c427ep
-63, 0x1.d77e03c9bf0a4p
-8},
342 {-0x1.1f28a9c0b3d47p
-62, 0x1.dd4ac3f94ea0ap
-8},
343 {-0x1.140ef760d3b63p
-62, 0x1.e3179b7a1c52p
-8},
344 {-0x1.ab65b1037f517p
-63, 0x1.e8e48a4ce39e7p
-8},
345 {-0x1.76940c457ce6dp
-63, 0x1.eeb19072600edp
-8},
346 {0x1.da3ae65a605cfp
-64, 0x1.f47eadeb4d34dp
-8},
347 {0x1.b15d0bce2ede6p
-62, 0x1.fa4be2b866abp
-8},
348 {0x1.e02aa1fa9dc57p
-61, 0x1.000c976d340a6p
-7},
349 {0x1.6be971a5565b9p
-62, 0x1.02f34929068f3p
-7},
350 {-0x1.8a9319a6ed164p
-64, 0x1.05da069008be7p
-7},
351 {0x1.825079f1e0ec5p
-62, 0x1.08c0cfa298771p
-7},
352 {0x1.60d5749321466p
-63, 0x1.0ba7a461139c8p
-7},
353 {-0x1.5b8f4c479e2ep
-61, 0x1.0e8e84cbd8169p
-7},
354 {-0x1.e3e1248004e29p
-62, 0x1.117570e343d17p
-7},
355 {0x1.9ac06487c375p
-63, 0x1.145c68a7b4bddp
-7},
356 {0x1.f657ea5c03ea4p
-62, 0x1.17436c1988d0dp
-7},
357 {-0x1.5a965659a05e2p
-61, 0x1.1a2a7b391e04p
-7},
358 {-0x1.21ce9b9bfc512p
-61, 0x1.1d119606d2554p
-7},
359 {-0x1.30fda247ad0e1p
-61, 0x1.1ff8bc8303c7p
-7},
360 {-0x1.382c78a45cdeap
-62, 0x1.22dfeeae10601p
-7},
361 {0x1.46ae4a64073d4p
-61, 0x1.250d5bf952374p
-7},
362 {-0x1.dcad2cec3b84bp
-62, 0x1.27f4a29740a2fp
-7},
363 {-0x1.413fbeb0b0635p
-61, 0x1.2adbf4e50cdf9p
-7},
364 {0x1.f28e6a48bcb9p
-61, 0x1.2dc352e315049p
-7},
365 {-0x1.96f286e1eb086p
-61, 0x1.30aabc91b72ep
-7},
366 {-0x1.f88c04206dfa1p
-61, 0x1.339231f1517c1p
-7},
367 {-0x1.11ea20e195841p
-61, 0x1.3679b30242139p
-7},
368 {-0x1.d6e71452b674ap
-63, 0x1.39613fc4e71dcp
-7},
369 {-0x1.57c578233b1b3p
-61, 0x1.3c48d8399ec85p
-7},
370 {-0x1.ec430f03b76ep
-63, 0x1.3f307c60c7455p
-7},
371 {0x1.e00dd1902ffb9p
-61, 0x1.42182c3abecb5p
-7},
372 {-0x1.f22bcd96afe38p
-61, 0x1.44ffe7c7e3957p
-7},
373 {0x1.08fd90f841d3p
-61, 0x1.47e7af0893e2fp
-7},
374 {0x1.09594c5552bccp
-62, 0x1.4acf81fd2df7ep
-7},
375 {-0x1.01a8a652e5602p
-61, 0x1.4db760a6101c9p
-7},
376 {-0x1.826168febb3dp
-64, 0x1.509f4b03989dcp
-7},
377 {-0x1.7eb21a35021e3p
-62, 0x1.5387411625cccp
-7},
378 {-0x1.66cbc818e175p
-61, 0x1.566f42de15ff4p
-7},
379 {0x1.9b784dd6cebdap
-64, 0x1.5957505bc78f6p
-7},
380 {0x1.2b121ab482456p
-61, 0x1.5b8562298c65bp
-7},
381 {-0x1.5d29869dd8233p
-62, 0x1.5e6d842633702p
-7},
382 {-0x1.572a1b6cd63cfp
-61, 0x1.6155b1d99f672p
-7},
383 {-0x1.a1f355360e877p
-62, 0x1.643deb442eb59p
-7},
384 {-0x1.b6f1cd2e1c03fp
-61, 0x1.672630663fcadp
-7},
385 {-0x1.2aaa11ccddcaep
-61, 0x1.6a0e8140311aap
-7},
386 {0x1.3d979ddf4746cp
-61, 0x1.6cf6ddd2611d4p
-7},
387 {-0x1.dc930484501f8p
-63, 0x1.6fdf461d2e4f8p
-7},
390 LIBC_INLINE
bool is_odd_integer(float x
) {
391 using FloatProp
= typename
fputil::FloatProperties
<float>;
392 uint32_t x_u
= cpp::bit_cast
<uint32_t>(x
);
393 int x_e
= static_cast<int>((x_u
& FloatProp::EXPONENT_MASK
) >>
394 FloatProp::MANTISSA_WIDTH
);
395 int lsb
= unsafe_ctz(x_u
| FloatProp::EXPONENT_MASK
);
396 constexpr int UNIT_EXPONENT
=
397 static_cast<int>(FloatProp::EXPONENT_BIAS
+ FloatProp::MANTISSA_WIDTH
);
398 return (x_e
+ lsb
== UNIT_EXPONENT
);
401 LIBC_INLINE
bool is_integer(float x
) {
402 using FloatProp
= typename
fputil::FloatProperties
<float>;
403 uint32_t x_u
= cpp::bit_cast
<uint32_t>(x
);
404 int x_e
= static_cast<int>((x_u
& FloatProp::EXPONENT_MASK
) >>
405 FloatProp::MANTISSA_WIDTH
);
406 int lsb
= unsafe_ctz(x_u
| FloatProp::EXPONENT_MASK
);
407 constexpr int UNIT_EXPONENT
=
408 static_cast<int>(FloatProp::EXPONENT_BIAS
+ FloatProp::MANTISSA_WIDTH
);
409 return (x_e
+ lsb
>= UNIT_EXPONENT
);
412 LIBC_INLINE
bool larger_exponent(double a
, double b
) {
413 using DoubleBits
= typename
fputil::FPBits
<double>;
414 return DoubleBits(a
).get_unbiased_exponent() >=
415 DoubleBits(b
).get_unbiased_exponent();
418 // Calculate 2^(y * log2(x)) in double-double precision.
419 // At this point we can reuse the following values:
420 // idx_x: index for extra precision of log2 for the middle part of log2(x).
421 // dx: the reduced argument for log2(x)
423 // lo6_hi: the high part of 2^6 * (y - (hi + mid))
424 // exp2_hi_mid: high part of 2^(hi + mid)
425 double powf_double_double(int idx_x
, double dx
, double y6
, double lo6_hi
,
426 const DoubleDouble
&exp2_hi_mid
) {
427 using DoubleBits
= typename
fputil::FPBits
<double>;
428 using DoubleProp
= typename
fputil::FloatProperties
<double>;
429 // Perform a second range reduction step:
430 // idx2 = round(2^14 * (dx + 2^-8)) = round ( dx * 2^14 + 2^6)
431 // dx2 = (1 + dx) * r2 - 1
433 // -0x1.3ffcp-15 <= dx2 <= 0x1.3e3dp-15
434 int idx2
= static_cast<int>(
435 fputil::nearest_integer(fputil::multiply_add(dx
, 0x1.0p14
, 0x1.0p6
)));
436 double dx2
= fputil::multiply_add(1.0 + dx
, R2
[idx2
], -1.0); // Exact
438 // Degree-5 polynomial approximation of log2(1 + x)/x in double-double
439 // precision. Generate by Solya with:
440 // > P = fpminimax(log2(1 + x)/x, 5, [|DD...|],
441 // [-0x1.3ffcp-15, 0x1.3e3dp-15]);
442 // > dirtyinfnorm(log2(1 + x)/x - P, [-0x1.3ffcp-15, 0x1.3e3dp-15]);
444 constexpr DoubleDouble COEFFS
[] = {
445 {0x1.777d0ffda25ep
-56, 0x1.71547652b82fep0
},
446 {-0x1.777d101cf0a84p
-57, -0x1.71547652b82fep
-1},
447 {0x1.ce04b5140d867p
-56, 0x1.ec709dc3a03fdp
-2},
448 {0x1.137b47e635be5p
-56, -0x1.71547652b82fbp
-2},
449 {-0x1.b5a30b3bdb318p
-58, 0x1.2776c516a92a2p
-2},
450 {0x1.2d2fbd081e657p
-57, -0x1.ec70af1929ca6p
-3},
453 DoubleDouble
dx_dd({0.0, dx2
});
454 DoubleDouble p
= fputil::polyeval(dx_dd
, COEFFS
[0], COEFFS
[1], COEFFS
[2],
455 COEFFS
[3], COEFFS
[4], COEFFS
[5]);
456 // log2(1 + dx2) ~ dx2 * P(dx2)
457 DoubleDouble log2_x_lo
= fputil::quick_mult(dx2
, p
);
458 // Lower parts of (e_x - log2(r1)) of the first range reduction constant
459 DoubleDouble
log2_x_mid({LOG2_R_TD
[idx_x
].lo
, LOG2_R_TD
[idx_x
].mid
});
460 // -log2(r2) + lower part of (e_x - log2(r1))
461 DoubleDouble log2_x_m
= fputil::add(LOG2_R2_DD
[idx2
], log2_x_mid
);
462 // log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1))
463 // Since we don't know which one has larger exponent to apply Fast2Sum
464 // algorithm, we need to check them before calling double-double addition.
465 DoubleDouble log2_x
= larger_exponent(log2_x_m
.hi
, log2_x_lo
.hi
)
466 ? fputil::add(log2_x_m
, log2_x_lo
)
467 : fputil::add(log2_x_lo
, log2_x_m
);
468 DoubleDouble
lo6_hi_dd({0.0, lo6_hi
});
469 // 2^6 * y * (log2(1 + dx2) - log2(r2) + lower part of (e_x - log2(r1)))
470 DoubleDouble prod
= fputil::quick_mult(y6
, log2_x
);
471 // 2^6 * (y * log2(x) - (hi + mid)) = 2^6 * lo
472 DoubleDouble lo6
= larger_exponent(prod
.hi
, lo6_hi
)
473 ? fputil::add(prod
, lo6_hi_dd
)
474 : fputil::add(lo6_hi_dd
, prod
);
476 constexpr DoubleDouble EXP2_COEFFS
[] = {
478 {0x1.abc9e3b398024p
-62, 0x1.62e42fefa39efp
-7},
479 {-0x1.5e43a5429bddbp
-69, 0x1.ebfbdff82c58fp
-15},
480 {-0x1.d33162491268fp
-77, 0x1.c6b08d704a0cp
-23},
481 {0x1.4fb32d240a14ep
-86, 0x1.3b2ab6fba4e77p
-31},
482 {0x1.e84e916be83ep
-97, 0x1.5d87fe78a6731p
-40},
483 {-0x1.9a447bfddc5e6p
-103, 0x1.430912f86bfb8p
-49},
484 {-0x1.31a55719de47fp
-113, 0x1.ffcbfc588ded9p
-59},
485 {-0x1.0ba57164eb36bp
-122, 0x1.62c034beb8339p
-68},
486 {-0x1.8483eabd9642dp
-132, 0x1.b5251ff97bee1p
-78},
489 DoubleDouble pp
= fputil::polyeval(
490 lo6
, EXP2_COEFFS
[0], EXP2_COEFFS
[1], EXP2_COEFFS
[2], EXP2_COEFFS
[3],
491 EXP2_COEFFS
[4], EXP2_COEFFS
[5], EXP2_COEFFS
[6], EXP2_COEFFS
[7],
492 EXP2_COEFFS
[8], EXP2_COEFFS
[9]);
493 DoubleDouble rr
= fputil::quick_mult(exp2_hi_mid
, pp
);
495 // Make sure the sum is normalized:
496 DoubleDouble r
= fputil::exact_add(rr
.hi
, rr
.lo
);
498 uint64_t r_bits
= cpp::bit_cast
<uint64_t>(r
.hi
);
499 if (LIBC_UNLIKELY(((r_bits
& 0xfff'ffff) == 0) && (r
.lo
!= 0.0))) {
500 bool hi_sign
= DoubleBits(r
.hi
).get_sign();
501 bool lo_sign
= DoubleBits(r
.lo
).get_sign();
502 if (hi_sign
== lo_sign
) {
504 } else if ((r_bits
& DoubleProp::MANTISSA_MASK
) > 0) {
509 return cpp::bit_cast
<double>(r_bits
);
514 LLVM_LIBC_FUNCTION(float, powf
, (float x
, float y
)) {
515 using FloatBits
= typename
fputil::FPBits
<float>;
516 using FloatProp
= typename
fputil::FloatProperties
<float>;
517 using DoubleProp
= typename
fputil::FloatProperties
<double>;
518 FloatBits
xbits(x
), ybits(y
);
520 uint32_t x_u
= xbits
.uintval();
521 uint32_t x_abs
= x_u
& FloatProp::EXP_MANT_MASK
;
522 uint32_t y_u
= ybits
.uintval();
523 uint32_t y_abs
= y_u
& FloatProp::EXP_MANT_MASK
;
525 ///////// BEGIN - Check exceptional cases ////////////////////////////////////
527 // The single precision number that is closest to 1 is (1 - 2^-24), which has
528 // log2(1 - 2^-24) ~ -1.715...p-24.
529 // So if |y| > 151 * 2^24, and x is finite:
530 // |y * log2(x)| = 0 or > 151.
531 // Hence x^y will either overflow or underflow if x is not zero.
532 if (LIBC_UNLIKELY((y_abs
& 0x007f'ffff) == 0) || (y_abs
> 0x4f170000)) {
533 // Exceptional exponents.
535 case 0x0000'0000: { // y = +-0.0f
538 case 0x7f80'0000: { // y = +-Inf
539 if (x_abs
> 0x7f80'0000) {
540 // pow(NaN, +-Inf) = NaN
543 if (x_abs
== 0x3f80'0000) {
544 // pow(+-1, +-Inf) = 1.0f
547 if (x_abs
== 0 && y_u
== 0xff80'0000) {
548 // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
549 fputil::set_errno_if_required(EDOM
);
550 fputil::raise_except_if_required(FE_DIVBYZERO
);
551 return FloatBits::inf().get_val();
553 // pow (|x| < 1, -inf) = +inf
554 // pow (|x| < 1, +inf) = 0.0f
555 // pow (|x| > 1, -inf) = 0.0f
556 // pow (|x| > 1, +inf) = +inf
557 return ((x_abs
< 0x3f80'0000) == (y_u
== 0xff80'0000))
558 ? FloatBits::inf().get_val()
562 // Speed up for common exponents
564 case 0x3f00'0000: // y = 0.5f
565 // pow(x, 1/2) = sqrt(x)
566 return fputil::sqrt(x
);
567 case 0x3f80'0000: // y = 1.0f
569 case 0x4000'0000: // y = 2.0f
572 // TODO: Enable special case speed-up for x^(-1/2) when rsqrt is ready.
573 // case 0xbf00'0000: // pow(x, -1/2) = rsqrt(x)
576 if (y_abs
> 0x4f17'0000) {
577 if (y_abs
> 0x7f80'0000) {
579 if (x_u
== 0x3f80'0000) { // x = 1.0f
586 // x^y will be overflow / underflow in single precision. Set y to a
587 // large enough exponent but not too large, so that the computations
588 // won't be overflow in double precision.
589 y
= cpp::bit_cast
<float>((y_u
& FloatProp::SIGN_MASK
) + 0x4f800000U
);
594 int ex
= -FloatBits::EXPONENT_BIAS
;
597 // y is finite and non-zero.
598 if (LIBC_UNLIKELY(((x_u
& 0x801f'ffffU
) == 0) || x_u
>= 0x7f80'0000U
||
599 x_u
< 0x0080'0000U
)) {
601 case 0x3f80'0000: // x = 1.0f
603 // TODO: Put these 2 entrypoint dependency under control flag.
604 case 0x4000'0000: // x = 2.0f
605 // pow(2, y) = exp2(y)
606 return generic::exp2f(y
);
607 case 0x4120'0000: // x = 10.0f
608 // pow(10, y) = exp10(y)
609 return generic::exp10f(y
);
612 bool x_sign
= x_u
>= FloatProp::SIGN_MASK
;
615 case 0x0000'0000: { // x = +-0.0f
616 bool x_sign
= (x_u
>= FloatProp::SIGN_MASK
);
617 bool out_sign
= x_sign
&& is_odd_integer(FloatBits(y_u
).get_val());
618 if (y_u
> 0x8000'0000U
) {
619 // pow(0, negative number) = inf
620 fputil::set_errno_if_required(EDOM
);
621 fputil::raise_except_if_required(FE_DIVBYZERO
);
622 return FloatBits::inf(out_sign
).get_val();
624 // pow(0, positive number) = 0
625 return out_sign
? -0.0f
: 0.0f
;
627 case 0x7f80'0000: { // x = +-Inf
628 bool x_sign
= (x_u
>= FloatProp::SIGN_MASK
);
629 bool out_sign
= x_sign
&& is_odd_integer(FloatBits(y_u
).get_val());
630 if (y_u
>= FloatProp::SIGN_MASK
) {
631 return out_sign
? -0.0f
: 0.0f
;
633 return FloatBits::inf(out_sign
).get_val();
637 if (x_abs
> 0x7f80'0000) {
639 // pow (aNaN, 0) is already taken care above.
643 // Normalize denormal inputs.
644 if (x_abs
< 0x0080'0000U
) {
649 // x is finite and negative, and y is a finite integer.
653 if (is_odd_integer(y
)) {
655 sign
= 0x8000'0000'0000'0000ULL
;
658 // pow( negative, non-integer ) = NaN
659 fputil::set_errno_if_required(EDOM
);
660 fputil::raise_except_if_required(FE_INVALID
);
661 return FloatBits::build_quiet_nan(0);
666 ///////// END - Check exceptional cases //////////////////////////////////////
668 // x^y = 2^( y * log2(x) )
669 // = 2^( y * ( e_x + log2(m_x) ) )
670 // First we compute log2(x) = e_x + log2(m_x)
671 x_u
= FloatBits(x
).uintval();
673 // Extract exponent field of x.
674 ex
+= (x_u
>> FloatProp::MANTISSA_WIDTH
);
675 double e_x
= static_cast<double>(ex
);
676 // Use the highest 7 fractional bits of m_x as the index for look up tables.
677 uint32_t x_mant
= x_u
& FloatProp::MANTISSA_MASK
;
678 int idx_x
= static_cast<int>(x_mant
>> (FloatProp::MANTISSA_WIDTH
- 7));
679 // Add the hidden bit to the mantissa.
681 float m_x
= cpp::bit_cast
<float>(x_mant
| 0x3f800000);
683 // Reduced argument for log2(m_x):
685 // The computation is exact, and -2^-8 <= dx < 2^-7.
686 // Then m_x = (1 + dx) / r, and
687 // log2(m_x) = log2( (1 + dx) / r )
688 // = log2(1 + dx) - log2(r).
690 #ifdef LIBC_TARGET_CPU_HAS_FMA
691 dx
= static_cast<double>(fputil::multiply_add(m_x
, R
[idx_x
], -1.0f
)); // Exact
693 dx
= fputil::multiply_add(static_cast<double>(m_x
), RD
[idx_x
], -1.0); // Exact
694 #endif // LIBC_TARGET_CPU_HAS_FMA
696 // Degree-5 polynomial approximation:
697 // dx * P(dx) ~ log2(1 + dx)
698 // Generated by Sollya with:
699 // > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]);
700 // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
702 constexpr double COEFFS
[] = {0x1.71547652b82fep0
, -0x1.71547652b7a07p
-1,
703 0x1.ec709dc458db1p
-2, -0x1.715479c2266c9p
-2,
704 0x1.2776ae1ddf8fp
-2, -0x1.e7b2178870157p
-3};
706 double dx2
= dx
* dx
; // Exact
707 double c0
= fputil::multiply_add(dx
, COEFFS
[1], COEFFS
[0]);
708 double c1
= fputil::multiply_add(dx
, COEFFS
[3], COEFFS
[2]);
709 double c2
= fputil::multiply_add(dx
, COEFFS
[5], COEFFS
[4]);
711 double p
= fputil::polyeval(dx2
, c0
, c1
, c2
);
713 //////////////////////////////////////////////////////////////////////////////
714 // NOTE: For some reason, this is significantly less efficient than above!
716 // > P = fpminimax(log2(1 + x)/x, 4, [|D...|], [-2^-8, 2^-7]);
717 // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
719 // constexpr double COEFFS[] = {0x1.71547652b8133p0, -0x1.71547652d1e33p-1,
720 // 0x1.ec70a098473dep-2, -0x1.7154c5ccdf121p-2,
721 // 0x1.2514fd90a130ap-2};
723 // double dx2 = dx * dx;
724 // double c0 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
725 // double c1 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
726 // double p = fputil::polyeval(dx2, c0, c1, COEFFS[4]);
727 //////////////////////////////////////////////////////////////////////////////
729 // s = e_x - log2(r) + dx * P(dx)
730 // Approximation errors:
731 // |log2(x) - s| < ulp(e_x) + (bounds on dx) * (error bounds of P(dx))
732 // = ulp(e_x) + 2^-7 * 2^-51
733 // < 2^8 * 2^-52 + 2^-7 * 2^-43
735 double s
= fputil::multiply_add(dx
, p
, LOG2_R
[idx_x
] + e_x
);
737 // To compute 2^(y * log2(x)), we break the exponent into 3 parts:
738 // y * log(2) = hi + mid + lo, where
740 // mid * 2^6 is an integer
743 // x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo,
744 // In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements,
745 // and 2^lo ~ 1 + lo * P(lo).
747 // hi + mid = 2^-6 * round( 2^6 * y * log2(x) )
748 // If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6)
749 // bits, hence, if we use double precision to perform
750 // round( 2^6 * y * log2(x))
751 // the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38
753 // In the following computations:
755 // hm = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s)
756 // lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm.
757 double y6
= static_cast<double>(y
* 0x1.0p6f
); // Exact.
758 double hm
= fputil::nearest_integer(s
* y6
);
761 fputil::multiply_add(y6
, e_x
+ LOG2_R_TD
[idx_x
].hi
, -hm
); // Exact
763 // | (y*log2(x) - hm * 2^-6 - lo) / y| < err(dx * p) + err(LOG2_R_DD.lo)
765 double lo6
= fputil::multiply_add(
766 y6
, fputil::multiply_add(dx
, p
, LOG2_R_TD
[idx_x
].mid
), lo6_hi
);
768 // |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2
769 // Clamp the exponent part into smaller range that fits double precision.
770 // For those exponents that are out of range, the final conversion will round
771 // them correctly to inf/max float or 0/min float accordingly.
772 int64_t hm_i
= static_cast<int64_t>(hm
);
773 hm_i
= (hm_i
> (1 << 15)) ? (1 << 15)
774 : (hm_i
< (-(1 << 15)) ? -(1 << 15) : hm_i
);
776 int idx_y
= hm_i
& 0x3f;
779 int64_t exp_hi_i
= (hm_i
>> 6) << DoubleProp::MANTISSA_WIDTH
;
781 int64_t exp_mid_i
= cpp::bit_cast
<uint64_t>(EXP2_MID1
[idx_y
].hi
);
782 // (-1)^sign * 2^hi * 2^mid
783 // Error <= 2^hi * 2^-53
784 uint64_t exp2_hi_mid_i
= static_cast<uint64_t>(exp_hi_i
+ exp_mid_i
) + sign
;
785 double exp2_hi_mid
= cpp::bit_cast
<double>(exp2_hi_mid_i
);
787 // Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo).
788 // Generated by Sollya with:
789 // > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]);
790 // > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]);
791 // 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60
792 constexpr double EXP2_COEFFS
[] = {0x1p
0,
793 0x1.62e42fefa39efp
-7,
794 0x1.ebfbdff82a23ap
-15,
795 0x1.c6b08d7076268p
-23,
796 0x1.3b2ad33f8b48bp
-31,
797 0x1.5d870c4d84445p
-40};
799 double lo6_sqr
= lo6
* lo6
;
800 double d0
= fputil::multiply_add(lo6
, EXP2_COEFFS
[1], EXP2_COEFFS
[0]);
801 double d1
= fputil::multiply_add(lo6
, EXP2_COEFFS
[3], EXP2_COEFFS
[2]);
802 double d2
= fputil::multiply_add(lo6
, EXP2_COEFFS
[5], EXP2_COEFFS
[4]);
803 double pp
= fputil::polyeval(lo6_sqr
, d0
, d1
, d2
);
805 double r
= pp
* exp2_hi_mid
;
807 // Ziv accuracy test.
808 uint64_t r_u
= cpp::bit_cast
<uint64_t>(r
);
809 float r_upper
= static_cast<float>(cpp::bit_cast
<double>(r_u
+ ERR
));
810 float r_lower
= static_cast<float>(cpp::bit_cast
<double>(r_u
- ERR
));
812 if (LIBC_LIKELY(r_upper
== r_lower
)) {
813 // Check for overflow or underflow.
814 if (LIBC_UNLIKELY(FloatBits(r_upper
).get_mantissa() == 0)) {
815 if (FloatBits(r_upper
).is_inf()) {
816 fputil::set_errno_if_required(ERANGE
);
817 fputil::raise_except_if_required(FE_OVERFLOW
);
818 } else if (r_upper
== 0.0f
) {
819 fputil::set_errno_if_required(ERANGE
);
820 fputil::raise_except_if_required(FE_UNDERFLOW
);
826 // Scale lower part of 2^(hi + mid)
827 DoubleDouble exp2_hi_mid_dd
;
830 ? cpp::bit_cast
<double>(exp_hi_i
+
831 cpp::bit_cast
<int64_t>(EXP2_MID1
[idx_y
].mid
))
833 exp2_hi_mid_dd
.hi
= exp2_hi_mid
;
835 return static_cast<float>(
836 powf_double_double(idx_x
, dx
, y6
, lo6_hi
, exp2_hi_mid_dd
)) +
838 // return static_cast<float>(r);
841 } // namespace LIBC_NAMESPACE