Cygwin: mmap: allow remapping part of an existing anonymous mapping
[newlib-cygwin.git] / newlib / libm / common / pow.c
blobff37716c5050aaf5bba7c970a24bb89bf04ebd4d
1 /* Double-precision x^y function.
2 Copyright (c) 2018 Arm Ltd. All rights reserved.
4 SPDX-License-Identifier: BSD-3-Clause
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the company may not be used to endorse or promote
15 products derived from this software without specific prior written
16 permission.
18 THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
29 #include "fdlibm.h"
30 #if !__OBSOLETE_MATH
32 #include <math.h>
33 #include <stdint.h>
34 #include "math_config.h"
37 Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
38 relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
39 ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
42 #define T __pow_log_data.tab
43 #define A __pow_log_data.poly
44 #define Ln2hi __pow_log_data.ln2hi
45 #define Ln2lo __pow_log_data.ln2lo
46 #define N (1 << POW_LOG_TABLE_BITS)
47 #define OFF 0x3fe6955500000000
49 /* Top 12 bits of a double (sign and exponent bits). */
50 static inline uint32_t
51 top12 (double x)
53 return asuint64 (x) >> 52;
56 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
57 additional 15 bits precision. IX is the bit representation of x, but
58 normalized in the subnormal range using the sign bit for the exponent. */
59 static inline double_t
60 log_inline (uint64_t ix, double_t *tail)
62 /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
63 double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
64 uint64_t iz, tmp;
65 int k, i;
67 /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
68 The range is split into N subintervals.
69 The ith subinterval contains z and c is near its center. */
70 tmp = ix - OFF;
71 i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
72 k = (int64_t) tmp >> 52; /* arithmetic shift */
73 iz = ix - (tmp & 0xfffULL << 52);
74 z = asdouble (iz);
75 kd = (double_t) k;
77 /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
78 invc = T[i].invc;
79 logc = T[i].logc;
80 logctail = T[i].logctail;
82 /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
83 |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
84 #if HAVE_FAST_FMA
85 r = fma (z, invc, -1.0);
86 #else
87 /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
88 double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
89 double_t zlo = z - zhi;
90 double_t rhi = zhi * invc - 1.0;
91 double_t rlo = zlo * invc;
92 r = rhi + rlo;
93 #endif
95 /* k*Ln2 + log(c) + r. */
96 t1 = kd * Ln2hi + logc;
97 t2 = t1 + r;
98 lo1 = kd * Ln2lo + logctail;
99 lo2 = t1 - t2 + r;
101 /* Evaluation is optimized assuming superscalar pipelined execution. */
102 double_t ar, ar2, ar3, lo3, lo4;
103 ar = A[0] * r; /* A[0] = -0.5. */
104 ar2 = r * ar;
105 ar3 = r * ar2;
106 /* k*Ln2 + log(c) + r + A[0]*r*r. */
107 #if HAVE_FAST_FMA
108 hi = t2 + ar2;
109 lo3 = fma (ar, r, -ar2);
110 lo4 = t2 - hi + ar2;
111 #else
112 double_t arhi = A[0] * rhi;
113 double_t arhi2 = rhi * arhi;
114 hi = t2 + arhi2;
115 lo3 = rlo * (ar + arhi);
116 lo4 = t2 - hi + arhi2;
117 #endif
118 /* p = log1p(r) - r - A[0]*r*r. */
119 #if POW_LOG_POLY_ORDER == 8
120 p = (ar3
121 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
122 #endif
123 lo = lo1 + lo2 + lo3 + lo4 + p;
124 y = hi + lo;
125 *tail = hi - y + lo;
126 return y;
129 #undef N
130 #undef T
131 #define N (1 << EXP_TABLE_BITS)
132 #define InvLn2N __exp_data.invln2N
133 #define NegLn2hiN __exp_data.negln2hiN
134 #define NegLn2loN __exp_data.negln2loN
135 #define Shift __exp_data.shift
136 #define T __exp_data.tab
137 #define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
138 #define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
139 #define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
140 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
141 #define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
143 /* Handle cases that may overflow or underflow when computing the result that
144 is scale*(1+TMP) without intermediate rounding. The bit representation of
145 scale is in SBITS, however it has a computed exponent that may have
146 overflown into the sign bit so that needs to be adjusted before using it as
147 a double. (int32_t)KI is the k used in the argument reduction and exponent
148 adjustment of scale, positive k here means the result may overflow and
149 negative k means the result may underflow. */
150 static inline double
151 specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
153 double_t scale, y;
155 if ((ki & 0x80000000) == 0)
157 /* k > 0, the exponent of scale might have overflowed by <= 460. */
158 sbits -= 1009ull << 52;
159 scale = asdouble (sbits);
160 y = 0x1p1009 * (scale + scale * tmp);
161 return check_oflow (y);
163 /* k < 0, need special care in the subnormal range. */
164 sbits += 1022ull << 52;
165 /* Note: sbits is signed scale. */
166 scale = asdouble (sbits);
167 y = scale + scale * tmp;
168 if (fabs (y) < 1.0)
170 /* Round y to the right precision before scaling it into the subnormal
171 range to avoid double rounding that can cause 0.5+E/2 ulp error where
172 E is the worst-case ulp error outside the subnormal range. So this
173 is only useful if the goal is better than 1 ulp worst-case error. */
174 double_t hi, lo, one = 1.0;
175 if (y < 0.0)
176 one = -1.0;
177 lo = scale - y + scale * tmp;
178 hi = one + y;
179 lo = one - hi + y + lo;
180 y = eval_as_double (hi + lo) - one;
181 /* Fix the sign of 0. */
182 if (y == 0.0)
183 y = asdouble (sbits & 0x8000000000000000);
184 /* The underflow exception needs to be signaled explicitly. */
185 force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
187 y = 0x1p-1022 * y;
188 return check_uflow (y);
191 #define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
193 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
194 The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
195 static inline double
196 exp_inline (double x, double xtail, uint32_t sign_bias)
198 uint32_t abstop;
199 uint64_t ki, idx, top, sbits;
200 /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
201 double_t kd, z, r, r2, scale, tail, tmp;
203 abstop = top12 (x) & 0x7ff;
204 if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
206 if (abstop - top12 (0x1p-54) >= 0x80000000)
208 /* Avoid spurious underflow for tiny x. */
209 /* Note: 0 is common input. */
210 double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
211 return sign_bias ? -one : one;
213 if (abstop >= top12 (1024.0))
215 /* Note: inf and nan are already handled. */
216 if (asuint64 (x) >> 63)
217 return __math_uflow (sign_bias);
218 else
219 return __math_oflow (sign_bias);
221 /* Large x is special cased below. */
222 abstop = 0;
225 /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
226 /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
227 z = InvLn2N * x;
228 #if TOINT_INTRINSICS
229 kd = roundtoint (z);
230 ki = converttoint (z);
231 #elif EXP_USE_TOINT_NARROW
232 /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
233 kd = eval_as_double (z + Shift);
234 ki = asuint64 (kd) >> 16;
235 kd = (double_t) (int32_t) ki;
236 #else
237 /* z - kd is in [-1, 1] in non-nearest rounding modes. */
238 kd = eval_as_double (z + Shift);
239 ki = asuint64 (kd);
240 kd -= Shift;
241 #endif
242 r = x + kd * NegLn2hiN + kd * NegLn2loN;
243 /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
244 r += xtail;
245 /* 2^(k/N) ~= scale * (1 + tail). */
246 idx = 2 * (ki % N);
247 top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
248 tail = asdouble (T[idx]);
249 /* This is only a valid scale when -1023*N < k < 1024*N. */
250 sbits = T[idx + 1] + top;
251 /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
252 /* Evaluation is optimized assuming superscalar pipelined execution. */
253 r2 = r * r;
254 /* Without fma the worst case error is 0.25/N ulp larger. */
255 /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
256 #if EXP_POLY_ORDER == 4
257 tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
258 #elif EXP_POLY_ORDER == 5
259 tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
260 #elif EXP_POLY_ORDER == 6
261 tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
262 #endif
263 if (unlikely (abstop == 0))
264 return specialcase (tmp, sbits, ki);
265 scale = asdouble (sbits);
266 /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
267 is no spurious underflow here even without fma. */
268 return scale + scale * tmp;
271 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
272 the bit representation of a non-zero finite floating-point value. */
273 static inline int
274 checkint (uint64_t iy)
276 int e = iy >> 52 & 0x7ff;
277 if (e < 0x3ff)
278 return 0;
279 if (e > 0x3ff + 52)
280 return 2;
281 if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
282 return 0;
283 if (iy & (1ULL << (0x3ff + 52 - e)))
284 return 1;
285 return 2;
288 /* Returns 1 if input is the bit representation of 0, infinity or nan. */
289 static inline int
290 zeroinfnan (uint64_t i)
292 return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
295 double
296 pow (double x, double y)
298 uint32_t sign_bias = 0;
299 uint64_t ix, iy;
300 uint32_t topx, topy;
302 ix = asuint64 (x);
303 iy = asuint64 (y);
304 topx = top12 (x);
305 topy = top12 (y);
306 if (unlikely (topx - 0x001 >= 0x7ff - 0x001
307 || (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
309 /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
310 and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
311 /* Special cases: (x < 0x1p-126 or inf or nan) or
312 (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
313 if (unlikely (zeroinfnan (iy)))
315 if (2 * iy == 0)
316 return issignaling_inline (x) ? x + y : 1.0;
317 if (ix == asuint64 (1.0))
318 return issignaling_inline (y) ? x + y : 1.0;
319 if (2 * ix > 2 * asuint64 (INFINITY)
320 || 2 * iy > 2 * asuint64 (INFINITY))
321 return x + y;
322 if (2 * ix == 2 * asuint64 (1.0))
323 return 1.0;
324 if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
325 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
326 return y * y;
328 if (unlikely (zeroinfnan (ix)))
330 double_t x2 = x * x;
331 if (ix >> 63 && checkint (iy) == 1)
333 x2 = -x2;
334 sign_bias = 1;
336 if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
337 return __math_divzero (sign_bias);
338 /* Without the barrier some versions of clang hoist the 1/x2 and
339 thus division by zero exception can be signaled spuriously. */
340 return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
342 /* Here x and y are non-zero finite. */
343 if (ix >> 63)
345 /* Finite x < 0. */
346 int yint = checkint (iy);
347 if (yint == 0)
348 return __math_invalid (x);
349 if (yint == 1)
350 sign_bias = SIGN_BIAS;
351 ix &= 0x7fffffffffffffff;
352 topx &= 0x7ff;
354 if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
356 /* Note: sign_bias == 0 here because y is not odd. */
357 if (ix == asuint64 (1.0))
358 return 1.0;
359 if ((topy & 0x7ff) < 0x3be)
361 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
362 if (WANT_ROUNDING)
363 return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
364 else
365 return 1.0;
367 return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
368 : __math_uflow (0);
370 if (topx == 0)
372 /* Normalize subnormal x so exponent becomes negative. */
373 ix = asuint64 (x * 0x1p52);
374 ix &= 0x7fffffffffffffff;
375 ix -= 52ULL << 52;
379 double_t lo;
380 double_t hi = log_inline (ix, &lo);
381 double_t ehi, elo;
382 #if HAVE_FAST_FMA
383 ehi = y * hi;
384 elo = y * lo + fma (y, hi, -ehi);
385 #else
386 double_t yhi = asdouble (iy & -1ULL << 27);
387 double_t ylo = y - yhi;
388 double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
389 double_t llo = hi - lhi + lo;
390 ehi = yhi * lhi;
391 elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
392 #endif
393 return exp_inline (ehi, elo, sign_bias);
395 #endif