1 /* Double-precision x^y function.
2 Copyright (c) 2018 Arm Ltd. All rights reserved.
4 SPDX-License-Identifier: BSD-3-Clause
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
9 1. Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 3. The name of the company may not be used to endorse or promote
15 products derived from this software without specific prior written
18 THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
23 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
34 #include "math_config.h"
37 Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
38 relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
39 ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
42 #define T __pow_log_data.tab
43 #define A __pow_log_data.poly
44 #define Ln2hi __pow_log_data.ln2hi
45 #define Ln2lo __pow_log_data.ln2lo
46 #define N (1 << POW_LOG_TABLE_BITS)
47 #define OFF 0x3fe6955500000000
49 /* Top 12 bits of a double (sign and exponent bits). */
50 static inline uint32_t
53 return asuint64 (x
) >> 52;
56 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
57 additional 15 bits precision. IX is the bit representation of x, but
58 normalized in the subnormal range using the sign bit for the exponent. */
59 static inline double_t
60 log_inline (uint64_t ix
, double_t
*tail
)
62 /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
63 double_t z
, r
, y
, invc
, logc
, logctail
, kd
, hi
, t1
, t2
, lo
, lo1
, lo2
, p
;
67 /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
68 The range is split into N subintervals.
69 The ith subinterval contains z and c is near its center. */
71 i
= (tmp
>> (52 - POW_LOG_TABLE_BITS
)) % N
;
72 k
= (int64_t) tmp
>> 52; /* arithmetic shift */
73 iz
= ix
- (tmp
& 0xfffULL
<< 52);
77 /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
80 logctail
= T
[i
].logctail
;
82 /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
83 |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
85 r
= fma (z
, invc
, -1.0);
87 /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
88 double_t zhi
= asdouble ((iz
+ (1ULL << 31)) & (-1ULL << 32));
89 double_t zlo
= z
- zhi
;
90 double_t rhi
= zhi
* invc
- 1.0;
91 double_t rlo
= zlo
* invc
;
95 /* k*Ln2 + log(c) + r. */
96 t1
= kd
* Ln2hi
+ logc
;
98 lo1
= kd
* Ln2lo
+ logctail
;
101 /* Evaluation is optimized assuming superscalar pipelined execution. */
102 double_t ar
, ar2
, ar3
, lo3
, lo4
;
103 ar
= A
[0] * r
; /* A[0] = -0.5. */
106 /* k*Ln2 + log(c) + r + A[0]*r*r. */
109 lo3
= fma (ar
, r
, -ar2
);
112 double_t arhi
= A
[0] * rhi
;
113 double_t arhi2
= rhi
* arhi
;
115 lo3
= rlo
* (ar
+ arhi
);
116 lo4
= t2
- hi
+ arhi2
;
118 /* p = log1p(r) - r - A[0]*r*r. */
119 #if POW_LOG_POLY_ORDER == 8
121 * (A
[1] + r
* A
[2] + ar2
* (A
[3] + r
* A
[4] + ar2
* (A
[5] + r
* A
[6]))));
123 lo
= lo1
+ lo2
+ lo3
+ lo4
+ p
;
131 #define N (1 << EXP_TABLE_BITS)
132 #define InvLn2N __exp_data.invln2N
133 #define NegLn2hiN __exp_data.negln2hiN
134 #define NegLn2loN __exp_data.negln2loN
135 #define Shift __exp_data.shift
136 #define T __exp_data.tab
137 #define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
138 #define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
139 #define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
140 #define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
141 #define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
143 /* Handle cases that may overflow or underflow when computing the result that
144 is scale*(1+TMP) without intermediate rounding. The bit representation of
145 scale is in SBITS, however it has a computed exponent that may have
146 overflown into the sign bit so that needs to be adjusted before using it as
147 a double. (int32_t)KI is the k used in the argument reduction and exponent
148 adjustment of scale, positive k here means the result may overflow and
149 negative k means the result may underflow. */
151 specialcase (double_t tmp
, uint64_t sbits
, uint64_t ki
)
155 if ((ki
& 0x80000000) == 0)
157 /* k > 0, the exponent of scale might have overflowed by <= 460. */
158 sbits
-= 1009ull << 52;
159 scale
= asdouble (sbits
);
160 y
= 0x1p
1009 * (scale
+ scale
* tmp
);
161 return check_oflow (y
);
163 /* k < 0, need special care in the subnormal range. */
164 sbits
+= 1022ull << 52;
165 /* Note: sbits is signed scale. */
166 scale
= asdouble (sbits
);
167 y
= scale
+ scale
* tmp
;
170 /* Round y to the right precision before scaling it into the subnormal
171 range to avoid double rounding that can cause 0.5+E/2 ulp error where
172 E is the worst-case ulp error outside the subnormal range. So this
173 is only useful if the goal is better than 1 ulp worst-case error. */
174 double_t hi
, lo
, one
= 1.0;
177 lo
= scale
- y
+ scale
* tmp
;
179 lo
= one
- hi
+ y
+ lo
;
180 y
= eval_as_double (hi
+ lo
) - one
;
181 /* Fix the sign of 0. */
183 y
= asdouble (sbits
& 0x8000000000000000);
184 /* The underflow exception needs to be signaled explicitly. */
185 force_eval_double (opt_barrier_double (0x1p
-1022) * 0x1p
-1022);
188 return check_uflow (y
);
191 #define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
193 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
194 The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
196 exp_inline (double x
, double xtail
, uint32_t sign_bias
)
199 uint64_t ki
, idx
, top
, sbits
;
200 /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
201 double_t kd
, z
, r
, r2
, scale
, tail
, tmp
;
203 abstop
= top12 (x
) & 0x7ff;
204 if (unlikely (abstop
- top12 (0x1p
-54) >= top12 (512.0) - top12 (0x1p
-54)))
206 if (abstop
- top12 (0x1p
-54) >= 0x80000000)
208 /* Avoid spurious underflow for tiny x. */
209 /* Note: 0 is common input. */
210 double_t one
= WANT_ROUNDING
? 1.0 + x
: 1.0;
211 return sign_bias
? -one
: one
;
213 if (abstop
>= top12 (1024.0))
215 /* Note: inf and nan are already handled. */
216 if (asuint64 (x
) >> 63)
217 return __math_uflow (sign_bias
);
219 return __math_oflow (sign_bias
);
221 /* Large x is special cased below. */
225 /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
226 /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
230 ki
= converttoint (z
);
231 #elif EXP_USE_TOINT_NARROW
232 /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
233 kd
= eval_as_double (z
+ Shift
);
234 ki
= asuint64 (kd
) >> 16;
235 kd
= (double_t
) (int32_t) ki
;
237 /* z - kd is in [-1, 1] in non-nearest rounding modes. */
238 kd
= eval_as_double (z
+ Shift
);
242 r
= x
+ kd
* NegLn2hiN
+ kd
* NegLn2loN
;
243 /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
245 /* 2^(k/N) ~= scale * (1 + tail). */
247 top
= (ki
+ sign_bias
) << (52 - EXP_TABLE_BITS
);
248 tail
= asdouble (T
[idx
]);
249 /* This is only a valid scale when -1023*N < k < 1024*N. */
250 sbits
= T
[idx
+ 1] + top
;
251 /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
252 /* Evaluation is optimized assuming superscalar pipelined execution. */
254 /* Without fma the worst case error is 0.25/N ulp larger. */
255 /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
256 #if EXP_POLY_ORDER == 4
257 tmp
= tail
+ r
+ r2
* C2
+ r
* r2
* (C3
+ r
* C4
);
258 #elif EXP_POLY_ORDER == 5
259 tmp
= tail
+ r
+ r2
* (C2
+ r
* C3
) + r2
* r2
* (C4
+ r
* C5
);
260 #elif EXP_POLY_ORDER == 6
261 tmp
= tail
+ r
+ r2
* (0.5 + r
* C3
) + r2
* r2
* (C4
+ r
* C5
+ r2
* C6
);
263 if (unlikely (abstop
== 0))
264 return specialcase (tmp
, sbits
, ki
);
265 scale
= asdouble (sbits
);
266 /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
267 is no spurious underflow here even without fma. */
268 return scale
+ scale
* tmp
;
271 /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
272 the bit representation of a non-zero finite floating-point value. */
274 checkint (uint64_t iy
)
276 int e
= iy
>> 52 & 0x7ff;
281 if (iy
& ((1ULL << (0x3ff + 52 - e
)) - 1))
283 if (iy
& (1ULL << (0x3ff + 52 - e
)))
288 /* Returns 1 if input is the bit representation of 0, infinity or nan. */
290 zeroinfnan (uint64_t i
)
292 return 2 * i
- 1 >= 2 * asuint64 (INFINITY
) - 1;
296 pow (double x
, double y
)
298 uint32_t sign_bias
= 0;
306 if (unlikely (topx
- 0x001 >= 0x7ff - 0x001
307 || (topy
& 0x7ff) - 0x3be >= 0x43e - 0x3be))
309 /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
310 and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
311 /* Special cases: (x < 0x1p-126 or inf or nan) or
312 (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
313 if (unlikely (zeroinfnan (iy
)))
316 return issignaling_inline (x
) ? x
+ y
: 1.0;
317 if (ix
== asuint64 (1.0))
318 return issignaling_inline (y
) ? x
+ y
: 1.0;
319 if (2 * ix
> 2 * asuint64 (INFINITY
)
320 || 2 * iy
> 2 * asuint64 (INFINITY
))
322 if (2 * ix
== 2 * asuint64 (1.0))
324 if ((2 * ix
< 2 * asuint64 (1.0)) == !(iy
>> 63))
325 return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
328 if (unlikely (zeroinfnan (ix
)))
331 if (ix
>> 63 && checkint (iy
) == 1)
336 if (WANT_ERRNO
&& 2 * ix
== 0 && iy
>> 63)
337 return __math_divzero (sign_bias
);
338 /* Without the barrier some versions of clang hoist the 1/x2 and
339 thus division by zero exception can be signaled spuriously. */
340 return iy
>> 63 ? opt_barrier_double (1 / x2
) : x2
;
342 /* Here x and y are non-zero finite. */
346 int yint
= checkint (iy
);
348 return __math_invalid (x
);
350 sign_bias
= SIGN_BIAS
;
351 ix
&= 0x7fffffffffffffff;
354 if ((topy
& 0x7ff) - 0x3be >= 0x43e - 0x3be)
356 /* Note: sign_bias == 0 here because y is not odd. */
357 if (ix
== asuint64 (1.0))
359 if ((topy
& 0x7ff) < 0x3be)
361 /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
363 return ix
> asuint64 (1.0) ? 1.0 + y
: 1.0 - y
;
367 return (ix
> asuint64 (1.0)) == (topy
< 0x800) ? __math_oflow (0)
372 /* Normalize subnormal x so exponent becomes negative. */
373 ix
= asuint64 (x
* 0x1p
52);
374 ix
&= 0x7fffffffffffffff;
380 double_t hi
= log_inline (ix
, &lo
);
384 elo
= y
* lo
+ fma (y
, hi
, -ehi
);
386 double_t yhi
= asdouble (iy
& -1ULL << 27);
387 double_t ylo
= y
- yhi
;
388 double_t lhi
= asdouble (asuint64 (hi
) & -1ULL << 27);
389 double_t llo
= hi
- lhi
+ lo
;
391 elo
= ylo
* lhi
+ y
* llo
; /* |elo| < |ehi| * 2^-25. */
393 return exp_inline (ehi
, elo
, sign_bias
);