2 * Copyright
(c) 2014 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include
<clc
/clcmacro.h
>
25 #include
<clc
/relational
/clc_isnan.h
>
33 // e^x
= 2^
(x/ln
(2)) = 2^
(x*(64/ln
(2))/64)
35 // x
*(64/ln
(2)) = n
+ f
, |f|
<= 0.5, n is integer
36 // n
= 64*m
+ j
, 0 <= j
< 64
38 // e^x
= 2^
((64*m
+ j
+ f
)/64)
39 // = (2^m
) * (2^
(j/64)) * 2^
(f/64)
40 // = (2^m
) * (2^
(j/64)) * e^
(f*(ln(2)/64))
42 // f
= x
*(64/ln
(2)) - n
43 // r
= f
*(ln(2)/64) = x - n
*(ln(2)/64)
45 // e^x
= (2^m
) * (2^
(j/64)) * e^r
47 // (2^
(j/64)) is precomputed
49 // e^r
= 1 + r
+ (r^
2)/2! + (r^
3)/3! + (r^
4)/4! + (r^
5)/5!
52 // q
= r
+ (r^
2)/2! + (r^
3)/3! + (r^
4)/4! + (r^
5)/5!
54 // e^x
= (2^m
) * ( (2^
(j/64)) + q
*(2^
(j/64)) )
56 _CLC_DEF _CLC_OVERLOAD float __clc_exp10
(float x
)
58 const float X_MAX
= 0x1.344134p
+5f
; // 128*log2/log10 : 38.53183944498959
59 const float X_MIN
= -
0x1.66d3e8p
+5f
; // -149*log2/log10 : -44.8534693539332
61 const float R_64_BY_LOG10_2
= 0x1.a934f0p
+7f
; // 64*log10/log2 : 212.6033980727912
62 const float R_LOG10_2_BY_64_LD
= 0x1.340000p-8f
; // log2/(64 * log10) lead : 0.004699707
63 const float R_LOG10_2_BY_64_TL
= 0x1.04d426p-18f
; // log2/(64 * log10) tail : 0.00000388665057
64 const float R_LN10
= 0x1.26bb1cp
+1f
;
66 int return_nan
= __clc_isnan
(x);
67 int return_inf
= x
> X_MAX
;
68 int return_zero
= x
< X_MIN
;
70 int n
= convert_int
(x * R_64_BY_LOG10_2
);
75 int m2
= m
<< EXPSHIFTBITS_SP32
;
78 r
= R_LN10
* mad
(fn, -R_LOG10_2_BY_64_TL
, mad
(fn, -R_LOG10_2_BY_64_LD
, x
));
80 // Truncated Taylor series for e^r
81 float z2
= mad
(mad(mad(r, 0x1.555556p-5f
, 0x1.555556p-3f
), r
, 0x1.000000p-1f
), r
*r
, r
);
83 float two_to_jby64
= USE_TABLE
(exp_tbl, j
);
84 z2
= mad
(two_to_jby64, z2
, two_to_jby64
);
86 float z2s
= z2
* as_float
(0x1 << (m + 149));
87 float z2n
= as_float
(as_int(z2) + m2
);
88 z2
= m
<= -
126 ? z2s
: z2n
;
91 z2
= return_inf ? as_float
(PINFBITPATT_SP32) : z2
;
92 z2
= return_zero ?
0.0f
: z2
;
93 z2
= return_nan ? x
: z2
;
96 _CLC_UNARY_VECTORIZE
(_CLC_DEF _CLC_OVERLOAD
, float
, __clc_exp10
, float
)
99 _CLC_DEF _CLC_OVERLOAD double __clc_exp10
(double x
)
101 const double X_MAX
= 0x1.34413509f79ffp
+8; // 1024*ln(2)/ln(10)
102 const double X_MIN
= -
0x1.434e6420f4374p
+8; // -1074*ln(2)/ln(10)
104 const double R_64_BY_LOG10_2
= 0x1.a934f0979a371p
+7; // 64*ln(10)/ln(2)
105 const double R_LOG10_2_BY_64_LD
= 0x1.3441350000000p-8
; // head ln(2)/(64*ln(10))
106 const double R_LOG10_2_BY_64_TL
= 0x1.3ef3fde623e25p-37
; // tail ln(2)/(64*ln(10))
107 const double R_LN10
= 0x1.26bb1bbb55516p
+1; // ln(10)
109 int n
= convert_int
(x * R_64_BY_LOG10_2
);
111 double dn
= (double)n
;
116 double r
= R_LN10
* fma
(-R_LOG10_2_BY_64_TL, dn
, fma
(-R_LOG10_2_BY_64_LD, dn
, x
));
118 // 6 term tail of Taylor expansion of e^r
119 double z2
= r
* fma
(r,
123 fma
(r, 0x1.6c16c16c16c17p-10
, 0x1.1111111111111p-7
),
124 0x1.5555555555555p-5
),
125 0x1.5555555555555p-3
),
126 0x1.0000000000000p-1
),
129 double2 tv
= USE_TABLE
(two_to_jby64_ep_tbl, j
);
130 z2
= fma
(tv.s0
+ tv.s1
, z2
, tv.s1
) + tv.s0
;
132 int small_value
= (m < -
1022) ||
((m == -
1022) && (z2 < 1.0));
136 double z3
= z2
* as_double
(((long)n1
+ 1023) << 52);
137 z3
*= as_double
(((long)n2
+ 1023) << 52);
140 z2
= small_value ? z3
: z2
;
142 z2
= __clc_isnan
(x) ? x
: z2
;
144 z2
= x
> X_MAX ? as_double
(PINFBITPATT_DP64) : z2
;
145 z2
= x
< X_MIN ?
0.0 : z2
;
149 _CLC_UNARY_VECTORIZE
(_CLC_DEF _CLC_OVERLOAD
, double
, __clc_exp10
, double
)