2 * Copyright
(c) 2014,2015 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include
<clc
/clcmacro.h
>
28 _CLC_OVERLOAD _CLC_DEF float exp
(float x
) {
31 const float ln2HI
= 0x1.62e300p-1f
;
32 const float ln2LO
= 0x1.2fefa2p-17f
;
33 const float invln2
= 0x1.715476p
+0f
;
35 float fhalF
= x
< 0.0f ? -
0.5f
: 0.5f
;
36 int p
= mad
(x, invln2
, fhalF
);
38 float hi
= mad
(fp, -ln2HI
, x
); // t*ln2HI is exact here
48 mad
(tt, 0x1.637698p-25f
, -
0x1.bbd41cp-20f
),
54 float y
= 1.0f -
(((-lo) - MATH_DIVIDE
(t * v
, 2.0f - v
)) - hi
);
57 float r
= as_float
(as_int(y) + (p << 23));
59 const float ulim
= 0x1.62e430p
+6f
; // ln(largest_normal) = 88.72283905206835305366
60 const float llim
= -
0x1.5d589ep
+6f
; // ln(smallest_normal) = -87.33654475055310898657
62 r
= x
< llim ?
0.0f
: r
;
63 r
= x
< ulim ? r
: as_float
(0x7f800000);
64 return isnan
(x) ? x
: r
;
67 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, float
, exp
, float
)
71 #include
"exp_helper.h"
73 #pragma OPENCL EXTENSION cl_khr_fp64
: enable
75 _CLC_OVERLOAD _CLC_DEF double exp
(double x
) {
77 const double X_MIN
= -
0x1.74910d52d3051p
+9; // -1075*ln(2)
78 const double X_MAX
= 0x1.62e42fefa39efp
+9; // 1024*ln(2)
79 const double R_64_BY_LOG2
= 0x1.71547652b82fep
+6; // 64/ln(2)
80 const double R_LOG2_BY_64_LD
= 0x1.62e42fefa0000p-7
; // head ln(2)/64
81 const double R_LOG2_BY_64_TL
= 0x1.cf79abc9e3b39p-46
; // tail ln(2)/64
83 int n
= convert_int
(x * R_64_BY_LOG2
);
84 double r
= fma
(-R_LOG2_BY_64_TL, (double)n
, fma
(-R_LOG2_BY_64_LD, (double)n
, x
));
85 return __clc_exp_helper
(x, X_MIN
, X_MAX
, r
, n
);
88 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, double
, exp
, double
)
92 _CLC_DEFINE_UNARY_BUILTIN_FP16
(exp)