2 * Copyright 2023 Siemens
4 * The authors hereby grant permission to use, copy, modify, distribute,
5 * and license this software and its documentation for any purpose, provided
6 * that existing copyright notices are retained in all copies and that this
7 * notice is included verbatim in any distributions. No written agreement,
8 * license, or royalty fee is required for any of the authorized uses.
9 * Modifications to this software may be copyrighted by their authors
10 * and need not follow the licensing terms described here, provided that
11 * the new terms are clearly indicated on the first page of each file where
16 * ====================================================
17 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
19 * Developed at SunPro, a Sun Microsystems, Inc. business.
20 * Permission to use, copy, modify, and distribute this
21 * software is freely granted, provided that this notice
23 * ====================================================
26 /* Based on newlib/libm/math/ef_exp.c in Newlib. */
28 #include "amdgcnmach.h"
32 twom100
= 7.8886090522e-31, /* 2**-100=0x0d800000 */
33 ln2HI
[2] ={ 6.9313812256e-01, /* 0x3f317180 */
34 -6.9313812256e-01,}, /* 0xbf317180 */
35 ln2LO
[2] ={ 9.0580006145e-06, /* 0x3717f7d1 */
36 -9.0580006145e-06,}, /* 0xb717f7d1 */
37 invln2
= 1.4426950216e+00, /* 0x3fb8aa3b */
38 P1
= 1.6666667163e-01, /* 0x3e2aaaab */
39 P2
= -2.7777778450e-03, /* 0xbb360b61 */
40 P3
= 6.6137559770e-05, /* 0x388ab355 */
41 P4
= -1.6533901999e-06, /* 0xb5ddea0e */
42 P5
= 4.1381369442e-08; /* 0x3331bb4c */
44 DEF_VS_MATH_FUNC (v64sf
, expf
, v64sf x
)
46 FUNCTION_INIT (v64sf
);
48 v64si k
= VECTOR_INIT (0);
50 GET_FLOAT_WORD(sx
, x
, NO_COND
);
51 v64si xsb
= (sx
>>31)&1; /* sign bit of x */
52 v64si hx
= sx
& 0x7fffffff; /* high word of |x| */
54 /* filter out non-finite argument */
55 VECTOR_RETURN (x
+x
, FLT_UWORD_IS_NAN(hx
)); /* NaN */
56 VECTOR_RETURN (x
, FLT_UWORD_IS_INFINITE(hx
) & (xsb
== 0));
57 VECTOR_RETURN (VECTOR_INIT (0.0f
), FLT_UWORD_IS_INFINITE (hx
)); /* exp(+-inf)={inf,0} */
58 VECTOR_RETURN (v64sf_math_oflowf (VECTOR_INIT (0)), sx
> FLT_UWORD_LOG_MAX
); /* overflow */
59 VECTOR_RETURN (v64sf_math_uflowf (VECTOR_INIT (0)), (sx
< 0) & (hx
> FLT_UWORD_LOG_MIN
)); /* underflow */
61 /* argument reduction */
63 VECTOR_IF (hx
> 0x3eb17218, cond
) /* if |x| > 0.5 ln2 */
64 VECTOR_IF2 (hx
< 0x3F851592, cond2
, cond
) /* and |x| < 1.5 ln2 */
65 VECTOR_COND_MOVE (hi
, x
-ln2HI
[0], cond2
& (xsb
== 0));
66 VECTOR_COND_MOVE (hi
, x
-ln2HI
[1], cond2
& (xsb
== 1));
67 VECTOR_COND_MOVE (lo
, VECTOR_INIT (ln2LO
[0]), cond2
& (xsb
== 0));
68 VECTOR_COND_MOVE (lo
, VECTOR_INIT (ln2LO
[1]), cond2
& (xsb
== 1));
69 VECTOR_COND_MOVE (k
, 1-xsb
-xsb
, cond2
);
70 VECTOR_ELSE2 (cond2
, cond
)
71 VECTOR_COND_MOVE (k
, __builtin_convertvector (invln2
*x
+ 0.5f
, v64si
), cond2
& (xsb
== 0));
72 VECTOR_COND_MOVE (k
, __builtin_convertvector (invln2
*x
- 0.5f
, v64si
), cond2
& (xsb
== 1));
73 v64sf t
= __builtin_convertvector (k
, v64sf
);
74 VECTOR_COND_MOVE (hi
, x
- t
*ln2HI
[0], cond2
); /* t*ln2HI is exact here */
75 VECTOR_COND_MOVE (lo
, t
*ln2LO
[0], cond2
);
77 VECTOR_COND_MOVE (x
, hi
- lo
, cond
);
78 VECTOR_ELSEIF (hx
< 0x34000000, cond
) /* when |x|<2**-23 */
79 VECTOR_RETURN (1.0f
+x
, cond
& (huge
+x
> 1.0f
)); /* trigger inexact */
82 /* x is now in primary range */
84 v64sf c
= x
- t
*(P1
+t
*(P2
+t
*(P3
+t
*(P4
+t
*P5
))));
85 VECTOR_RETURN (1.0f
- ((x
*c
)/(c
-2.0f
)-x
), k
==0);
86 v64sf y
= 1.0f
- ((lo
-(x
*c
)/(2.0f
-c
))-hi
);
87 VECTOR_IF (k
>= -125, cond
)
89 GET_FLOAT_WORD(hy
, y
, cond
);
90 SET_FLOAT_WORD(y
,hy
+(k
<<23), cond
); /* add k to y's exponent */
91 VECTOR_RETURN (y
, cond
);
94 GET_FLOAT_WORD(hy
, y
, cond
);
95 SET_FLOAT_WORD(y
, hy
+((k
+100)<<23), cond
); /* add k to y's exponent */
96 VECTOR_RETURN (y
*twom100
, cond
);
102 DEF_VARIANTS (expf
, sf
, sf
)