2 * Copyright
(c) 2014,2015 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include
"../clcmacro.h"
28 _CLC_OVERLOAD _CLC_DEF float atanpi
(float x
) {
29 const float pi
= 3.1415926535897932f
;
32 uint aux
= ux
& EXSIGNBIT_SP32
;
35 float xbypi
= MATH_DIVIDE
(x, pi
);
36 float shalf
= as_float
(sx | as_uint
(0.5f
));
38 float v
= as_float
(aux);
43 // 2^
26 <= |x|
<= Inf
=> atan
(x) is close to piby2
44 ret
= aux
<= PINFBITPATT_SP32 ? shalf
: ret
;
46 // Reduce arguments
2^-
19 <= |x|
< 2^
26
50 float c
= 1.57079632679489655800f
; // atan(infinity)
53 int l
= aux
< 0x401c0000;
54 float xx
= MATH_DIVIDE
(v -
1.5f
, mad
(v, 1.5f
, 1.0f
));
56 c
= l ?
9.82793723247329054082e-01f
: c
; // atan(1.5)
59 l
= aux
< 0x3f980000U
;
60 xx
= MATH_DIVIDE
(v -
1.0f
, 1.0f
+ v
);
62 c
= l ?
7.85398163397448278999e-01f
: c
; // atan(1)
66 xx
= MATH_DIVIDE
(mad(v, 2.0f
, -
1.0f
), 2.0f
+ v
);
68 c
= l ?
4.63647609000806093515e-01f
: c
; // atan(0.5)
75 // Core approximation
: Remez
(2,2) on
[-
7/16,7/16]
79 mad
(s, 0.470677934286149214138357545549e-2f
, 0.192324546402108583211697690500f
),
80 0.296528598819239217902158651186f
);
83 mad
(s, 0.299309699959659728404442796915f
, 0.111072499995399550138837673349e1f
),
84 0.889585796862432286486651434570f
);
86 float q
= x
* s
* MATH_DIVIDE
(a, b
);
88 float z
= c -
(q - x
);
89 z
= MATH_DIVIDE
(z, pi
);
90 float zs
= as_float
(sx | as_uint
(z));
92 ret
= aux
< 0x4c800000 ? zs
: ret
;
95 ret
= aux
< 0x36000000 ? xbypi
: ret
;
99 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, float
, atanpi
, float
)
102 #pragma OPENCL EXTENSION cl_khr_fp64
: enable
104 _CLC_OVERLOAD _CLC_DEF double atanpi
(double x
) {
105 const double pi
= 0x1.921fb54442d18p
+1;
112 // (chi + clo
) = arctan
(infinity)
113 double chi
= 1.57079632679489655800e+00;
114 double clo
= 6.12323399573676480327e-17;
117 double tb
= 1.0 + 1.5 * v
;
118 int l
= v
<= 0x1.38p
+1; // 39/16 > v > 19/16
121 // (chi + clo
) = arctan
(1.5
)
122 chi
= l ?
9.82793723247329054082e-01 : chi
;
123 clo
= l ?
1.39033110312309953701e-17 : clo
;
127 l
= v
<= 0x1.3p
+0; // 19/16 > v > 11/16
130 // (chi + clo
) = arctan
(1.
)
131 chi
= l ?
7.85398163397448278999e-01 : chi
;
132 clo
= l ?
3.06161699786838240164e-17 : clo
;
136 l
= v
<= 0x1.6p-1
; // 11/16 > v > 7/16
139 // (chi + clo
) = arctan
(0.5
)
140 chi
= l ?
4.63647609000806093515e-01 : chi
;
141 clo
= l ?
2.26987774529616809294e-17 : clo
;
143 l
= v
<= 0x1.cp-2
; // v < 7/16
149 // Core approximation
: Remez
(4,4) on
[-
7/16,7/16]
155 fma
(s, 0.142316903342317766e-3,
156 0.304455919504853031e-1),
157 0.220638780716667420e0
),
158 0.447677206805497472e0
),
159 0.268297920532545909e0
);
164 fma
(s, 0.389525873944742195e-1,
165 0.424602594203847109e0
),
166 0.141254259931958921e1
),
167 0.182596787737507063e1
),
168 0.804893761597637733e0
);
170 double q
= r
* s
* qn
/ qd
;
171 r
= (chi -
((q - clo
) - r
)) / pi
;
174 double z
= isnan
(x) ? x
: 0.5;
175 z
= v
<= 0x1.0p
+56 ? r
: z
;
176 z
= v
< 0x1.0p-26 ? vp
: z
;
177 return x
== v ? z
: -z
;
180 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, double
, atanpi
, double
)