2 * Copyright
(c) 2014 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include
"../clcmacro.h"
28 _CLC_OVERLOAD _CLC_DEF float atan
(float x
)
30 const float piby2
= 1.5707963267948966f
; // 0x3ff921fb54442d18
33 uint aux
= ux
& EXSIGNBIT_SP32
;
36 float spiby2
= as_float
(sx | as_uint
(piby2));
38 float v
= as_float
(aux);
43 // 2^
26 <= |x|
<= Inf
=> atan
(x) is close to piby2
44 ret
= aux
<= PINFBITPATT_SP32 ? spiby2
: ret
;
46 // Reduce arguments
2^-
19 <= |x|
< 2^
26
50 float c
= 1.57079632679489655800f
; // atan(infinity)
53 int l
= aux
< 0x401c0000;
54 float xx
= MATH_DIVIDE
(v -
1.5f
, mad
(v, 1.5f
, 1.0f
));
56 c
= l ?
9.82793723247329054082e-01f
: c
; // atan(1.5)
59 l
= aux
< 0x3f980000U
;
60 xx
= MATH_DIVIDE
(v -
1.0f
, 1.0f
+ v
);
62 c
= l ?
7.85398163397448278999e-01f
: c
; // atan(1)
66 xx
= MATH_DIVIDE
(mad(v, 2.0f
, -
1.0f
), 2.0f
+ v
);
68 c
= l ?
4.63647609000806093515e-01f
: c
; // atan(0.5)
75 // Core approximation
: Remez
(2,2) on
[-
7/16,7/16]
79 mad
(s, 0.470677934286149214138357545549e-2f
, 0.192324546402108583211697690500f
),
80 0.296528598819239217902158651186f
);
83 mad
(s, 0.299309699959659728404442796915f
, 0.111072499995399550138837673349e1f
),
84 0.889585796862432286486651434570f
);
86 float q
= x
* s
* MATH_DIVIDE
(a, b
);
88 float z
= c -
(q - x
);
89 float zs
= as_float
(sx | as_uint
(z));
91 ret
= aux
< 0x4c800000 ? zs
: ret
;
94 ret
= aux
< 0x36000000 ? as_float
(ux) : ret
;
98 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, float
, atan
, float
);
102 #pragma OPENCL EXTENSION cl_khr_fp64
: enable
105 _CLC_OVERLOAD _CLC_DEF double atan
(double x
)
107 const double piby2
= 1.5707963267948966e+00; // 0x3ff921fb54442d18
114 // (chi + clo
) = arctan
(infinity)
115 double chi
= 1.57079632679489655800e+00;
116 double clo
= 6.12323399573676480327e-17;
119 double tb
= 1.0 + 1.5 * v
;
120 int l
= v
<= 0x1.38p
+1; // 39/16 > v > 19/16
123 // (chi + clo
) = arctan
(1.5
)
124 chi
= l ?
9.82793723247329054082e-01 : chi
;
125 clo
= l ?
1.39033110312309953701e-17 : clo
;
129 l
= v
<= 0x1.3p
+0; // 19/16 > v > 11/16
132 // (chi + clo
) = arctan
(1.
)
133 chi
= l ?
7.85398163397448278999e-01 : chi
;
134 clo
= l ?
3.06161699786838240164e-17 : clo
;
138 l
= v
<= 0x1.6p-1
; // 11/16 > v > 7/16
141 // (chi + clo
) = arctan
(0.5
)
142 chi
= l ?
4.63647609000806093515e-01 : chi
;
143 clo
= l ?
2.26987774529616809294e-17 : clo
;
145 l
= v
<= 0x1.cp-2
; // v < 7/16
151 // Core approximation
: Remez
(4,4) on
[-
7/16,7/16]
157 fma
(s, 0.142316903342317766e-3,
158 0.304455919504853031e-1),
159 0.220638780716667420e0
),
160 0.447677206805497472e0
),
161 0.268297920532545909e0
);
166 fma
(s, 0.389525873944742195e-1,
167 0.424602594203847109e0
),
168 0.141254259931958921e1
),
169 0.182596787737507063e1
),
170 0.804893761597637733e0
);
172 double q
= r
* s
* qn
/ qd
;
173 r
= chi -
((q - clo
) - r
);
175 double z
= isnan
(x) ? x
: piby2
;
176 z
= v
<= 0x1.0p
+56 ? r
: z
;
177 z
= v
< 0x1.0p-26 ? v
: z
;
178 return x
== v ? z
: -z
;
181 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, double
, atan
, double
);
183 #endif
// cl_khr_fp64