2 * Copyright
(c) 2014,2015 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include
"../clcmacro.h"
28 _CLC_OVERLOAD _CLC_DEF float asinpi
(float x
) {
29 // Computes arcsin
(x).
30 // The argument is first reduced by noting that arcsin
(x)
31 // is invalid for abs
(x) > 1 and arcsin
(-x) = -arcsin
(x).
32 // For denormal and small arguments arcsin
(x) = x to machine
33 // accuracy. Remaining argument ranges are handled as follows.
34 // For abs
(x) <= 0.5 use
35 // arcsin
(x) = x
+ x^
3*R
(x^
2)
36 // where R
(x^
2) is a rational minimax approximation to
37 // (arcsin(x) - x
)/x^
3.
38 // For abs
(x) > 0.5 exploit the identity
:
39 // arcsin
(x) = pi
/2 -
2*arcsin
(sqrt(1-x)/2)
40 // together with the above rational approximation
, and
41 // reconstruct the terms carefully.
44 const float pi
= 3.1415926535897933e+00f
;
45 const float piby2_tail
= 7.5497894159e-08F
; /* 0x33a22168 */
46 const float hpiby2_head
= 7.8539812565e-01F
; /* 0x3f490fda */
49 uint aux
= ux
& EXSIGNBIT_SP32
;
51 float shalf
= as_float
(xs | as_uint
(0.5f
));
53 int xexp
= (int)(aux >> EXPSHIFTBITS_SP32
) - EXPBIAS_SP32
;
55 float y
= as_float
(aux);
58 int transform
= xexp
>= -
1;
61 float rt
= 0.5f
* (1.0f - y
);
62 float r
= transform ? rt
: y2
;
64 // Use a rational approximation for
[0.0, 0.5]
67 mad
(r, -
0.00396137437848476485201154797087F
, -
0.0133819288943925804214011424456F
),
68 -
0.0565298683201845211985026327361F
),
69 0.184161606965100694821398249421F
);
70 float b
= mad
(r, -
0.836411276854206731913362287293F
, 1.10496961524520294485512696706F
);
71 float u
= r
* MATH_DIVIDE
(a, b
);
73 float s
= MATH_SQRT
(r);
74 float s1
= as_float
(as_uint(s) & 0xffff0000);
75 float c
= MATH_DIVIDE
(mad(-s1, s1
, r
), s
+ s1
);
76 float p
= mad
(2.0f
*s
, u
, -mad
(c, -
2.0f
, piby2_tail
));
77 float q
= mad
(s1, -
2.0f
, hpiby2_head
);
78 float vt
= hpiby2_head -
(p - q
);
79 float v
= mad
(y, u
, y
);
80 v
= transform ? vt
: v
;
81 v
= MATH_DIVIDE
(v, pi
);
82 float xbypi
= MATH_DIVIDE
(x, pi
);
84 float ret
= as_float
(xs | as_uint
(v));
85 ret
= aux
> 0x3f800000U ? as_float
(QNANBITPATT_SP32) : ret
;
86 ret
= aux
== 0x3f800000U ? shalf
: ret
;
87 ret
= xexp
< -
14 ? xbypi
: ret
;
92 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, float
, asinpi
, float
)
95 #pragma OPENCL EXTENSION cl_khr_fp64
: enable
97 _CLC_OVERLOAD _CLC_DEF double asinpi
(double x
) {
98 // Computes arcsin
(x).
99 // The argument is first reduced by noting that arcsin
(x)
100 // is invalid for abs
(x) > 1 and arcsin
(-x) = -arcsin
(x).
101 // For denormal and small arguments arcsin
(x) = x to machine
102 // accuracy. Remaining argument ranges are handled as follows.
103 // For abs
(x) <= 0.5 use
104 // arcsin
(x) = x
+ x^
3*R
(x^
2)
105 // where R
(x^
2) is a rational minimax approximation to
106 // (arcsin(x) - x
)/x^
3.
107 // For abs
(x) > 0.5 exploit the identity
:
108 // arcsin
(x) = pi
/2 -
2*arcsin
(sqrt(1-x)/2)
109 // together with the above rational approximation
, and
110 // reconstruct the terms carefully.
112 const double pi
= 0x1.921fb54442d18p
+1;
113 const double piby2_tail
= 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
114 const double hpiby2_head
= 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
117 int xneg
= as_int2
(x).hi
< 0;
118 int xexp
= (as_int2(y).hi
>> 20) - EXPBIAS_DP64
;
121 int transform
= xexp
>= -
1;
123 double rt
= 0.5 * (1.0 - y
);
125 double r
= transform ? rt
: y2
;
127 // Use a rational approximation for
[0.0, 0.5]
132 fma
(r, 0.0000482901920344786991880522822991,
133 0.00109242697235074662306043804220),
134 -
0.0549989809235685841612020091328),
135 0.275558175256937652532686256258),
136 -
0.445017216867635649900123110649),
137 0.227485835556935010735943483075);
142 fma
(r, 0.105869422087204370341222318533,
143 -
0.943639137032492685763471240072),
144 2.76568859157270989520376345954),
145 -
3.28431505720958658909889444194),
146 1.36491501334161032038194214209);
148 double u
= r
* MATH_DIVIDE
(un, ud
);
151 // Reconstruct asin carefully in transformed region
153 double sh
= as_double
(as_ulong(s) & 0xffffffff00000000UL
);
154 double c
= MATH_DIVIDE
(fma(-sh, sh
, r
), s
+ sh
);
155 double p
= fma
(2.0
*s
, u
, -fma
(-2.0
, c
, piby2_tail
));
156 double q
= fma
(-2.0
, sh
, hpiby2_head
);
157 double vt
= hpiby2_head -
(p - q
);
158 double v
= fma
(y, u
, y
);
159 v
= transform ? vt
: v
;
161 v
= xexp
< -
28 ? y
: v
;
162 v
= MATH_DIVIDE
(v, pi
);
163 v
= xexp
>= 0 ? as_double
(QNANBITPATT_DP64) : v
;
164 v
= y
== 1.0 ?
0.5 : v
;
165 return xneg ? -v
: v
;
168 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, double
, asinpi
, double
)