2 * Copyright
(c) 2014 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include
<clc
/clcmacro.h
>
28 _CLC_OVERLOAD _CLC_DEF float asin
(float x
) {
29 // Computes arcsin
(x).
30 // The argument is first reduced by noting that arcsin
(x)
31 // is invalid for abs
(x) > 1 and arcsin
(-x) = -arcsin
(x).
32 // For denormal and small arguments arcsin
(x) = x to machine
33 // accuracy. Remaining argument ranges are handled as follows.
34 // For abs
(x) <= 0.5 use
35 // arcsin
(x) = x
+ x^
3*R
(x^
2)
36 // where R
(x^
2) is a rational minimax approximation to
37 // (arcsin(x) - x
)/x^
3.
38 // For abs
(x) > 0.5 exploit the identity
:
39 // arcsin
(x) = pi
/2 -
2*arcsin
(sqrt(1-x)/2)
40 // together with the above rational approximation
, and
41 // reconstruct the terms carefully.
43 const float piby2_tail
= 7.5497894159e-08F
; /* 0x33a22168 */
44 const float hpiby2_head
= 7.8539812565e-01F
; /* 0x3f490fda */
45 const float piby2
= 1.5707963705e+00F
; /* 0x3fc90fdb */
48 uint aux
= ux
& EXSIGNBIT_SP32
;
50 float spiby2
= as_float
(xs | as_uint
(piby2));
51 int xexp
= (int)(aux >> EXPSHIFTBITS_SP32
) - EXPBIAS_SP32
;
52 float y
= as_float
(aux);
55 int transform
= xexp
>= -
1;
58 float rt
= 0.5f
* (1.0f - y
);
59 float r
= transform ? rt
: y2
;
61 // Use a rational approximation for
[0.0, 0.5]
64 mad
(r, -
0.00396137437848476485201154797087F
, -
0.0133819288943925804214011424456F
),
65 -
0.0565298683201845211985026327361F
),
66 0.184161606965100694821398249421F
);
68 float b
= mad
(r, -
0.836411276854206731913362287293F
, 1.10496961524520294485512696706F
);
69 float u
= r
* MATH_DIVIDE
(a, b
);
71 float s
= MATH_SQRT
(r);
72 float s1
= as_float
(as_uint(s) & 0xffff0000);
73 float c
= MATH_DIVIDE
(mad(-s1, s1
, r
), s
+ s1
);
74 float p
= mad
(2.0f
*s
, u
, -mad
(c, -
2.0f
, piby2_tail
));
75 float q
= mad
(s1, -
2.0f
, hpiby2_head
);
76 float vt
= hpiby2_head -
(p - q
);
77 float v
= mad
(y, u
, y
);
78 v
= transform ? vt
: v
;
80 float ret
= as_float
(xs | as_uint
(v));
81 ret
= aux
> 0x3f800000U ? as_float
(QNANBITPATT_SP32) : ret
;
82 ret
= aux
== 0x3f800000U ? spiby2
: ret
;
83 ret
= xexp
< -
14 ? x
: ret
;
88 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, float
, asin
, float
);
92 #pragma OPENCL EXTENSION cl_khr_fp64
: enable
94 _CLC_OVERLOAD _CLC_DEF double asin
(double x
) {
95 // Computes arcsin
(x).
96 // The argument is first reduced by noting that arcsin
(x)
97 // is invalid for abs
(x) > 1 and arcsin
(-x) = -arcsin
(x).
98 // For denormal and small arguments arcsin
(x) = x to machine
99 // accuracy. Remaining argument ranges are handled as follows.
100 // For abs
(x) <= 0.5 use
101 // arcsin
(x) = x
+ x^
3*R
(x^
2)
102 // where R
(x^
2) is a rational minimax approximation to
103 // (arcsin(x) - x
)/x^
3.
104 // For abs
(x) > 0.5 exploit the identity
:
105 // arcsin
(x) = pi
/2 -
2*arcsin
(sqrt(1-x)/2)
106 // together with the above rational approximation
, and
107 // reconstruct the terms carefully.
109 const double piby2_tail
= 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
110 const double hpiby2_head
= 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
111 const double piby2
= 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
114 int xneg
= as_int2
(x).hi
< 0;
115 int xexp
= (as_int2(y).hi
>> 20) - EXPBIAS_DP64
;
118 int transform
= xexp
>= -
1;
120 double rt
= 0.5 * (1.0 - y
);
122 double r
= transform ? rt
: y2
;
124 // Use a rational approximation for
[0.0, 0.5]
130 fma
(r, 0.0000482901920344786991880522822991,
131 0.00109242697235074662306043804220),
132 -
0.0549989809235685841612020091328),
133 0.275558175256937652532686256258),
134 -
0.445017216867635649900123110649),
135 0.227485835556935010735943483075);
140 fma
(r, 0.105869422087204370341222318533,
141 -
0.943639137032492685763471240072),
142 2.76568859157270989520376345954),
143 -
3.28431505720958658909889444194),
144 1.36491501334161032038194214209);
146 double u
= r
* MATH_DIVIDE
(un, ud
);
148 // Reconstruct asin carefully in transformed region
150 double sh
= as_double
(as_ulong(s) & 0xffffffff00000000UL
);
151 double c
= MATH_DIVIDE
(fma(-sh, sh
, r
), s
+ sh
);
152 double p
= fma
(2.0
*s
, u
, -fma
(-2.0
, c
, piby2_tail
));
153 double q
= fma
(-2.0
, sh
, hpiby2_head
);
154 double vt
= hpiby2_head -
(p - q
);
155 double v
= fma
(y, u
, y
);
156 v
= transform ? vt
: v
;
158 v
= xexp
< -
28 ? y
: v
;
159 v
= xexp
>= 0 ? as_double
(QNANBITPATT_DP64) : v
;
160 v
= y
== 1.0 ? piby2
: v
;
162 return xneg ? -v
: v
;
165 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, double
, asin
, double
);
167 #endif
// cl_khr_fp64