2 * Copyright
(c) 2015 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include
"../../../generic/lib/clcmacro.h"
25 #include
"math/clc_sqrt.h"
27 _CLC_DEFINE_UNARY_BUILTIN
(float, sqrt
, __clc_sqrt
, float
)
31 #pragma OPENCL EXTENSION cl_khr_fp16
: enable
32 _CLC_DEFINE_UNARY_BUILTIN
(half, sqrt
, __clc_sqrt
, half
)
38 #pragma OPENCL EXTENSION cl_khr_fp64
: enable
41 #define __clc_builtin_rsq __builtin_amdgcn_rsq
43 #define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
46 _CLC_OVERLOAD _CLC_DEF double sqrt
(double x
) {
48 uint vcc
= x
< 0x1p-
767;
49 uint exp0
= vcc ?
0x100 : 0;
50 unsigned exp1
= vcc ?
0xffffff80 : 0;
52 double v01
= ldexp
(x, exp0
);
53 double v23
= __clc_builtin_rsq
(v01);
54 double v45
= v01
* v23
;
57 double v67
= fma
(-v23, v45
, 0.5);
58 v45
= fma
(v45, v67
, v45
);
59 double v89
= fma
(-v45, v45
, v01
);
60 v23
= fma
(v23, v67
, v23
);
61 v45
= fma
(v89, v23
, v45
);
62 v67
= fma
(-v45, v45
, v01
);
63 v23
= fma
(v67, v23
, v45
);
65 v23
= ldexp
(v23, exp1
);
66 return
((x == __builtin_inf
()) ||
(x == 0.0)) ? v01
: v23
;
69 _CLC_UNARY_VECTORIZE
(_CLC_OVERLOAD _CLC_DEF
, double
, sqrt
, double
);