2 * Copyright
(c) 2014 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include
<clc
/clcmacro.h
>
25 #include
<clc
/math
/clc_floor.h
>
26 #include
<clc
/math
/clc_trunc.h
>
27 #include
<clc
/shared
/clc_max.h
>
29 #include
<math
/clc_remainder.h
>
33 _CLC_DEF _CLC_OVERLOAD float __clc_fmod
(float x
, float y
)
36 int ax
= ux
& EXSIGNBIT_SP32
;
37 float xa
= as_float
(ax);
39 int ex
= ax
>> EXPSHIFTBITS_SP32
;
42 int ay
= uy
& EXSIGNBIT_SP32
;
43 float ya
= as_float
(ay);
44 int ey
= ay
>> EXPSHIFTBITS_SP32
;
46 float xr
= as_float
(0x3f800000 |
(ax & 0x007fffff));
47 float yr
= as_float
(0x3f800000 |
(ay & 0x007fffff));
67 float s
= as_float
(ey << EXPSHIFTBITS_SP32
);
73 xr
= as_float
(sx ^ as_int
(xr));
75 c
= ax
> PINFBITPATT_SP32 | ay
> PINFBITPATT_SP32 | ax
== PINFBITPATT_SP32 | ay
== 0;
76 xr
= c ? as_float
(QNANBITPATT_SP32) : xr
;
81 _CLC_BINARY_VECTORIZE
(_CLC_DEF _CLC_OVERLOAD
, float
, __clc_fmod
, float
, float
);
84 _CLC_DEF _CLC_OVERLOAD double __clc_fmod
(double x
, double y
)
86 ulong ux
= as_ulong
(x);
87 ulong ax
= ux
& ~SIGNBIT_DP64
;
89 double dx
= as_double
(ax);
90 int xexp
= convert_int
(ax >> EXPSHIFTBITS_DP64
);
91 int xexp1
= 11 -
(int) clz
(ax & MANTBITS_DP64
);
92 xexp1
= xexp
< 1 ? xexp1
: xexp
;
94 ulong uy
= as_ulong
(y);
95 ulong ay
= uy
& ~SIGNBIT_DP64
;
96 double dy
= as_double
(ay);
97 int yexp
= convert_int
(ay >> EXPSHIFTBITS_DP64
);
98 int yexp1
= 11 -
(int) clz
(ay & MANTBITS_DP64
);
99 yexp1
= yexp
< 1 ? yexp1
: yexp
;
101 // First assume |x|
> |y|
103 // Set ntimes to the number of times we need to do a
104 // partial remainder. If the exponent of x is an exact multiple
105 // of
53 larger than the exponent of y
, and the mantissa of x is
106 // less than the mantissa of y
, ntimes will be one too large
107 // but it doesn
't matter - it just means that we
'll go round
108 // the loop below one extra time.
109 int ntimes
= __clc_max
(0, (xexp1 - yexp1
) / 53);
110 double w
= ldexp
(dy, ntimes
* 53);
111 w
= ntimes
== 0 ? dy
: w
;
112 double scale
= ntimes
== 0 ?
1.0 : 0x1.0p-53
;
114 // Each time round the loop we compute a partial remainder.
115 // This is done by subtracting a large multiple of w
116 // from x each time
, where w is a scaled up version of y.
117 // The subtraction must be performed exactly in quad
118 // precision
, though the result at each stage can
119 // fit exactly in a double precision number.
123 for
(i = 0; i < ntimes; i++) {
124 // Compute integral multiplier
125 t
= __clc_trunc
(dx / w
);
127 // Compute w
* t in quad precision
131 // Subtract w
* t from dx
133 dx
= v
+ (((dx - v
) - p
) - pp
);
135 // If t was one too large
, dx will be negative. Add back one w.
136 dx
+= dx
< 0.0 ? w
: 0.0;
138 // Scale w down by
2^
(-53) for the next iteration
143 // Variable todd says whether the integer t is odd or not
144 t
= __clc_floor
(dx / w
);
151 dx
= v
+ (((dx - v
) - p
) - pp
);
156 // At this point
, dx lies in the range
[0,dy
)
157 double ret
= as_double
(xsgn ^ as_ulong
(dx));
160 // Now handle |x|
== |y|
165 // Next
, handle |x|
< |y|
169 // We don
't need anything special for |x|
== 0
173 ret
= c ? as_double
(QNANBITPATT_DP64) : ret
;
176 c
= yexp
> BIASEDEMAX_DP64
;
181 c
= xexp
> BIASEDEMAX_DP64
;
182 ret
= c ? as_double
(QNANBITPATT_DP64) : ret
;
186 _CLC_BINARY_VECTORIZE
(_CLC_DEF _CLC_OVERLOAD
, double
, __clc_fmod
, double
, double
);