2 * Copyright
(c) 2014 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include
<clc
/clcmacro.h
>
25 #include
<clc
/math
/clc_floor.h
>
26 #include
<clc
/math
/clc_trunc.h
>
27 #include
<clc
/shared
/clc_max.h
>
29 #include
<math
/clc_remainder.h
>
33 _CLC_DEF _CLC_OVERLOAD float __clc_remainder
(float x
, float y
)
36 int ax
= ux
& EXSIGNBIT_SP32
;
37 float xa
= as_float
(ax);
39 int ex
= ax
>> EXPSHIFTBITS_SP32
;
42 int ay
= uy
& EXSIGNBIT_SP32
;
43 float ya
= as_float
(ay);
44 int ey
= ay
>> EXPSHIFTBITS_SP32
;
46 float xr
= as_float
(0x3f800000 |
(ax & 0x007fffff));
47 float yr
= as_float
(0x3f800000 |
(ay & 0x007fffff));
71 c
= (yr < 2.0f
* xr
) |
((yr == 2.0f
* xr
) & ((q & 0x1) == 0x1));
75 float s
= as_float
(ey << EXPSHIFTBITS_SP32
);
81 xr
= as_float
(sx ^ as_int
(xr));
83 c
= ax
> PINFBITPATT_SP32 | ay
> PINFBITPATT_SP32 | ax
== PINFBITPATT_SP32 | ay
== 0;
84 xr
= c ? as_float
(QNANBITPATT_SP32) : xr
;
89 _CLC_BINARY_VECTORIZE
(_CLC_DEF _CLC_OVERLOAD
, float
, __clc_remainder
, float
, float
);
92 _CLC_DEF _CLC_OVERLOAD double __clc_remainder
(double x
, double y
)
94 ulong ux
= as_ulong
(x);
95 ulong ax
= ux
& ~SIGNBIT_DP64
;
97 double dx
= as_double
(ax);
98 int xexp
= convert_int
(ax >> EXPSHIFTBITS_DP64
);
99 int xexp1
= 11 -
(int) clz
(ax & MANTBITS_DP64
);
100 xexp1
= xexp
< 1 ? xexp1
: xexp
;
102 ulong uy
= as_ulong
(y);
103 ulong ay
= uy
& ~SIGNBIT_DP64
;
104 double dy
= as_double
(ay);
105 int yexp
= convert_int
(ay >> EXPSHIFTBITS_DP64
);
106 int yexp1
= 11 -
(int) clz
(ay & MANTBITS_DP64
);
107 yexp1
= yexp
< 1 ? yexp1
: yexp
;
109 int qsgn
= ((ux ^ uy
) & SIGNBIT_DP64
) == 0UL ?
1 : -
1;
111 // First assume |x|
> |y|
113 // Set ntimes to the number of times we need to do a
114 // partial remainder. If the exponent of x is an exact multiple
115 // of
53 larger than the exponent of y
, and the mantissa of x is
116 // less than the mantissa of y
, ntimes will be one too large
117 // but it doesn
't matter - it just means that we
'll go round
118 // the loop below one extra time.
119 int ntimes
= __clc_max
(0, (xexp1 - yexp1
) / 53);
120 double w
= ldexp
(dy, ntimes
* 53);
121 w
= ntimes
== 0 ? dy
: w
;
122 double scale
= ntimes
== 0 ?
1.0 : 0x1.0p-53
;
124 // Each time round the loop we compute a partial remainder.
125 // This is done by subtracting a large multiple of w
126 // from x each time
, where w is a scaled up version of y.
127 // The subtraction must be performed exactly in quad
128 // precision
, though the result at each stage can
129 // fit exactly in a double precision number.
133 for
(i = 0; i < ntimes; i++) {
134 // Compute integral multiplier
135 t
= __clc_trunc
(dx / w
);
137 // Compute w
* t in quad precision
141 // Subtract w
* t from dx
143 dx
= v
+ (((dx - v
) - p
) - pp
);
145 // If t was one too large
, dx will be negative. Add back one w.
146 dx
+= dx
< 0.0 ? w
: 0.0;
148 // Scale w down by
2^
(-53) for the next iteration
153 // Variable todd says whether the integer t is odd or not
154 t
= __clc_floor
(dx / w
);
161 dx
= v
+ (((dx - v
) - p
) - pp
);
166 // At this point
, dx lies in the range
[0,dy
)
168 // For the fmod function
, we
're done apart from setting the correct sign.
170 // For the remainder function
, we need to adjust dx
171 // so that it lies in the range
(-y/2, y
/2] by carefully
172 // subtracting w
(== dy
== y
) if necessary. The rigmarole
173 // with todd is to get the correct sign of the result
174 // when x
/y lies exactly half way between two integers
,
175 // when we need to choose the even integer.
177 int al
= (2.0
*dx
> w
) |
(todd & (2.0
*dx
== w
));
178 double dxl
= dx -
(al ? w
: 0.0);
180 int ag
= (dx > 0.5*w
) |
(todd & (dx == 0.5*w
));
181 double dxg
= dx -
(ag ? w
: 0.0);
183 dx
= dy
< 0x1.0p
+1022 ? dxl
: dxg
;
185 double ret
= as_double
(xsgn ^ as_ulong
(dx));
188 // Now handle |x|
== |y|
193 // Next
, handle |x|
< |y|
197 c
&= (yexp < 1023 & 2.0*dx
> dy
) |
(dx > 0.5*dy
);
198 // we could use a conversion here instead since qsgn
= +-
1
199 p
= qsgn
== 1 ? -
1.0 : 1.0;
203 // We don
't need anything special for |x|
== 0
207 ret
= c ? as_double
(QNANBITPATT_DP64) : ret
;
210 c
= yexp
> BIASEDEMAX_DP64
;
215 c
= xexp
> BIASEDEMAX_DP64
;
216 ret
= c ? as_double
(QNANBITPATT_DP64) : ret
;
220 _CLC_BINARY_VECTORIZE
(_CLC_DEF _CLC_OVERLOAD
, double
, __clc_remainder
, double
, double
);