2 * Copyright
(c) 2014 Advanced Micro Devices
, Inc.
4 * Permission is hereby granted
, free of charge
, to any person obtaining a copy
5 * of this software and associated documentation files
(the "Software"), to deal
6 * in the Software without restriction
, including without limitation the rights
7 * to use
, copy
, modify
, merge
, publish
, distribute
, sublicense
, and
/or sell
8 * copies of the Software
, and to permit persons to whom the Software is
9 * furnished to do so
, subject to the following conditions
:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED
"AS IS", WITHOUT WARRANTY OF ANY KIND
, EXPRESS OR
15 * IMPLIED
, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM
, DAMAGES OR OTHER
18 * LIABILITY
, WHETHER IN AN ACTION OF CONTRACT
, TORT OR OTHERWISE
, ARISING FROM
,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include
<math
/clc_remainder.h
>
26 #include
"../clcmacro.h"
30 _CLC_DEF _CLC_OVERLOAD float __clc_remquo
(float x
, float y
, __private int
*quo
)
32 x
= __clc_flush_denormal_if_not_supported
(x);
33 y
= __clc_flush_denormal_if_not_supported
(y);
35 int ax
= ux
& EXSIGNBIT_SP32
;
36 float xa
= as_float
(ax);
38 int ex
= ax
>> EXPSHIFTBITS_SP32
;
41 int ay
= uy
& EXSIGNBIT_SP32
;
42 float ya
= as_float
(ay);
44 int ey
= ay
>> EXPSHIFTBITS_SP32
;
46 float xr
= as_float
(0x3f800000 |
(ax & 0x007fffff));
47 float yr
= as_float
(0x3f800000 |
(ay & 0x007fffff));
71 c
= (yr < 2.0f
* xr
) |
((yr == 2.0f
* xr
) & ((q & 0x1) == 0x1));
75 float s
= as_float
(ey << EXPSHIFTBITS_SP32
);
78 int qsgn
= sx
== sy ?
1 : -
1;
79 int quot
= (q & 0x7f) * qsgn
;
82 quot
= c ? qsgn
: quot
;
85 xr
= as_float
(sx ^ as_int
(xr));
87 c
= ax
> PINFBITPATT_SP32 | ay
> PINFBITPATT_SP32 | ax
== PINFBITPATT_SP32 | ay
== 0;
89 xr
= c ? as_float
(QNANBITPATT_SP32) : xr
;
95 // remquo singature is special
, we don
't have macro for this
96 #define __VEC_REMQUO
(TYPE, VEC_SIZE
, HALF_VEC_SIZE
) \
97 _CLC_DEF _CLC_OVERLOAD TYPE
##VEC_SIZE __clc_remquo
(TYPE##VEC_SIZE x
, TYPE
##VEC_SIZE y
, __private int
##VEC_SIZE
*quo
) \
99 int
##HALF_VEC_SIZE lo
, hi
; \
100 TYPE
##VEC_SIZE ret
; \
101 ret.lo
= __clc_remquo
(x.lo
, y.lo
, &lo
); \
102 ret.hi
= __clc_remquo
(x.hi
, y.hi
, &hi
); \
107 __VEC_REMQUO
(float, 2,)
108 __VEC_REMQUO
(float, 3, 2)
109 __VEC_REMQUO
(float, 4, 2)
110 __VEC_REMQUO
(float, 8, 4)
111 __VEC_REMQUO
(float, 16, 8)
114 _CLC_DEF _CLC_OVERLOAD double __clc_remquo
(double x
, double y
, __private int
*pquo
)
116 ulong ux
= as_ulong
(x);
117 ulong ax
= ux
& ~SIGNBIT_DP64
;
118 ulong xsgn
= ux ^ ax
;
119 double dx
= as_double
(ax);
120 int xexp
= convert_int
(ax >> EXPSHIFTBITS_DP64
);
121 int xexp1
= 11 -
(int) clz
(ax & MANTBITS_DP64
);
122 xexp1
= xexp
< 1 ? xexp1
: xexp
;
124 ulong uy
= as_ulong
(y);
125 ulong ay
= uy
& ~SIGNBIT_DP64
;
126 double dy
= as_double
(ay);
127 int yexp
= convert_int
(ay >> EXPSHIFTBITS_DP64
);
128 int yexp1
= 11 -
(int) clz
(ay & MANTBITS_DP64
);
129 yexp1
= yexp
< 1 ? yexp1
: yexp
;
131 int qsgn
= ((ux ^ uy
) & SIGNBIT_DP64
) == 0UL ?
1 : -
1;
133 // First assume |x|
> |y|
135 // Set ntimes to the number of times we need to do a
136 // partial remainder. If the exponent of x is an exact multiple
137 // of
53 larger than the exponent of y
, and the mantissa of x is
138 // less than the mantissa of y
, ntimes will be one too large
139 // but it doesn
't matter - it just means that we
'll go round
140 // the loop below one extra time.
141 int ntimes
= max
(0, (xexp1 - yexp1
) / 53);
142 double w
= ldexp
(dy, ntimes
* 53);
143 w
= ntimes
== 0 ? dy
: w
;
144 double scale
= ntimes
== 0 ?
1.0 : 0x1.0p-53
;
146 // Each time round the loop we compute a partial remainder.
147 // This is done by subtracting a large multiple of w
148 // from x each time
, where w is a scaled up version of y.
149 // The subtraction must be performed exactly in quad
150 // precision
, though the result at each stage can
151 // fit exactly in a double precision number.
155 for
(i = 0; i < ntimes; i++) {
156 // Compute integral multiplier
159 // Compute w
* t in quad precision
163 // Subtract w
* t from dx
165 dx
= v
+ (((dx - v
) - p
) - pp
);
167 // If t was one too large
, dx will be negative. Add back one w.
168 dx
+= dx
< 0.0 ? w
: 0.0;
170 // Scale w down by
2^
(-53) for the next iteration
175 // Variable todd says whether the integer t is odd or not
183 dx
= v
+ (((dx - v
) - p
) - pp
);
190 // At this point
, dx lies in the range
[0,dy
)
192 // For the remainder function
, we need to adjust dx
193 // so that it lies in the range
(-y/2, y
/2] by carefully
194 // subtracting w
(== dy
== y
) if necessary. The rigmarole
195 // with todd is to get the correct sign of the result
196 // when x
/y lies exactly half way between two integers
,
197 // when we need to choose the even integer.
199 int al
= (2.0
*dx
> w
) |
(todd & (2.0
*dx
== w
));
200 double dxl
= dx -
(al ? w
: 0.0);
202 int ag
= (dx > 0.5*w
) |
(todd & (dx == 0.5*w
));
203 double dxg
= dx -
(ag ? w
: 0.0);
205 dx
= dy
< 0x1.0p
+1022 ? dxl
: dxg
;
206 lt
+= dy
< 0x1.0p
+1022 ? al
: ag
;
207 int quo
= ((int)lt
& 0x7f) * qsgn
;
209 double ret
= as_double
(xsgn ^ as_ulong
(dx));
212 // Now handle |x|
== |y|
215 quo
= c ? qsgn
: quo
;
218 // Next
, handle |x|
< |y|
223 c
&= (yexp < 1023 & 2.0*dx
> dy
) |
(dx > 0.5*dy
);
224 quo
= c ? qsgn
: quo
;
225 // we could use a conversion here instead since qsgn
= +-
1
226 p
= qsgn
== 1 ? -
1.0 : 1.0;
230 // We don
't need anything special for |x|
== 0
235 ret
= c ? as_double
(QNANBITPATT_DP64) : ret
;
238 c
= yexp
> BIASEDEMAX_DP64
;
244 c
= xexp
> BIASEDEMAX_DP64
;
246 ret
= c ? as_double
(QNANBITPATT_DP64) : ret
;
251 __VEC_REMQUO
(double, 2,)
252 __VEC_REMQUO
(double, 3, 2)
253 __VEC_REMQUO
(double, 4, 2)
254 __VEC_REMQUO
(double, 8, 4)
255 __VEC_REMQUO
(double, 16, 8)