2 * IEEE754 floating point arithmetic
3 * double precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
15 #include "ieee754dp.h"
18 /* 128 bits shift right logical with rounding. */
19 static void srl128(u64
*hptr
, u64
*lptr
, int count
)
24 *lptr
= *hptr
!= 0 || *lptr
!= 0;
26 } else if (count
>= 64) {
28 *lptr
= *hptr
| (*lptr
!= 0);
31 *lptr
= *hptr
>> (count
- 64);
32 *lptr
|= (*hptr
<< (128 - count
)) != 0 || low
!= 0;
37 *lptr
= low
>> count
| *hptr
<< (64 - count
);
38 *lptr
|= (low
<< (64 - count
)) != 0;
39 *hptr
= *hptr
>> count
;
43 static union ieee754dp
_dp_maddf(union ieee754dp z
, union ieee754dp x
,
44 union ieee754dp y
, enum maddf_flags flags
)
75 * Handle the cases when at least one of x, y or z is a NaN.
76 * Order of precedence is sNaN, qNaN and z, x, y.
78 if (zc
== IEEE754_CLASS_SNAN
)
79 return ieee754dp_nanxcpt(z
);
80 if (xc
== IEEE754_CLASS_SNAN
)
81 return ieee754dp_nanxcpt(x
);
82 if (yc
== IEEE754_CLASS_SNAN
)
83 return ieee754dp_nanxcpt(y
);
84 if (zc
== IEEE754_CLASS_QNAN
)
86 if (xc
== IEEE754_CLASS_QNAN
)
88 if (yc
== IEEE754_CLASS_QNAN
)
91 if (zc
== IEEE754_CLASS_DNORM
)
93 /* ZERO z cases are handled separately below */
95 switch (CLPAIR(xc
, yc
)) {
100 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_ZERO
):
101 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_INF
):
102 ieee754_setcx(IEEE754_INVALID_OPERATION
);
103 return ieee754dp_indef();
105 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_INF
):
106 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_INF
):
107 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_NORM
):
108 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_DNORM
):
109 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_INF
):
110 if ((zc
== IEEE754_CLASS_INF
) &&
111 ((!(flags
& MADDF_NEGATE_PRODUCT
) && (zs
!= (xs
^ ys
))) ||
112 ((flags
& MADDF_NEGATE_PRODUCT
) && (zs
== (xs
^ ys
))))) {
114 * Cases of addition of infinities with opposite signs
115 * or subtraction of infinities with same signs.
117 ieee754_setcx(IEEE754_INVALID_OPERATION
);
118 return ieee754dp_indef();
121 * z is here either not an infinity, or an infinity having the
122 * same sign as product (x*y) (in case of MADDF.D instruction)
123 * or product -(x*y) (in MSUBF.D case). The result must be an
124 * infinity, and its sign is determined only by the value of
125 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
127 if (flags
& MADDF_NEGATE_PRODUCT
)
128 return ieee754dp_inf(1 ^ (xs
^ ys
));
130 return ieee754dp_inf(xs
^ ys
);
132 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_ZERO
):
133 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_NORM
):
134 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_DNORM
):
135 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_ZERO
):
136 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_ZERO
):
137 if (zc
== IEEE754_CLASS_INF
)
138 return ieee754dp_inf(zs
);
139 if (zc
== IEEE754_CLASS_ZERO
) {
140 /* Handle cases +0 + (-0) and similar ones. */
141 if ((!(flags
& MADDF_NEGATE_PRODUCT
)
142 && (zs
== (xs
^ ys
))) ||
143 ((flags
& MADDF_NEGATE_PRODUCT
)
144 && (zs
!= (xs
^ ys
))))
146 * Cases of addition of zeros of equal signs
147 * or subtraction of zeroes of opposite signs.
148 * The sign of the resulting zero is in any
149 * such case determined only by the sign of z.
153 return ieee754dp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
155 /* x*y is here 0, and z is not 0, so just return z */
158 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_DNORM
):
162 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_DNORM
):
163 if (zc
== IEEE754_CLASS_INF
)
164 return ieee754dp_inf(zs
);
168 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_NORM
):
169 if (zc
== IEEE754_CLASS_INF
)
170 return ieee754dp_inf(zs
);
174 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_NORM
):
175 if (zc
== IEEE754_CLASS_INF
)
176 return ieee754dp_inf(zs
);
177 /* continue to real computations */
180 /* Finally get to do some computation */
183 * Do the multiplication bit first
185 * rm = xm * ym, re = xe + ye basically
187 * At this point xm and ym should have been normalized.
189 assert(xm
& DP_HIDDEN_BIT
);
190 assert(ym
& DP_HIDDEN_BIT
);
194 if (flags
& MADDF_NEGATE_PRODUCT
)
197 /* shunt to top of word */
198 xm
<<= 64 - (DP_FBITS
+ 1);
199 ym
<<= 64 - (DP_FBITS
+ 1);
202 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
210 lrm
= DPXMULT(lxm
, lym
);
211 hrm
= DPXMULT(hxm
, hym
);
213 t
= DPXMULT(lxm
, hym
);
215 at
= lrm
+ (t
<< 32);
219 hrm
= hrm
+ (t
>> 32);
221 t
= DPXMULT(hxm
, lym
);
223 at
= lrm
+ (t
<< 32);
227 hrm
= hrm
+ (t
>> 32);
229 /* Put explicit bit at bit 126 if necessary */
230 if ((int64_t)hrm
< 0) {
231 lrm
= (hrm
<< 63) | (lrm
>> 1);
236 assert(hrm
& (1 << 62));
238 if (zc
== IEEE754_CLASS_ZERO
) {
240 * Move explicit bit from bit 126 to bit 55 since the
241 * ieee754dp_format code expects the mantissa to be
242 * 56 bits wide (53 + 3 rounding bits).
244 srl128(&hrm
, &lrm
, (126 - 55));
245 return ieee754dp_format(rs
, re
, lrm
);
248 /* Move explicit bit from bit 52 to bit 126 */
251 assert(hzm
& (1 << 62));
253 /* Make the exponents the same */
256 * Have to shift y fraction right to align.
259 srl128(&hrm
, &lrm
, s
);
261 } else if (re
> ze
) {
263 * Have to shift x fraction right to align.
266 srl128(&hzm
, &lzm
, s
);
270 assert(ze
<= DP_EMAX
);
272 /* Do the addition */
275 * Generate 128 bit result by adding two 127 bit numbers
276 * leaving result in hzm:lzm, zs and ze.
278 hzm
= hzm
+ hrm
+ (lzm
> (lzm
+ lrm
));
280 if ((int64_t)hzm
< 0) { /* carry out */
281 srl128(&hzm
, &lzm
, 1);
285 if (hzm
> hrm
|| (hzm
== hrm
&& lzm
>= lrm
)) {
286 hzm
= hzm
- hrm
- (lzm
< lrm
);
289 hzm
= hrm
- hzm
- (lrm
< lzm
);
293 if (lzm
== 0 && hzm
== 0)
294 return ieee754dp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
297 * Put explicit bit at bit 126 if necessary.
300 /* left shift by 63 or 64 bits */
301 if ((int64_t)lzm
< 0) {
302 /* MSB of lzm is the explicit bit */
314 while ((hzm
>> (62 - t
)) == 0)
319 hzm
= hzm
<< t
| lzm
>> (64 - t
);
326 * Move explicit bit from bit 126 to bit 55 since the
327 * ieee754dp_format code expects the mantissa to be
328 * 56 bits wide (53 + 3 rounding bits).
330 srl128(&hzm
, &lzm
, (126 - 55));
332 return ieee754dp_format(zs
, ze
, lzm
);
335 union ieee754dp
ieee754dp_maddf(union ieee754dp z
, union ieee754dp x
,
338 return _dp_maddf(z
, x
, y
, 0);
341 union ieee754dp
ieee754dp_msubf(union ieee754dp z
, union ieee754dp x
,
344 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_PRODUCT
);