1 // SPDX-License-Identifier: GPL-2.0-only
3 * IEEE754 floating point arithmetic
4 * double precision: MADDF.f (Fused Multiply Add)
5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
7 * MIPS floating point support
8 * Copyright (C) 2015 Imagination Technologies, Ltd.
9 * Author: Markos Chandras <markos.chandras@imgtec.com>
12 #include "ieee754dp.h"
15 /* 128 bits shift right logical with rounding. */
16 static void srl128(u64
*hptr
, u64
*lptr
, int count
)
21 *lptr
= *hptr
!= 0 || *lptr
!= 0;
23 } else if (count
>= 64) {
25 *lptr
= *hptr
| (*lptr
!= 0);
28 *lptr
= *hptr
>> (count
- 64);
29 *lptr
|= (*hptr
<< (128 - count
)) != 0 || low
!= 0;
34 *lptr
= low
>> count
| *hptr
<< (64 - count
);
35 *lptr
|= (low
<< (64 - count
)) != 0;
36 *hptr
= *hptr
>> count
;
40 static union ieee754dp
_dp_maddf(union ieee754dp z
, union ieee754dp x
,
41 union ieee754dp y
, enum maddf_flags flags
)
72 if (flags
& MADDF_NEGATE_PRODUCT
)
74 if (flags
& MADDF_NEGATE_ADDITION
)
78 * Handle the cases when at least one of x, y or z is a NaN.
79 * Order of precedence is sNaN, qNaN and z, x, y.
81 if (zc
== IEEE754_CLASS_SNAN
)
82 return ieee754dp_nanxcpt(z
);
83 if (xc
== IEEE754_CLASS_SNAN
)
84 return ieee754dp_nanxcpt(x
);
85 if (yc
== IEEE754_CLASS_SNAN
)
86 return ieee754dp_nanxcpt(y
);
87 if (zc
== IEEE754_CLASS_QNAN
)
89 if (xc
== IEEE754_CLASS_QNAN
)
91 if (yc
== IEEE754_CLASS_QNAN
)
94 if (zc
== IEEE754_CLASS_DNORM
)
96 /* ZERO z cases are handled separately below */
98 switch (CLPAIR(xc
, yc
)) {
103 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_ZERO
):
104 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_INF
):
105 ieee754_setcx(IEEE754_INVALID_OPERATION
);
106 return ieee754dp_indef();
108 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_INF
):
109 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_INF
):
110 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_NORM
):
111 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_DNORM
):
112 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_INF
):
113 if ((zc
== IEEE754_CLASS_INF
) && (zs
!= rs
)) {
115 * Cases of addition of infinities with opposite signs
116 * or subtraction of infinities with same signs.
118 ieee754_setcx(IEEE754_INVALID_OPERATION
);
119 return ieee754dp_indef();
122 * z is here either not an infinity, or an infinity having the
123 * same sign as product (x*y). The result must be an infinity,
124 * and its sign is determined only by the sign of product (x*y).
126 return ieee754dp_inf(rs
);
128 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_ZERO
):
129 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_NORM
):
130 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_DNORM
):
131 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_ZERO
):
132 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_ZERO
):
133 if (zc
== IEEE754_CLASS_INF
)
134 return ieee754dp_inf(zs
);
135 if (zc
== IEEE754_CLASS_ZERO
) {
136 /* Handle cases +0 + (-0) and similar ones. */
139 * Cases of addition of zeros of equal signs
140 * or subtraction of zeroes of opposite signs.
141 * The sign of the resulting zero is in any
142 * such case determined only by the sign of z.
146 return ieee754dp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
148 /* x*y is here 0, and z is not 0, so just return z */
151 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_DNORM
):
154 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_DNORM
):
155 if (zc
== IEEE754_CLASS_INF
)
156 return ieee754dp_inf(zs
);
160 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_NORM
):
161 if (zc
== IEEE754_CLASS_INF
)
162 return ieee754dp_inf(zs
);
166 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_NORM
):
167 if (zc
== IEEE754_CLASS_INF
)
168 return ieee754dp_inf(zs
);
169 /* continue to real computations */
172 /* Finally get to do some computation */
175 * Do the multiplication bit first
177 * rm = xm * ym, re = xe + ye basically
179 * At this point xm and ym should have been normalized.
181 assert(xm
& DP_HIDDEN_BIT
);
182 assert(ym
& DP_HIDDEN_BIT
);
186 /* shunt to top of word */
187 xm
<<= 64 - (DP_FBITS
+ 1);
188 ym
<<= 64 - (DP_FBITS
+ 1);
191 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
199 lrm
= DPXMULT(lxm
, lym
);
200 hrm
= DPXMULT(hxm
, hym
);
202 t
= DPXMULT(lxm
, hym
);
204 at
= lrm
+ (t
<< 32);
208 hrm
= hrm
+ (t
>> 32);
210 t
= DPXMULT(hxm
, lym
);
212 at
= lrm
+ (t
<< 32);
216 hrm
= hrm
+ (t
>> 32);
218 /* Put explicit bit at bit 126 if necessary */
219 if ((int64_t)hrm
< 0) {
220 lrm
= (hrm
<< 63) | (lrm
>> 1);
225 assert(hrm
& (1 << 62));
227 if (zc
== IEEE754_CLASS_ZERO
) {
229 * Move explicit bit from bit 126 to bit 55 since the
230 * ieee754dp_format code expects the mantissa to be
231 * 56 bits wide (53 + 3 rounding bits).
233 srl128(&hrm
, &lrm
, (126 - 55));
234 return ieee754dp_format(rs
, re
, lrm
);
237 /* Move explicit bit from bit 52 to bit 126 */
240 assert(hzm
& (1 << 62));
242 /* Make the exponents the same */
245 * Have to shift y fraction right to align.
248 srl128(&hrm
, &lrm
, s
);
250 } else if (re
> ze
) {
252 * Have to shift x fraction right to align.
255 srl128(&hzm
, &lzm
, s
);
259 assert(ze
<= DP_EMAX
);
261 /* Do the addition */
264 * Generate 128 bit result by adding two 127 bit numbers
265 * leaving result in hzm:lzm, zs and ze.
267 hzm
= hzm
+ hrm
+ (lzm
> (lzm
+ lrm
));
269 if ((int64_t)hzm
< 0) { /* carry out */
270 srl128(&hzm
, &lzm
, 1);
274 if (hzm
> hrm
|| (hzm
== hrm
&& lzm
>= lrm
)) {
275 hzm
= hzm
- hrm
- (lzm
< lrm
);
278 hzm
= hrm
- hzm
- (lrm
< lzm
);
282 if (lzm
== 0 && hzm
== 0)
283 return ieee754dp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
286 * Put explicit bit at bit 126 if necessary.
289 /* left shift by 63 or 64 bits */
290 if ((int64_t)lzm
< 0) {
291 /* MSB of lzm is the explicit bit */
303 while ((hzm
>> (62 - t
)) == 0)
308 hzm
= hzm
<< t
| lzm
>> (64 - t
);
315 * Move explicit bit from bit 126 to bit 55 since the
316 * ieee754dp_format code expects the mantissa to be
317 * 56 bits wide (53 + 3 rounding bits).
319 srl128(&hzm
, &lzm
, (126 - 55));
321 return ieee754dp_format(zs
, ze
, lzm
);
324 union ieee754dp
ieee754dp_maddf(union ieee754dp z
, union ieee754dp x
,
327 return _dp_maddf(z
, x
, y
, 0);
330 union ieee754dp
ieee754dp_msubf(union ieee754dp z
, union ieee754dp x
,
333 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_PRODUCT
);
336 union ieee754dp
ieee754dp_madd(union ieee754dp z
, union ieee754dp x
,
339 return _dp_maddf(z
, x
, y
, 0);
342 union ieee754dp
ieee754dp_msub(union ieee754dp z
, union ieee754dp x
,
345 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_ADDITION
);
348 union ieee754dp
ieee754dp_nmadd(union ieee754dp z
, union ieee754dp x
,
351 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_PRODUCT
|MADDF_NEGATE_ADDITION
);
354 union ieee754dp
ieee754dp_nmsub(union ieee754dp z
, union ieee754dp x
,
357 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_PRODUCT
);