1 // SPDX-License-Identifier: GPL-2.0-only
3 * IEEE754 floating point arithmetic
4 * double precision: MADDF.f (Fused Multiply Add)
5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
7 * MIPS floating point support
8 * Copyright (C) 2015 Imagination Technologies, Ltd.
9 * Author: Markos Chandras <markos.chandras@imgtec.com>
12 #include "ieee754dp.h"
15 /* 128 bits shift right logical with rounding. */
16 static void srl128(u64
*hptr
, u64
*lptr
, int count
)
21 *lptr
= *hptr
!= 0 || *lptr
!= 0;
23 } else if (count
>= 64) {
25 *lptr
= *hptr
| (*lptr
!= 0);
28 *lptr
= *hptr
>> (count
- 64);
29 *lptr
|= (*hptr
<< (128 - count
)) != 0 || low
!= 0;
34 *lptr
= low
>> count
| *hptr
<< (64 - count
);
35 *lptr
|= (low
<< (64 - count
)) != 0;
36 *hptr
= *hptr
>> count
;
40 static union ieee754dp
_dp_maddf(union ieee754dp z
, union ieee754dp x
,
41 union ieee754dp y
, enum maddf_flags flags
)
72 if (flags
& MADDF_NEGATE_PRODUCT
)
74 if (flags
& MADDF_NEGATE_ADDITION
)
78 * Handle the cases when at least one of x, y or z is a NaN.
79 * Order of precedence is sNaN, qNaN and z, x, y.
81 if (zc
== IEEE754_CLASS_SNAN
)
82 return ieee754dp_nanxcpt(z
);
83 if (xc
== IEEE754_CLASS_SNAN
)
84 return ieee754dp_nanxcpt(x
);
85 if (yc
== IEEE754_CLASS_SNAN
)
86 return ieee754dp_nanxcpt(y
);
87 if (zc
== IEEE754_CLASS_QNAN
)
89 if (xc
== IEEE754_CLASS_QNAN
)
91 if (yc
== IEEE754_CLASS_QNAN
)
94 if (zc
== IEEE754_CLASS_DNORM
)
96 /* ZERO z cases are handled separately below */
98 switch (CLPAIR(xc
, yc
)) {
103 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_ZERO
):
104 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_INF
):
105 ieee754_setcx(IEEE754_INVALID_OPERATION
);
106 return ieee754dp_indef();
108 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_INF
):
109 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_INF
):
110 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_NORM
):
111 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_DNORM
):
112 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_INF
):
113 if ((zc
== IEEE754_CLASS_INF
) && (zs
!= rs
)) {
115 * Cases of addition of infinities with opposite signs
116 * or subtraction of infinities with same signs.
118 ieee754_setcx(IEEE754_INVALID_OPERATION
);
119 return ieee754dp_indef();
122 * z is here either not an infinity, or an infinity having the
123 * same sign as product (x*y). The result must be an infinity,
124 * and its sign is determined only by the sign of product (x*y).
126 return ieee754dp_inf(rs
);
128 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_ZERO
):
129 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_NORM
):
130 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_DNORM
):
131 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_ZERO
):
132 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_ZERO
):
133 if (zc
== IEEE754_CLASS_INF
)
134 return ieee754dp_inf(zs
);
135 if (zc
== IEEE754_CLASS_ZERO
) {
136 /* Handle cases +0 + (-0) and similar ones. */
139 * Cases of addition of zeros of equal signs
140 * or subtraction of zeroes of opposite signs.
141 * The sign of the resulting zero is in any
142 * such case determined only by the sign of z.
146 return ieee754dp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
148 /* x*y is here 0, and z is not 0, so just return z */
151 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_DNORM
):
155 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_DNORM
):
156 if (zc
== IEEE754_CLASS_INF
)
157 return ieee754dp_inf(zs
);
161 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_NORM
):
162 if (zc
== IEEE754_CLASS_INF
)
163 return ieee754dp_inf(zs
);
167 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_NORM
):
168 if (zc
== IEEE754_CLASS_INF
)
169 return ieee754dp_inf(zs
);
170 /* continue to real computations */
173 /* Finally get to do some computation */
176 * Do the multiplication bit first
178 * rm = xm * ym, re = xe + ye basically
180 * At this point xm and ym should have been normalized.
182 assert(xm
& DP_HIDDEN_BIT
);
183 assert(ym
& DP_HIDDEN_BIT
);
187 /* shunt to top of word */
188 xm
<<= 64 - (DP_FBITS
+ 1);
189 ym
<<= 64 - (DP_FBITS
+ 1);
192 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
200 lrm
= DPXMULT(lxm
, lym
);
201 hrm
= DPXMULT(hxm
, hym
);
203 t
= DPXMULT(lxm
, hym
);
205 at
= lrm
+ (t
<< 32);
209 hrm
= hrm
+ (t
>> 32);
211 t
= DPXMULT(hxm
, lym
);
213 at
= lrm
+ (t
<< 32);
217 hrm
= hrm
+ (t
>> 32);
219 /* Put explicit bit at bit 126 if necessary */
220 if ((int64_t)hrm
< 0) {
221 lrm
= (hrm
<< 63) | (lrm
>> 1);
226 assert(hrm
& (1 << 62));
228 if (zc
== IEEE754_CLASS_ZERO
) {
230 * Move explicit bit from bit 126 to bit 55 since the
231 * ieee754dp_format code expects the mantissa to be
232 * 56 bits wide (53 + 3 rounding bits).
234 srl128(&hrm
, &lrm
, (126 - 55));
235 return ieee754dp_format(rs
, re
, lrm
);
238 /* Move explicit bit from bit 52 to bit 126 */
241 assert(hzm
& (1 << 62));
243 /* Make the exponents the same */
246 * Have to shift y fraction right to align.
249 srl128(&hrm
, &lrm
, s
);
251 } else if (re
> ze
) {
253 * Have to shift x fraction right to align.
256 srl128(&hzm
, &lzm
, s
);
260 assert(ze
<= DP_EMAX
);
262 /* Do the addition */
265 * Generate 128 bit result by adding two 127 bit numbers
266 * leaving result in hzm:lzm, zs and ze.
268 hzm
= hzm
+ hrm
+ (lzm
> (lzm
+ lrm
));
270 if ((int64_t)hzm
< 0) { /* carry out */
271 srl128(&hzm
, &lzm
, 1);
275 if (hzm
> hrm
|| (hzm
== hrm
&& lzm
>= lrm
)) {
276 hzm
= hzm
- hrm
- (lzm
< lrm
);
279 hzm
= hrm
- hzm
- (lrm
< lzm
);
283 if (lzm
== 0 && hzm
== 0)
284 return ieee754dp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
287 * Put explicit bit at bit 126 if necessary.
290 /* left shift by 63 or 64 bits */
291 if ((int64_t)lzm
< 0) {
292 /* MSB of lzm is the explicit bit */
304 while ((hzm
>> (62 - t
)) == 0)
309 hzm
= hzm
<< t
| lzm
>> (64 - t
);
316 * Move explicit bit from bit 126 to bit 55 since the
317 * ieee754dp_format code expects the mantissa to be
318 * 56 bits wide (53 + 3 rounding bits).
320 srl128(&hzm
, &lzm
, (126 - 55));
322 return ieee754dp_format(zs
, ze
, lzm
);
325 union ieee754dp
ieee754dp_maddf(union ieee754dp z
, union ieee754dp x
,
328 return _dp_maddf(z
, x
, y
, 0);
331 union ieee754dp
ieee754dp_msubf(union ieee754dp z
, union ieee754dp x
,
334 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_PRODUCT
);
337 union ieee754dp
ieee754dp_madd(union ieee754dp z
, union ieee754dp x
,
340 return _dp_maddf(z
, x
, y
, 0);
343 union ieee754dp
ieee754dp_msub(union ieee754dp z
, union ieee754dp x
,
346 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_ADDITION
);
349 union ieee754dp
ieee754dp_nmadd(union ieee754dp z
, union ieee754dp x
,
352 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_PRODUCT
|MADDF_NEGATE_ADDITION
);
355 union ieee754dp
ieee754dp_nmsub(union ieee754dp z
, union ieee754dp x
,
358 return _dp_maddf(z
, x
, y
, MADDF_NEGATE_PRODUCT
);