2 * IEEE754 floating point arithmetic
3 * double precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
15 #include "ieee754dp.h"
18 maddf_negate_product
= 1 << 0,
21 static union ieee754dp
_dp_maddf(union ieee754dp z
, union ieee754dp x
,
22 union ieee754dp y
, enum maddf_flags flags
)
52 case IEEE754_CLASS_SNAN
:
53 ieee754_setcx(IEEE754_INVALID_OPERATION
);
54 return ieee754dp_nanxcpt(z
);
55 case IEEE754_CLASS_DNORM
:
57 /* QNAN and ZERO cases are handled separately below */
60 switch (CLPAIR(xc
, yc
)) {
61 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_SNAN
):
62 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_SNAN
):
63 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_SNAN
):
64 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_SNAN
):
65 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_SNAN
):
66 return ieee754dp_nanxcpt(y
);
68 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_SNAN
):
69 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_QNAN
):
70 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_ZERO
):
71 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_NORM
):
72 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_DNORM
):
73 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_INF
):
74 return ieee754dp_nanxcpt(x
);
76 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_QNAN
):
77 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_QNAN
):
78 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_QNAN
):
79 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_QNAN
):
82 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_QNAN
):
83 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_ZERO
):
84 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_NORM
):
85 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_DNORM
):
86 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_INF
):
93 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_ZERO
):
94 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_INF
):
95 if (zc
== IEEE754_CLASS_QNAN
)
97 ieee754_setcx(IEEE754_INVALID_OPERATION
);
98 return ieee754dp_indef();
100 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_INF
):
101 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_INF
):
102 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_NORM
):
103 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_DNORM
):
104 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_INF
):
105 if (zc
== IEEE754_CLASS_QNAN
)
107 return ieee754dp_inf(xs
^ ys
);
109 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_ZERO
):
110 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_NORM
):
111 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_DNORM
):
112 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_ZERO
):
113 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_ZERO
):
114 if (zc
== IEEE754_CLASS_INF
)
115 return ieee754dp_inf(zs
);
116 /* Multiplication is 0 so just return z */
119 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_DNORM
):
122 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_DNORM
):
123 if (zc
== IEEE754_CLASS_QNAN
)
125 else if (zc
== IEEE754_CLASS_INF
)
126 return ieee754dp_inf(zs
);
130 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_NORM
):
131 if (zc
== IEEE754_CLASS_QNAN
)
133 else if (zc
== IEEE754_CLASS_INF
)
134 return ieee754dp_inf(zs
);
138 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_NORM
):
139 if (zc
== IEEE754_CLASS_QNAN
)
141 else if (zc
== IEEE754_CLASS_INF
)
142 return ieee754dp_inf(zs
);
143 /* fall through to real computations */
146 /* Finally get to do some computation */
149 * Do the multiplication bit first
151 * rm = xm * ym, re = xe + ye basically
153 * At this point xm and ym should have been normalized.
155 assert(xm
& DP_HIDDEN_BIT
);
156 assert(ym
& DP_HIDDEN_BIT
);
160 if (flags
& maddf_negate_product
)
163 /* shunt to top of word */
164 xm
<<= 64 - (DP_FBITS
+ 1);
165 ym
<<= 64 - (DP_FBITS
+ 1);
168 * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
172 #define DPXMULT(x, y) ((u64)(x) * (u64)y)
179 lrm
= DPXMULT(lxm
, lym
);
180 hrm
= DPXMULT(hxm
, hym
);
182 t
= DPXMULT(lxm
, hym
);
184 at
= lrm
+ (t
<< 32);
188 hrm
= hrm
+ (t
>> 32);
190 t
= DPXMULT(hxm
, lym
);
192 at
= lrm
+ (t
<< 32);
196 hrm
= hrm
+ (t
>> 32);
198 rm
= hrm
| (lrm
!= 0);
201 * Sticky shift down to normal rounding precision.
204 rm
= (rm
>> (64 - (DP_FBITS
+ 1 + 3))) |
205 ((rm
<< (DP_FBITS
+ 1 + 3)) != 0);
208 rm
= (rm
>> (64 - (DP_FBITS
+ 1 + 3 + 1))) |
209 ((rm
<< (DP_FBITS
+ 1 + 3 + 1)) != 0);
211 assert(rm
& (DP_HIDDEN_BIT
<< 3));
213 if (zc
== IEEE754_CLASS_ZERO
)
214 return ieee754dp_format(rs
, re
, rm
);
216 /* And now the addition */
217 assert(zm
& DP_HIDDEN_BIT
);
220 * Provide guard,round and stick bit space.
226 * Have to shift y fraction right to align.
231 } else if (re
> ze
) {
233 * Have to shift x fraction right to align.
240 assert(ze
<= DP_EMAX
);
244 * Generate 28 bit result of adding two 27 bit numbers
245 * leaving result in xm, xs and xe.
249 if (zm
>> (DP_FBITS
+ 1 + 3)) { /* carry out */
261 return ieee754dp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
264 * Normalize to rounding precision.
266 while ((zm
>> (DP_FBITS
+ 3)) == 0) {
272 return ieee754dp_format(zs
, ze
, zm
);
275 union ieee754dp
ieee754dp_maddf(union ieee754dp z
, union ieee754dp x
,
278 return _dp_maddf(z
, x
, y
, 0);
281 union ieee754dp
ieee754dp_msubf(union ieee754dp z
, union ieee754dp x
,
284 return _dp_maddf(z
, x
, y
, maddf_negate_product
);