2 * IEEE754 floating point arithmetic
3 * single precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
15 #include "ieee754sp.h"
18 maddf_negate_product
= 1 << 0,
21 static union ieee754sp
_sp_maddf(union ieee754sp z
, union ieee754sp x
,
22 union ieee754sp y
, enum maddf_flags flags
)
52 case IEEE754_CLASS_SNAN
:
53 ieee754_setcx(IEEE754_INVALID_OPERATION
);
54 return ieee754sp_nanxcpt(z
);
55 case IEEE754_CLASS_DNORM
:
57 /* QNAN is handled separately below */
60 switch (CLPAIR(xc
, yc
)) {
61 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_SNAN
):
62 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_SNAN
):
63 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_SNAN
):
64 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_SNAN
):
65 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_SNAN
):
66 return ieee754sp_nanxcpt(y
);
68 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_SNAN
):
69 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_QNAN
):
70 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_ZERO
):
71 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_NORM
):
72 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_DNORM
):
73 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_INF
):
74 return ieee754sp_nanxcpt(x
);
76 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_QNAN
):
77 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_QNAN
):
78 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_QNAN
):
79 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_QNAN
):
82 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_QNAN
):
83 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_ZERO
):
84 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_NORM
):
85 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_DNORM
):
86 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_INF
):
92 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_ZERO
):
93 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_INF
):
94 if (zc
== IEEE754_CLASS_QNAN
)
96 ieee754_setcx(IEEE754_INVALID_OPERATION
);
97 return ieee754sp_indef();
99 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_INF
):
100 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_INF
):
101 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_NORM
):
102 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_DNORM
):
103 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_INF
):
104 if (zc
== IEEE754_CLASS_QNAN
)
106 return ieee754sp_inf(xs
^ ys
);
108 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_ZERO
):
109 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_NORM
):
110 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_DNORM
):
111 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_ZERO
):
112 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_ZERO
):
113 if (zc
== IEEE754_CLASS_INF
)
114 return ieee754sp_inf(zs
);
115 /* Multiplication is 0 so just return z */
118 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_DNORM
):
121 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_DNORM
):
122 if (zc
== IEEE754_CLASS_QNAN
)
124 else if (zc
== IEEE754_CLASS_INF
)
125 return ieee754sp_inf(zs
);
129 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_NORM
):
130 if (zc
== IEEE754_CLASS_QNAN
)
132 else if (zc
== IEEE754_CLASS_INF
)
133 return ieee754sp_inf(zs
);
137 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_NORM
):
138 if (zc
== IEEE754_CLASS_QNAN
)
140 else if (zc
== IEEE754_CLASS_INF
)
141 return ieee754sp_inf(zs
);
142 /* fall through to real computations */
145 /* Finally get to do some computation */
148 * Do the multiplication bit first
150 * rm = xm * ym, re = xe + ye basically
152 * At this point xm and ym should have been normalized.
155 /* rm = xm * ym, re = xe+ye basically */
156 assert(xm
& SP_HIDDEN_BIT
);
157 assert(ym
& SP_HIDDEN_BIT
);
161 if (flags
& maddf_negate_product
)
164 /* shunt to top of word */
165 xm
<<= 32 - (SP_FBITS
+ 1);
166 ym
<<= 32 - (SP_FBITS
+ 1);
169 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
176 lrm
= lxm
* lym
; /* 16 * 16 => 32 */
177 hrm
= hxm
* hym
; /* 16 * 16 => 32 */
179 t
= lxm
* hym
; /* 16 * 16 => 32 */
180 at
= lrm
+ (t
<< 16);
183 hrm
= hrm
+ (t
>> 16);
185 t
= hxm
* lym
; /* 16 * 16 => 32 */
186 at
= lrm
+ (t
<< 16);
189 hrm
= hrm
+ (t
>> 16);
191 rm
= hrm
| (lrm
!= 0);
194 * Sticky shift down to normal rounding precision.
197 rm
= (rm
>> (32 - (SP_FBITS
+ 1 + 3))) |
198 ((rm
<< (SP_FBITS
+ 1 + 3)) != 0);
201 rm
= (rm
>> (32 - (SP_FBITS
+ 1 + 3 + 1))) |
202 ((rm
<< (SP_FBITS
+ 1 + 3 + 1)) != 0);
204 assert(rm
& (SP_HIDDEN_BIT
<< 3));
206 /* And now the addition */
208 assert(zm
& SP_HIDDEN_BIT
);
211 * Provide guard,round and stick bit space.
217 * Have to shift r fraction right to align.
222 } else if (re
> ze
) {
224 * Have to shift z fraction right to align.
231 assert(ze
<= SP_EMAX
);
235 * Generate 28 bit result of adding two 27 bit numbers
236 * leaving result in zm, zs and ze.
240 if (zm
>> (SP_FBITS
+ 1 + 3)) { /* carry out */
252 return ieee754sp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
255 * Normalize in extended single precision
257 while ((zm
>> (SP_MBITS
+ 3)) == 0) {
263 return ieee754sp_format(zs
, ze
, zm
);
266 union ieee754sp
ieee754sp_maddf(union ieee754sp z
, union ieee754sp x
,
269 return _sp_maddf(z
, x
, y
, 0);
272 union ieee754sp
ieee754sp_msubf(union ieee754sp z
, union ieee754sp x
,
275 return _sp_maddf(z
, x
, y
, maddf_negate_product
);