2 * IEEE754 floating point arithmetic
3 * single precision: MSUB.f (Fused Multiply Subtract)
4 * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
15 #include "ieee754sp.h"
17 union ieee754sp
ieee754sp_msubf(union ieee754sp z
, union ieee754sp x
,
35 u32 zm
; int ze
; int zs __maybe_unused
; int zc
;
39 EXPLODESP(z
, zc
, zs
, ze
, zm
)
43 FLUSHSP(z
, zc
, zs
, ze
, zm
);
48 case IEEE754_CLASS_SNAN
:
49 ieee754_setcx(IEEE754_INVALID_OPERATION
);
50 return ieee754sp_nanxcpt(z
);
51 case IEEE754_CLASS_DNORM
:
53 /* QNAN is handled separately below */
56 switch (CLPAIR(xc
, yc
)) {
57 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_SNAN
):
58 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_SNAN
):
59 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_SNAN
):
60 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_SNAN
):
61 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_SNAN
):
62 return ieee754sp_nanxcpt(y
);
64 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_SNAN
):
65 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_QNAN
):
66 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_ZERO
):
67 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_NORM
):
68 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_DNORM
):
69 case CLPAIR(IEEE754_CLASS_SNAN
, IEEE754_CLASS_INF
):
70 return ieee754sp_nanxcpt(x
);
72 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_QNAN
):
73 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_QNAN
):
74 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_QNAN
):
75 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_QNAN
):
78 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_QNAN
):
79 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_ZERO
):
80 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_NORM
):
81 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_DNORM
):
82 case CLPAIR(IEEE754_CLASS_QNAN
, IEEE754_CLASS_INF
):
88 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_ZERO
):
89 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_INF
):
90 if (zc
== IEEE754_CLASS_QNAN
)
92 ieee754_setcx(IEEE754_INVALID_OPERATION
);
93 return ieee754sp_indef();
95 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_INF
):
96 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_INF
):
97 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_NORM
):
98 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_DNORM
):
99 case CLPAIR(IEEE754_CLASS_INF
, IEEE754_CLASS_INF
):
100 if (zc
== IEEE754_CLASS_QNAN
)
102 return ieee754sp_inf(xs
^ ys
);
104 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_ZERO
):
105 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_NORM
):
106 case CLPAIR(IEEE754_CLASS_ZERO
, IEEE754_CLASS_DNORM
):
107 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_ZERO
):
108 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_ZERO
):
109 if (zc
== IEEE754_CLASS_INF
)
110 return ieee754sp_inf(zs
);
111 /* Multiplication is 0 so just return z */
114 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_DNORM
):
117 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_DNORM
):
118 if (zc
== IEEE754_CLASS_QNAN
)
120 else if (zc
== IEEE754_CLASS_INF
)
121 return ieee754sp_inf(zs
);
125 case CLPAIR(IEEE754_CLASS_DNORM
, IEEE754_CLASS_NORM
):
126 if (zc
== IEEE754_CLASS_QNAN
)
128 else if (zc
== IEEE754_CLASS_INF
)
129 return ieee754sp_inf(zs
);
133 case CLPAIR(IEEE754_CLASS_NORM
, IEEE754_CLASS_NORM
):
134 if (zc
== IEEE754_CLASS_QNAN
)
136 else if (zc
== IEEE754_CLASS_INF
)
137 return ieee754sp_inf(zs
);
138 /* fall through to real compuation */
141 /* Finally get to do some computation */
144 * Do the multiplication bit first
146 * rm = xm * ym, re = xe + ye basically
148 * At this point xm and ym should have been normalized.
151 /* rm = xm * ym, re = xe+ye basically */
152 assert(xm
& SP_HIDDEN_BIT
);
153 assert(ym
& SP_HIDDEN_BIT
);
158 /* shunt to top of word */
159 xm
<<= 32 - (SP_FBITS
+ 1);
160 ym
<<= 32 - (SP_FBITS
+ 1);
163 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
170 lrm
= lxm
* lym
; /* 16 * 16 => 32 */
171 hrm
= hxm
* hym
; /* 16 * 16 => 32 */
173 t
= lxm
* hym
; /* 16 * 16 => 32 */
174 at
= lrm
+ (t
<< 16);
177 hrm
= hrm
+ (t
>> 16);
179 t
= hxm
* lym
; /* 16 * 16 => 32 */
180 at
= lrm
+ (t
<< 16);
183 hrm
= hrm
+ (t
>> 16);
185 rm
= hrm
| (lrm
!= 0);
188 * Sticky shift down to normal rounding precision.
191 rm
= (rm
>> (32 - (SP_FBITS
+ 1 + 3))) |
192 ((rm
<< (SP_FBITS
+ 1 + 3)) != 0);
195 rm
= (rm
>> (32 - (SP_FBITS
+ 1 + 3 + 1))) |
196 ((rm
<< (SP_FBITS
+ 1 + 3 + 1)) != 0);
198 assert(rm
& (SP_HIDDEN_BIT
<< 3));
200 /* And now the subtraction */
202 /* Flip sign of r and handle as add */
205 assert(zm
& SP_HIDDEN_BIT
);
208 * Provide guard,round and stick bit space.
214 * Have to shift y fraction right to align.
218 } else if (re
> ze
) {
220 * Have to shift x fraction right to align.
226 assert(ze
<= SP_EMAX
);
230 * Generate 28 bit result of adding two 27 bit numbers
231 * leaving result in zm, zs and ze.
235 if (zm
>> (SP_FBITS
+ 1 + 3)) { /* carry out */
236 SPXSRSX1(); /* shift preserving sticky */
246 return ieee754sp_zero(ieee754_csr
.rm
== FPU_CSR_RD
);
249 * Normalize in extended single precision
251 while ((zm
>> (SP_MBITS
+ 3)) == 0) {
257 return ieee754sp_format(zs
, ze
, zm
);