Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cris-mirror.git] / arch / mips / math-emu / sp_maddf.c
bloba8cd8b4f235eb810db8c0472dea29c82dc2cd173
1 /*
2 * IEEE754 floating point arithmetic
3 * single precision: MADDF.f (Fused Multiply Add)
4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
10 * This program is free software; you can distribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; version 2 of the License.
15 #include "ieee754sp.h"
17 enum maddf_flags {
18 maddf_negate_product = 1 << 0,
21 static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
22 union ieee754sp y, enum maddf_flags flags)
24 int re;
25 int rs;
26 unsigned rm;
27 unsigned short lxm;
28 unsigned short hxm;
29 unsigned short lym;
30 unsigned short hym;
31 unsigned lrm;
32 unsigned hrm;
33 unsigned t;
34 unsigned at;
35 int s;
37 COMPXSP;
38 COMPYSP;
39 COMPZSP;
41 EXPLODEXSP;
42 EXPLODEYSP;
43 EXPLODEZSP;
45 FLUSHXSP;
46 FLUSHYSP;
47 FLUSHZSP;
49 ieee754_clearcx();
51 switch (zc) {
52 case IEEE754_CLASS_SNAN:
53 ieee754_setcx(IEEE754_INVALID_OPERATION);
54 return ieee754sp_nanxcpt(z);
55 case IEEE754_CLASS_DNORM:
56 SPDNORMZ;
57 /* QNAN is handled separately below */
60 switch (CLPAIR(xc, yc)) {
61 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
62 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
63 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
64 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
65 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
66 return ieee754sp_nanxcpt(y);
68 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
70 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
71 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
72 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
73 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
74 return ieee754sp_nanxcpt(x);
76 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
77 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
78 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
79 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
80 return y;
82 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
83 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
84 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
85 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
86 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
87 return x;
90 * Infinity handling
92 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
93 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
94 if (zc == IEEE754_CLASS_QNAN)
95 return z;
96 ieee754_setcx(IEEE754_INVALID_OPERATION);
97 return ieee754sp_indef();
99 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
100 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
101 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
102 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
103 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
104 if (zc == IEEE754_CLASS_QNAN)
105 return z;
106 return ieee754sp_inf(xs ^ ys);
108 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
109 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
110 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
111 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
112 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
113 if (zc == IEEE754_CLASS_INF)
114 return ieee754sp_inf(zs);
115 /* Multiplication is 0 so just return z */
116 return z;
118 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
119 SPDNORMX;
121 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
122 if (zc == IEEE754_CLASS_QNAN)
123 return z;
124 else if (zc == IEEE754_CLASS_INF)
125 return ieee754sp_inf(zs);
126 SPDNORMY;
127 break;
129 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130 if (zc == IEEE754_CLASS_QNAN)
131 return z;
132 else if (zc == IEEE754_CLASS_INF)
133 return ieee754sp_inf(zs);
134 SPDNORMX;
135 break;
137 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
138 if (zc == IEEE754_CLASS_QNAN)
139 return z;
140 else if (zc == IEEE754_CLASS_INF)
141 return ieee754sp_inf(zs);
142 /* fall through to real computations */
145 /* Finally get to do some computation */
148 * Do the multiplication bit first
150 * rm = xm * ym, re = xe + ye basically
152 * At this point xm and ym should have been normalized.
155 /* rm = xm * ym, re = xe+ye basically */
156 assert(xm & SP_HIDDEN_BIT);
157 assert(ym & SP_HIDDEN_BIT);
159 re = xe + ye;
160 rs = xs ^ ys;
161 if (flags & maddf_negate_product)
162 rs ^= 1;
164 /* shunt to top of word */
165 xm <<= 32 - (SP_FBITS + 1);
166 ym <<= 32 - (SP_FBITS + 1);
169 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
171 lxm = xm & 0xffff;
172 hxm = xm >> 16;
173 lym = ym & 0xffff;
174 hym = ym >> 16;
176 lrm = lxm * lym; /* 16 * 16 => 32 */
177 hrm = hxm * hym; /* 16 * 16 => 32 */
179 t = lxm * hym; /* 16 * 16 => 32 */
180 at = lrm + (t << 16);
181 hrm += at < lrm;
182 lrm = at;
183 hrm = hrm + (t >> 16);
185 t = hxm * lym; /* 16 * 16 => 32 */
186 at = lrm + (t << 16);
187 hrm += at < lrm;
188 lrm = at;
189 hrm = hrm + (t >> 16);
191 rm = hrm | (lrm != 0);
194 * Sticky shift down to normal rounding precision.
196 if ((int) rm < 0) {
197 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
198 ((rm << (SP_FBITS + 1 + 3)) != 0);
199 re++;
200 } else {
201 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
202 ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
204 assert(rm & (SP_HIDDEN_BIT << 3));
206 /* And now the addition */
208 assert(zm & SP_HIDDEN_BIT);
211 * Provide guard,round and stick bit space.
213 zm <<= 3;
215 if (ze > re) {
217 * Have to shift r fraction right to align.
219 s = ze - re;
220 rm = XSPSRS(rm, s);
221 re += s;
222 } else if (re > ze) {
224 * Have to shift z fraction right to align.
226 s = re - ze;
227 zm = XSPSRS(zm, s);
228 ze += s;
230 assert(ze == re);
231 assert(ze <= SP_EMAX);
233 if (zs == rs) {
235 * Generate 28 bit result of adding two 27 bit numbers
236 * leaving result in zm, zs and ze.
238 zm = zm + rm;
240 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
241 zm = XSPSRS1(zm);
242 ze++;
244 } else {
245 if (zm >= rm) {
246 zm = zm - rm;
247 } else {
248 zm = rm - zm;
249 zs = rs;
251 if (zm == 0)
252 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
255 * Normalize in extended single precision
257 while ((zm >> (SP_MBITS + 3)) == 0) {
258 zm <<= 1;
259 ze--;
263 return ieee754sp_format(zs, ze, zm);
266 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
267 union ieee754sp y)
269 return _sp_maddf(z, x, y, 0);
272 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
273 union ieee754sp y)
275 return _sp_maddf(z, x, y, maddf_negate_product);