Linux 4.8-rc8
[linux/fpc-iii.git] / arch / sh / kernel / cpu / sh4 / softfloat.c
blob42edf2e54e85430b1df930625cb0378d48711bc0
1 /*
2 * Floating point emulation support for subnormalised numbers on SH4
3 * architecture This file is derived from the SoftFloat IEC/IEEE
4 * Floating-point Arithmetic Package, Release 2 the original license of
5 * which is reproduced below.
7 * ========================================================================
9 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10 * Arithmetic Package, Release 2.
12 * Written by John R. Hauser. This work was made possible in part by the
13 * International Computer Science Institute, located at Suite 600, 1947 Center
14 * Street, Berkeley, California 94704. Funding was partially provided by the
15 * National Science Foundation under grant MIP-9311980. The original version
16 * of this code was written as part of a project to build a fixed-point vector
17 * processor in collaboration with the University of California at Berkeley,
18 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20 * arithmetic/softfloat.html'.
22 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
28 * Derivative works are acceptable, even for commercial purposes, so long as
29 * (1) they include prominent notice that the work is derivative, and (2) they
30 * include prominent notice akin to these three paragraphs for those parts of
31 * this code that are retained.
33 * ========================================================================
35 * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36 * and Kamel Khelifi <kamel.khelifi@st.com>
38 #include <linux/kernel.h>
39 #include <cpu/fpu.h>
40 #include <asm/div64.h>
42 #define LIT64( a ) a##LL
44 typedef char flag;
45 typedef unsigned char uint8;
46 typedef signed char int8;
47 typedef int uint16;
48 typedef int int16;
49 typedef unsigned int uint32;
50 typedef signed int int32;
52 typedef unsigned long long int bits64;
53 typedef signed long long int sbits64;
55 typedef unsigned char bits8;
56 typedef signed char sbits8;
57 typedef unsigned short int bits16;
58 typedef signed short int sbits16;
59 typedef unsigned int bits32;
60 typedef signed int sbits32;
62 typedef unsigned long long int uint64;
63 typedef signed long long int int64;
65 typedef unsigned long int float32;
66 typedef unsigned long long float64;
68 extern void float_raise(unsigned int flags); /* in fpu.c */
69 extern int float_rounding_mode(void); /* in fpu.c */
71 bits64 extractFloat64Frac(float64 a);
72 flag extractFloat64Sign(float64 a);
73 int16 extractFloat64Exp(float64 a);
74 int16 extractFloat32Exp(float32 a);
75 flag extractFloat32Sign(float32 a);
76 bits32 extractFloat32Frac(float32 a);
77 float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
78 void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
79 float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
80 void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
81 float64 float64_sub(float64 a, float64 b);
82 float32 float32_sub(float32 a, float32 b);
83 float32 float32_add(float32 a, float32 b);
84 float64 float64_add(float64 a, float64 b);
85 float64 float64_div(float64 a, float64 b);
86 float32 float32_div(float32 a, float32 b);
87 float32 float32_mul(float32 a, float32 b);
88 float64 float64_mul(float64 a, float64 b);
89 float32 float64_to_float32(float64 a);
90 void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91 bits64 * z1Ptr);
92 void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
93 bits64 * z1Ptr);
94 void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
96 static int8 countLeadingZeros32(bits32 a);
97 static int8 countLeadingZeros64(bits64 a);
98 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
99 bits64 zSig);
100 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
101 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
102 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
103 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
104 bits32 zSig);
105 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
106 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
107 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
108 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
109 bits64 * zSigPtr);
110 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
111 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
112 bits32 * zSigPtr);
114 bits64 extractFloat64Frac(float64 a)
116 return a & LIT64(0x000FFFFFFFFFFFFF);
119 flag extractFloat64Sign(float64 a)
121 return a >> 63;
124 int16 extractFloat64Exp(float64 a)
126 return (a >> 52) & 0x7FF;
129 int16 extractFloat32Exp(float32 a)
131 return (a >> 23) & 0xFF;
134 flag extractFloat32Sign(float32 a)
136 return a >> 31;
139 bits32 extractFloat32Frac(float32 a)
141 return a & 0x007FFFFF;
144 float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
146 return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
149 void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
151 bits64 z;
153 if (count == 0) {
154 z = a;
155 } else if (count < 64) {
156 z = (a >> count) | ((a << ((-count) & 63)) != 0);
157 } else {
158 z = (a != 0);
160 *zPtr = z;
163 static int8 countLeadingZeros32(bits32 a)
165 static const int8 countLeadingZerosHigh[] = {
166 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
167 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
183 int8 shiftCount;
185 shiftCount = 0;
186 if (a < 0x10000) {
187 shiftCount += 16;
188 a <<= 16;
190 if (a < 0x1000000) {
191 shiftCount += 8;
192 a <<= 8;
194 shiftCount += countLeadingZerosHigh[a >> 24];
195 return shiftCount;
199 static int8 countLeadingZeros64(bits64 a)
201 int8 shiftCount;
203 shiftCount = 0;
204 if (a < ((bits64) 1) << 32) {
205 shiftCount += 32;
206 } else {
207 a >>= 32;
209 shiftCount += countLeadingZeros32(a);
210 return shiftCount;
214 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
216 int8 shiftCount;
218 shiftCount = countLeadingZeros64(zSig) - 1;
219 return roundAndPackFloat64(zSign, zExp - shiftCount,
220 zSig << shiftCount);
224 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
226 int16 aExp, bExp, zExp;
227 bits64 aSig, bSig, zSig;
228 int16 expDiff;
230 aSig = extractFloat64Frac(a);
231 aExp = extractFloat64Exp(a);
232 bSig = extractFloat64Frac(b);
233 bExp = extractFloat64Exp(b);
234 expDiff = aExp - bExp;
235 aSig <<= 10;
236 bSig <<= 10;
237 if (0 < expDiff)
238 goto aExpBigger;
239 if (expDiff < 0)
240 goto bExpBigger;
241 if (aExp == 0) {
242 aExp = 1;
243 bExp = 1;
245 if (bSig < aSig)
246 goto aBigger;
247 if (aSig < bSig)
248 goto bBigger;
249 return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
250 bExpBigger:
251 if (bExp == 0x7FF) {
252 return packFloat64(zSign ^ 1, 0x7FF, 0);
254 if (aExp == 0) {
255 ++expDiff;
256 } else {
257 aSig |= LIT64(0x4000000000000000);
259 shift64RightJamming(aSig, -expDiff, &aSig);
260 bSig |= LIT64(0x4000000000000000);
261 bBigger:
262 zSig = bSig - aSig;
263 zExp = bExp;
264 zSign ^= 1;
265 goto normalizeRoundAndPack;
266 aExpBigger:
267 if (aExp == 0x7FF) {
268 return a;
270 if (bExp == 0) {
271 --expDiff;
272 } else {
273 bSig |= LIT64(0x4000000000000000);
275 shift64RightJamming(bSig, expDiff, &bSig);
276 aSig |= LIT64(0x4000000000000000);
277 aBigger:
278 zSig = aSig - bSig;
279 zExp = aExp;
280 normalizeRoundAndPack:
281 --zExp;
282 return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
285 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
287 int16 aExp, bExp, zExp;
288 bits64 aSig, bSig, zSig;
289 int16 expDiff;
291 aSig = extractFloat64Frac(a);
292 aExp = extractFloat64Exp(a);
293 bSig = extractFloat64Frac(b);
294 bExp = extractFloat64Exp(b);
295 expDiff = aExp - bExp;
296 aSig <<= 9;
297 bSig <<= 9;
298 if (0 < expDiff) {
299 if (aExp == 0x7FF) {
300 return a;
302 if (bExp == 0) {
303 --expDiff;
304 } else {
305 bSig |= LIT64(0x2000000000000000);
307 shift64RightJamming(bSig, expDiff, &bSig);
308 zExp = aExp;
309 } else if (expDiff < 0) {
310 if (bExp == 0x7FF) {
311 return packFloat64(zSign, 0x7FF, 0);
313 if (aExp == 0) {
314 ++expDiff;
315 } else {
316 aSig |= LIT64(0x2000000000000000);
318 shift64RightJamming(aSig, -expDiff, &aSig);
319 zExp = bExp;
320 } else {
321 if (aExp == 0x7FF) {
322 return a;
324 if (aExp == 0)
325 return packFloat64(zSign, 0, (aSig + bSig) >> 9);
326 zSig = LIT64(0x4000000000000000) + aSig + bSig;
327 zExp = aExp;
328 goto roundAndPack;
330 aSig |= LIT64(0x2000000000000000);
331 zSig = (aSig + bSig) << 1;
332 --zExp;
333 if ((sbits64) zSig < 0) {
334 zSig = aSig + bSig;
335 ++zExp;
337 roundAndPack:
338 return roundAndPackFloat64(zSign, zExp, zSig);
342 float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
344 return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
347 void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
349 bits32 z;
350 if (count == 0) {
351 z = a;
352 } else if (count < 32) {
353 z = (a >> count) | ((a << ((-count) & 31)) != 0);
354 } else {
355 z = (a != 0);
357 *zPtr = z;
360 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
362 flag roundNearestEven;
363 int8 roundIncrement, roundBits;
364 flag isTiny;
366 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
367 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
368 roundIncrement = 0x40;
369 if (!roundNearestEven) {
370 roundIncrement = 0;
372 roundBits = zSig & 0x7F;
373 if (0xFD <= (bits16) zExp) {
374 if ((0xFD < zExp)
375 || ((zExp == 0xFD)
376 && ((sbits32) (zSig + roundIncrement) < 0))
378 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
379 return packFloat32(zSign, 0xFF,
380 0) - (roundIncrement == 0);
382 if (zExp < 0) {
383 isTiny = (zExp < -1)
384 || (zSig + roundIncrement < 0x80000000);
385 shift32RightJamming(zSig, -zExp, &zSig);
386 zExp = 0;
387 roundBits = zSig & 0x7F;
388 if (isTiny && roundBits)
389 float_raise(FPSCR_CAUSE_UNDERFLOW);
392 if (roundBits)
393 float_raise(FPSCR_CAUSE_INEXACT);
394 zSig = (zSig + roundIncrement) >> 7;
395 zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
396 if (zSig == 0)
397 zExp = 0;
398 return packFloat32(zSign, zExp, zSig);
402 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
404 int8 shiftCount;
406 shiftCount = countLeadingZeros32(zSig) - 1;
407 return roundAndPackFloat32(zSign, zExp - shiftCount,
408 zSig << shiftCount);
411 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
413 flag roundNearestEven;
414 int16 roundIncrement, roundBits;
415 flag isTiny;
417 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
418 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
419 roundIncrement = 0x200;
420 if (!roundNearestEven) {
421 roundIncrement = 0;
423 roundBits = zSig & 0x3FF;
424 if (0x7FD <= (bits16) zExp) {
425 if ((0x7FD < zExp)
426 || ((zExp == 0x7FD)
427 && ((sbits64) (zSig + roundIncrement) < 0))
429 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
430 return packFloat64(zSign, 0x7FF,
431 0) - (roundIncrement == 0);
433 if (zExp < 0) {
434 isTiny = (zExp < -1)
435 || (zSig + roundIncrement <
436 LIT64(0x8000000000000000));
437 shift64RightJamming(zSig, -zExp, &zSig);
438 zExp = 0;
439 roundBits = zSig & 0x3FF;
440 if (isTiny && roundBits)
441 float_raise(FPSCR_CAUSE_UNDERFLOW);
444 if (roundBits)
445 float_raise(FPSCR_CAUSE_INEXACT);
446 zSig = (zSig + roundIncrement) >> 10;
447 zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
448 if (zSig == 0)
449 zExp = 0;
450 return packFloat64(zSign, zExp, zSig);
454 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
456 int16 aExp, bExp, zExp;
457 bits32 aSig, bSig, zSig;
458 int16 expDiff;
460 aSig = extractFloat32Frac(a);
461 aExp = extractFloat32Exp(a);
462 bSig = extractFloat32Frac(b);
463 bExp = extractFloat32Exp(b);
464 expDiff = aExp - bExp;
465 aSig <<= 7;
466 bSig <<= 7;
467 if (0 < expDiff)
468 goto aExpBigger;
469 if (expDiff < 0)
470 goto bExpBigger;
471 if (aExp == 0) {
472 aExp = 1;
473 bExp = 1;
475 if (bSig < aSig)
476 goto aBigger;
477 if (aSig < bSig)
478 goto bBigger;
479 return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
480 bExpBigger:
481 if (bExp == 0xFF) {
482 return packFloat32(zSign ^ 1, 0xFF, 0);
484 if (aExp == 0) {
485 ++expDiff;
486 } else {
487 aSig |= 0x40000000;
489 shift32RightJamming(aSig, -expDiff, &aSig);
490 bSig |= 0x40000000;
491 bBigger:
492 zSig = bSig - aSig;
493 zExp = bExp;
494 zSign ^= 1;
495 goto normalizeRoundAndPack;
496 aExpBigger:
497 if (aExp == 0xFF) {
498 return a;
500 if (bExp == 0) {
501 --expDiff;
502 } else {
503 bSig |= 0x40000000;
505 shift32RightJamming(bSig, expDiff, &bSig);
506 aSig |= 0x40000000;
507 aBigger:
508 zSig = aSig - bSig;
509 zExp = aExp;
510 normalizeRoundAndPack:
511 --zExp;
512 return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
516 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
518 int16 aExp, bExp, zExp;
519 bits32 aSig, bSig, zSig;
520 int16 expDiff;
522 aSig = extractFloat32Frac(a);
523 aExp = extractFloat32Exp(a);
524 bSig = extractFloat32Frac(b);
525 bExp = extractFloat32Exp(b);
526 expDiff = aExp - bExp;
527 aSig <<= 6;
528 bSig <<= 6;
529 if (0 < expDiff) {
530 if (aExp == 0xFF) {
531 return a;
533 if (bExp == 0) {
534 --expDiff;
535 } else {
536 bSig |= 0x20000000;
538 shift32RightJamming(bSig, expDiff, &bSig);
539 zExp = aExp;
540 } else if (expDiff < 0) {
541 if (bExp == 0xFF) {
542 return packFloat32(zSign, 0xFF, 0);
544 if (aExp == 0) {
545 ++expDiff;
546 } else {
547 aSig |= 0x20000000;
549 shift32RightJamming(aSig, -expDiff, &aSig);
550 zExp = bExp;
551 } else {
552 if (aExp == 0xFF) {
553 return a;
555 if (aExp == 0)
556 return packFloat32(zSign, 0, (aSig + bSig) >> 6);
557 zSig = 0x40000000 + aSig + bSig;
558 zExp = aExp;
559 goto roundAndPack;
561 aSig |= 0x20000000;
562 zSig = (aSig + bSig) << 1;
563 --zExp;
564 if ((sbits32) zSig < 0) {
565 zSig = aSig + bSig;
566 ++zExp;
568 roundAndPack:
569 return roundAndPackFloat32(zSign, zExp, zSig);
573 float64 float64_sub(float64 a, float64 b)
575 flag aSign, bSign;
577 aSign = extractFloat64Sign(a);
578 bSign = extractFloat64Sign(b);
579 if (aSign == bSign) {
580 return subFloat64Sigs(a, b, aSign);
581 } else {
582 return addFloat64Sigs(a, b, aSign);
587 float32 float32_sub(float32 a, float32 b)
589 flag aSign, bSign;
591 aSign = extractFloat32Sign(a);
592 bSign = extractFloat32Sign(b);
593 if (aSign == bSign) {
594 return subFloat32Sigs(a, b, aSign);
595 } else {
596 return addFloat32Sigs(a, b, aSign);
601 float32 float32_add(float32 a, float32 b)
603 flag aSign, bSign;
605 aSign = extractFloat32Sign(a);
606 bSign = extractFloat32Sign(b);
607 if (aSign == bSign) {
608 return addFloat32Sigs(a, b, aSign);
609 } else {
610 return subFloat32Sigs(a, b, aSign);
615 float64 float64_add(float64 a, float64 b)
617 flag aSign, bSign;
619 aSign = extractFloat64Sign(a);
620 bSign = extractFloat64Sign(b);
621 if (aSign == bSign) {
622 return addFloat64Sigs(a, b, aSign);
623 } else {
624 return subFloat64Sigs(a, b, aSign);
628 static void
629 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
631 int8 shiftCount;
633 shiftCount = countLeadingZeros64(aSig) - 11;
634 *zSigPtr = aSig << shiftCount;
635 *zExpPtr = 1 - shiftCount;
638 void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
639 bits64 * z1Ptr)
641 bits64 z1;
643 z1 = a1 + b1;
644 *z1Ptr = z1;
645 *z0Ptr = a0 + b0 + (z1 < a1);
648 void
649 sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
650 bits64 * z1Ptr)
652 *z1Ptr = a1 - b1;
653 *z0Ptr = a0 - b0 - (a1 < b1);
656 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
658 bits64 b0, b1;
659 bits64 rem0, rem1, term0, term1;
660 bits64 z, tmp;
661 if (b <= a0)
662 return LIT64(0xFFFFFFFFFFFFFFFF);
663 b0 = b >> 32;
664 tmp = a0;
665 do_div(tmp, b0);
667 z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
668 mul64To128(b, z, &term0, &term1);
669 sub128(a0, a1, term0, term1, &rem0, &rem1);
670 while (((sbits64) rem0) < 0) {
671 z -= LIT64(0x100000000);
672 b1 = b << 32;
673 add128(rem0, rem1, b0, b1, &rem0, &rem1);
675 rem0 = (rem0 << 32) | (rem1 >> 32);
676 tmp = rem0;
677 do_div(tmp, b0);
678 z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
679 return z;
682 void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
684 bits32 aHigh, aLow, bHigh, bLow;
685 bits64 z0, zMiddleA, zMiddleB, z1;
687 aLow = a;
688 aHigh = a >> 32;
689 bLow = b;
690 bHigh = b >> 32;
691 z1 = ((bits64) aLow) * bLow;
692 zMiddleA = ((bits64) aLow) * bHigh;
693 zMiddleB = ((bits64) aHigh) * bLow;
694 z0 = ((bits64) aHigh) * bHigh;
695 zMiddleA += zMiddleB;
696 z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
697 zMiddleA <<= 32;
698 z1 += zMiddleA;
699 z0 += (z1 < zMiddleA);
700 *z1Ptr = z1;
701 *z0Ptr = z0;
705 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
706 bits32 * zSigPtr)
708 int8 shiftCount;
710 shiftCount = countLeadingZeros32(aSig) - 8;
711 *zSigPtr = aSig << shiftCount;
712 *zExpPtr = 1 - shiftCount;
716 float64 float64_div(float64 a, float64 b)
718 flag aSign, bSign, zSign;
719 int16 aExp, bExp, zExp;
720 bits64 aSig, bSig, zSig;
721 bits64 rem0, rem1;
722 bits64 term0, term1;
724 aSig = extractFloat64Frac(a);
725 aExp = extractFloat64Exp(a);
726 aSign = extractFloat64Sign(a);
727 bSig = extractFloat64Frac(b);
728 bExp = extractFloat64Exp(b);
729 bSign = extractFloat64Sign(b);
730 zSign = aSign ^ bSign;
731 if (aExp == 0x7FF) {
732 if (bExp == 0x7FF) {
734 return packFloat64(zSign, 0x7FF, 0);
736 if (bExp == 0x7FF) {
737 return packFloat64(zSign, 0, 0);
739 if (bExp == 0) {
740 if (bSig == 0) {
741 if ((aExp | aSig) == 0) {
742 float_raise(FPSCR_CAUSE_INVALID);
744 return packFloat64(zSign, 0x7FF, 0);
746 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
748 if (aExp == 0) {
749 if (aSig == 0)
750 return packFloat64(zSign, 0, 0);
751 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
753 zExp = aExp - bExp + 0x3FD;
754 aSig = (aSig | LIT64(0x0010000000000000)) << 10;
755 bSig = (bSig | LIT64(0x0010000000000000)) << 11;
756 if (bSig <= (aSig + aSig)) {
757 aSig >>= 1;
758 ++zExp;
760 zSig = estimateDiv128To64(aSig, 0, bSig);
761 if ((zSig & 0x1FF) <= 2) {
762 mul64To128(bSig, zSig, &term0, &term1);
763 sub128(aSig, 0, term0, term1, &rem0, &rem1);
764 while ((sbits64) rem0 < 0) {
765 --zSig;
766 add128(rem0, rem1, 0, bSig, &rem0, &rem1);
768 zSig |= (rem1 != 0);
770 return roundAndPackFloat64(zSign, zExp, zSig);
774 float32 float32_div(float32 a, float32 b)
776 flag aSign, bSign, zSign;
777 int16 aExp, bExp, zExp;
778 bits32 aSig, bSig;
779 uint64_t zSig;
781 aSig = extractFloat32Frac(a);
782 aExp = extractFloat32Exp(a);
783 aSign = extractFloat32Sign(a);
784 bSig = extractFloat32Frac(b);
785 bExp = extractFloat32Exp(b);
786 bSign = extractFloat32Sign(b);
787 zSign = aSign ^ bSign;
788 if (aExp == 0xFF) {
789 if (bExp == 0xFF) {
791 return packFloat32(zSign, 0xFF, 0);
793 if (bExp == 0xFF) {
794 return packFloat32(zSign, 0, 0);
796 if (bExp == 0) {
797 if (bSig == 0) {
798 return packFloat32(zSign, 0xFF, 0);
800 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
802 if (aExp == 0) {
803 if (aSig == 0)
804 return packFloat32(zSign, 0, 0);
805 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
807 zExp = aExp - bExp + 0x7D;
808 aSig = (aSig | 0x00800000) << 7;
809 bSig = (bSig | 0x00800000) << 8;
810 if (bSig <= (aSig + aSig)) {
811 aSig >>= 1;
812 ++zExp;
814 zSig = (((bits64) aSig) << 32);
815 do_div(zSig, bSig);
817 if ((zSig & 0x3F) == 0) {
818 zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
820 return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
824 float32 float32_mul(float32 a, float32 b)
826 char aSign, bSign, zSign;
827 int aExp, bExp, zExp;
828 unsigned int aSig, bSig;
829 unsigned long long zSig64;
830 unsigned int zSig;
832 aSig = extractFloat32Frac(a);
833 aExp = extractFloat32Exp(a);
834 aSign = extractFloat32Sign(a);
835 bSig = extractFloat32Frac(b);
836 bExp = extractFloat32Exp(b);
837 bSign = extractFloat32Sign(b);
838 zSign = aSign ^ bSign;
839 if (aExp == 0) {
840 if (aSig == 0)
841 return packFloat32(zSign, 0, 0);
842 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
844 if (bExp == 0) {
845 if (bSig == 0)
846 return packFloat32(zSign, 0, 0);
847 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
849 if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
850 return roundAndPackFloat32(zSign, 0xff, 0);
852 zExp = aExp + bExp - 0x7F;
853 aSig = (aSig | 0x00800000) << 7;
854 bSig = (bSig | 0x00800000) << 8;
855 shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
856 zSig = zSig64;
857 if (0 <= (signed int)(zSig << 1)) {
858 zSig <<= 1;
859 --zExp;
861 return roundAndPackFloat32(zSign, zExp, zSig);
865 float64 float64_mul(float64 a, float64 b)
867 char aSign, bSign, zSign;
868 int aExp, bExp, zExp;
869 unsigned long long int aSig, bSig, zSig0, zSig1;
871 aSig = extractFloat64Frac(a);
872 aExp = extractFloat64Exp(a);
873 aSign = extractFloat64Sign(a);
874 bSig = extractFloat64Frac(b);
875 bExp = extractFloat64Exp(b);
876 bSign = extractFloat64Sign(b);
877 zSign = aSign ^ bSign;
879 if (aExp == 0) {
880 if (aSig == 0)
881 return packFloat64(zSign, 0, 0);
882 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
884 if (bExp == 0) {
885 if (bSig == 0)
886 return packFloat64(zSign, 0, 0);
887 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
889 if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
890 return roundAndPackFloat64(zSign, 0x7ff, 0);
892 zExp = aExp + bExp - 0x3FF;
893 aSig = (aSig | 0x0010000000000000LL) << 10;
894 bSig = (bSig | 0x0010000000000000LL) << 11;
895 mul64To128(aSig, bSig, &zSig0, &zSig1);
896 zSig0 |= (zSig1 != 0);
897 if (0 <= (signed long long int)(zSig0 << 1)) {
898 zSig0 <<= 1;
899 --zExp;
901 return roundAndPackFloat64(zSign, zExp, zSig0);
905 * -------------------------------------------------------------------------------
906 * Returns the result of converting the double-precision floating-point value
907 * `a' to the single-precision floating-point format. The conversion is
908 * performed according to the IEC/IEEE Standard for Binary Floating-point
909 * Arithmetic.
910 * -------------------------------------------------------------------------------
911 * */
912 float32 float64_to_float32(float64 a)
914 flag aSign;
915 int16 aExp;
916 bits64 aSig;
917 bits32 zSig;
919 aSig = extractFloat64Frac( a );
920 aExp = extractFloat64Exp( a );
921 aSign = extractFloat64Sign( a );
923 shift64RightJamming( aSig, 22, &aSig );
924 zSig = aSig;
925 if ( aExp || zSig ) {
926 zSig |= 0x40000000;
927 aExp -= 0x381;
929 return roundAndPackFloat32(aSign, aExp, zSig);