2 * Floating point emulation support for subnormalised numbers on SH4
3 * architecture This file is derived from the SoftFloat IEC/IEEE
4 * Floating-point Arithmetic Package, Release 2 the original license of
5 * which is reproduced below.
7 * ========================================================================
9 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10 * Arithmetic Package, Release 2.
12 * Written by John R. Hauser. This work was made possible in part by the
13 * International Computer Science Institute, located at Suite 600, 1947 Center
14 * Street, Berkeley, California 94704. Funding was partially provided by the
15 * National Science Foundation under grant MIP-9311980. The original version
16 * of this code was written as part of a project to build a fixed-point vector
17 * processor in collaboration with the University of California at Berkeley,
18 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20 * arithmetic/softfloat.html'.
22 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
28 * Derivative works are acceptable, even for commercial purposes, so long as
29 * (1) they include prominent notice that the work is derivative, and (2) they
30 * include prominent notice akin to these three paragraphs for those parts of
31 * this code that are retained.
33 * ========================================================================
35 * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36 * and Kamel Khelifi <kamel.khelifi@st.com>
38 #include <linux/kernel.h>
40 #include <asm/div64.h>
42 #define LIT64( a ) a##LL
45 typedef unsigned char uint8
;
46 typedef signed char int8
;
49 typedef unsigned int uint32
;
50 typedef signed int int32
;
52 typedef unsigned long long int bits64
;
53 typedef signed long long int sbits64
;
55 typedef unsigned char bits8
;
56 typedef signed char sbits8
;
57 typedef unsigned short int bits16
;
58 typedef signed short int sbits16
;
59 typedef unsigned int bits32
;
60 typedef signed int sbits32
;
62 typedef unsigned long long int uint64
;
63 typedef signed long long int int64
;
65 typedef unsigned long int float32
;
66 typedef unsigned long long float64
;
68 extern void float_raise(unsigned int flags
); /* in fpu.c */
69 extern int float_rounding_mode(void); /* in fpu.c */
71 bits64
extractFloat64Frac(float64 a
);
72 flag
extractFloat64Sign(float64 a
);
73 int16
extractFloat64Exp(float64 a
);
74 int16
extractFloat32Exp(float32 a
);
75 flag
extractFloat32Sign(float32 a
);
76 bits32
extractFloat32Frac(float32 a
);
77 float64
packFloat64(flag zSign
, int16 zExp
, bits64 zSig
);
78 void shift64RightJamming(bits64 a
, int16 count
, bits64
* zPtr
);
79 float32
packFloat32(flag zSign
, int16 zExp
, bits32 zSig
);
80 void shift32RightJamming(bits32 a
, int16 count
, bits32
* zPtr
);
81 float64
float64_sub(float64 a
, float64 b
);
82 float32
float32_sub(float32 a
, float32 b
);
83 float32
float32_add(float32 a
, float32 b
);
84 float64
float64_add(float64 a
, float64 b
);
85 float64
float64_div(float64 a
, float64 b
);
86 float32
float32_div(float32 a
, float32 b
);
87 float32
float32_mul(float32 a
, float32 b
);
88 float64
float64_mul(float64 a
, float64 b
);
89 float32
float64_to_float32(float64 a
);
90 void add128(bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
, bits64
* z0Ptr
,
92 void sub128(bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
, bits64
* z0Ptr
,
94 void mul64To128(bits64 a
, bits64 b
, bits64
* z0Ptr
, bits64
* z1Ptr
);
96 static int8
countLeadingZeros32(bits32 a
);
97 static int8
countLeadingZeros64(bits64 a
);
98 static float64
normalizeRoundAndPackFloat64(flag zSign
, int16 zExp
,
100 static float64
subFloat64Sigs(float64 a
, float64 b
, flag zSign
);
101 static float64
addFloat64Sigs(float64 a
, float64 b
, flag zSign
);
102 static float32
roundAndPackFloat32(flag zSign
, int16 zExp
, bits32 zSig
);
103 static float32
normalizeRoundAndPackFloat32(flag zSign
, int16 zExp
,
105 static float64
roundAndPackFloat64(flag zSign
, int16 zExp
, bits64 zSig
);
106 static float32
subFloat32Sigs(float32 a
, float32 b
, flag zSign
);
107 static float32
addFloat32Sigs(float32 a
, float32 b
, flag zSign
);
108 static void normalizeFloat64Subnormal(bits64 aSig
, int16
* zExpPtr
,
110 static bits64
estimateDiv128To64(bits64 a0
, bits64 a1
, bits64 b
);
111 static void normalizeFloat32Subnormal(bits32 aSig
, int16
* zExpPtr
,
114 bits64
extractFloat64Frac(float64 a
)
116 return a
& LIT64(0x000FFFFFFFFFFFFF);
119 flag
extractFloat64Sign(float64 a
)
124 int16
extractFloat64Exp(float64 a
)
126 return (a
>> 52) & 0x7FF;
129 int16
extractFloat32Exp(float32 a
)
131 return (a
>> 23) & 0xFF;
134 flag
extractFloat32Sign(float32 a
)
139 bits32
extractFloat32Frac(float32 a
)
141 return a
& 0x007FFFFF;
144 float64
packFloat64(flag zSign
, int16 zExp
, bits64 zSig
)
146 return (((bits64
) zSign
) << 63) + (((bits64
) zExp
) << 52) + zSig
;
149 void shift64RightJamming(bits64 a
, int16 count
, bits64
* zPtr
)
155 } else if (count
< 64) {
156 z
= (a
>> count
) | ((a
<< ((-count
) & 63)) != 0);
163 static int8
countLeadingZeros32(bits32 a
)
165 static const int8 countLeadingZerosHigh
[] = {
166 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
167 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
194 shiftCount
+= countLeadingZerosHigh
[a
>> 24];
199 static int8
countLeadingZeros64(bits64 a
)
204 if (a
< ((bits64
) 1) << 32) {
209 shiftCount
+= countLeadingZeros32(a
);
214 static float64
normalizeRoundAndPackFloat64(flag zSign
, int16 zExp
, bits64 zSig
)
218 shiftCount
= countLeadingZeros64(zSig
) - 1;
219 return roundAndPackFloat64(zSign
, zExp
- shiftCount
,
224 static float64
subFloat64Sigs(float64 a
, float64 b
, flag zSign
)
226 int16 aExp
, bExp
, zExp
;
227 bits64 aSig
, bSig
, zSig
;
230 aSig
= extractFloat64Frac(a
);
231 aExp
= extractFloat64Exp(a
);
232 bSig
= extractFloat64Frac(b
);
233 bExp
= extractFloat64Exp(b
);
234 expDiff
= aExp
- bExp
;
249 return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO
, 0, 0);
252 return packFloat64(zSign
^ 1, 0x7FF, 0);
257 aSig
|= LIT64(0x4000000000000000);
259 shift64RightJamming(aSig
, -expDiff
, &aSig
);
260 bSig
|= LIT64(0x4000000000000000);
265 goto normalizeRoundAndPack
;
273 bSig
|= LIT64(0x4000000000000000);
275 shift64RightJamming(bSig
, expDiff
, &bSig
);
276 aSig
|= LIT64(0x4000000000000000);
280 normalizeRoundAndPack
:
282 return normalizeRoundAndPackFloat64(zSign
, zExp
, zSig
);
285 static float64
addFloat64Sigs(float64 a
, float64 b
, flag zSign
)
287 int16 aExp
, bExp
, zExp
;
288 bits64 aSig
, bSig
, zSig
;
291 aSig
= extractFloat64Frac(a
);
292 aExp
= extractFloat64Exp(a
);
293 bSig
= extractFloat64Frac(b
);
294 bExp
= extractFloat64Exp(b
);
295 expDiff
= aExp
- bExp
;
305 bSig
|= LIT64(0x2000000000000000);
307 shift64RightJamming(bSig
, expDiff
, &bSig
);
309 } else if (expDiff
< 0) {
311 return packFloat64(zSign
, 0x7FF, 0);
316 aSig
|= LIT64(0x2000000000000000);
318 shift64RightJamming(aSig
, -expDiff
, &aSig
);
325 return packFloat64(zSign
, 0, (aSig
+ bSig
) >> 9);
326 zSig
= LIT64(0x4000000000000000) + aSig
+ bSig
;
330 aSig
|= LIT64(0x2000000000000000);
331 zSig
= (aSig
+ bSig
) << 1;
333 if ((sbits64
) zSig
< 0) {
338 return roundAndPackFloat64(zSign
, zExp
, zSig
);
342 float32
packFloat32(flag zSign
, int16 zExp
, bits32 zSig
)
344 return (((bits32
) zSign
) << 31) + (((bits32
) zExp
) << 23) + zSig
;
347 void shift32RightJamming(bits32 a
, int16 count
, bits32
* zPtr
)
352 } else if (count
< 32) {
353 z
= (a
>> count
) | ((a
<< ((-count
) & 31)) != 0);
360 static float32
roundAndPackFloat32(flag zSign
, int16 zExp
, bits32 zSig
)
362 flag roundNearestEven
;
363 int8 roundIncrement
, roundBits
;
366 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
367 roundNearestEven
= (float_rounding_mode() == FPSCR_RM_NEAREST
);
368 roundIncrement
= 0x40;
369 if (!roundNearestEven
) {
372 roundBits
= zSig
& 0x7F;
373 if (0xFD <= (bits16
) zExp
) {
376 && ((sbits32
) (zSig
+ roundIncrement
) < 0))
378 float_raise(FPSCR_CAUSE_OVERFLOW
| FPSCR_CAUSE_INEXACT
);
379 return packFloat32(zSign
, 0xFF,
380 0) - (roundIncrement
== 0);
384 || (zSig
+ roundIncrement
< 0x80000000);
385 shift32RightJamming(zSig
, -zExp
, &zSig
);
387 roundBits
= zSig
& 0x7F;
388 if (isTiny
&& roundBits
)
389 float_raise(FPSCR_CAUSE_UNDERFLOW
);
393 float_raise(FPSCR_CAUSE_INEXACT
);
394 zSig
= (zSig
+ roundIncrement
) >> 7;
395 zSig
&= ~(((roundBits
^ 0x40) == 0) & roundNearestEven
);
398 return packFloat32(zSign
, zExp
, zSig
);
402 static float32
normalizeRoundAndPackFloat32(flag zSign
, int16 zExp
, bits32 zSig
)
406 shiftCount
= countLeadingZeros32(zSig
) - 1;
407 return roundAndPackFloat32(zSign
, zExp
- shiftCount
,
411 static float64
roundAndPackFloat64(flag zSign
, int16 zExp
, bits64 zSig
)
413 flag roundNearestEven
;
414 int16 roundIncrement
, roundBits
;
417 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
418 roundNearestEven
= (float_rounding_mode() == FPSCR_RM_NEAREST
);
419 roundIncrement
= 0x200;
420 if (!roundNearestEven
) {
423 roundBits
= zSig
& 0x3FF;
424 if (0x7FD <= (bits16
) zExp
) {
427 && ((sbits64
) (zSig
+ roundIncrement
) < 0))
429 float_raise(FPSCR_CAUSE_OVERFLOW
| FPSCR_CAUSE_INEXACT
);
430 return packFloat64(zSign
, 0x7FF,
431 0) - (roundIncrement
== 0);
435 || (zSig
+ roundIncrement
<
436 LIT64(0x8000000000000000));
437 shift64RightJamming(zSig
, -zExp
, &zSig
);
439 roundBits
= zSig
& 0x3FF;
440 if (isTiny
&& roundBits
)
441 float_raise(FPSCR_CAUSE_UNDERFLOW
);
445 float_raise(FPSCR_CAUSE_INEXACT
);
446 zSig
= (zSig
+ roundIncrement
) >> 10;
447 zSig
&= ~(((roundBits
^ 0x200) == 0) & roundNearestEven
);
450 return packFloat64(zSign
, zExp
, zSig
);
454 static float32
subFloat32Sigs(float32 a
, float32 b
, flag zSign
)
456 int16 aExp
, bExp
, zExp
;
457 bits32 aSig
, bSig
, zSig
;
460 aSig
= extractFloat32Frac(a
);
461 aExp
= extractFloat32Exp(a
);
462 bSig
= extractFloat32Frac(b
);
463 bExp
= extractFloat32Exp(b
);
464 expDiff
= aExp
- bExp
;
479 return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO
, 0, 0);
482 return packFloat32(zSign
^ 1, 0xFF, 0);
489 shift32RightJamming(aSig
, -expDiff
, &aSig
);
495 goto normalizeRoundAndPack
;
505 shift32RightJamming(bSig
, expDiff
, &bSig
);
510 normalizeRoundAndPack
:
512 return normalizeRoundAndPackFloat32(zSign
, zExp
, zSig
);
516 static float32
addFloat32Sigs(float32 a
, float32 b
, flag zSign
)
518 int16 aExp
, bExp
, zExp
;
519 bits32 aSig
, bSig
, zSig
;
522 aSig
= extractFloat32Frac(a
);
523 aExp
= extractFloat32Exp(a
);
524 bSig
= extractFloat32Frac(b
);
525 bExp
= extractFloat32Exp(b
);
526 expDiff
= aExp
- bExp
;
538 shift32RightJamming(bSig
, expDiff
, &bSig
);
540 } else if (expDiff
< 0) {
542 return packFloat32(zSign
, 0xFF, 0);
549 shift32RightJamming(aSig
, -expDiff
, &aSig
);
556 return packFloat32(zSign
, 0, (aSig
+ bSig
) >> 6);
557 zSig
= 0x40000000 + aSig
+ bSig
;
562 zSig
= (aSig
+ bSig
) << 1;
564 if ((sbits32
) zSig
< 0) {
569 return roundAndPackFloat32(zSign
, zExp
, zSig
);
573 float64
float64_sub(float64 a
, float64 b
)
577 aSign
= extractFloat64Sign(a
);
578 bSign
= extractFloat64Sign(b
);
579 if (aSign
== bSign
) {
580 return subFloat64Sigs(a
, b
, aSign
);
582 return addFloat64Sigs(a
, b
, aSign
);
587 float32
float32_sub(float32 a
, float32 b
)
591 aSign
= extractFloat32Sign(a
);
592 bSign
= extractFloat32Sign(b
);
593 if (aSign
== bSign
) {
594 return subFloat32Sigs(a
, b
, aSign
);
596 return addFloat32Sigs(a
, b
, aSign
);
601 float32
float32_add(float32 a
, float32 b
)
605 aSign
= extractFloat32Sign(a
);
606 bSign
= extractFloat32Sign(b
);
607 if (aSign
== bSign
) {
608 return addFloat32Sigs(a
, b
, aSign
);
610 return subFloat32Sigs(a
, b
, aSign
);
615 float64
float64_add(float64 a
, float64 b
)
619 aSign
= extractFloat64Sign(a
);
620 bSign
= extractFloat64Sign(b
);
621 if (aSign
== bSign
) {
622 return addFloat64Sigs(a
, b
, aSign
);
624 return subFloat64Sigs(a
, b
, aSign
);
629 normalizeFloat64Subnormal(bits64 aSig
, int16
* zExpPtr
, bits64
* zSigPtr
)
633 shiftCount
= countLeadingZeros64(aSig
) - 11;
634 *zSigPtr
= aSig
<< shiftCount
;
635 *zExpPtr
= 1 - shiftCount
;
638 void add128(bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
, bits64
* z0Ptr
,
645 *z0Ptr
= a0
+ b0
+ (z1
< a1
);
649 sub128(bits64 a0
, bits64 a1
, bits64 b0
, bits64 b1
, bits64
* z0Ptr
,
653 *z0Ptr
= a0
- b0
- (a1
< b1
);
656 static bits64
estimateDiv128To64(bits64 a0
, bits64 a1
, bits64 b
)
659 bits64 rem0
, rem1
, term0
, term1
;
662 return LIT64(0xFFFFFFFFFFFFFFFF);
667 z
= (b0
<< 32 <= a0
) ? LIT64(0xFFFFFFFF00000000) : tmp
<< 32;
668 mul64To128(b
, z
, &term0
, &term1
);
669 sub128(a0
, a1
, term0
, term1
, &rem0
, &rem1
);
670 while (((sbits64
) rem0
) < 0) {
671 z
-= LIT64(0x100000000);
673 add128(rem0
, rem1
, b0
, b1
, &rem0
, &rem1
);
675 rem0
= (rem0
<< 32) | (rem1
>> 32);
678 z
|= (b0
<< 32 <= rem0
) ? 0xFFFFFFFF : tmp
;
682 void mul64To128(bits64 a
, bits64 b
, bits64
* z0Ptr
, bits64
* z1Ptr
)
684 bits32 aHigh
, aLow
, bHigh
, bLow
;
685 bits64 z0
, zMiddleA
, zMiddleB
, z1
;
691 z1
= ((bits64
) aLow
) * bLow
;
692 zMiddleA
= ((bits64
) aLow
) * bHigh
;
693 zMiddleB
= ((bits64
) aHigh
) * bLow
;
694 z0
= ((bits64
) aHigh
) * bHigh
;
695 zMiddleA
+= zMiddleB
;
696 z0
+= (((bits64
) (zMiddleA
< zMiddleB
)) << 32) + (zMiddleA
>> 32);
699 z0
+= (z1
< zMiddleA
);
705 static void normalizeFloat32Subnormal(bits32 aSig
, int16
* zExpPtr
,
710 shiftCount
= countLeadingZeros32(aSig
) - 8;
711 *zSigPtr
= aSig
<< shiftCount
;
712 *zExpPtr
= 1 - shiftCount
;
716 float64
float64_div(float64 a
, float64 b
)
718 flag aSign
, bSign
, zSign
;
719 int16 aExp
, bExp
, zExp
;
720 bits64 aSig
, bSig
, zSig
;
724 aSig
= extractFloat64Frac(a
);
725 aExp
= extractFloat64Exp(a
);
726 aSign
= extractFloat64Sign(a
);
727 bSig
= extractFloat64Frac(b
);
728 bExp
= extractFloat64Exp(b
);
729 bSign
= extractFloat64Sign(b
);
730 zSign
= aSign
^ bSign
;
734 return packFloat64(zSign
, 0x7FF, 0);
737 return packFloat64(zSign
, 0, 0);
741 if ((aExp
| aSig
) == 0) {
742 float_raise(FPSCR_CAUSE_INVALID
);
744 return packFloat64(zSign
, 0x7FF, 0);
746 normalizeFloat64Subnormal(bSig
, &bExp
, &bSig
);
750 return packFloat64(zSign
, 0, 0);
751 normalizeFloat64Subnormal(aSig
, &aExp
, &aSig
);
753 zExp
= aExp
- bExp
+ 0x3FD;
754 aSig
= (aSig
| LIT64(0x0010000000000000)) << 10;
755 bSig
= (bSig
| LIT64(0x0010000000000000)) << 11;
756 if (bSig
<= (aSig
+ aSig
)) {
760 zSig
= estimateDiv128To64(aSig
, 0, bSig
);
761 if ((zSig
& 0x1FF) <= 2) {
762 mul64To128(bSig
, zSig
, &term0
, &term1
);
763 sub128(aSig
, 0, term0
, term1
, &rem0
, &rem1
);
764 while ((sbits64
) rem0
< 0) {
766 add128(rem0
, rem1
, 0, bSig
, &rem0
, &rem1
);
770 return roundAndPackFloat64(zSign
, zExp
, zSig
);
774 float32
float32_div(float32 a
, float32 b
)
776 flag aSign
, bSign
, zSign
;
777 int16 aExp
, bExp
, zExp
;
781 aSig
= extractFloat32Frac(a
);
782 aExp
= extractFloat32Exp(a
);
783 aSign
= extractFloat32Sign(a
);
784 bSig
= extractFloat32Frac(b
);
785 bExp
= extractFloat32Exp(b
);
786 bSign
= extractFloat32Sign(b
);
787 zSign
= aSign
^ bSign
;
791 return packFloat32(zSign
, 0xFF, 0);
794 return packFloat32(zSign
, 0, 0);
798 return packFloat32(zSign
, 0xFF, 0);
800 normalizeFloat32Subnormal(bSig
, &bExp
, &bSig
);
804 return packFloat32(zSign
, 0, 0);
805 normalizeFloat32Subnormal(aSig
, &aExp
, &aSig
);
807 zExp
= aExp
- bExp
+ 0x7D;
808 aSig
= (aSig
| 0x00800000) << 7;
809 bSig
= (bSig
| 0x00800000) << 8;
810 if (bSig
<= (aSig
+ aSig
)) {
814 zSig
= (((bits64
) aSig
) << 32);
817 if ((zSig
& 0x3F) == 0) {
818 zSig
|= (((bits64
) bSig
) * zSig
!= ((bits64
) aSig
) << 32);
820 return roundAndPackFloat32(zSign
, zExp
, (bits32
)zSig
);
824 float32
float32_mul(float32 a
, float32 b
)
826 char aSign
, bSign
, zSign
;
827 int aExp
, bExp
, zExp
;
828 unsigned int aSig
, bSig
;
829 unsigned long long zSig64
;
832 aSig
= extractFloat32Frac(a
);
833 aExp
= extractFloat32Exp(a
);
834 aSign
= extractFloat32Sign(a
);
835 bSig
= extractFloat32Frac(b
);
836 bExp
= extractFloat32Exp(b
);
837 bSign
= extractFloat32Sign(b
);
838 zSign
= aSign
^ bSign
;
841 return packFloat32(zSign
, 0, 0);
842 normalizeFloat32Subnormal(aSig
, &aExp
, &aSig
);
846 return packFloat32(zSign
, 0, 0);
847 normalizeFloat32Subnormal(bSig
, &bExp
, &bSig
);
849 if ((bExp
== 0xff && bSig
== 0) || (aExp
== 0xff && aSig
== 0))
850 return roundAndPackFloat32(zSign
, 0xff, 0);
852 zExp
= aExp
+ bExp
- 0x7F;
853 aSig
= (aSig
| 0x00800000) << 7;
854 bSig
= (bSig
| 0x00800000) << 8;
855 shift64RightJamming(((unsigned long long)aSig
) * bSig
, 32, &zSig64
);
857 if (0 <= (signed int)(zSig
<< 1)) {
861 return roundAndPackFloat32(zSign
, zExp
, zSig
);
865 float64
float64_mul(float64 a
, float64 b
)
867 char aSign
, bSign
, zSign
;
868 int aExp
, bExp
, zExp
;
869 unsigned long long int aSig
, bSig
, zSig0
, zSig1
;
871 aSig
= extractFloat64Frac(a
);
872 aExp
= extractFloat64Exp(a
);
873 aSign
= extractFloat64Sign(a
);
874 bSig
= extractFloat64Frac(b
);
875 bExp
= extractFloat64Exp(b
);
876 bSign
= extractFloat64Sign(b
);
877 zSign
= aSign
^ bSign
;
881 return packFloat64(zSign
, 0, 0);
882 normalizeFloat64Subnormal(aSig
, &aExp
, &aSig
);
886 return packFloat64(zSign
, 0, 0);
887 normalizeFloat64Subnormal(bSig
, &bExp
, &bSig
);
889 if ((aExp
== 0x7ff && aSig
== 0) || (bExp
== 0x7ff && bSig
== 0))
890 return roundAndPackFloat64(zSign
, 0x7ff, 0);
892 zExp
= aExp
+ bExp
- 0x3FF;
893 aSig
= (aSig
| 0x0010000000000000LL
) << 10;
894 bSig
= (bSig
| 0x0010000000000000LL
) << 11;
895 mul64To128(aSig
, bSig
, &zSig0
, &zSig1
);
896 zSig0
|= (zSig1
!= 0);
897 if (0 <= (signed long long int)(zSig0
<< 1)) {
901 return roundAndPackFloat64(zSign
, zExp
, zSig0
);
905 * -------------------------------------------------------------------------------
906 * Returns the result of converting the double-precision floating-point value
907 * `a' to the single-precision floating-point format. The conversion is
908 * performed according to the IEC/IEEE Standard for Binary Floating-point
910 * -------------------------------------------------------------------------------
912 float32
float64_to_float32(float64 a
)
919 aSig
= extractFloat64Frac( a
);
920 aExp
= extractFloat64Exp( a
);
921 aSign
= extractFloat64Sign( a
);
923 shift64RightJamming( aSig
, 22, &aSig
);
925 if ( aExp
|| zSig
) {
929 return roundAndPackFloat32(aSign
, aExp
, zSig
);