1 /* $NetBSD: softfloat-macros,v 1.3 2012/03/21 02:32:26 christos Exp $ */
4 ===============================================================================
6 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
7 Arithmetic Package, Release 2a.
9 Written by John R. Hauser. This work was made possible in part by the
10 International Computer Science Institute, located at Suite 600, 1947 Center
11 Street, Berkeley, California 94704. Funding was partially provided by the
12 National Science Foundation under grant MIP-9311980. The original version
13 of this code was written as part of a project to build a fixed-point vector
14 processor in collaboration with the University of California at Berkeley,
15 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
16 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
17 arithmetic/SoftFloat.html'.
19 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
20 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
21 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
22 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
23 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
25 Derivative works are acceptable, even for commercial purposes, so long as
26 (1) they include prominent notice that the work is derivative, and (2) they
27 include prominent notice akin to these four paragraphs for those parts of
28 this code that are retained.
30 ===============================================================================
34 -------------------------------------------------------------------------------
35 Shifts `a' right by the number of bits given in `count'. If any nonzero
36 bits are shifted off, they are ``jammed'' into the least significant bit of
37 the result by setting the least significant bit to 1. The value of `count'
38 can be arbitrarily large; in particular, if `count' is greater than 32, the
39 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
40 The result is stored in the location pointed to by `zPtr'.
41 -------------------------------------------------------------------------------
43 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
50 else if ( count < 32 ) {
51 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
61 -------------------------------------------------------------------------------
62 Shifts `a' right by the number of bits given in `count'. If any nonzero
63 bits are shifted off, they are ``jammed'' into the least significant bit of
64 the result by setting the least significant bit to 1. The value of `count'
65 can be arbitrarily large; in particular, if `count' is greater than 64, the
66 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
67 The result is stored in the location pointed to by `zPtr'.
68 -------------------------------------------------------------------------------
70 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
77 else if ( count < 64 ) {
78 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
88 -------------------------------------------------------------------------------
89 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
90 _plus_ the number of bits given in `count'. The shifted result is at most
91 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
92 bits shifted off form a second 64-bit result as follows: The _last_ bit
93 shifted off is the most-significant bit of the extra result, and the other
94 63 bits of the extra result are all zero if and only if _all_but_the_last_
95 bits shifted off were all zero. This extra result is stored in the location
96 pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
97 (This routine makes more sense if `a0' and `a1' are considered to form a
98 fixed-point value with binary point between `a0' and `a1'. This fixed-point
99 value is shifted right by the number of bits given in `count', and the
100 integer part of the result is returned at the location pointed to by
101 `z0Ptr'. The fractional part of the result may be slightly corrupted as
102 described above, and is returned at the location pointed to by `z1Ptr'.)
103 -------------------------------------------------------------------------------
106 shift64ExtraRightJamming(
107 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
110 int8 negCount = ( - count ) & 63;
116 else if ( count < 64 ) {
117 z1 = ( a0<<negCount ) | ( a1 != 0 );
122 z1 = a0 | ( a1 != 0 );
125 z1 = ( ( a0 | a1 ) != 0 );
135 -------------------------------------------------------------------------------
136 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
137 number of bits given in `count'. Any bits shifted off are lost. The value
138 of `count' can be arbitrarily large; in particular, if `count' is greater
139 than 128, the result will be 0. The result is broken into two 64-bit pieces
140 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
141 -------------------------------------------------------------------------------
145 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
148 int8 negCount = ( - count ) & 63;
154 else if ( count < 64 ) {
155 z1 = ( a0<<negCount ) | ( a1>>count );
159 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
168 -------------------------------------------------------------------------------
169 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
170 number of bits given in `count'. If any nonzero bits are shifted off, they
171 are ``jammed'' into the least significant bit of the result by setting the
172 least significant bit to 1. The value of `count' can be arbitrarily large;
173 in particular, if `count' is greater than 128, the result will be either
174 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
175 nonzero. The result is broken into two 64-bit pieces which are stored at
176 the locations pointed to by `z0Ptr' and `z1Ptr'.
177 -------------------------------------------------------------------------------
180 shift128RightJamming(
181 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
184 int8 negCount = ( - count ) & 63;
190 else if ( count < 64 ) {
191 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
196 z1 = a0 | ( a1 != 0 );
198 else if ( count < 128 ) {
199 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
202 z1 = ( ( a0 | a1 ) != 0 );
212 -------------------------------------------------------------------------------
213 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
214 by 64 _plus_ the number of bits given in `count'. The shifted result is
215 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
216 stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
217 off form a third 64-bit result as follows: The _last_ bit shifted off is
218 the most-significant bit of the extra result, and the other 63 bits of the
219 extra result are all zero if and only if _all_but_the_last_ bits shifted off
220 were all zero. This extra result is stored in the location pointed to by
221 `z2Ptr'. The value of `count' can be arbitrarily large.
222 (This routine makes more sense if `a0', `a1', and `a2' are considered
223 to form a fixed-point value with binary point between `a1' and `a2'. This
224 fixed-point value is shifted right by the number of bits given in `count',
225 and the integer part of the result is returned at the locations pointed to
226 by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
227 corrupted as described above, and is returned at the location pointed to by
229 -------------------------------------------------------------------------------
232 shift128ExtraRightJamming(
243 int8 negCount = ( - count ) & 63;
253 z1 = ( a0<<negCount ) | ( a1>>count );
265 z1 = a0>>( count & 63 );
268 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
283 -------------------------------------------------------------------------------
284 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
285 number of bits given in `count'. Any bits shifted off are lost. The value
286 of `count' must be less than 64. The result is broken into two 64-bit
287 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
288 -------------------------------------------------------------------------------
292 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
297 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
302 -------------------------------------------------------------------------------
303 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
304 by the number of bits given in `count'. Any bits shifted off are lost.
305 The value of `count' must be less than 64. The result is broken into three
306 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
307 `z1Ptr', and `z2Ptr'.
308 -------------------------------------------------------------------------------
328 negCount = ( ( - count ) & 63 );
339 -------------------------------------------------------------------------------
340 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
341 value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
342 any carry out is lost. The result is broken into two 64-bit pieces which
343 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
344 -------------------------------------------------------------------------------
348 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
354 *z0Ptr = a0 + b0 + ( z1 < a1 );
359 -------------------------------------------------------------------------------
360 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
361 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
362 modulo 2^192, so any carry out is lost. The result is broken into three
363 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
364 `z1Ptr', and `z2Ptr'.
365 -------------------------------------------------------------------------------
384 carry1 = ( z2 < a2 );
386 carry0 = ( z1 < a1 );
389 z0 += ( z1 < (bits64)carry1 );
398 -------------------------------------------------------------------------------
399 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
400 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
401 2^128, so any borrow out (carry out) is lost. The result is broken into two
402 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
404 -------------------------------------------------------------------------------
408 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
412 *z0Ptr = a0 - b0 - ( a1 < b1 );
417 -------------------------------------------------------------------------------
418 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
419 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
420 Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
421 result is broken into three 64-bit pieces which are stored at the locations
422 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
423 -------------------------------------------------------------------------------
439 int8 borrow0, borrow1;
442 borrow1 = ( a2 < b2 );
444 borrow0 = ( a1 < b1 );
446 z0 -= ( z1 < (bits64)borrow1 );
456 -------------------------------------------------------------------------------
457 Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
458 into two 64-bit pieces which are stored at the locations pointed to by
460 -------------------------------------------------------------------------------
462 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
464 bits32 aHigh, aLow, bHigh, bLow;
465 bits64 z0, zMiddleA, zMiddleB, z1;
468 aHigh = (bits32)(a>>32);
470 bHigh = (bits32)(b>>32);
471 z1 = ( (bits64) aLow ) * bLow;
472 zMiddleA = ( (bits64) aLow ) * bHigh;
473 zMiddleB = ( (bits64) aHigh ) * bLow;
474 z0 = ( (bits64) aHigh ) * bHigh;
475 zMiddleA += zMiddleB;
476 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
479 z0 += ( z1 < zMiddleA );
486 -------------------------------------------------------------------------------
487 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
488 `b' to obtain a 192-bit product. The product is broken into three 64-bit
489 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
491 -------------------------------------------------------------------------------
503 bits64 z0, z1, z2, more1;
505 mul64To128( a1, b, &z1, &z2 );
506 mul64To128( a0, b, &z0, &more1 );
507 add128( z0, more1, 0, z1, &z0, &z1 );
515 -------------------------------------------------------------------------------
516 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
517 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
518 product. The product is broken into four 64-bit pieces which are stored at
519 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
520 -------------------------------------------------------------------------------
534 bits64 z0, z1, z2, z3;
537 mul64To128( a1, b1, &z2, &z3 );
538 mul64To128( a1, b0, &z1, &more2 );
539 add128( z1, more2, 0, z2, &z1, &z2 );
540 mul64To128( a0, b0, &z0, &more1 );
541 add128( z0, more1, 0, z1, &z0, &z1 );
542 mul64To128( a0, b1, &more1, &more2 );
543 add128( more1, more2, 0, z2, &more1, &z2 );
544 add128( z0, z1, 0, more1, &z0, &z1 );
553 -------------------------------------------------------------------------------
554 Returns an approximation to the 64-bit integer quotient obtained by dividing
555 `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
556 divisor `b' must be at least 2^63. If q is the exact quotient truncated
557 toward zero, the approximation returned lies between q and q + 2 inclusive.
558 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
559 unsigned integer is returned.
560 -------------------------------------------------------------------------------
562 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
565 bits64 rem0, rem1, term0, term1;
568 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
570 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
571 mul64To128( b, z, &term0, &term1 );
572 sub128( a0, a1, term0, term1, &rem0, &rem1 );
573 while ( ( (sbits64) rem0 ) < 0 ) {
574 z -= LIT64( 0x100000000 );
576 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
578 rem0 = ( rem0<<32 ) | ( rem1>>32 );
579 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
584 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
586 -------------------------------------------------------------------------------
587 Returns an approximation to the square root of the 32-bit significand given
588 by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
589 `aExp' (the least significant bit) is 1, the integer returned approximates
590 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
591 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
592 case, the approximation returned lies strictly within +/-2 of the exact
594 -------------------------------------------------------------------------------
596 static bits32 estimateSqrt32( int16 aExp, bits32 a )
598 static const bits16 sqrtOddAdjustments[] = {
599 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
600 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
602 static const bits16 sqrtEvenAdjustments[] = {
603 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
604 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
609 idx = ( a>>27 ) & 15;
611 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
612 z = ( ( a / z )<<14 ) + ( z<<15 );
616 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
618 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
619 if ( z <= a ) return (bits32) ( ( (bits32) a )>>1 );
621 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
627 -------------------------------------------------------------------------------
628 Returns the number of leading 0 bits before the most-significant 1 bit of
629 `a'. If `a' is zero, 32 is returned.
630 -------------------------------------------------------------------------------
632 static int8 countLeadingZeros32( bits32 a )
634 static const int8 countLeadingZerosHigh[] = {
635 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
636 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
637 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
638 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
639 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
640 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
641 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
642 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
659 if ( a < 0x1000000 ) {
663 shiftCount += countLeadingZerosHigh[ a>>24 ];
669 -------------------------------------------------------------------------------
670 Returns the number of leading 0 bits before the most-significant 1 bit of
671 `a'. If `a' is zero, 64 is returned.
672 -------------------------------------------------------------------------------
674 static int8 countLeadingZeros64( bits64 a )
679 if ( a < ( (bits64) 1 )<<32 ) {
685 shiftCount += (int8)countLeadingZeros32( (bits32)a );
691 -------------------------------------------------------------------------------
692 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
693 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
694 Otherwise, returns 0.
695 -------------------------------------------------------------------------------
697 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
700 return ( a0 == b0 ) && ( a1 == b1 );
705 -------------------------------------------------------------------------------
706 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
707 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
708 Otherwise, returns 0.
709 -------------------------------------------------------------------------------
711 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
714 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
719 -------------------------------------------------------------------------------
720 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
721 than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
723 -------------------------------------------------------------------------------
725 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
728 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
733 -------------------------------------------------------------------------------
734 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
735 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
736 Otherwise, returns 0.
737 -------------------------------------------------------------------------------
739 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
742 return ( a0 != b0 ) || ( a1 != b1 );