1 /*============================================================================
2 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b.
5 Written by John R. Hauser. This work was made possible in part by the
6 International Computer Science Institute, located at Suite 600, 1947 Center
7 Street, Berkeley, California 94704. Funding was partially provided by the
8 National Science Foundation under grant MIP-9311980. The original version
9 of this code was written as part of a project to build a fixed-point vector
10 processor in collaboration with the University of California at Berkeley,
11 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
12 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
13 arithmetic/SoftFloat.html'.
15 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
16 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
17 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
18 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
19 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
20 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
21 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
22 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
24 Derivative works are acceptable, even for commercial purposes, so long as
25 (1) the source code for the derivative work includes prominent notice that
26 the work is derivative, and (2) the source code includes prominent notice with
27 these four paragraphs for those parts of this code that are retained.
28 =============================================================================*/
30 /*============================================================================
31 * Adapted for Bochs (x86 achitecture simulator) by
32 * Stanislav Shwartsman [sshwarts at sourceforge net]
33 * ==========================================================================*/
35 #ifndef _SOFTFLOAT_MACROS_H_
36 #define _SOFTFLOAT_MACROS_H_
38 /*----------------------------------------------------------------------------
39 | Shifts `a' right by the number of bits given in `count'. If any nonzero
40 | bits are shifted off, they are ``jammed'' into the least significant bit of
41 | the result by setting the least significant bit to 1. The value of `count'
42 | can be arbitrarily large; in particular, if `count' is greater than 32, the
43 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
44 | The result is stored in the location pointed to by `zPtr'.
45 *----------------------------------------------------------------------------*/
47 BX_CPP_INLINE Bit32u
shift32RightJamming(Bit32u a
, int count
)
54 else if (count
< 32) {
55 z
= (a
>>count
) | ((a
<<((-count
) & 31)) != 0);
64 /*----------------------------------------------------------------------------
65 | Shifts `a' right by the number of bits given in `count'. If any nonzero
66 | bits are shifted off, they are ``jammed'' into the least significant bit of
67 | the result by setting the least significant bit to 1. The value of `count'
68 | can be arbitrarily large; in particular, if `count' is greater than 64, the
69 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
70 | The result is stored in the location pointed to by `zPtr'.
71 *----------------------------------------------------------------------------*/
73 BX_CPP_INLINE Bit64u
shift64RightJamming(Bit64u a
, int count
)
80 else if (count
< 64) {
81 z
= (a
>>count
) | ((a
<< ((-count
) & 63)) != 0);
90 /*----------------------------------------------------------------------------
91 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
92 | _plus_ the number of bits given in `count'. The shifted result is at most
93 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
94 | bits shifted off form a second 64-bit result as follows: The _last_ bit
95 | shifted off is the most-significant bit of the extra result, and the other
96 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
97 | bits shifted off were all zero. This extra result is stored in the location
98 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
99 | (This routine makes more sense if `a0' and `a1' are considered to form
100 | a fixed-point value with binary point between `a0' and `a1'. This fixed-
101 | point value is shifted right by the number of bits given in `count', and
102 | the integer part of the result is returned at the location pointed to by
103 | `z0Ptr'. The fractional part of the result may be slightly corrupted as
104 | described above, and is returned at the location pointed to by `z1Ptr'.)
105 *----------------------------------------------------------------------------*/
108 shift64ExtraRightJamming(
109 Bit64u a0
, Bit64u a1
, int count
, Bit64u
*z0Ptr
, Bit64u
*z1Ptr
)
112 int negCount
= (-count
) & 63;
118 else if (count
< 64) {
119 z1
= (a0
<<negCount
) | (a1
!= 0);
127 z1
= ((a0
| a1
) != 0);
135 /*----------------------------------------------------------------------------
136 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
137 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
138 | any carry out is lost. The result is broken into two 64-bit pieces which
139 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
140 *----------------------------------------------------------------------------*/
143 add128(Bit64u a0
, Bit64u a1
, Bit64u b0
, Bit64u b1
, Bit64u
*z0Ptr
, Bit64u
*z1Ptr
)
147 *z0Ptr
= a0
+ b0
+ (z1
< a1
);
150 /*----------------------------------------------------------------------------
151 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
152 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
153 | 2^128, so any borrow out (carry out) is lost. The result is broken into two
154 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
156 *----------------------------------------------------------------------------*/
159 sub128(Bit64u a0
, Bit64u a1
, Bit64u b0
, Bit64u b1
, Bit64u
*z0Ptr
, Bit64u
*z1Ptr
)
162 *z0Ptr
= a0
- b0
- (a1
< b1
);
165 /*----------------------------------------------------------------------------
166 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
167 | into two 64-bit pieces which are stored at the locations pointed to by
168 | `z0Ptr' and `z1Ptr'.
169 *----------------------------------------------------------------------------*/
171 BX_CPP_INLINE
void mul64To128(Bit64u a
, Bit64u b
, Bit64u
*z0Ptr
, Bit64u
*z1Ptr
)
173 Bit32u aHigh
, aLow
, bHigh
, bLow
;
174 Bit64u z0
, zMiddleA
, zMiddleB
, z1
;
177 aHigh
= (Bit32u
)(a
>>32);
179 bHigh
= (Bit32u
)(b
>>32);
180 z1
= ((Bit64u
) aLow
) * bLow
;
181 zMiddleA
= ((Bit64u
) aLow
) * bHigh
;
182 zMiddleB
= ((Bit64u
) aHigh
) * bLow
;
183 z0
= ((Bit64u
) aHigh
) * bHigh
;
184 zMiddleA
+= zMiddleB
;
185 z0
+= (((Bit64u
) (zMiddleA
< zMiddleB
))<<32) + (zMiddleA
>>32);
188 z0
+= (z1
< zMiddleA
);
193 /*----------------------------------------------------------------------------
194 | Returns an approximation to the 64-bit integer quotient obtained by dividing
195 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
196 | divisor `b' must be at least 2^63. If q is the exact quotient truncated
197 | toward zero, the approximation returned lies between q and q + 2 inclusive.
198 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
199 | unsigned integer is returned.
200 *----------------------------------------------------------------------------*/
202 #ifdef USE_estimateDiv128To64
203 static Bit64u
estimateDiv128To64(Bit64u a0
, Bit64u a1
, Bit64u b
)
206 Bit64u rem0
, rem1
, term0
, term1
;
209 if (b
<= a0
) return BX_CONST64(0xFFFFFFFFFFFFFFFF);
211 z
= (b0
<<32 <= a0
) ? BX_CONST64(0xFFFFFFFF00000000) : (a0
/ b0
)<<32;
212 mul64To128(b
, z
, &term0
, &term1
);
213 sub128(a0
, a1
, term0
, term1
, &rem0
, &rem1
);
214 while (((Bit64s
) rem0
) < 0) {
215 z
-= BX_CONST64(0x100000000);
217 add128(rem0
, rem1
, b0
, b1
, &rem0
, &rem1
);
219 rem0
= (rem0
<<32) | (rem1
>>32);
220 z
|= (b0
<<32 <= rem0
) ? 0xFFFFFFFF : rem0
/ b0
;
225 /*----------------------------------------------------------------------------
226 | Returns an approximation to the square root of the 32-bit significand given
227 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
228 | `aExp' (the least significant bit) is 1, the integer returned approximates
229 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
230 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
231 | case, the approximation returned lies strictly within +/-2 of the exact
233 *----------------------------------------------------------------------------*/
235 #ifdef USE_estimateSqrt32
236 static Bit32u
estimateSqrt32(Bit16s aExp
, Bit32u a
)
238 static const Bit16u sqrtOddAdjustments
[] = {
239 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
240 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
242 static const Bit16u sqrtEvenAdjustments
[] = {
243 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
244 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
248 int index
= (a
>>27) & 15;
250 z
= 0x4000 + (a
>>17) - sqrtOddAdjustments
[index
];
251 z
= ((a
/ z
)<<14) + (z
<<15);
255 z
= 0x8000 + (a
>>17) - sqrtEvenAdjustments
[index
];
257 z
= (0x20000 <= z
) ? 0xFFFF8000 : (z
<<15);
258 if (z
<= a
) return (Bit32u
) (((Bit32s
) a
)>>1);
260 return ((Bit32u
) ((((Bit64u
) a
)<<31) / z
)) + (z
>>1);
264 /*----------------------------------------------------------------------------
265 | Returns the number of leading 0 bits before the most-significant 1 bit of
266 | `a'. If `a' is zero, 32 is returned.
267 *----------------------------------------------------------------------------*/
269 static int countLeadingZeros32(Bit32u a
)
271 static const int countLeadingZerosHigh
[] = {
272 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
273 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
274 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
275 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
276 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
277 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
278 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
279 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
284 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
285 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
286 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
287 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
298 shiftCount
+= countLeadingZerosHigh
[ a
>>24 ];
302 /*----------------------------------------------------------------------------
303 | Returns the number of leading 0 bits before the most-significant 1 bit of
304 | `a'. If `a' is zero, 64 is returned.
305 *----------------------------------------------------------------------------*/
307 BX_CPP_INLINE
int countLeadingZeros64(Bit64u a
)
310 if (a
< ((Bit64u
) 1)<<32) {
316 shiftCount
+= countLeadingZeros32((int)(a
));
322 /*----------------------------------------------------------------------------
323 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
324 | number of bits given in `count'. Any bits shifted off are lost. The value
325 | of `count' can be arbitrarily large; in particular, if `count' is greater
326 | than 128, the result will be 0. The result is broken into two 64-bit pieces
327 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
328 *----------------------------------------------------------------------------*/
331 shift128Right(Bit64u a0
, Bit64u a1
, int count
, Bit64u
*z0Ptr
, Bit64u
*z1Ptr
)
334 int negCount
= (-count
) & 63;
340 else if (count
< 64) {
341 z1
= (a0
<<negCount
) | (a1
>>count
);
345 z1
= (count
< 64) ? (a0
>>(count
& 63)) : 0;
352 /*----------------------------------------------------------------------------
353 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
354 | number of bits given in `count'. If any nonzero bits are shifted off, they
355 | are ``jammed'' into the least significant bit of the result by setting the
356 | least significant bit to 1. The value of `count' can be arbitrarily large;
357 | in particular, if `count' is greater than 128, the result will be either
358 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
359 | nonzero. The result is broken into two 64-bit pieces which are stored at
360 | the locations pointed to by `z0Ptr' and `z1Ptr'.
361 *----------------------------------------------------------------------------*/
364 shift128RightJamming(
365 Bit64u a0
, Bit64u a1
, int count
, Bit64u
*z0Ptr
, Bit64u
*z1Ptr
)
368 int negCount
= (-count
) & 63;
374 else if (count
< 64) {
375 z1
= (a0
<<negCount
) | (a1
>>count
) | ((a1
<<negCount
) != 0);
382 else if (count
< 128) {
383 z1
= (a0
>>(count
& 63)) | (((a0
<<negCount
) | a1
) != 0);
386 z1
= ((a0
| a1
) != 0);
394 /*----------------------------------------------------------------------------
395 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
396 | number of bits given in `count'. Any bits shifted off are lost. The value
397 | of `count' must be less than 64. The result is broken into two 64-bit
398 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
399 *----------------------------------------------------------------------------*/
403 Bit64u a0
, Bit64u a1
, int count
, Bit64u
*z0Ptr
, Bit64u
*z1Ptr
)
406 *z0Ptr
= (count
== 0) ? a0
: (a0
<<count
) | (a1
>>((-count
) & 63));
409 /*----------------------------------------------------------------------------
410 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
411 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
412 | modulo 2^192, so any carry out is lost. The result is broken into three
413 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
414 | `z1Ptr', and `z2Ptr'.
415 *----------------------------------------------------------------------------*/
417 BX_CPP_INLINE
void add192(
430 unsigned carry0
, carry1
;
445 /*----------------------------------------------------------------------------
446 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
447 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
448 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
449 | result is broken into three 64-bit pieces which are stored at the locations
450 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
451 *----------------------------------------------------------------------------*/
453 BX_CPP_INLINE
void sub192(
466 unsigned borrow0
, borrow1
;
473 z0
-= (z1
< borrow1
);
481 /*----------------------------------------------------------------------------
482 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
483 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
484 | Otherwise, returns 0.
485 *----------------------------------------------------------------------------*/
487 BX_CPP_INLINE
int eq128(Bit64u a0
, Bit64u a1
, Bit64u b0
, Bit64u b1
)
489 return (a0
== b0
) && (a1
== b1
);
492 /*----------------------------------------------------------------------------
493 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
494 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
495 | Otherwise, returns 0.
496 *----------------------------------------------------------------------------*/
498 BX_CPP_INLINE
int le128(Bit64u a0
, Bit64u a1
, Bit64u b0
, Bit64u b1
)
500 return (a0
< b0
) || ((a0
== b0
) && (a1
<= b1
));
503 /*----------------------------------------------------------------------------
504 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
505 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
507 *----------------------------------------------------------------------------*/
509 BX_CPP_INLINE
int lt128(Bit64u a0
, Bit64u a1
, Bit64u b0
, Bit64u b1
)
511 return (a0
< b0
) || ((a0
== b0
) && (a1
< b1
));
514 #endif /* FLOATX80 */
516 /*----------------------------------------------------------------------------
517 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
518 | `b' to obtain a 192-bit product. The product is broken into three 64-bit
519 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
521 *----------------------------------------------------------------------------*/
523 BX_CPP_INLINE
void mul128By64To192(
532 Bit64u z0
, z1
, z2
, more1
;
534 mul64To128(a1
, b
, &z1
, &z2
);
535 mul64To128(a0
, b
, &z0
, &more1
);
536 add128(z0
, more1
, 0, z1
, &z0
, &z1
);
544 /*----------------------------------------------------------------------------
545 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
546 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
547 | product. The product is broken into four 64-bit pieces which are stored at
548 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
549 *----------------------------------------------------------------------------*/
551 BX_CPP_INLINE
void mul128To256(
562 Bit64u z0
, z1
, z2
, z3
;
565 mul64To128(a1
, b1
, &z2
, &z3
);
566 mul64To128(a1
, b0
, &z1
, &more2
);
567 add128(z1
, more2
, 0, z2
, &z1
, &z2
);
568 mul64To128(a0
, b0
, &z0
, &more1
);
569 add128(z0
, more1
, 0, z1
, &z0
, &z1
);
570 mul64To128(a0
, b1
, &more1
, &more2
);
571 add128(more1
, more2
, 0, z2
, &more1
, &z2
);
572 add128(z0
, z1
, 0, more1
, &z0
, &z1
);
580 /*----------------------------------------------------------------------------
581 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
582 | by 64 _plus_ the number of bits given in `count'. The shifted result is
583 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
584 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
585 | off form a third 64-bit result as follows: The _last_ bit shifted off is
586 | the most-significant bit of the extra result, and the other 63 bits of the
587 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
588 | were all zero. This extra result is stored in the location pointed to by
589 | `z2Ptr'. The value of `count' can be arbitrarily large.
590 | (This routine makes more sense if `a0', `a1', and `a2' are considered
591 | to form a fixed-point value with binary point between `a1' and `a2'. This
592 | fixed-point value is shifted right by the number of bits given in `count',
593 | and the integer part of the result is returned at the locations pointed to
594 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
595 | corrupted as described above, and is returned at the location pointed to by
597 *----------------------------------------------------------------------------*/
599 BX_CPP_INLINE
void shift128ExtraRightJamming(
610 int negCount
= (-count
) & 63;
620 z1
= (a0
<<negCount
) | (a1
>>count
);
632 z1
= a0
>>(count
& 63);
635 z2
= (count
== 128) ? a0
: (a0
!= 0);
648 #endif /* FLOAT128 */