1 /*============================================================================
2 This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
5 Written by John R. Hauser. This work was made possible in part by the
6 International Computer Science Institute, located at Suite 600, 1947 Center
7 Street, Berkeley, California 94704. Funding was partially provided by the
8 National Science Foundation under grant MIP-9311980. The original version
9 of this code was written as part of a project to build a fixed-point vector
10 processor in collaboration with the University of California at Berkeley,
11 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
12 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
13 arithmetic/SoftFloat.html'.
15 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
16 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
17 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
18 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
19 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
20 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
21 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
22 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
24 Derivative works are acceptable, even for commercial purposes, so long as
25 (1) the source code for the derivative work includes prominent notice that
26 the work is derivative, and (2) the source code includes prominent notice with
27 these four paragraphs for those parts of this code that are retained.
28 =============================================================================*/
32 /*============================================================================
33 * Adapted for Bochs (x86 achitecture simulator) by
34 * Stanislav Shwartsman [sshwarts at sourceforge net]
35 * ==========================================================================*/
37 #include "softfloat.h"
38 #include "softfloat-round-pack.h"
40 /*----------------------------------------------------------------------------
41 | Primitive arithmetic functions, including multi-word arithmetic, and
42 | division and square root approximations. (Can be specialized to target
44 *----------------------------------------------------------------------------*/
45 #include "softfloat-macros.h"
47 /*----------------------------------------------------------------------------
48 | Functions and definitions to determine: (1) whether tininess for underflow
49 | is detected before or after rounding by default, (2) what (if anything)
50 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
51 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
52 | are propagated from function inputs to output. These details are target-
54 *----------------------------------------------------------------------------*/
55 #include "softfloat-specialize.h"
57 /*----------------------------------------------------------------------------
58 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
59 | and 7, and returns the properly rounded 32-bit integer corresponding to the
60 | input. If `zSign' is 1, the input is negated before being converted to an
61 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
62 | is simply rounded to an integer, with the inexact exception raised if the
63 | input cannot be represented exactly as an integer. However, if the fixed-
64 | point input is too large, the invalid exception is raised and the integer
65 | indefinite value is returned.
66 *----------------------------------------------------------------------------*/
68 Bit32s
roundAndPackInt32(int zSign
, Bit64u absZ
, float_status_t
&status
)
70 int roundingMode
= get_float_rounding_mode(status
);
71 int roundNearestEven
= (roundingMode
== float_round_nearest_even
);
72 int roundIncrement
= 0x40;
73 if (! roundNearestEven
) {
74 if (roundingMode
== float_round_to_zero
) roundIncrement
= 0;
76 roundIncrement
= 0x7F;
78 if (roundingMode
== float_round_up
) roundIncrement
= 0;
81 if (roundingMode
== float_round_down
) roundIncrement
= 0;
85 int roundBits
= absZ
& 0x7F;
86 absZ
= (absZ
+ roundIncrement
)>>7;
87 absZ
&= ~(((roundBits
^ 0x40) == 0) & roundNearestEven
);
90 if ((absZ
>>32) || (z
&& ((z
< 0) ^ zSign
))) {
91 float_raise(status
, float_flag_invalid
);
92 return (Bit32s
)(int32_indefinite
);
94 if (roundBits
) float_raise(status
, float_flag_inexact
);
98 /*----------------------------------------------------------------------------
99 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
100 | `absZ1', with binary point between bits 63 and 64 (between the input words),
101 | and returns the properly rounded 64-bit integer corresponding to the input.
102 | If `zSign' is 1, the input is negated before being converted to an integer.
103 | Ordinarily, the fixed-point input is simply rounded to an integer, with
104 | the inexact exception raised if the input cannot be represented exactly as
105 | an integer. However, if the fixed-point input is too large, the invalid
106 | exception is raised and the integer indefinite value is returned.
107 *----------------------------------------------------------------------------*/
109 Bit64s
roundAndPackInt64(int zSign
, Bit64u absZ0
, Bit64u absZ1
, float_status_t
&status
)
112 int roundingMode
= get_float_rounding_mode(status
);
113 int roundNearestEven
= (roundingMode
== float_round_nearest_even
);
114 int increment
= ((Bit64s
) absZ1
< 0);
115 if (! roundNearestEven
) {
116 if (roundingMode
== float_round_to_zero
) increment
= 0;
119 increment
= (roundingMode
== float_round_down
) && absZ1
;
122 increment
= (roundingMode
== float_round_up
) && absZ1
;
128 if (absZ0
== 0) goto overflow
;
129 absZ0
&= ~(((Bit64u
) (absZ1
<<1) == 0) & roundNearestEven
);
133 if (z
&& ((z
< 0) ^ zSign
)) {
135 float_raise(status
, float_flag_invalid
);
136 return (Bit64s
)(int64_indefinite
);
138 if (absZ1
) float_raise(status
, float_flag_inexact
);
142 /*----------------------------------------------------------------------------
143 | Normalizes the subnormal single-precision floating-point value represented
144 | by the denormalized significand `aSig'. The normalized exponent and
145 | significand are stored at the locations pointed to by `zExpPtr' and
146 | `zSigPtr', respectively.
147 *----------------------------------------------------------------------------*/
149 void normalizeFloat32Subnormal(Bit32u aSig
, Bit16s
*zExpPtr
, Bit32u
*zSigPtr
)
151 int shiftCount
= countLeadingZeros32(aSig
) - 8;
152 *zSigPtr
= aSig
<<shiftCount
;
153 *zExpPtr
= 1 - shiftCount
;
156 /*----------------------------------------------------------------------------
157 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
158 | and significand `zSig', and returns the proper single-precision floating-
159 | point value corresponding to the abstract input. Ordinarily, the abstract
160 | value is simply rounded and packed into the single-precision format, with
161 | the inexact exception raised if the abstract input cannot be represented
162 | exactly. However, if the abstract value is too large, the overflow and
163 | inexact exceptions are raised and an infinity or maximal finite value is
164 | returned. If the abstract value is too small, the input value is rounded to
165 | a subnormal number, and the underflow and inexact exceptions are raised if
166 | the abstract input cannot be represented exactly as a subnormal single-
167 | precision floating-point number.
168 | The input significand `zSig' has its binary point between bits 30
169 | and 29, which is 7 bits to the left of the usual location. This shifted
170 | significand must be normalized or smaller. If `zSig' is not normalized,
171 | `zExp' must be 0; in that case, the result returned is a subnormal number,
172 | and it must not require rounding. In the usual case that `zSig' is
173 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
174 | The handling of underflow and overflow follows the IEC/IEEE Standard for
175 | Binary Floating-Point Arithmetic.
176 *----------------------------------------------------------------------------*/
178 float32
roundAndPackFloat32(int zSign
, Bit16s zExp
, Bit32u zSig
, float_status_t
&status
)
180 Bit32s roundIncrement
, roundBits
, roundMask
;
182 int roundingMode
= get_float_rounding_mode(status
);
183 int roundNearestEven
= (roundingMode
== float_round_nearest_even
);
184 roundIncrement
= 0x40;
187 if (! roundNearestEven
) {
188 if (roundingMode
== float_round_to_zero
) roundIncrement
= 0;
190 roundIncrement
= roundMask
;
192 if (roundingMode
== float_round_up
) roundIncrement
= 0;
195 if (roundingMode
== float_round_down
) roundIncrement
= 0;
199 roundBits
= zSig
& roundMask
;
200 if (0xFD <= (Bit16u
) zExp
) {
203 && ((Bit32s
) (zSig
+ roundIncrement
) < 0)))
205 float_raise(status
, float_flag_overflow
| float_flag_inexact
);
206 return packFloat32(zSign
, 0xFF, 0) - (roundIncrement
== 0);
209 int isTiny
= (zExp
< -1) || (zSig
+ roundIncrement
< 0x80000000);
210 shift32RightJamming(zSig
, -zExp
, &zSig
);
212 roundBits
= zSig
& roundMask
;
213 if (isTiny
&& roundBits
) {
214 float_raise(status
, float_flag_underflow
);
215 if(get_flush_underflow_to_zero(status
)) {
216 float_raise(status
, float_flag_inexact
);
217 return packFloat32(zSign
, 0, 0);
222 if (roundBits
) float_raise(status
, float_flag_inexact
);
223 zSig
= ((zSig
+ roundIncrement
) & ~roundMask
) >> 7;
224 zSig
&= ~(((roundBits
^ 0x40) == 0) & roundNearestEven
);
225 if (zSig
== 0) zExp
= 0;
226 return packFloat32(zSign
, zExp
, zSig
);
229 /*----------------------------------------------------------------------------
230 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
231 | and significand `zSig', and returns the proper single-precision floating-
232 | point value corresponding to the abstract input. This routine is just like
233 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
234 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
235 | floating-point exponent.
236 *----------------------------------------------------------------------------*/
238 float32
normalizeRoundAndPackFloat32(int zSign
, Bit16s zExp
, Bit32u zSig
, float_status_t
&status
)
240 int shiftCount
= countLeadingZeros32(zSig
) - 1;
241 return roundAndPackFloat32(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
, status
);
244 /*----------------------------------------------------------------------------
245 | Normalizes the subnormal double-precision floating-point value represented
246 | by the denormalized significand `aSig'. The normalized exponent and
247 | significand are stored at the locations pointed to by `zExpPtr' and
248 | `zSigPtr', respectively.
249 *----------------------------------------------------------------------------*/
251 void normalizeFloat64Subnormal(Bit64u aSig
, Bit16s
*zExpPtr
, Bit64u
*zSigPtr
)
253 int shiftCount
= countLeadingZeros64(aSig
) - 11;
254 *zSigPtr
= aSig
<<shiftCount
;
255 *zExpPtr
= 1 - shiftCount
;
258 /*----------------------------------------------------------------------------
259 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
260 | and significand `zSig', and returns the proper double-precision floating-
261 | point value corresponding to the abstract input. Ordinarily, the abstract
262 | value is simply rounded and packed into the double-precision format, with
263 | the inexact exception raised if the abstract input cannot be represented
264 | exactly. However, if the abstract value is too large, the overflow and
265 | inexact exceptions are raised and an infinity or maximal finite value is
266 | returned. If the abstract value is too small, the input value is rounded
267 | to a subnormal number, and the underflow and inexact exceptions are raised
268 | if the abstract input cannot be represented exactly as a subnormal double-
269 | precision floating-point number.
270 | The input significand `zSig' has its binary point between bits 62
271 | and 61, which is 10 bits to the left of the usual location. This shifted
272 | significand must be normalized or smaller. If `zSig' is not normalized,
273 | `zExp' must be 0; in that case, the result returned is a subnormal number,
274 | and it must not require rounding. In the usual case that `zSig' is
275 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
276 | The handling of underflow and overflow follows the IEC/IEEE Standard for
277 | Binary Floating-Point Arithmetic.
278 *----------------------------------------------------------------------------*/
280 float64
roundAndPackFloat64(int zSign
, Bit16s zExp
, Bit64u zSig
, float_status_t
&status
)
282 Bit16s roundIncrement
, roundBits
;
283 int roundingMode
= get_float_rounding_mode(status
);
284 int roundNearestEven
= (roundingMode
== float_round_nearest_even
);
285 roundIncrement
= 0x200;
286 if (! roundNearestEven
) {
287 if (roundingMode
== float_round_to_zero
) roundIncrement
= 0;
289 roundIncrement
= 0x3FF;
291 if (roundingMode
== float_round_up
) roundIncrement
= 0;
294 if (roundingMode
== float_round_down
) roundIncrement
= 0;
298 roundBits
= zSig
& 0x3FF;
299 if (0x7FD <= (Bit16u
) zExp
) {
302 && ((Bit64s
) (zSig
+ roundIncrement
) < 0)))
304 float_raise(status
, float_flag_overflow
| float_flag_inexact
);
305 return packFloat64(zSign
, 0x7FF, 0) - (roundIncrement
== 0);
308 int isTiny
= (zExp
< -1) || (zSig
+ roundIncrement
< BX_CONST64(0x8000000000000000));
309 shift64RightJamming(zSig
, -zExp
, &zSig
);
311 roundBits
= zSig
& 0x3FF;
312 if (isTiny
&& roundBits
) {
313 float_raise(status
, float_flag_underflow
);
314 if(get_flush_underflow_to_zero(status
)) {
315 float_raise(status
, float_flag_inexact
);
316 return packFloat64(zSign
, 0, 0);
321 if (roundBits
) float_raise(status
, float_flag_inexact
);
322 zSig
= (zSig
+ roundIncrement
)>>10;
323 zSig
&= ~(((roundBits
^ 0x200) == 0) & roundNearestEven
);
324 if (zSig
== 0) zExp
= 0;
325 return packFloat64(zSign
, zExp
, zSig
);
328 /*----------------------------------------------------------------------------
329 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
330 | and significand `zSig', and returns the proper double-precision floating-
331 | point value corresponding to the abstract input. This routine is just like
332 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
333 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
334 | floating-point exponent.
335 *----------------------------------------------------------------------------*/
337 float64
normalizeRoundAndPackFloat64(int zSign
, Bit16s zExp
, Bit64u zSig
, float_status_t
&status
)
339 int shiftCount
= countLeadingZeros64(zSig
) - 1;
340 return roundAndPackFloat64(zSign
, zExp
- shiftCount
, zSig
<<shiftCount
, status
);
345 /*----------------------------------------------------------------------------
346 | Normalizes the subnormal extended double-precision floating-point value
347 | represented by the denormalized significand `aSig'. The normalized exponent
348 | and significand are stored at the locations pointed to by `zExpPtr' and
349 | `zSigPtr', respectively.
350 *----------------------------------------------------------------------------*/
352 void normalizeFloatx80Subnormal(Bit64u aSig
, Bit32s
*zExpPtr
, Bit64u
*zSigPtr
)
354 int shiftCount
= countLeadingZeros64(aSig
);
355 *zSigPtr
= aSig
<<shiftCount
;
356 *zExpPtr
= 1 - shiftCount
;
359 /*----------------------------------------------------------------------------
360 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
361 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
362 | and returns the proper extended double-precision floating-point value
363 | corresponding to the abstract input. Ordinarily, the abstract value is
364 | rounded and packed into the extended double-precision format, with the
365 | inexact exception raised if the abstract input cannot be represented
366 | exactly. However, if the abstract value is too large, the overflow and
367 | inexact exceptions are raised and an infinity or maximal finite value is
368 | returned. If the abstract value is too small, the input value is rounded to
369 | a subnormal number, and the underflow and inexact exceptions are raised if
370 | the abstract input cannot be represented exactly as a subnormal extended
371 | double-precision floating-point number.
372 | If `roundingPrecision' is 32 or 64, the result is rounded to the same
373 | number of bits as single or double precision, respectively. Otherwise, the
374 | result is rounded to the full precision of the extended double-precision
376 | The input significand must be normalized or smaller. If the input
377 | significand is not normalized, `zExp' must be 0; in that case, the result
378 | returned is a subnormal number, and it must not require rounding. The
379 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
380 | Floating-Point Arithmetic.
381 *----------------------------------------------------------------------------*/
383 floatx80
roundAndPackFloatx80(int roundingPrecision
,
384 int zSign
, Bit32s zExp
, Bit64u zSig0
, Bit64u zSig1
, float_status_t
&status
)
386 Bit64u roundIncrement
, roundMask
, roundBits
;
388 Bit64u zSigExact
; /* support rounding-up response */
390 Bit8u roundingMode
= get_float_rounding_mode(status
);
391 int roundNearestEven
= (roundingMode
== float_round_nearest_even
);
392 if (roundingPrecision
== 64) {
393 roundIncrement
= BX_CONST64(0x0000000000000400);
394 roundMask
= BX_CONST64(0x00000000000007FF);
396 else if (roundingPrecision
== 32) {
397 roundIncrement
= BX_CONST64(0x0000008000000000);
398 roundMask
= BX_CONST64(0x000000FFFFFFFFFF);
400 else goto precision80
;
402 zSig0
|= (zSig1
!= 0);
403 if (! roundNearestEven
) {
404 if (roundingMode
== float_round_to_zero
) roundIncrement
= 0;
406 roundIncrement
= roundMask
;
408 if (roundingMode
== float_round_up
) roundIncrement
= 0;
411 if (roundingMode
== float_round_down
) roundIncrement
= 0;
415 roundBits
= zSig0
& roundMask
;
416 if (0x7FFD <= (Bit32u
) (zExp
- 1)) {
418 || ((zExp
== 0x7FFE) && (zSig0
+ roundIncrement
< zSig0
)))
423 int isTiny
= (zExp
< 0) || (zSig0
<= zSig0
+ roundIncrement
);
424 shift64RightJamming(zSig0
, 1 - zExp
, &zSig0
);
427 roundBits
= zSig0
& roundMask
;
428 if (isTiny
&& roundBits
) float_raise(status
, float_flag_underflow
);
429 if (roundBits
) float_raise(status
, float_flag_inexact
);
430 zSig0
+= roundIncrement
;
431 if ((Bit64s
) zSig0
< 0) zExp
= 1;
432 roundIncrement
= roundMask
+ 1;
433 if (roundNearestEven
&& (roundBits
<<1 == roundIncrement
))
434 roundMask
|= roundIncrement
;
436 if (zSig0
> zSigExact
) set_float_rounding_up(status
);
437 return packFloatx80(zSign
, zExp
, zSig0
);
440 if (roundBits
) float_raise(status
, float_flag_inexact
);
442 zSig0
+= roundIncrement
;
443 if (zSig0
< roundIncrement
) {
444 // Basically scale by shifting right and keep overflow
446 zSig0
= BX_CONST64(0x8000000000000000);
447 zSigExact
>>= 1; // must scale also, or else later tests will fail
449 roundIncrement
= roundMask
+ 1;
450 if (roundNearestEven
&& (roundBits
<<1 == roundIncrement
))
451 roundMask
|= roundIncrement
;
453 if (zSig0
> zSigExact
) set_float_rounding_up(status
);
454 if (zSig0
== 0) zExp
= 0;
455 return packFloatx80(zSign
, zExp
, zSig0
);
457 increment
= ((Bit64s
) zSig1
< 0);
458 if (! roundNearestEven
) {
459 if (roundingMode
== float_round_to_zero
) increment
= 0;
462 increment
= (roundingMode
== float_round_down
) && zSig1
;
465 increment
= (roundingMode
== float_round_up
) && zSig1
;
469 if (0x7FFD <= (Bit32u
) (zExp
- 1)) {
472 && (zSig0
== BX_CONST64(0xFFFFFFFFFFFFFFFF))
477 float_raise(status
, float_flag_overflow
| float_flag_inexact
);
478 if ((roundingMode
== float_round_to_zero
)
479 || (zSign
&& (roundingMode
== float_round_up
))
480 || (! zSign
&& (roundingMode
== float_round_down
)))
482 return packFloatx80(zSign
, 0x7FFE, ~roundMask
);
484 set_float_rounding_up(status
);
485 return packFloatx80(zSign
, 0x7FFF, BX_CONST64(0x8000000000000000));
488 int isTiny
= (zExp
< 0) || (! increment
)
489 || (zSig0
< BX_CONST64(0xFFFFFFFFFFFFFFFF));
490 shift64ExtraRightJamming(zSig0
, zSig1
, 1 - zExp
, &zSig0
, &zSig1
);
492 if (isTiny
&& zSig1
) float_raise(status
, float_flag_underflow
);
493 if (zSig1
) float_raise(status
, float_flag_inexact
);
494 if (roundNearestEven
) increment
= ((Bit64s
) zSig1
< 0);
497 increment
= (roundingMode
== float_round_down
) && zSig1
;
499 increment
= (roundingMode
== float_round_up
) && zSig1
;
504 zSig0
&= ~(((Bit64u
) (zSig1
<<1) == 0) & roundNearestEven
);
505 if (zSig0
> zSigExact
) set_float_rounding_up(status
);
506 if ((Bit64s
) zSig0
< 0) zExp
= 1;
508 return packFloatx80(zSign
, zExp
, zSig0
);
511 if (zSig1
) float_raise(status
, float_flag_inexact
);
516 zSig0
= BX_CONST64(0x8000000000000000);
517 zSigExact
>>= 1; // must scale also, or else later tests will fail
520 zSig0
&= ~(((Bit64u
) (zSig1
<<1) == 0) & roundNearestEven
);
522 if (zSig0
> zSigExact
) set_float_rounding_up(status
);
525 if (zSig0
== 0) zExp
= 0;
527 return packFloatx80(zSign
, zExp
, zSig0
);
530 /*----------------------------------------------------------------------------
531 | Takes an abstract floating-point value having sign `zSign', exponent
532 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
533 | and returns the proper extended double-precision floating-point value
534 | corresponding to the abstract input. This routine is just like
535 | `roundAndPackFloatx80' except that the input significand does not have to be
537 *----------------------------------------------------------------------------*/
539 floatx80
normalizeRoundAndPackFloatx80(int roundingPrecision
,
540 int zSign
, Bit32s zExp
, Bit64u zSig0
, Bit64u zSig1
, float_status_t
&status
)
547 int shiftCount
= countLeadingZeros64(zSig0
);
548 shortShift128Left(zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
551 roundAndPackFloatx80(roundingPrecision
, zSign
, zExp
, zSig0
, zSig1
, status
);
558 /*----------------------------------------------------------------------------
559 | Normalizes the subnormal quadruple-precision floating-point value
560 | represented by the denormalized significand formed by the concatenation of
561 | `aSig0' and `aSig1'. The normalized exponent is stored at the location
562 | pointed to by `zExpPtr'. The most significant 49 bits of the normalized
563 | significand are stored at the location pointed to by `zSig0Ptr', and the
564 | least significant 64 bits of the normalized significand are stored at the
565 | location pointed to by `zSig1Ptr'.
566 *----------------------------------------------------------------------------*/
568 void normalizeFloat128Subnormal(
569 Bit64u aSig0
, Bit64u aSig1
, Bit32s
*zExpPtr
, Bit64u
*zSig0Ptr
, Bit64u
*zSig1Ptr
)
574 shiftCount
= countLeadingZeros64(aSig1
) - 15;
575 if (shiftCount
< 0) {
576 *zSig0Ptr
= aSig1
>>(-shiftCount
);
577 *zSig1Ptr
= aSig1
<< (shiftCount
& 63);
580 *zSig0Ptr
= aSig1
<< shiftCount
;
583 *zExpPtr
= - shiftCount
- 63;
586 shiftCount
= countLeadingZeros64(aSig0
) - 15;
587 shortShift128Left(aSig0
, aSig1
, shiftCount
, zSig0Ptr
, zSig1Ptr
);
588 *zExpPtr
= 1 - shiftCount
;
592 /*----------------------------------------------------------------------------
593 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
594 | and extended significand formed by the concatenation of `zSig0', `zSig1',
595 | and `zSig2', and returns the proper quadruple-precision floating-point value
596 | corresponding to the abstract input. Ordinarily, the abstract value is
597 | simply rounded and packed into the quadruple-precision format, with the
598 | inexact exception raised if the abstract input cannot be represented
599 | exactly. However, if the abstract value is too large, the overflow and
600 | inexact exceptions are raised and an infinity or maximal finite value is
601 | returned. If the abstract value is too small, the input value is rounded to
602 | a subnormal number, and the underflow and inexact exceptions are raised if
603 | the abstract input cannot be represented exactly as a subnormal quadruple-
604 | precision floating-point number.
605 | The input significand must be normalized or smaller. If the input
606 | significand is not normalized, `zExp' must be 0; in that case, the result
607 | returned is a subnormal number, and it must not require rounding. In the
608 | usual case that the input significand is normalized, `zExp' must be 1 less
609 | than the ``true'' floating-point exponent. The handling of underflow and
610 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
611 *----------------------------------------------------------------------------*/
613 float128
roundAndPackFloat128(
614 int zSign
, Bit32s zExp
, Bit64u zSig0
, Bit64u zSig1
, Bit64u zSig2
, float_status_t
&status
)
616 int increment
= ((Bit64s
) zSig2
< 0);
617 if (0x7FFD <= (Bit32u
) zExp
) {
620 && eq128(BX_CONST64(0x0001FFFFFFFFFFFF),
621 BX_CONST64(0xFFFFFFFFFFFFFFFF), zSig0
, zSig1
)
624 float_raise(status
, float_flag_overflow
| float_flag_inexact
);
625 return packFloat128(zSign
, 0x7FFF, 0, 0);
628 int isTiny
= (zExp
< -1)
630 || lt128(zSig0
, zSig1
,
631 BX_CONST64(0x0001FFFFFFFFFFFF),
632 BX_CONST64(0xFFFFFFFFFFFFFFFF));
633 shift128ExtraRightJamming(
634 zSig0
, zSig1
, zSig2
, -zExp
, &zSig0
, &zSig1
, &zSig2
);
636 if (isTiny
&& zSig2
) float_raise(status
, float_flag_underflow
);
637 increment
= ((Bit64s
) zSig2
< 0);
640 if (zSig2
) float_raise(status
, float_flag_inexact
);
642 add128(zSig0
, zSig1
, 0, 1, &zSig0
, &zSig1
);
643 zSig1
&= ~((zSig2
+ zSig2
== 0) & 1);
646 if ((zSig0
| zSig1
) == 0) zExp
= 0;
648 return packFloat128(zSign
, zExp
, zSig0
, zSig1
);
651 /*----------------------------------------------------------------------------
652 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
653 | and significand formed by the concatenation of `zSig0' and `zSig1', and
654 | returns the proper quadruple-precision floating-point value corresponding
655 | to the abstract input. This routine is just like `roundAndPackFloat128'
656 | except that the input significand has fewer bits and does not have to be
657 | normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
659 *----------------------------------------------------------------------------*/
661 float128
normalizeRoundAndPackFloat128(
662 int zSign
, Bit32s zExp
, Bit64u zSig0
, Bit64u zSig1
, float_status_t
&status
)
671 int shiftCount
= countLeadingZeros64(zSig0
) - 15;
672 if (0 <= shiftCount
) {
674 shortShift128Left(zSig0
, zSig1
, shiftCount
, &zSig0
, &zSig1
);
677 shift128ExtraRightJamming(
678 zSig0
, zSig1
, 0, -shiftCount
, &zSig0
, &zSig1
, &zSig2
);
681 return roundAndPackFloat128(zSign
, zExp
, zSig0
, zSig1
, zSig2
, status
);