fpu/softfloat-round-pack.cc

   1 /*============================================================================
   2 This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
   3 Package, Release 2b.
   4
   5 Written by John R. Hauser.  This work was made possible in part by the
   6 International Computer Science Institute, located at Suite 600, 1947 Center
   7 Street, Berkeley, California 94704.  Funding was partially provided by the
   8 National Science Foundation under grant MIP-9311980.  The original version
   9 of this code was written as part of a project to build a fixed-point vector
  10 processor in collaboration with the University of California at Berkeley,
  11 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  12 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  13 arithmetic/SoftFloat.html'.
  14
  15 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  16 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  17 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  18 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  19 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  20 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  21 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  22 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  23
  24 Derivative works are acceptable, even for commercial purposes, so long as
  25 (1) the source code for the derivative work includes prominent notice that
  26 the work is derivative, and (2) the source code includes prominent notice with
  27 these four paragraphs for those parts of this code that are retained.
  28 =============================================================================*/
  29
  30 #define FLOAT128
  31
  32 /*============================================================================
  33  * Adapted for Bochs (x86 achitecture simulator) by
  34  *            Stanislav Shwartsman [sshwarts at sourceforge net]
  35  * ==========================================================================*/
  36
  37 #include "softfloat.h"
  38 #include "softfloat-round-pack.h"
  39
  40 /*----------------------------------------------------------------------------
  41 | Primitive arithmetic functions, including multi-word arithmetic, and
  42 | division and square root approximations. (Can be specialized to target
  43 | if desired).
  44 *----------------------------------------------------------------------------*/
  45 #include "softfloat-macros.h"
  46
  47 /*----------------------------------------------------------------------------
  48 | Functions and definitions to determine:  (1) whether tininess for underflow
  49 | is detected before or after rounding by default, (2) what (if anything)
  50 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
  51 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
  52 | are propagated from function inputs to output.  These details are target-
  53 | specific.
  54 *----------------------------------------------------------------------------*/
  55 #include "softfloat-specialize.h"
  56
  57 /*----------------------------------------------------------------------------
  58 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
  59 | and 7, and returns the properly rounded 32-bit integer corresponding to the
  60 | input.  If `zSign' is 1, the input is negated before being converted to an
  61 | integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
  62 | is simply rounded to an integer, with the inexact exception raised if the
  63 | input cannot be represented exactly as an integer.  However, if the fixed-
  64 | point input is too large, the invalid exception is raised and the integer
  65 | indefinite value is returned.
  66 *----------------------------------------------------------------------------*/
  67
  68 Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status)
  69 {
  70     int roundingMode = get_float_rounding_mode(status);
  71     int roundNearestEven = (roundingMode == float_round_nearest_even);
  72     int roundIncrement = 0x40;
  73     if (! roundNearestEven) {
  74         if (roundingMode == float_round_to_zero) roundIncrement = 0;
  75         else {
  76             roundIncrement = 0x7F;
  77             if (zSign) {
  78                 if (roundingMode == float_round_up) roundIncrement = 0;
  79             }
  80             else {
  81                 if (roundingMode == float_round_down) roundIncrement = 0;
  82             }
  83         }
  84     }
  85     int roundBits = absZ & 0x7F;
  86     absZ = (absZ + roundIncrement)>>7;
  87     absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
  88     Bit32s z = absZ;
  89     if (zSign) z = -z;
  90     if ((absZ>>32) || (z && ((z < 0) ^ zSign))) {
  91         float_raise(status, float_flag_invalid);
  92         return (Bit32s)(int32_indefinite);
  93     }
  94     if (roundBits) float_raise(status, float_flag_inexact);
  95     return z;
  96 }
  97
  98 /*----------------------------------------------------------------------------
  99 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
 100 | `absZ1', with binary point between bits 63 and 64 (between the input words),
 101 | and returns the properly rounded 64-bit integer corresponding to the input.
 102 | If `zSign' is 1, the input is negated before being converted to an integer.
 103 | Ordinarily, the fixed-point input is simply rounded to an integer, with
 104 | the inexact exception raised if the input cannot be represented exactly as
 105 | an integer.  However, if the fixed-point input is too large, the invalid
 106 | exception is raised and the integer indefinite value is returned.
 107 *----------------------------------------------------------------------------*/
 108
 109 Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status)
 110 {
 111     Bit64s z;
 112     int roundingMode = get_float_rounding_mode(status);
 113     int roundNearestEven = (roundingMode == float_round_nearest_even);
 114     int increment = ((Bit64s) absZ1 < 0);
 115     if (! roundNearestEven) {
 116         if (roundingMode == float_round_to_zero) increment = 0;
 117         else {
 118             if (zSign) {
 119                 increment = (roundingMode == float_round_down) && absZ1;
 120             }
 121             else {
 122                 increment = (roundingMode == float_round_up) && absZ1;
 123             }
 124         }
 125     }
 126     if (increment) {
 127         ++absZ0;
 128         if (absZ0 == 0) goto overflow;
 129         absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
 130     }
 131     z = absZ0;
 132     if (zSign) z = -z;
 133     if (z && ((z < 0) ^ zSign)) {
 134  overflow:
 135         float_raise(status, float_flag_invalid);
 136         return (Bit64s)(int64_indefinite);
 137     }
 138     if (absZ1) float_raise(status, float_flag_inexact);
 139     return z;
 140 }
 141
 142 /*----------------------------------------------------------------------------
 143 | Normalizes the subnormal single-precision floating-point value represented
 144 | by the denormalized significand `aSig'.  The normalized exponent and
 145 | significand are stored at the locations pointed to by `zExpPtr' and
 146 | `zSigPtr', respectively.
 147 *----------------------------------------------------------------------------*/
 148
 149 void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr)
 150 {
 151     int shiftCount = countLeadingZeros32(aSig) - 8;
 152     *zSigPtr = aSig<<shiftCount;
 153     *zExpPtr = 1 - shiftCount;
 154 }
 155
 156 /*----------------------------------------------------------------------------
 157 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 158 | and significand `zSig', and returns the proper single-precision floating-
 159 | point value corresponding to the abstract input.  Ordinarily, the abstract
 160 | value is simply rounded and packed into the single-precision format, with
 161 | the inexact exception raised if the abstract input cannot be represented
 162 | exactly.  However, if the abstract value is too large, the overflow and
 163 | inexact exceptions are raised and an infinity or maximal finite value is
 164 | returned.  If the abstract value is too small, the input value is rounded to
 165 | a subnormal number, and the underflow and inexact exceptions are raised if
 166 | the abstract input cannot be represented exactly as a subnormal single-
 167 | precision floating-point number.
 168 |     The input significand `zSig' has its binary point between bits 30
 169 | and 29, which is 7 bits to the left of the usual location.  This shifted
 170 | significand must be normalized or smaller.  If `zSig' is not normalized,
 171 | `zExp' must be 0; in that case, the result returned is a subnormal number,
 172 | and it must not require rounding.  In the usual case that `zSig' is
 173 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 174 | The handling of underflow and overflow follows the IEC/IEEE Standard for
 175 | Binary Floating-Point Arithmetic.
 176 *----------------------------------------------------------------------------*/
 177
 178 float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
 179 {
 180     Bit32s roundIncrement, roundBits, roundMask;
 181
 182     int roundingMode = get_float_rounding_mode(status);
 183     int roundNearestEven = (roundingMode == float_round_nearest_even);
 184     roundIncrement = 0x40;
 185     roundMask = 0x7F;
 186
 187     if (! roundNearestEven) {
 188         if (roundingMode == float_round_to_zero) roundIncrement = 0;
 189         else {
 190             roundIncrement = roundMask;
 191             if (zSign) {
 192                 if (roundingMode == float_round_up) roundIncrement = 0;
 193             }
 194             else {
 195                 if (roundingMode == float_round_down) roundIncrement = 0;
 196             }
 197         }
 198     }
 199     roundBits = zSig & roundMask;
 200     if (0xFD <= (Bit16u) zExp) {
 201         if ((0xFD < zExp)
 202              || ((zExp == 0xFD)
 203                   && ((Bit32s) (zSig + roundIncrement) < 0)))
 204         {
 205             float_raise(status, float_flag_overflow | float_flag_inexact);
 206             return packFloat32(zSign, 0xFF, 0) - (roundIncrement == 0);
 207         }
 208         if (zExp < 0) {
 209             int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x80000000);
 210             shift32RightJamming(zSig, -zExp, &zSig);
 211             zExp = 0;
 212             roundBits = zSig & roundMask;
 213             if (isTiny && roundBits) {
 214                 float_raise(status, float_flag_underflow);
 215                 if(get_flush_underflow_to_zero(status)) {
 216                     float_raise(status, float_flag_inexact);
 217                     return packFloat32(zSign, 0, 0);
 218                 }
 219             }
 220         }
 221     }
 222     if (roundBits) float_raise(status, float_flag_inexact);
 223     zSig = ((zSig + roundIncrement) & ~roundMask) >> 7;
 224     zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
 225     if (zSig == 0) zExp = 0;
 226     return packFloat32(zSign, zExp, zSig);
 227 }
 228
 229 /*----------------------------------------------------------------------------
 230 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 231 | and significand `zSig', and returns the proper single-precision floating-
 232 | point value corresponding to the abstract input.  This routine is just like
 233 | `roundAndPackFloat32' except that `zSig' does not have to be normalized.
 234 | Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
 235 | floating-point exponent.
 236 *----------------------------------------------------------------------------*/
 237
 238 float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
 239 {
 240     int shiftCount = countLeadingZeros32(zSig) - 1;
 241     return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
 242 }
 243
 244 /*----------------------------------------------------------------------------
 245 | Normalizes the subnormal double-precision floating-point value represented
 246 | by the denormalized significand `aSig'.  The normalized exponent and
 247 | significand are stored at the locations pointed to by `zExpPtr' and
 248 | `zSigPtr', respectively.
 249 *----------------------------------------------------------------------------*/
 250
 251 void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr)
 252 {
 253     int shiftCount = countLeadingZeros64(aSig) - 11;
 254     *zSigPtr = aSig<<shiftCount;
 255     *zExpPtr = 1 - shiftCount;
 256 }
 257
 258 /*----------------------------------------------------------------------------
 259 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 260 | and significand `zSig', and returns the proper double-precision floating-
 261 | point value corresponding to the abstract input.  Ordinarily, the abstract
 262 | value is simply rounded and packed into the double-precision format, with
 263 | the inexact exception raised if the abstract input cannot be represented
 264 | exactly.  However, if the abstract value is too large, the overflow and
 265 | inexact exceptions are raised and an infinity or maximal finite value is
 266 | returned.  If the abstract value is too small, the input value is rounded
 267 | to a subnormal number, and the underflow and inexact exceptions are raised
 268 | if the abstract input cannot be represented exactly as a subnormal double-
 269 | precision floating-point number.
 270 |     The input significand `zSig' has its binary point between bits 62
 271 | and 61, which is 10 bits to the left of the usual location.  This shifted
 272 | significand must be normalized or smaller.  If `zSig' is not normalized,
 273 | `zExp' must be 0; in that case, the result returned is a subnormal number,
 274 | and it must not require rounding.  In the usual case that `zSig' is
 275 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 276 | The handling of underflow and overflow follows the IEC/IEEE Standard for
 277 | Binary Floating-Point Arithmetic.
 278 *----------------------------------------------------------------------------*/
 279
 280 float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
 281 {
 282     Bit16s roundIncrement, roundBits;
 283     int roundingMode = get_float_rounding_mode(status);
 284     int roundNearestEven = (roundingMode == float_round_nearest_even);
 285     roundIncrement = 0x200;
 286     if (! roundNearestEven) {
 287         if (roundingMode == float_round_to_zero) roundIncrement = 0;
 288         else {
 289             roundIncrement = 0x3FF;
 290             if (zSign) {
 291                 if (roundingMode == float_round_up) roundIncrement = 0;
 292             }
 293             else {
 294                 if (roundingMode == float_round_down) roundIncrement = 0;
 295             }
 296         }
 297     }
 298     roundBits = zSig & 0x3FF;
 299     if (0x7FD <= (Bit16u) zExp) {
 300         if ((0x7FD < zExp)
 301              || ((zExp == 0x7FD)
 302                   && ((Bit64s) (zSig + roundIncrement) < 0)))
 303         {
 304             float_raise(status, float_flag_overflow | float_flag_inexact);
 305             return packFloat64(zSign, 0x7FF, 0) - (roundIncrement == 0);
 306         }
 307         if (zExp < 0) {
 308             int isTiny = (zExp < -1) || (zSig + roundIncrement < BX_CONST64(0x8000000000000000));
 309             shift64RightJamming(zSig, -zExp, &zSig);
 310             zExp = 0;
 311             roundBits = zSig & 0x3FF;
 312             if (isTiny && roundBits) {
 313                 float_raise(status, float_flag_underflow);
 314                 if(get_flush_underflow_to_zero(status)) {
 315                     float_raise(status, float_flag_inexact);
 316                     return packFloat64(zSign, 0, 0);
 317                 }
 318             }
 319         }
 320     }
 321     if (roundBits) float_raise(status, float_flag_inexact);
 322     zSig = (zSig + roundIncrement)>>10;
 323     zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
 324     if (zSig == 0) zExp = 0;
 325     return packFloat64(zSign, zExp, zSig);
 326 }
 327
 328 /*----------------------------------------------------------------------------
 329 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 330 | and significand `zSig', and returns the proper double-precision floating-
 331 | point value corresponding to the abstract input.  This routine is just like
 332 | `roundAndPackFloat64' except that `zSig' does not have to be normalized.
 333 | Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
 334 | floating-point exponent.
 335 *----------------------------------------------------------------------------*/
 336
 337 float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
 338 {
 339     int shiftCount = countLeadingZeros64(zSig) - 1;
 340     return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, status);
 341 }
 342
 343 #ifdef FLOATX80
 344
 345 /*----------------------------------------------------------------------------
 346 | Normalizes the subnormal extended double-precision floating-point value
 347 | represented by the denormalized significand `aSig'.  The normalized exponent
 348 | and significand are stored at the locations pointed to by `zExpPtr' and
 349 | `zSigPtr', respectively.
 350 *----------------------------------------------------------------------------*/
 351
 352 void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr)
 353 {
 354     int shiftCount = countLeadingZeros64(aSig);
 355     *zSigPtr = aSig<<shiftCount;
 356     *zExpPtr = 1 - shiftCount;
 357 }
 358
 359 /*----------------------------------------------------------------------------
 360 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 361 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
 362 | and returns the proper extended double-precision floating-point value
 363 | corresponding to the abstract input.  Ordinarily, the abstract value is
 364 | rounded and packed into the extended double-precision format, with the
 365 | inexact exception raised if the abstract input cannot be represented
 366 | exactly.  However, if the abstract value is too large, the overflow and
 367 | inexact exceptions are raised and an infinity or maximal finite value is
 368 | returned.  If the abstract value is too small, the input value is rounded to
 369 | a subnormal number, and the underflow and inexact exceptions are raised if
 370 | the abstract input cannot be represented exactly as a subnormal extended
 371 | double-precision floating-point number.
 372 |     If `roundingPrecision' is 32 or 64, the result is rounded to the same
 373 | number of bits as single or double precision, respectively.  Otherwise, the
 374 | result is rounded to the full precision of the extended double-precision
 375 | format.
 376 |     The input significand must be normalized or smaller.  If the input
 377 | significand is not normalized, `zExp' must be 0; in that case, the result
 378 | returned is a subnormal number, and it must not require rounding.  The
 379 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
 380 | Floating-Point Arithmetic.
 381 *----------------------------------------------------------------------------*/
 382
 383 floatx80 roundAndPackFloatx80(int roundingPrecision,
 384         int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
 385 {
 386     Bit64u roundIncrement, roundMask, roundBits;
 387     int increment;
 388     Bit64u zSigExact; /* support rounding-up response */
 389
 390     Bit8u roundingMode = get_float_rounding_mode(status);
 391     int roundNearestEven = (roundingMode == float_round_nearest_even);
 392     if (roundingPrecision == 64) {
 393         roundIncrement = BX_CONST64(0x0000000000000400);
 394         roundMask = BX_CONST64(0x00000000000007FF);
 395     }
 396     else if (roundingPrecision == 32) {
 397         roundIncrement = BX_CONST64(0x0000008000000000);
 398         roundMask = BX_CONST64(0x000000FFFFFFFFFF);
 399     }
 400     else goto precision80;
 401
 402     zSig0 |= (zSig1 != 0);
 403     if (! roundNearestEven) {
 404         if (roundingMode == float_round_to_zero) roundIncrement = 0;
 405         else {
 406             roundIncrement = roundMask;
 407             if (zSign) {
 408                 if (roundingMode == float_round_up) roundIncrement = 0;
 409             }
 410             else {
 411                 if (roundingMode == float_round_down) roundIncrement = 0;
 412             }
 413         }
 414     }
 415     roundBits = zSig0 & roundMask;
 416     if (0x7FFD <= (Bit32u) (zExp - 1)) {
 417         if ((0x7FFE < zExp)
 418              || ((zExp == 0x7FFE) && (zSig0 + roundIncrement < zSig0)))
 419         {
 420             goto overflow;
 421         }
 422         if (zExp <= 0) {
 423             int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement);
 424             shift64RightJamming(zSig0, 1 - zExp, &zSig0);
 425             zSigExact = zSig0;
 426             zExp = 0;
 427             roundBits = zSig0 & roundMask;
 428             if (isTiny && roundBits) float_raise(status, float_flag_underflow);
 429             if (roundBits) float_raise(status, float_flag_inexact);
 430             zSig0 += roundIncrement;
 431             if ((Bit64s) zSig0 < 0) zExp = 1;
 432             roundIncrement = roundMask + 1;
 433             if (roundNearestEven && (roundBits<<1 == roundIncrement))
 434                 roundMask |= roundIncrement;
 435             zSig0 &= ~roundMask;
 436             if (zSig0 > zSigExact) set_float_rounding_up(status);
 437             return packFloatx80(zSign, zExp, zSig0);
 438         }
 439     }
 440     if (roundBits) float_raise(status, float_flag_inexact);
 441     zSigExact = zSig0;
 442     zSig0 += roundIncrement;
 443     if (zSig0 < roundIncrement) {
 444         // Basically scale by shifting right and keep overflow
 445         ++zExp;
 446         zSig0 = BX_CONST64(0x8000000000000000);
 447         zSigExact >>= 1; // must scale also, or else later tests will fail
 448     }
 449     roundIncrement = roundMask + 1;
 450     if (roundNearestEven && (roundBits<<1 == roundIncrement))
 451         roundMask |= roundIncrement;
 452     zSig0 &= ~roundMask;
 453     if (zSig0 > zSigExact) set_float_rounding_up(status);
 454     if (zSig0 == 0) zExp = 0;
 455     return packFloatx80(zSign, zExp, zSig0);
 456  precision80:
 457     increment = ((Bit64s) zSig1 < 0);
 458     if (! roundNearestEven) {
 459         if (roundingMode == float_round_to_zero) increment = 0;
 460         else {
 461             if (zSign) {
 462                 increment = (roundingMode == float_round_down) && zSig1;
 463             }
 464             else {
 465                 increment = (roundingMode == float_round_up) && zSig1;
 466             }
 467         }
 468     }
 469     if (0x7FFD <= (Bit32u) (zExp - 1)) {
 470         if ((0x7FFE < zExp)
 471              || ((zExp == 0x7FFE)
 472                   && (zSig0 == BX_CONST64(0xFFFFFFFFFFFFFFFF))
 473                   && increment))
 474         {
 475             roundMask = 0;
 476  overflow:
 477             float_raise(status, float_flag_overflow | float_flag_inexact);
 478             if ((roundingMode == float_round_to_zero)
 479                  || (zSign && (roundingMode == float_round_up))
 480                  || (! zSign && (roundingMode == float_round_down)))
 481             {
 482                 return packFloatx80(zSign, 0x7FFE, ~roundMask);
 483             }
 484             set_float_rounding_up(status);
 485             return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
 486         }
 487         if (zExp <= 0) {
 488             int isTiny = (zExp < 0) || (! increment)
 489                 || (zSig0 < BX_CONST64(0xFFFFFFFFFFFFFFFF));
 490             shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1);
 491             zExp = 0;
 492             if (isTiny && zSig1) float_raise(status, float_flag_underflow);
 493             if (zSig1) float_raise(status, float_flag_inexact);
 494             if (roundNearestEven) increment = ((Bit64s) zSig1 < 0);
 495             else {
 496                 if (zSign) {
 497                     increment = (roundingMode == float_round_down) && zSig1;
 498                 } else {
 499                     increment = (roundingMode == float_round_up) && zSig1;
 500                 }
 501             }
 502             if (increment) {
 503                 zSigExact = zSig0++;
 504                 zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
 505                 if (zSig0 > zSigExact) set_float_rounding_up(status);
 506                 if ((Bit64s) zSig0 < 0) zExp = 1;
 507             }
 508             return packFloatx80(zSign, zExp, zSig0);
 509         }
 510     }
 511     if (zSig1) float_raise(status, float_flag_inexact);
 512     if (increment) {
 513         zSigExact = zSig0++;
 514         if (zSig0 == 0) {
 515             zExp++;
 516             zSig0 = BX_CONST64(0x8000000000000000);
 517             zSigExact >>= 1;  // must scale also, or else later tests will fail
 518         }
 519         else {
 520             zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
 521         }
 522         if (zSig0 > zSigExact) set_float_rounding_up(status);
 523     }
 524     else {
 525         if (zSig0 == 0) zExp = 0;
 526     }
 527     return packFloatx80(zSign, zExp, zSig0);
 528 }
 529
 530 /*----------------------------------------------------------------------------
 531 | Takes an abstract floating-point value having sign `zSign', exponent
 532 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
 533 | and returns the proper extended double-precision floating-point value
 534 | corresponding to the abstract input.  This routine is just like
 535 | `roundAndPackFloatx80' except that the input significand does not have to be
 536 | normalized.
 537 *----------------------------------------------------------------------------*/
 538
 539 floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
 540         int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
 541 {
 542     if (zSig0 == 0) {
 543         zSig0 = zSig1;
 544         zSig1 = 0;
 545         zExp -= 64;
 546     }
 547     int shiftCount = countLeadingZeros64(zSig0);
 548     shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
 549     zExp -= shiftCount;
 550     return
 551         roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
 552 }
 553
 554 #endif
 555
 556 #ifdef FLOAT128
 557
 558 /*----------------------------------------------------------------------------
 559 | Normalizes the subnormal quadruple-precision floating-point value
 560 | represented by the denormalized significand formed by the concatenation of
 561 | `aSig0' and `aSig1'.  The normalized exponent is stored at the location
 562 | pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
 563 | significand are stored at the location pointed to by `zSig0Ptr', and the
 564 | least significant 64 bits of the normalized significand are stored at the
 565 | location pointed to by `zSig1Ptr'.
 566 *----------------------------------------------------------------------------*/
 567
 568 void normalizeFloat128Subnormal(
 569      Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr)
 570 {
 571     int shiftCount;
 572
 573     if (aSig0 == 0) {
 574         shiftCount = countLeadingZeros64(aSig1) - 15;
 575         if (shiftCount < 0) {
 576             *zSig0Ptr = aSig1 >>(-shiftCount);
 577             *zSig1Ptr = aSig1 << (shiftCount & 63);
 578         }
 579         else {
 580             *zSig0Ptr = aSig1 << shiftCount;
 581             *zSig1Ptr = 0;
 582         }
 583         *zExpPtr = - shiftCount - 63;
 584     }
 585     else {
 586         shiftCount = countLeadingZeros64(aSig0) - 15;
 587         shortShift128Left(aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr);
 588         *zExpPtr = 1 - shiftCount;
 589     }
 590 }
 591
 592 /*----------------------------------------------------------------------------
 593 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 594 | and extended significand formed by the concatenation of `zSig0', `zSig1',
 595 | and `zSig2', and returns the proper quadruple-precision floating-point value
 596 | corresponding to the abstract input.  Ordinarily, the abstract value is
 597 | simply rounded and packed into the quadruple-precision format, with the
 598 | inexact exception raised if the abstract input cannot be represented
 599 | exactly.  However, if the abstract value is too large, the overflow and
 600 | inexact exceptions are raised and an infinity or maximal finite value is
 601 | returned.  If the abstract value is too small, the input value is rounded to
 602 | a subnormal number, and the underflow and inexact exceptions are raised if
 603 | the abstract input cannot be represented exactly as a subnormal quadruple-
 604 | precision floating-point number.
 605 |     The input significand must be normalized or smaller.  If the input
 606 | significand is not normalized, `zExp' must be 0; in that case, the result
 607 | returned is a subnormal number, and it must not require rounding.  In the
 608 | usual case that the input significand is normalized, `zExp' must be 1 less
 609 | than the ``true'' floating-point exponent.  The handling of underflow and
 610 | overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 611 *----------------------------------------------------------------------------*/
 612
 613 float128 roundAndPackFloat128(
 614      int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, float_status_t &status)
 615 {
 616     int increment = ((Bit64s) zSig2 < 0);
 617     if (0x7FFD <= (Bit32u) zExp) {
 618         if ((0x7FFD < zExp)
 619              || ((zExp == 0x7FFD)
 620                   && eq128(BX_CONST64(0x0001FFFFFFFFFFFF),
 621                          BX_CONST64(0xFFFFFFFFFFFFFFFF), zSig0, zSig1)
 622                   && increment))
 623         {
 624             float_raise(status, float_flag_overflow | float_flag_inexact);
 625             return packFloat128(zSign, 0x7FFF, 0, 0);
 626         }
 627         if (zExp < 0) {
 628             int isTiny = (zExp < -1)
 629                 || ! increment
 630                 || lt128(zSig0, zSig1,
 631                        BX_CONST64(0x0001FFFFFFFFFFFF),
 632                        BX_CONST64(0xFFFFFFFFFFFFFFFF));
 633             shift128ExtraRightJamming(
 634                 zSig0, zSig1, zSig2, -zExp, &zSig0, &zSig1, &zSig2);
 635             zExp = 0;
 636             if (isTiny && zSig2) float_raise(status, float_flag_underflow);
 637             increment = ((Bit64s) zSig2 < 0);
 638         }
 639     }
 640     if (zSig2) float_raise(status, float_flag_inexact);
 641     if (increment) {
 642         add128(zSig0, zSig1, 0, 1, &zSig0, &zSig1);
 643         zSig1 &= ~((zSig2 + zSig2 == 0) & 1);
 644     }
 645     else {
 646         if ((zSig0 | zSig1) == 0) zExp = 0;
 647     }
 648     return packFloat128(zSign, zExp, zSig0, zSig1);
 649 }
 650
 651 /*----------------------------------------------------------------------------
 652 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 653 | and significand formed by the concatenation of `zSig0' and `zSig1', and
 654 | returns the proper quadruple-precision floating-point value corresponding
 655 | to the abstract input.  This routine is just like `roundAndPackFloat128'
 656 | except that the input significand has fewer bits and does not have to be
 657 | normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
 658 | point exponent.
 659 *----------------------------------------------------------------------------*/
 660
 661 float128 normalizeRoundAndPackFloat128(
 662      int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
 663 {
 664     Bit64u zSig2;
 665
 666     if (zSig0 == 0) {
 667         zSig0 = zSig1;
 668         zSig1 = 0;
 669         zExp -= 64;
 670     }
 671     int shiftCount = countLeadingZeros64(zSig0) - 15;
 672     if (0 <= shiftCount) {
 673         zSig2 = 0;
 674         shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
 675     }
 676     else {
 677         shift128ExtraRightJamming(
 678             zSig0, zSig1, 0, -shiftCount, &zSig0, &zSig1, &zSig2);
 679     }
 680     zExp -= shiftCount;
 681     return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
 682 }
 683
 684 #endif