fpu/softfloat-macros.h

   1 /*============================================================================
   2 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
   3 Arithmetic Package, Release 2b.
   4
   5 Written by John R. Hauser.  This work was made possible in part by the
   6 International Computer Science Institute, located at Suite 600, 1947 Center
   7 Street, Berkeley, California 94704.  Funding was partially provided by the
   8 National Science Foundation under grant MIP-9311980.  The original version
   9 of this code was written as part of a project to build a fixed-point vector
  10 processor in collaboration with the University of California at Berkeley,
  11 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  12 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  13 arithmetic/SoftFloat.html'.
  14
  15 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  16 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  17 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  18 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  19 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  20 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  21 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
  22 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  23
  24 Derivative works are acceptable, even for commercial purposes, so long as
  25 (1) the source code for the derivative work includes prominent notice that
  26 the work is derivative, and (2) the source code includes prominent notice with
  27 these four paragraphs for those parts of this code that are retained.
  28 =============================================================================*/
  29
  30 /*============================================================================
  31  * Adapted for Bochs (x86 achitecture simulator) by
  32  *            Stanislav Shwartsman [sshwarts at sourceforge net]
  33  * ==========================================================================*/
  34
  35 #ifndef _SOFTFLOAT_MACROS_H_
  36 #define _SOFTFLOAT_MACROS_H_
  37
  38 /*----------------------------------------------------------------------------
  39 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
  40 | bits are shifted off, they are ``jammed'' into the least significant bit of
  41 | the result by setting the least significant bit to 1.  The value of `count'
  42 | can be arbitrarily large; in particular, if `count' is greater than 32, the
  43 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  44 | The result is stored in the location pointed to by `zPtr'.
  45 *----------------------------------------------------------------------------*/
  46
  47 BX_CPP_INLINE void shift32RightJamming(Bit32u a, int count, Bit32u *zPtr)
  48 {
  49     Bit32u z;
  50
  51     if (count == 0) {
  52         z = a;
  53     }
  54     else if (count < 32) {
  55         z = (a>>count) | ((a<<((-count) & 31)) != 0);
  56     }
  57     else {
  58         z = (a != 0);
  59     }
  60     *zPtr = z;
  61 }
  62
  63 /*----------------------------------------------------------------------------
  64 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
  65 | bits are shifted off, they are ``jammed'' into the least significant bit of
  66 | the result by setting the least significant bit to 1.  The value of `count'
  67 | can be arbitrarily large; in particular, if `count' is greater than 64, the
  68 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  69 | The result is stored in the location pointed to by `zPtr'.
  70 *----------------------------------------------------------------------------*/
  71
  72 BX_CPP_INLINE void shift64RightJamming(Bit64u a, int count, Bit64u *zPtr)
  73 {
  74     if (count == 0) {
  75         *zPtr = a;
  76     }
  77     else if (count < 64) {
  78         *zPtr = (a>>count) | ((a<<((-count) & 63)) != 0);
  79     }
  80     else {
  81         *zPtr = (a != 0);
  82     }
  83 }
  84
  85 /*----------------------------------------------------------------------------
  86 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
  87 | _plus_ the number of bits given in `count'.  The shifted result is at most
  88 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
  89 | bits shifted off form a second 64-bit result as follows:  The _last_ bit
  90 | shifted off is the most-significant bit of the extra result, and the other
  91 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
  92 | bits shifted off were all zero.  This extra result is stored in the location
  93 | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
  94 |     (This routine makes more sense if `a0' and `a1' are considered to form
  95 | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
  96 | point value is shifted right by the number of bits given in `count', and
  97 | the integer part of the result is returned at the location pointed to by
  98 | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
  99 | described above, and is returned at the location pointed to by `z1Ptr'.)
 100 *----------------------------------------------------------------------------*/
 101
 102 BX_CPP_INLINE void
 103  shift64ExtraRightJamming(
 104      Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 105 {
 106     Bit64u z0, z1;
 107     int negCount = (-count) & 63;
 108
 109     if (count == 0) {
 110         z1 = a1;
 111         z0 = a0;
 112     }
 113     else if (count < 64) {
 114         z1 = (a0<<negCount) | (a1 != 0);
 115         z0 = a0>>count;
 116     }
 117     else {
 118         if (count == 64) {
 119             z1 = a0 | (a1 != 0);
 120         }
 121         else {
 122             z1 = ((a0 | a1) != 0);
 123         }
 124         z0 = 0;
 125     }
 126     *z1Ptr = z1;
 127     *z0Ptr = z0;
 128 }
 129
 130 /*----------------------------------------------------------------------------
 131 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
 132 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
 133 | any carry out is lost.  The result is broken into two 64-bit pieces which
 134 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 135 *----------------------------------------------------------------------------*/
 136
 137 BX_CPP_INLINE void
 138  add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
 139 {
 140     Bit64u z1 = a1 + b1;
 141     *z1Ptr = z1;
 142     *z0Ptr = a0 + b0 + (z1 < a1);
 143 }
 144
 145 /*----------------------------------------------------------------------------
 146 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
 147 | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 148 | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
 149 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 150 | `z1Ptr'.
 151 *----------------------------------------------------------------------------*/
 152
 153 BX_CPP_INLINE void
 154  sub128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
 155 {
 156     *z1Ptr = a1 - b1;
 157     *z0Ptr = a0 - b0 - (a1 < b1);
 158 }
 159
 160 /*----------------------------------------------------------------------------
 161 | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
 162 | into two 64-bit pieces which are stored at the locations pointed to by
 163 | `z0Ptr' and `z1Ptr'.
 164 *----------------------------------------------------------------------------*/
 165
 166 BX_CPP_INLINE void mul64To128(Bit64u a, Bit64u b, Bit64u *z0Ptr, Bit64u *z1Ptr)
 167 {
 168     Bit32u aHigh, aLow, bHigh, bLow;
 169     Bit64u z0, zMiddleA, zMiddleB, z1;
 170
 171     aLow = (Bit32u) a;
 172     aHigh = (Bit32u)(a>>32);
 173     bLow = (Bit32u) b;
 174     bHigh = (Bit32u)(b>>32);
 175     z1 = ((Bit64u) aLow) * bLow;
 176     zMiddleA = ((Bit64u) aLow) * bHigh;
 177     zMiddleB = ((Bit64u) aHigh) * bLow;
 178     z0 = ((Bit64u) aHigh) * bHigh;
 179     zMiddleA += zMiddleB;
 180     z0 += (((Bit64u) (zMiddleA < zMiddleB))<<32) + (zMiddleA>>32);
 181     zMiddleA <<= 32;
 182     z1 += zMiddleA;
 183     z0 += (z1 < zMiddleA);
 184     *z1Ptr = z1;
 185     *z0Ptr = z0;
 186 }
 187
 188 /*----------------------------------------------------------------------------
 189 | Returns an approximation to the 64-bit integer quotient obtained by dividing
 190 | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
 191 | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
 192 | toward zero, the approximation returned lies between q and q + 2 inclusive.
 193 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
 194 | unsigned integer is returned.
 195 *----------------------------------------------------------------------------*/
 196
 197 static Bit64u estimateDiv128To64(Bit64u a0, Bit64u a1, Bit64u b)
 198 {
 199     Bit64u b0, b1;
 200     Bit64u rem0, rem1, term0, term1;
 201     Bit64u z;
 202
 203     if (b <= a0) return BX_CONST64(0xFFFFFFFFFFFFFFFF);
 204     b0 = b>>32;
 205     z = (b0<<32 <= a0) ? BX_CONST64(0xFFFFFFFF00000000) : (a0 / b0)<<32;
 206     mul64To128(b, z, &term0, &term1);
 207     sub128(a0, a1, term0, term1, &rem0, &rem1);
 208     while (((Bit64s) rem0) < 0) {
 209         z -= BX_CONST64(0x100000000);
 210         b1 = b<<32;
 211         add128(rem0, rem1, b0, b1, &rem0, &rem1);
 212     }
 213     rem0 = (rem0<<32) | (rem1>>32);
 214     z |= (b0<<32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
 215     return z;
 216 }
 217
 218 /*----------------------------------------------------------------------------
 219 | Returns an approximation to the square root of the 32-bit significand given
 220 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
 221 | `aExp' (the least significant bit) is 1, the integer returned approximates
 222 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
 223 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
 224 | case, the approximation returned lies strictly within +/-2 of the exact
 225 | value.
 226 *----------------------------------------------------------------------------*/
 227
 228 static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
 229 {
 230     static const Bit16u sqrtOddAdjustments[] = {
 231         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 232         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 233     };
 234     static const Bit16u sqrtEvenAdjustments[] = {
 235         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 236         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 237     };
 238     Bit32u z;
 239
 240     int index = (a>>27) & 15;
 241     if (aExp & 1) {
 242         z = 0x4000 + (a>>17) - sqrtOddAdjustments[index];
 243         z = ((a / z)<<14) + (z<<15);
 244         a >>= 1;
 245     }
 246     else {
 247         z = 0x8000 + (a>>17) - sqrtEvenAdjustments[index];
 248         z = a / z + z;
 249         z = (0x20000 <= z) ? 0xFFFF8000 : (z<<15);
 250         if (z <= a) return (Bit32u) (((Bit32s) a)>>1);
 251     }
 252     return ((Bit32u) ((((Bit64u) a)<<31) / z)) + (z>>1);
 253 }
 254
 255 /*----------------------------------------------------------------------------
 256 | Returns the number of leading 0 bits before the most-significant 1 bit of
 257 | `a'.  If `a' is zero, 32 is returned.
 258 *----------------------------------------------------------------------------*/
 259
 260 static int countLeadingZeros32(Bit32u a)
 261 {
 262     static const int countLeadingZerosHigh[] = {
 263         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 264         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 265         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 266         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 267         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 268         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 269         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 270         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 271         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 272         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 273         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 274         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 275         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 276         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 277         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 278         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 279     };
 280     int shiftCount = 0;
 281     if (a < 0x10000) {
 282         shiftCount += 16;
 283         a <<= 16;
 284     }
 285     if (a < 0x1000000) {
 286         shiftCount += 8;
 287         a <<= 8;
 288     }
 289     shiftCount += countLeadingZerosHigh[ a>>24 ];
 290     return shiftCount;
 291 }
 292
 293 /*----------------------------------------------------------------------------
 294 | Returns the number of leading 0 bits before the most-significant 1 bit of
 295 | `a'.  If `a' is zero, 64 is returned.
 296 *----------------------------------------------------------------------------*/
 297
 298 BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
 299 {
 300     int shiftCount = 0;
 301     if (a < ((Bit64u) 1)<<32) {
 302         shiftCount += 32;
 303     }
 304     else {
 305         a >>= 32;
 306     }
 307     shiftCount += countLeadingZeros32((int)(a));
 308     return shiftCount;
 309 }
 310
 311 #ifdef FLOATX80
 312
 313 /*----------------------------------------------------------------------------
 314 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 315 | number of bits given in `count'.  Any bits shifted off are lost.  The value
 316 | of `count' can be arbitrarily large; in particular, if `count' is greater
 317 | than 128, the result will be 0.  The result is broken into two 64-bit pieces
 318 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 319 *----------------------------------------------------------------------------*/
 320
 321 BX_CPP_INLINE void
 322  shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 323 {
 324     Bit64u z0, z1;
 325     int negCount = (-count) & 63;
 326
 327     if (count == 0) {
 328         z1 = a1;
 329         z0 = a0;
 330     }
 331     else if (count < 64) {
 332         z1 = (a0<<negCount) | (a1>>count);
 333         z0 = a0>>count;
 334     }
 335     else {
 336         z1 = (count < 64) ? (a0>>(count & 63)) : 0;
 337         z0 = 0;
 338     }
 339     *z1Ptr = z1;
 340     *z0Ptr = z0;
 341 }
 342
 343 /*----------------------------------------------------------------------------
 344 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 345 | number of bits given in `count'.  If any nonzero bits are shifted off, they
 346 | are ``jammed'' into the least significant bit of the result by setting the
 347 | least significant bit to 1.  The value of `count' can be arbitrarily large;
 348 | in particular, if `count' is greater than 128, the result will be either
 349 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
 350 | nonzero.  The result is broken into two 64-bit pieces which are stored at
 351 | the locations pointed to by `z0Ptr' and `z1Ptr'.
 352 *----------------------------------------------------------------------------*/
 353
 354 BX_CPP_INLINE void
 355  shift128RightJamming(
 356      Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 357 {
 358     Bit64u z0, z1;
 359     int negCount = (-count) & 63;
 360
 361     if (count == 0) {
 362         z1 = a1;
 363         z0 = a0;
 364     }
 365     else if (count < 64) {
 366         z1 = (a0<<negCount) | (a1>>count) | ((a1<<negCount) != 0);
 367         z0 = a0>>count;
 368     }
 369     else {
 370         if (count == 64) {
 371             z1 = a0 | (a1 != 0);
 372         }
 373         else if (count < 128) {
 374             z1 = (a0>>(count & 63)) | (((a0<<negCount) | a1) != 0);
 375         }
 376         else {
 377             z1 = ((a0 | a1) != 0);
 378         }
 379         z0 = 0;
 380     }
 381     *z1Ptr = z1;
 382     *z0Ptr = z0;
 383 }
 384
 385 /*----------------------------------------------------------------------------
 386 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 387 | number of bits given in `count'.  Any bits shifted off are lost.  The value
 388 | of `count' must be less than 64.  The result is broken into two 64-bit
 389 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 390 *----------------------------------------------------------------------------*/
 391
 392 BX_CPP_INLINE void
 393  shortShift128Left(
 394      Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 395 {
 396     *z1Ptr = a1<<count;
 397     *z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
 398 }
 399
 400 /*----------------------------------------------------------------------------
 401 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
 402 | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 403 | modulo 2^192, so any carry out is lost.  The result is broken into three
 404 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 405 | `z1Ptr', and `z2Ptr'.
 406 *----------------------------------------------------------------------------*/
 407
 408 BX_CPP_INLINE void add192(
 409      Bit64u a0,
 410      Bit64u a1,
 411      Bit64u a2,
 412      Bit64u b0,
 413      Bit64u b1,
 414      Bit64u b2,
 415      Bit64u *z0Ptr,
 416      Bit64u *z1Ptr,
 417      Bit64u *z2Ptr
 418 )
 419 {
 420     Bit64u z0, z1, z2;
 421     unsigned carry0, carry1;
 422
 423     z2 = a2 + b2;
 424     carry1 = (z2 < a2);
 425     z1 = a1 + b1;
 426     carry0 = (z1 < a1);
 427     z0 = a0 + b0;
 428     z1 += carry1;
 429     z0 += (z1 < carry1);
 430     z0 += carry0;
 431     *z2Ptr = z2;
 432     *z1Ptr = z1;
 433     *z0Ptr = z0;
 434 }
 435
 436 /*----------------------------------------------------------------------------
 437 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
 438 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
 439 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
 440 | result is broken into three 64-bit pieces which are stored at the locations
 441 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 442 *----------------------------------------------------------------------------*/
 443
 444 BX_CPP_INLINE void sub192(
 445      Bit64u a0,
 446      Bit64u a1,
 447      Bit64u a2,
 448      Bit64u b0,
 449      Bit64u b1,
 450      Bit64u b2,
 451      Bit64u *z0Ptr,
 452      Bit64u *z1Ptr,
 453      Bit64u *z2Ptr
 454 )
 455 {
 456     Bit64u z0, z1, z2;
 457     unsigned borrow0, borrow1;
 458
 459     z2 = a2 - b2;
 460     borrow1 = (a2 < b2);
 461     z1 = a1 - b1;
 462     borrow0 = (a1 < b1);
 463     z0 = a0 - b0;
 464     z0 -= (z1 < borrow1);
 465     z1 -= borrow1;
 466     z0 -= borrow0;
 467     *z2Ptr = z2;
 468     *z1Ptr = z1;
 469     *z0Ptr = z0;
 470 }
 471
 472 /*----------------------------------------------------------------------------
 473 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 474 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 475 | Otherwise, returns 0.
 476 *----------------------------------------------------------------------------*/
 477
 478 BX_CPP_INLINE int eq128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
 479 {
 480     return (a0 == b0) && (a1 == b1);
 481 }
 482
 483 /*----------------------------------------------------------------------------
 484 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 485 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
 486 | Otherwise, returns 0.
 487 *----------------------------------------------------------------------------*/
 488
 489 BX_CPP_INLINE int le128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
 490 {
 491     return (a0 < b0) || ((a0 == b0) && (a1 <= b1));
 492 }
 493
 494 /*----------------------------------------------------------------------------
 495 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 496 | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 497 | returns 0.
 498 *----------------------------------------------------------------------------*/
 499
 500 BX_CPP_INLINE int lt128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
 501 {
 502     return (a0 < b0) || ((a0 == b0) && (a1 < b1));
 503 }
 504
 505 #endif  /* FLOATX80 */
 506
 507 /*----------------------------------------------------------------------------
 508 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
 509 | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
 510 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
 511 | `z2Ptr'.
 512 *----------------------------------------------------------------------------*/
 513
 514 BX_CPP_INLINE void mul128By64To192(
 515      Bit64u a0,
 516      Bit64u a1,
 517      Bit64u b,
 518      Bit64u *z0Ptr,
 519      Bit64u *z1Ptr,
 520      Bit64u *z2Ptr
 521 )
 522 {
 523     Bit64u z0, z1, z2, more1;
 524
 525     mul64To128(a1, b, &z1, &z2);
 526     mul64To128(a0, b, &z0, &more1);
 527     add128(z0, more1, 0, z1, &z0, &z1);
 528     *z2Ptr = z2;
 529     *z1Ptr = z1;
 530     *z0Ptr = z0;
 531 }
 532
 533 #ifdef FLOAT128
 534
 535 /*----------------------------------------------------------------------------
 536 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 537 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
 538 | product.  The product is broken into four 64-bit pieces which are stored at
 539 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 540 *----------------------------------------------------------------------------*/
 541
 542 BX_CPP_INLINE void mul128To256(
 543      Bit64u a0,
 544      Bit64u a1,
 545      Bit64u b0,
 546      Bit64u b1,
 547      Bit64u *z0Ptr,
 548      Bit64u *z1Ptr,
 549      Bit64u *z2Ptr,
 550      Bit64u *z3Ptr
 551 )
 552 {
 553     Bit64u z0, z1, z2, z3;
 554     Bit64u more1, more2;
 555
 556     mul64To128(a1, b1, &z2, &z3);
 557     mul64To128(a1, b0, &z1, &more2);
 558     add128(z1, more2, 0, z2, &z1, &z2);
 559     mul64To128(a0, b0, &z0, &more1);
 560     add128(z0, more1, 0, z1, &z0, &z1);
 561     mul64To128(a0, b1, &more1, &more2);
 562     add128(more1, more2, 0, z2, &more1, &z2);
 563     add128(z0, z1, 0, more1, &z0, &z1);
 564     *z3Ptr = z3;
 565     *z2Ptr = z2;
 566     *z1Ptr = z1;
 567     *z0Ptr = z0;
 568 }
 569
 570
 571 /*----------------------------------------------------------------------------
 572 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
 573 | by 64 _plus_ the number of bits given in `count'.  The shifted result is
 574 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
 575 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 576 | off form a third 64-bit result as follows:  The _last_ bit shifted off is
 577 | the most-significant bit of the extra result, and the other 63 bits of the
 578 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
 579 | were all zero.  This extra result is stored in the location pointed to by
 580 | `z2Ptr'.  The value of `count' can be arbitrarily large.
 581 |     (This routine makes more sense if `a0', `a1', and `a2' are considered
 582 | to form a fixed-point value with binary point between `a1' and `a2'.  This
 583 | fixed-point value is shifted right by the number of bits given in `count',
 584 | and the integer part of the result is returned at the locations pointed to
 585 | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 586 | corrupted as described above, and is returned at the location pointed to by
 587 | `z2Ptr'.)
 588 *----------------------------------------------------------------------------*/
 589
 590 BX_CPP_INLINE void shift128ExtraRightJamming(
 591      Bit64u a0,
 592      Bit64u a1,
 593      Bit64u a2,
 594      int count,
 595      Bit64u *z0Ptr,
 596      Bit64u *z1Ptr,
 597      Bit64u *z2Ptr
 598 )
 599 {
 600     Bit64u z0, z1, z2;
 601     int negCount = (-count) & 63;
 602
 603     if (count == 0) {
 604         z2 = a2;
 605         z1 = a1;
 606         z0 = a0;
 607     }
 608     else {
 609         if (count < 64) {
 610             z2 = a1<<negCount;
 611             z1 = (a0<<negCount) | (a1>>count);
 612             z0 = a0>>count;
 613         }
 614         else {
 615             if (count == 64) {
 616                 z2 = a1;
 617                 z1 = a0;
 618             }
 619             else {
 620                 a2 |= a1;
 621                 if (count < 128) {
 622                     z2 = a0<<negCount;
 623                     z1 = a0>>(count & 63);
 624                 }
 625                 else {
 626                     z2 = (count == 128) ? a0 : (a0 != 0);
 627                     z1 = 0;
 628                 }
 629             }
 630             z0 = 0;
 631         }
 632         z2 |= (a2 != 0);
 633     }
 634     *z2Ptr = z2;
 635     *z1Ptr = z1;
 636     *z0Ptr = z0;
 637 }
 638
 639 #endif  /* FLOAT128 */
 640
 641 #endif