fpu/softfloat-macros.h

   1 /*============================================================================
   2 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
   3 Arithmetic Package, Release 2b.
   4
   5 Written by John R. Hauser.  This work was made possible in part by the
   6 International Computer Science Institute, located at Suite 600, 1947 Center
   7 Street, Berkeley, California 94704.  Funding was partially provided by the
   8 National Science Foundation under grant MIP-9311980.  The original version
   9 of this code was written as part of a project to build a fixed-point vector
  10 processor in collaboration with the University of California at Berkeley,
  11 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  12 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  13 arithmetic/SoftFloat.html'.
  14
  15 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  16 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  17 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  18 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  19 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  20 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  21 INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
  22 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  23
  24 Derivative works are acceptable, even for commercial purposes, so long as
  25 (1) the source code for the derivative work includes prominent notice that
  26 the work is derivative, and (2) the source code includes prominent notice with
  27 these four paragraphs for those parts of this code that are retained.
  28 =============================================================================*/
  29
  30 /*============================================================================
  31  * Adapted for Bochs (x86 achitecture simulator) by
  32  *            Stanislav Shwartsman [sshwarts at sourceforge net]
  33  * ==========================================================================*/
  34
  35 #ifndef _SOFTFLOAT_MACROS_H_
  36 #define _SOFTFLOAT_MACROS_H_
  37
  38 /*----------------------------------------------------------------------------
  39 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
  40 | bits are shifted off, they are ``jammed'' into the least significant bit of
  41 | the result by setting the least significant bit to 1.  The value of `count'
  42 | can be arbitrarily large; in particular, if `count' is greater than 32, the
  43 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  44 | The result is stored in the location pointed to by `zPtr'.
  45 *----------------------------------------------------------------------------*/
  46
  47 BX_CPP_INLINE Bit32u shift32RightJamming(Bit32u a, int count)
  48 {
  49     Bit32u z;
  50
  51     if (count == 0) {
  52         z = a;
  53     }
  54     else if (count < 32) {
  55         z = (a>>count) | ((a<<((-count) & 31)) != 0);
  56     }
  57     else {
  58         z = (a != 0);
  59     }
  60
  61     return z;
  62 }
  63
  64 /*----------------------------------------------------------------------------
  65 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
  66 | bits are shifted off, they are ``jammed'' into the least significant bit of
  67 | the result by setting the least significant bit to 1.  The value of `count'
  68 | can be arbitrarily large; in particular, if `count' is greater than 64, the
  69 | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  70 | The result is stored in the location pointed to by `zPtr'.
  71 *----------------------------------------------------------------------------*/
  72
  73 BX_CPP_INLINE Bit64u shift64RightJamming(Bit64u a, int count)
  74 {
  75     Bit64u z;
  76
  77     if (count == 0) {
  78         z = a;
  79     }
  80     else if (count < 64) {
  81         z = (a>>count) | ((a << ((-count) & 63)) != 0);
  82     }
  83     else {
  84         z = (a != 0);
  85     }
  86
  87     return z;
  88 }
  89
  90 /*----------------------------------------------------------------------------
  91 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
  92 | _plus_ the number of bits given in `count'.  The shifted result is at most
  93 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
  94 | bits shifted off form a second 64-bit result as follows:  The _last_ bit
  95 | shifted off is the most-significant bit of the extra result, and the other
  96 | 63 bits of the extra result are all zero if and only if _all_but_the_last_
  97 | bits shifted off were all zero.  This extra result is stored in the location
  98 | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
  99 |     (This routine makes more sense if `a0' and `a1' are considered to form
 100 | a fixed-point value with binary point between `a0' and `a1'.  This fixed-
 101 | point value is shifted right by the number of bits given in `count', and
 102 | the integer part of the result is returned at the location pointed to by
 103 | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
 104 | described above, and is returned at the location pointed to by `z1Ptr'.)
 105 *----------------------------------------------------------------------------*/
 106
 107 BX_CPP_INLINE void
 108  shift64ExtraRightJamming(
 109      Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 110 {
 111     Bit64u z0, z1;
 112     int negCount = (-count) & 63;
 113
 114     if (count == 0) {
 115         z1 = a1;
 116         z0 = a0;
 117     }
 118     else if (count < 64) {
 119         z1 = (a0<<negCount) | (a1 != 0);
 120         z0 = a0>>count;
 121     }
 122     else {
 123         if (count == 64) {
 124             z1 = a0 | (a1 != 0);
 125         }
 126         else {
 127             z1 = ((a0 | a1) != 0);
 128         }
 129         z0 = 0;
 130     }
 131     *z1Ptr = z1;
 132     *z0Ptr = z0;
 133 }
 134
 135 /*----------------------------------------------------------------------------
 136 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
 137 | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
 138 | any carry out is lost.  The result is broken into two 64-bit pieces which
 139 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 140 *----------------------------------------------------------------------------*/
 141
 142 BX_CPP_INLINE void
 143  add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
 144 {
 145     Bit64u z1 = a1 + b1;
 146     *z1Ptr = z1;
 147     *z0Ptr = a0 + b0 + (z1 < a1);
 148 }
 149
 150 /*----------------------------------------------------------------------------
 151 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
 152 | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 153 | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
 154 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 155 | `z1Ptr'.
 156 *----------------------------------------------------------------------------*/
 157
 158 BX_CPP_INLINE void
 159  sub128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
 160 {
 161     *z1Ptr = a1 - b1;
 162     *z0Ptr = a0 - b0 - (a1 < b1);
 163 }
 164
 165 /*----------------------------------------------------------------------------
 166 | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
 167 | into two 64-bit pieces which are stored at the locations pointed to by
 168 | `z0Ptr' and `z1Ptr'.
 169 *----------------------------------------------------------------------------*/
 170
 171 BX_CPP_INLINE void mul64To128(Bit64u a, Bit64u b, Bit64u *z0Ptr, Bit64u *z1Ptr)
 172 {
 173     Bit32u aHigh, aLow, bHigh, bLow;
 174     Bit64u z0, zMiddleA, zMiddleB, z1;
 175
 176     aLow = (Bit32u) a;
 177     aHigh = (Bit32u)(a>>32);
 178     bLow = (Bit32u) b;
 179     bHigh = (Bit32u)(b>>32);
 180     z1 = ((Bit64u) aLow) * bLow;
 181     zMiddleA = ((Bit64u) aLow) * bHigh;
 182     zMiddleB = ((Bit64u) aHigh) * bLow;
 183     z0 = ((Bit64u) aHigh) * bHigh;
 184     zMiddleA += zMiddleB;
 185     z0 += (((Bit64u) (zMiddleA < zMiddleB))<<32) + (zMiddleA>>32);
 186     zMiddleA <<= 32;
 187     z1 += zMiddleA;
 188     z0 += (z1 < zMiddleA);
 189     *z1Ptr = z1;
 190     *z0Ptr = z0;
 191 }
 192
 193 /*----------------------------------------------------------------------------
 194 | Returns an approximation to the 64-bit integer quotient obtained by dividing
 195 | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
 196 | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
 197 | toward zero, the approximation returned lies between q and q + 2 inclusive.
 198 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
 199 | unsigned integer is returned.
 200 *----------------------------------------------------------------------------*/
 201
 202 #ifdef USE_estimateDiv128To64
 203 static Bit64u estimateDiv128To64(Bit64u a0, Bit64u a1, Bit64u b)
 204 {
 205     Bit64u b0, b1;
 206     Bit64u rem0, rem1, term0, term1;
 207     Bit64u z;
 208
 209     if (b <= a0) return BX_CONST64(0xFFFFFFFFFFFFFFFF);
 210     b0 = b>>32;
 211     z = (b0<<32 <= a0) ? BX_CONST64(0xFFFFFFFF00000000) : (a0 / b0)<<32;
 212     mul64To128(b, z, &term0, &term1);
 213     sub128(a0, a1, term0, term1, &rem0, &rem1);
 214     while (((Bit64s) rem0) < 0) {
 215         z -= BX_CONST64(0x100000000);
 216         b1 = b<<32;
 217         add128(rem0, rem1, b0, b1, &rem0, &rem1);
 218     }
 219     rem0 = (rem0<<32) | (rem1>>32);
 220     z |= (b0<<32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
 221     return z;
 222 }
 223 #endif
 224
 225 /*----------------------------------------------------------------------------
 226 | Returns an approximation to the square root of the 32-bit significand given
 227 | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
 228 | `aExp' (the least significant bit) is 1, the integer returned approximates
 229 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
 230 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
 231 | case, the approximation returned lies strictly within +/-2 of the exact
 232 | value.
 233 *----------------------------------------------------------------------------*/
 234
 235 #ifdef USE_estimateSqrt32
 236 static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
 237 {
 238     static const Bit16u sqrtOddAdjustments[] = {
 239         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 240         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 241     };
 242     static const Bit16u sqrtEvenAdjustments[] = {
 243         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 244         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 245     };
 246     Bit32u z;
 247
 248     int index = (a>>27) & 15;
 249     if (aExp & 1) {
 250         z = 0x4000 + (a>>17) - sqrtOddAdjustments[index];
 251         z = ((a / z)<<14) + (z<<15);
 252         a >>= 1;
 253     }
 254     else {
 255         z = 0x8000 + (a>>17) - sqrtEvenAdjustments[index];
 256         z = a / z + z;
 257         z = (0x20000 <= z) ? 0xFFFF8000 : (z<<15);
 258         if (z <= a) return (Bit32u) (((Bit32s) a)>>1);
 259     }
 260     return ((Bit32u) ((((Bit64u) a)<<31) / z)) + (z>>1);
 261 }
 262 #endif
 263
 264 /*----------------------------------------------------------------------------
 265 | Returns the number of leading 0 bits before the most-significant 1 bit of
 266 | `a'.  If `a' is zero, 32 is returned.
 267 *----------------------------------------------------------------------------*/
 268
 269 static int countLeadingZeros32(Bit32u a)
 270 {
 271     static const int countLeadingZerosHigh[] = {
 272         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 273         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 274         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 275         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 276         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 277         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 278         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 279         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 280         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 281         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 282         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 283         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 284         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 285         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 286         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 287         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 288     };
 289     int shiftCount = 0;
 290     if (a < 0x10000) {
 291         shiftCount += 16;
 292         a <<= 16;
 293     }
 294     if (a < 0x1000000) {
 295         shiftCount += 8;
 296         a <<= 8;
 297     }
 298     shiftCount += countLeadingZerosHigh[ a>>24 ];
 299     return shiftCount;
 300 }
 301
 302 /*----------------------------------------------------------------------------
 303 | Returns the number of leading 0 bits before the most-significant 1 bit of
 304 | `a'.  If `a' is zero, 64 is returned.
 305 *----------------------------------------------------------------------------*/
 306
 307 BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
 308 {
 309     int shiftCount = 0;
 310     if (a < ((Bit64u) 1)<<32) {
 311         shiftCount += 32;
 312     }
 313     else {
 314         a >>= 32;
 315     }
 316     shiftCount += countLeadingZeros32((int)(a));
 317     return shiftCount;
 318 }
 319
 320 #ifdef FLOATX80
 321
 322 /*----------------------------------------------------------------------------
 323 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 324 | number of bits given in `count'.  Any bits shifted off are lost.  The value
 325 | of `count' can be arbitrarily large; in particular, if `count' is greater
 326 | than 128, the result will be 0.  The result is broken into two 64-bit pieces
 327 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 328 *----------------------------------------------------------------------------*/
 329
 330 BX_CPP_INLINE void
 331  shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 332 {
 333     Bit64u z0, z1;
 334     int negCount = (-count) & 63;
 335
 336     if (count == 0) {
 337         z1 = a1;
 338         z0 = a0;
 339     }
 340     else if (count < 64) {
 341         z1 = (a0<<negCount) | (a1>>count);
 342         z0 = a0>>count;
 343     }
 344     else {
 345         z1 = (count < 64) ? (a0>>(count & 63)) : 0;
 346         z0 = 0;
 347     }
 348     *z1Ptr = z1;
 349     *z0Ptr = z0;
 350 }
 351
 352 /*----------------------------------------------------------------------------
 353 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 354 | number of bits given in `count'.  If any nonzero bits are shifted off, they
 355 | are ``jammed'' into the least significant bit of the result by setting the
 356 | least significant bit to 1.  The value of `count' can be arbitrarily large;
 357 | in particular, if `count' is greater than 128, the result will be either
 358 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
 359 | nonzero.  The result is broken into two 64-bit pieces which are stored at
 360 | the locations pointed to by `z0Ptr' and `z1Ptr'.
 361 *----------------------------------------------------------------------------*/
 362
 363 BX_CPP_INLINE void
 364  shift128RightJamming(
 365      Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 366 {
 367     Bit64u z0, z1;
 368     int negCount = (-count) & 63;
 369
 370     if (count == 0) {
 371         z1 = a1;
 372         z0 = a0;
 373     }
 374     else if (count < 64) {
 375         z1 = (a0<<negCount) | (a1>>count) | ((a1<<negCount) != 0);
 376         z0 = a0>>count;
 377     }
 378     else {
 379         if (count == 64) {
 380             z1 = a0 | (a1 != 0);
 381         }
 382         else if (count < 128) {
 383             z1 = (a0>>(count & 63)) | (((a0<<negCount) | a1) != 0);
 384         }
 385         else {
 386             z1 = ((a0 | a1) != 0);
 387         }
 388         z0 = 0;
 389     }
 390     *z1Ptr = z1;
 391     *z0Ptr = z0;
 392 }
 393
 394 /*----------------------------------------------------------------------------
 395 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 396 | number of bits given in `count'.  Any bits shifted off are lost.  The value
 397 | of `count' must be less than 64.  The result is broken into two 64-bit
 398 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 399 *----------------------------------------------------------------------------*/
 400
 401 BX_CPP_INLINE void
 402  shortShift128Left(
 403      Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 404 {
 405     *z1Ptr = a1<<count;
 406     *z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
 407 }
 408
 409 /*----------------------------------------------------------------------------
 410 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
 411 | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 412 | modulo 2^192, so any carry out is lost.  The result is broken into three
 413 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 414 | `z1Ptr', and `z2Ptr'.
 415 *----------------------------------------------------------------------------*/
 416
 417 BX_CPP_INLINE void add192(
 418      Bit64u a0,
 419      Bit64u a1,
 420      Bit64u a2,
 421      Bit64u b0,
 422      Bit64u b1,
 423      Bit64u b2,
 424      Bit64u *z0Ptr,
 425      Bit64u *z1Ptr,
 426      Bit64u *z2Ptr
 427 )
 428 {
 429     Bit64u z0, z1, z2;
 430     unsigned carry0, carry1;
 431
 432     z2 = a2 + b2;
 433     carry1 = (z2 < a2);
 434     z1 = a1 + b1;
 435     carry0 = (z1 < a1);
 436     z0 = a0 + b0;
 437     z1 += carry1;
 438     z0 += (z1 < carry1);
 439     z0 += carry0;
 440     *z2Ptr = z2;
 441     *z1Ptr = z1;
 442     *z0Ptr = z0;
 443 }
 444
 445 /*----------------------------------------------------------------------------
 446 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
 447 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
 448 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
 449 | result is broken into three 64-bit pieces which are stored at the locations
 450 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 451 *----------------------------------------------------------------------------*/
 452
 453 BX_CPP_INLINE void sub192(
 454      Bit64u a0,
 455      Bit64u a1,
 456      Bit64u a2,
 457      Bit64u b0,
 458      Bit64u b1,
 459      Bit64u b2,
 460      Bit64u *z0Ptr,
 461      Bit64u *z1Ptr,
 462      Bit64u *z2Ptr
 463 )
 464 {
 465     Bit64u z0, z1, z2;
 466     unsigned borrow0, borrow1;
 467
 468     z2 = a2 - b2;
 469     borrow1 = (a2 < b2);
 470     z1 = a1 - b1;
 471     borrow0 = (a1 < b1);
 472     z0 = a0 - b0;
 473     z0 -= (z1 < borrow1);
 474     z1 -= borrow1;
 475     z0 -= borrow0;
 476     *z2Ptr = z2;
 477     *z1Ptr = z1;
 478     *z0Ptr = z0;
 479 }
 480
 481 /*----------------------------------------------------------------------------
 482 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 483 | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 484 | Otherwise, returns 0.
 485 *----------------------------------------------------------------------------*/
 486
 487 BX_CPP_INLINE int eq128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
 488 {
 489     return (a0 == b0) && (a1 == b1);
 490 }
 491
 492 /*----------------------------------------------------------------------------
 493 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 494 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
 495 | Otherwise, returns 0.
 496 *----------------------------------------------------------------------------*/
 497
 498 BX_CPP_INLINE int le128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
 499 {
 500     return (a0 < b0) || ((a0 == b0) && (a1 <= b1));
 501 }
 502
 503 /*----------------------------------------------------------------------------
 504 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 505 | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 506 | returns 0.
 507 *----------------------------------------------------------------------------*/
 508
 509 BX_CPP_INLINE int lt128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
 510 {
 511     return (a0 < b0) || ((a0 == b0) && (a1 < b1));
 512 }
 513
 514 #endif  /* FLOATX80 */
 515
 516 /*----------------------------------------------------------------------------
 517 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
 518 | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
 519 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
 520 | `z2Ptr'.
 521 *----------------------------------------------------------------------------*/
 522
 523 BX_CPP_INLINE void mul128By64To192(
 524      Bit64u a0,
 525      Bit64u a1,
 526      Bit64u b,
 527      Bit64u *z0Ptr,
 528      Bit64u *z1Ptr,
 529      Bit64u *z2Ptr
 530 )
 531 {
 532     Bit64u z0, z1, z2, more1;
 533
 534     mul64To128(a1, b, &z1, &z2);
 535     mul64To128(a0, b, &z0, &more1);
 536     add128(z0, more1, 0, z1, &z0, &z1);
 537     *z2Ptr = z2;
 538     *z1Ptr = z1;
 539     *z0Ptr = z0;
 540 }
 541
 542 #ifdef FLOAT128
 543
 544 /*----------------------------------------------------------------------------
 545 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 546 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
 547 | product.  The product is broken into four 64-bit pieces which are stored at
 548 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 549 *----------------------------------------------------------------------------*/
 550
 551 BX_CPP_INLINE void mul128To256(
 552      Bit64u a0,
 553      Bit64u a1,
 554      Bit64u b0,
 555      Bit64u b1,
 556      Bit64u *z0Ptr,
 557      Bit64u *z1Ptr,
 558      Bit64u *z2Ptr,
 559      Bit64u *z3Ptr
 560 )
 561 {
 562     Bit64u z0, z1, z2, z3;
 563     Bit64u more1, more2;
 564
 565     mul64To128(a1, b1, &z2, &z3);
 566     mul64To128(a1, b0, &z1, &more2);
 567     add128(z1, more2, 0, z2, &z1, &z2);
 568     mul64To128(a0, b0, &z0, &more1);
 569     add128(z0, more1, 0, z1, &z0, &z1);
 570     mul64To128(a0, b1, &more1, &more2);
 571     add128(more1, more2, 0, z2, &more1, &z2);
 572     add128(z0, z1, 0, more1, &z0, &z1);
 573     *z3Ptr = z3;
 574     *z2Ptr = z2;
 575     *z1Ptr = z1;
 576     *z0Ptr = z0;
 577 }
 578
 579
 580 /*----------------------------------------------------------------------------
 581 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
 582 | by 64 _plus_ the number of bits given in `count'.  The shifted result is
 583 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
 584 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 585 | off form a third 64-bit result as follows:  The _last_ bit shifted off is
 586 | the most-significant bit of the extra result, and the other 63 bits of the
 587 | extra result are all zero if and only if _all_but_the_last_ bits shifted off
 588 | were all zero.  This extra result is stored in the location pointed to by
 589 | `z2Ptr'.  The value of `count' can be arbitrarily large.
 590 |     (This routine makes more sense if `a0', `a1', and `a2' are considered
 591 | to form a fixed-point value with binary point between `a1' and `a2'.  This
 592 | fixed-point value is shifted right by the number of bits given in `count',
 593 | and the integer part of the result is returned at the locations pointed to
 594 | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 595 | corrupted as described above, and is returned at the location pointed to by
 596 | `z2Ptr'.)
 597 *----------------------------------------------------------------------------*/
 598
 599 BX_CPP_INLINE void shift128ExtraRightJamming(
 600      Bit64u a0,
 601      Bit64u a1,
 602      Bit64u a2,
 603      int count,
 604      Bit64u *z0Ptr,
 605      Bit64u *z1Ptr,
 606      Bit64u *z2Ptr
 607 )
 608 {
 609     Bit64u z0, z1, z2;
 610     int negCount = (-count) & 63;
 611
 612     if (count == 0) {
 613         z2 = a2;
 614         z1 = a1;
 615         z0 = a0;
 616     }
 617     else {
 618         if (count < 64) {
 619             z2 = a1<<negCount;
 620             z1 = (a0<<negCount) | (a1>>count);
 621             z0 = a0>>count;
 622         }
 623         else {
 624             if (count == 64) {
 625                 z2 = a1;
 626                 z1 = a0;
 627             }
 628             else {
 629                 a2 |= a1;
 630                 if (count < 128) {
 631                     z2 = a0<<negCount;
 632                     z1 = a0>>(count & 63);
 633                 }
 634                 else {
 635                     z2 = (count == 128) ? a0 : (a0 != 0);
 636                     z1 = 0;
 637                 }
 638             }
 639             z0 = 0;
 640         }
 641         z2 |= (a2 != 0);
 642     }
 643     *z2Ptr = z2;
 644     *z1Ptr = z1;
 645     *z0Ptr = z0;
 646 }
 647
 648 #endif  /* FLOAT128 */
 649
 650 #endif