fpu/fyl2x.cc

   1 /*============================================================================
   2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
   3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
   4 floating point emulation.
   5
   6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
   7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
   8 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
   9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  14
  15 Derivative works are acceptable, even for commercial purposes, so long as
  16 (1) the source code for the derivative work includes prominent notice that
  17 the work is derivative, and (2) the source code includes prominent notice with
  18 these four paragraphs for those parts of this code that are retained.
  19 =============================================================================*/
  20
  21 /*============================================================================
  22  * Written for Bochs (x86 achitecture simulator) by
  23  *            Stanislav Shwartsman [sshwarts at sourceforge net]
  24  * ==========================================================================*/
  25
  26 #define FLOAT128
  27
  28 #include "softfloatx80.h"
  29 #include "softfloat-round-pack.h"
  30 #include "fpu_constant.h"
  31
  32 static const floatx80 floatx80_one =
  33     packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
  34
  35 static const float128 float128_one =
  36     packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000));
  37 static const float128 float128_two =
  38     packFloat128(BX_CONST64(0x4000000000000000), BX_CONST64(0x0000000000000000));
  39
  40 static const float128 float128_ln2inv2 =
  41     packFloat128(BX_CONST64(0x400071547652b82f), BX_CONST64(0xe1777d0ffda0d23a));
  42
  43 #define SQRT2_HALF_SIG  BX_CONST64(0xb504f333f9de6484)
  44
  45 extern float128 OddPoly(float128 x, float128 *arr, unsigned n, float_status_t &status);
  46
  47 #define L2_ARR_SIZE 9
  48
  49 static float128 ln_arr[L2_ARR_SIZE] =
  50 {
  51     PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
  52     PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /*  3 */
  53     PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
  54     PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /*  7 */
  55     PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
  56     PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
  57     PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
  58     PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
  59     PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2)  /* 17 */
  60 };
  61
  62 static float128 poly_ln(float128 x1, float_status_t &status)
  63 {
  64 /*
  65     //
  66     //                     3     5     7     9     11     13     15
  67     //        1+u         u     u     u     u     u      u      u
  68     // 1/2 ln ---  ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
  69     //        1-u         3     5     7     9     11     13     15
  70     //
  71     //                     2     4     6     8     10     12     14
  72     //                    u     u     u     u     u      u      u
  73     //       = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
  74     //                    3     5     7     9     11     13     15
  75     //
  76     //           3                          3
  77     //          --       4k                --        4k+2
  78     //   p(u) = >  C  * u           q(u) = >  C   * u
  79     //          --  2k                     --  2k+1
  80     //          k=0                        k=0
  81     //
  82     //          1+u                 2
  83     //   1/2 ln --- ~ u * [ p(u) + u * q(u) ]
  84     //          1-u
  85     //
  86 */
  87     return OddPoly(x1, ln_arr, L2_ARR_SIZE, status);
  88 }
  89
  90 /* required sqrt(2)/2 < x < sqrt(2) */
  91 static float128 poly_l2(float128 x, float_status_t &status)
  92 {
  93     /* using float128 for approximation */
  94     float128 x_p1 = float128_add(x, float128_one, status);
  95     float128 x_m1 = float128_sub(x, float128_one, status);
  96     x = float128_div(x_m1, x_p1, status);
  97     x = poly_ln(x, status);
  98     x = float128_mul(x, float128_ln2inv2, status);
  99     return x;
 100 }
 101
 102 static float128 poly_l2p1(float128 x, float_status_t &status)
 103 {
 104     /* using float128 for approximation */
 105     float128 x_p2 = float128_add(x, float128_two, status);
 106     x = float128_div(x, x_p2, status);
 107     x = poly_ln(x, status);
 108     x = float128_mul(x, float128_ln2inv2, status);
 109     return x;
 110 }
 111
 112 // =================================================
 113 // FYL2X                   Compute y * log (x)
 114 //                                        2
 115 // =================================================
 116
 117 //
 118 // Uses the following identities:
 119 //
 120 // 1. ----------------------------------------------------------
 121 //              ln(x)
 122 //   log (x) = -------,  ln (x*y) = ln(x) + ln(y)
 123 //      2       ln(2)
 124 //
 125 // 2. ----------------------------------------------------------
 126 //                1+u             x-1
 127 //   ln (x) = ln -----, when u = -----
 128 //                1-u             x+1
 129 //
 130 // 3. ----------------------------------------------------------
 131 //                        3     5     7           2n+1
 132 //       1+u             u     u     u           u
 133 //   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
 134 //       1-u             3     5     7           2n+1
 135 //
 136
 137 floatx80 fyl2x(floatx80 a, floatx80 b, float_status_t &status)
 138 {
 139     // handle unsupported extended double-precision floating encodings
 140     if (floatx80_is_unsupported(a)) {
 141 invalid:
 142         float_raise(status, float_flag_invalid);
 143         return floatx80_default_nan;
 144     }
 145
 146     Bit64u aSig = extractFloatx80Frac(a);
 147     Bit32s aExp = extractFloatx80Exp(a);
 148     int aSign = extractFloatx80Sign(a);
 149     Bit64u bSig = extractFloatx80Frac(b);
 150     Bit32s bExp = extractFloatx80Exp(b);
 151     int bSign = extractFloatx80Sign(b);
 152
 153     int zSign = bSign ^ 1;
 154
 155     if (aExp == 0x7FFF) {
 156         if ((Bit64u) (aSig<<1)
 157              || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
 158         {
 159             return propagateFloatx80NaN(a, b, status);
 160         }
 161         if (aSign) goto invalid;
 162         else {
 163             if (bExp == 0) {
 164                 if (bSig == 0) goto invalid;
 165                 float_raise(status, float_flag_denormal);
 166             }
 167             return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
 168         }
 169     }
 170     if (bExp == 0x7FFF)
 171     {
 172         if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
 173         if (aSign && (Bit64u)(aExp | aSig)) goto invalid;
 174         if (aSig && (aExp == 0))
 175             float_raise(status, float_flag_denormal);
 176         if (aExp < 0x3FFF) {
 177             return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
 178         }
 179         if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) goto invalid;
 180         return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
 181     }
 182     if (aExp == 0) {
 183         if (aSig == 0) {
 184             if ((bExp | bSig) == 0) goto invalid;
 185             float_raise(status, float_flag_divbyzero);
 186             return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
 187         }
 188         if (aSign) goto invalid;
 189         float_raise(status, float_flag_denormal);
 190         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
 191     }
 192     if (aSign) goto invalid;
 193     if (bExp == 0) {
 194         if (bSig == 0) {
 195             if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
 196             return packFloatx80(bSign, 0, 0);
 197         }
 198         float_raise(status, float_flag_denormal);
 199         normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
 200     }
 201     if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0))
 202         return packFloatx80(bSign, 0, 0);
 203
 204     float_raise(status, float_flag_inexact);
 205
 206     int ExpDiff = aExp - 0x3FFF;
 207     aExp = 0;
 208     if (aSig >= SQRT2_HALF_SIG) {
 209         ExpDiff++;
 210         aExp--;
 211     }
 212
 213     /* ******************************** */
 214     /* using float128 for approximation */
 215     /* ******************************** */
 216
 217     Bit64u zSig0, zSig1;
 218     shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
 219     float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
 220     x = poly_l2(x, status);
 221     x = float128_add(x, floatx80_to_float128(int32_to_floatx80(ExpDiff), status), status);
 222     return floatx80_mul(b, x, status);
 223 }
 224
 225 // =================================================
 226 // FYL2XP1                 Compute y * log (x + 1)
 227 //                                        2
 228 // =================================================
 229
 230 //
 231 // Uses the following identities:
 232 //
 233 // 1. ----------------------------------------------------------
 234 //              ln(x)
 235 //   log (x) = -------
 236 //      2       ln(2)
 237 //
 238 // 2. ----------------------------------------------------------
 239 //                  1+u              x
 240 //   ln (x+1) = ln -----, when u = -----
 241 //                  1-u             x+2
 242 //
 243 // 3. ----------------------------------------------------------
 244 //                        3     5     7           2n+1
 245 //       1+u             u     u     u           u
 246 //   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
 247 //       1-u             3     5     7           2n+1
 248 //
 249
 250 floatx80 fyl2xp1(floatx80 a, floatx80 b, float_status_t &status)
 251 {
 252     Bit32s aExp, bExp;
 253     Bit64u aSig, bSig, zSig0, zSig1, zSig2;
 254     int aSign, bSign;
 255
 256     // handle unsupported extended double-precision floating encodings
 257     if (floatx80_is_unsupported(a)) {
 258 invalid:
 259         float_raise(status, float_flag_invalid);
 260         return floatx80_default_nan;
 261     }
 262
 263     aSig = extractFloatx80Frac(a);
 264     aExp = extractFloatx80Exp(a);
 265     aSign = extractFloatx80Sign(a);
 266     bSig = extractFloatx80Frac(b);
 267     bExp = extractFloatx80Exp(b);
 268     bSign = extractFloatx80Sign(b);
 269     int zSign = aSign ^ bSign;
 270
 271     if (aExp == 0x7FFF) {
 272         if ((Bit64u) (aSig<<1)
 273              || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
 274         {
 275             return propagateFloatx80NaN(a, b, status);
 276         }
 277         if (aSign) goto invalid;
 278         else {
 279             if (bExp == 0) {
 280                 if (bSig == 0) goto invalid;
 281                 float_raise(status, float_flag_denormal);
 282             }
 283             return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
 284         }
 285     }
 286     if (bExp == 0x7FFF)
 287     {
 288         if ((Bit64u) (bSig<<1))
 289             return propagateFloatx80NaN(a, b, status);
 290
 291         if (aExp == 0) {
 292             if (aSig == 0) goto invalid;
 293             float_raise(status, float_flag_denormal);
 294         }
 295
 296         return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
 297     }
 298     if (aExp == 0) {
 299         if (aSig == 0) {
 300             if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
 301             return packFloatx80(zSign, 0, 0);
 302         }
 303         float_raise(status, float_flag_denormal);
 304         normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
 305     }
 306     if (bExp == 0) {
 307         if (bSig == 0) return packFloatx80(zSign, 0, 0);
 308         float_raise(status, float_flag_denormal);
 309         normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
 310     }
 311
 312     float_raise(status, float_flag_inexact);
 313
 314     if (aSign && aExp >= 0x3FFF)
 315         return a;
 316
 317     if (aExp >= 0x3FFC) // big argument
 318     {
 319         return fyl2x(floatx80_add(a, floatx80_one, status), b, status);
 320     }
 321
 322     // handle tiny argument
 323     if (aExp < EXP_BIAS-70)
 324     {
 325         // first order approximation, return (a*b)/ln(2)
 326         Bit32s zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
 327
 328         mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
 329         if (0 < (Bit64s) zSig0) {
 330             shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
 331             --zExp;
 332         }
 333
 334         zExp = zExp + bExp - 0x3FFE;
 335         mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
 336         if (0 < (Bit64s) zSig0) {
 337             shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
 338             --zExp;
 339         }
 340
 341         return
 342             roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1, status);
 343     }
 344
 345     /* ******************************** */
 346     /* using float128 for approximation */
 347     /* ******************************** */
 348
 349     shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
 350     float128 x = packFloat128(aSign, aExp, zSig0, zSig1);
 351     x = poly_l2p1(x, status);
 352     return floatx80_mul(b, x, status);
 353 }