fpu/fsincos.cc

   1 /*============================================================================
   2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
   3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
   4 floating point emulation.
   5
   6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
   7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
   8 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
   9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  14
  15 Derivative works are acceptable, even for commercial purposes, so long as
  16 (1) the source code for the derivative work includes prominent notice that
  17 the work is derivative, and (2) the source code includes prominent notice with
  18 these four paragraphs for those parts of this code that are retained.
  19 =============================================================================*/
  20
  21 /*============================================================================
  22  * Written for Bochs (x86 achitecture simulator) by
  23  *            Stanislav Shwartsman [sshwarts at sourceforge net]
  24  * ==========================================================================*/
  25
  26 #define FLOAT128
  27
  28 #define USE_estimateDiv128To64
  29 #include "softfloatx80.h"
  30 #include "softfloat-round-pack.h"
  31 #include "fpu_constant.h"
  32
  33 static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
  34
  35 /* reduce trigonometric function argument using 128-bit precision
  36    M_PI approximation */
  37 static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1)
  38 {
  39     Bit64u term0, term1, term2;
  40     Bit64u aSig1 = 0;
  41
  42     shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
  43     Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
  44     mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
  45     sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
  46     while ((Bit64s)(*zSig1) < 0) {
  47         --q;
  48         add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
  49     }
  50     *zSig1 = term2;
  51     return q;
  52 }
  53
  54 static int reduce_trig_arg(int expDiff, int &zSign, Bit64u &aSig0, Bit64u &aSig1)
  55 {
  56     Bit64u term0, term1, q = 0;
  57
  58     if (expDiff < 0) {
  59         shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
  60         expDiff = 0;
  61     }
  62     if (expDiff > 0) {
  63         q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
  64     }
  65     else {
  66         if (FLOAT_PI_HI <= aSig0) {
  67             aSig0 -= FLOAT_PI_HI;
  68             q = 1;
  69         }
  70     }
  71
  72     shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
  73     if (! lt128(aSig0, aSig1, term0, term1))
  74     {
  75         int lt = lt128(term0, term1, aSig0, aSig1);
  76         int eq = eq128(aSig0, aSig1, term0, term1);
  77
  78         if ((eq && (q & 1)) || lt) {
  79             zSign = !zSign;
  80             ++q;
  81         }
  82         if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
  83     }
  84
  85     return (int)(q & 3);
  86 }
  87
  88 #define SIN_ARR_SIZE 9
  89 #define COS_ARR_SIZE 9
  90
  91 static float128 sin_arr[SIN_ARR_SIZE] =
  92 {
  93     PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
  94     PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /*  3 */
  95     PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
  96     PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
  97     PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
  98     PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
  99     PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
 100     PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
 101     PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00)  /* 17 */
 102 };
 103
 104 static float128 cos_arr[COS_ARR_SIZE] =
 105 {
 106     PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  0 */
 107     PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /*  2 */
 108     PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
 109     PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
 110     PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
 111     PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
 112     PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
 113     PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
 114     PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0)  /* 16 */
 115 };
 116
 117 extern float128 OddPoly (float128 x, float128 *arr, unsigned n, float_status_t &status);
 118
 119 /* 0 <= x <= pi/4 */
 120 BX_CPP_INLINE float128 poly_sin(float128 x, float_status_t &status)
 121 {
 122     //                 3     5     7     9     11     13     15
 123     //                x     x     x     x     x      x      x
 124     // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
 125     //                3!    5!    7!    9!    11!    13!    15!
 126     //
 127     //                 2     4     6     8     10     12     14
 128     //                x     x     x     x     x      x      x
 129     //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
 130     //                3!    5!    7!    9!    11!    13!    15!
 131     //
 132     //           3                          3
 133     //          --       4k                --        4k+2
 134     //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
 135     //          --  2k                     --  2k+1
 136     //          k=0                        k=0
 137     //
 138     //                          2
 139     //   sin(x) ~ x * [ p(x) + x * q(x) ]
 140     //
 141
 142     return OddPoly(x, sin_arr, SIN_ARR_SIZE, status);
 143 }
 144
 145 extern float128 EvenPoly(float128 x, float128 *arr, unsigned n, float_status_t &status);
 146
 147 /* 0 <= x <= pi/4 */
 148 BX_CPP_INLINE float128 poly_cos(float128 x, float_status_t &status)
 149 {
 150     //                 2     4     6     8     10     12     14
 151     //                x     x     x     x     x      x      x
 152     // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
 153     //                2!    4!    6!    8!    10!    12!    14!
 154     //
 155     //           3                          3
 156     //          --       4k                --        4k+2
 157     //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
 158     //          --  2k                     --  2k+1
 159     //          k=0                        k=0
 160     //
 161     //                      2
 162     //   cos(x) ~ [ p(x) + x * q(x) ]
 163     //
 164
 165     return EvenPoly(x, cos_arr, COS_ARR_SIZE, status);
 166 }
 167
 168 BX_CPP_INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
 169 {
 170     if (sin_a) *sin_a = a;
 171     if (cos_a) *cos_a = a;
 172 }
 173
 174 BX_CPP_INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
 175 {
 176     if (sin_a) *sin_a = a;
 177     if (cos_a) *cos_a = floatx80_one;
 178 }
 179
 180 static floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient, float_status_t &status)
 181 {
 182     if (quotient & 0x1) {
 183         r = poly_cos(r, status);
 184         neg = 0;
 185     } else  {
 186         r = poly_sin(r, status);
 187     }
 188
 189     floatx80 result = float128_to_floatx80(r, status);
 190     if (quotient & 0x2)
 191         neg = ! neg;
 192
 193     if (neg)
 194         floatx80_chs(result);
 195
 196     return result;
 197 }
 198
 199 // =================================================
 200 // FSINCOS               Compute sin(x) and cos(x)
 201 // =================================================
 202
 203 //
 204 // Uses the following identities:
 205 // ----------------------------------------------------------
 206 //
 207 //  sin(-x) = -sin(x)
 208 //  cos(-x) =  cos(x)
 209 //
 210 //  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
 211 //  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
 212 //
 213 //  sin(x+ pi/2)  =  cos(x)
 214 //  sin(x+ pi)    = -sin(x)
 215 //  sin(x+3pi/2)  = -cos(x)
 216 //  sin(x+2pi)    =  sin(x)
 217 //
 218
 219 int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, float_status_t &status)
 220 {
 221     Bit64u aSig0, aSig1 = 0;
 222     Bit32s aExp, zExp, expDiff;
 223     int aSign, zSign;
 224     int q = 0;
 225
 226     // handle unsupported extended double-precision floating encodings
 227     if (floatx80_is_unsupported(a))
 228     {
 229         goto invalid;
 230     }
 231
 232     aSig0 = extractFloatx80Frac(a);
 233     aExp = extractFloatx80Exp(a);
 234     aSign = extractFloatx80Sign(a);
 235
 236     /* invalid argument */
 237     if (aExp == 0x7FFF) {
 238         if ((Bit64u) (aSig0<<1)) {
 239             sincos_invalid(sin_a, cos_a, propagateFloatx80NaN(a, status));
 240             return 0;
 241         }
 242
 243     invalid:
 244         float_raise(status, float_flag_invalid);
 245         sincos_invalid(sin_a, cos_a, floatx80_default_nan);
 246         return 0;
 247     }
 248
 249     if (aExp == 0) {
 250         if (aSig0 == 0) {
 251             sincos_tiny_argument(sin_a, cos_a, a);
 252             return 0;
 253         }
 254
 255         float_raise(status, float_flag_denormal);
 256
 257         /* handle pseudo denormals */
 258         if (! (aSig0 & BX_CONST64(0x8000000000000000)))
 259         {
 260             float_raise(status, float_flag_inexact);
 261             if (sin_a)
 262                 float_raise(status, float_flag_underflow);
 263             sincos_tiny_argument(sin_a, cos_a, a);
 264             return 0;
 265         }
 266
 267         normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
 268     }
 269
 270     zSign = aSign;
 271     zExp = EXP_BIAS;
 272     expDiff = aExp - zExp;
 273
 274     /* argument is out-of-range */
 275     if (expDiff >= 63)
 276         return -1;
 277
 278     float_raise(status, float_flag_inexact);
 279
 280     if (expDiff < -1) {    // doesn't require reduction
 281         if (expDiff <= -68) {
 282             a = packFloatx80(aSign, aExp, aSig0);
 283             sincos_tiny_argument(sin_a, cos_a, a);
 284             return 0;
 285         }
 286         zExp = aExp;
 287     }
 288     else {
 289         q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
 290     }
 291
 292     /* **************************** */
 293     /* argument reduction completed */
 294     /* **************************** */
 295
 296     /* using float128 for approximation */
 297     float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
 298
 299     if (aSign) q = -q;
 300     if (sin_a) *sin_a = sincos_approximation(zSign, r,   q, status);
 301     if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1, status);
 302
 303     return 0;
 304 }
 305
 306 int fsin(floatx80 &a, float_status_t &status)
 307 {
 308     return fsincos(a, &a, 0, status);
 309 }
 310
 311 int fcos(floatx80 &a, float_status_t &status)
 312 {
 313     return fsincos(a, 0, &a, status);
 314 }
 315
 316 // =================================================
 317 // FPTAN                 Compute tan(x)
 318 // =================================================
 319
 320 //
 321 // Uses the following identities:
 322 //
 323 // 1. ----------------------------------------------------------
 324 //
 325 //  sin(-x) = -sin(x)
 326 //  cos(-x) =  cos(x)
 327 //
 328 //  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
 329 //  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
 330 //
 331 //  sin(x+ pi/2)  =  cos(x)
 332 //  sin(x+ pi)    = -sin(x)
 333 //  sin(x+3pi/2)  = -cos(x)
 334 //  sin(x+2pi)    =  sin(x)
 335 //
 336 // 2. ----------------------------------------------------------
 337 //
 338 //           sin(x)
 339 //  tan(x) = ------
 340 //           cos(x)
 341 //
 342
 343 int ftan(floatx80 &a, float_status_t &status)
 344 {
 345     Bit64u aSig0, aSig1 = 0;
 346     Bit32s aExp, zExp, expDiff;
 347     int aSign, zSign;
 348     int q = 0;
 349
 350     // handle unsupported extended double-precision floating encodings
 351     if (floatx80_is_unsupported(a))
 352     {
 353         goto invalid;
 354     }
 355
 356     aSig0 = extractFloatx80Frac(a);
 357     aExp = extractFloatx80Exp(a);
 358     aSign = extractFloatx80Sign(a);
 359
 360     /* invalid argument */
 361     if (aExp == 0x7FFF) {
 362         if ((Bit64u) (aSig0<<1))
 363         {
 364             a = propagateFloatx80NaN(a, status);
 365             return 0;
 366         }
 367
 368     invalid:
 369         float_raise(status, float_flag_invalid);
 370         a = floatx80_default_nan;
 371         return 0;
 372     }
 373
 374     if (aExp == 0) {
 375         if (aSig0 == 0) return 0;
 376         float_raise(status, float_flag_denormal);
 377         /* handle pseudo denormals */
 378         if (! (aSig0 & BX_CONST64(0x8000000000000000)))
 379         {
 380             float_raise(status, float_flag_inexact | float_flag_underflow);
 381             return 0;
 382         }
 383         normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
 384     }
 385
 386     zSign = aSign;
 387     zExp = EXP_BIAS;
 388     expDiff = aExp - zExp;
 389
 390     /* argument is out-of-range */
 391     if (expDiff >= 63)
 392         return -1;
 393
 394     float_raise(status, float_flag_inexact);
 395
 396     if (expDiff < -1) {    // doesn't require reduction
 397         if (expDiff <= -68) {
 398             a = packFloatx80(aSign, aExp, aSig0);
 399             return 0;
 400         }
 401         zExp = aExp;
 402     }
 403     else {
 404         q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
 405     }
 406
 407     /* **************************** */
 408     /* argument reduction completed */
 409     /* **************************** */
 410
 411     /* using float128 for approximation */
 412     float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
 413
 414     float128 sin_r = poly_sin(r, status);
 415     float128 cos_r = poly_cos(r, status);
 416
 417     if (q & 0x1) {
 418         r = float128_div(cos_r, sin_r, status);
 419         zSign = ! zSign;
 420     } else {
 421         r = float128_div(sin_r, cos_r, status);
 422     }
 423
 424     a = float128_to_floatx80(r, status);
 425     if (zSign)
 426         floatx80_chs(a);
 427
 428     return 0;
 429 }