arch/x86/math-emu/poly_tan.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*---------------------------------------------------------------------------+
   3  |  poly_tan.c                                                               |
   4  |                                                                           |
   5  | Compute the tan of a FPU_REG, using a polynomial approximation.           |
   6  |                                                                           |
   7  | Copyright (C) 1992,1993,1994,1997,1999                                    |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail   billm@melbpc.org.au            |
  10  |                                                                           |
  11  |                                                                           |
  12  +---------------------------------------------------------------------------*/
  13
  14 #include "exception.h"
  15 #include "reg_constant.h"
  16 #include "fpu_emu.h"
  17 #include "fpu_system.h"
  18 #include "control_w.h"
  19 #include "poly.h"
  20
  21 #define HiPOWERop       3       /* odd poly, positive terms */
  22 static const unsigned long long oddplterm[HiPOWERop] = {
  23         0x0000000000000000LL,
  24         0x0051a1cf08fca228LL,
  25         0x0000000071284ff7LL
  26 };
  27
  28 #define HiPOWERon       2       /* odd poly, negative terms */
  29 static const unsigned long long oddnegterm[HiPOWERon] = {
  30         0x1291a9a184244e80LL,
  31         0x0000583245819c21LL
  32 };
  33
  34 #define HiPOWERep       2       /* even poly, positive terms */
  35 static const unsigned long long evenplterm[HiPOWERep] = {
  36         0x0e848884b539e888LL,
  37         0x00003c7f18b887daLL
  38 };
  39
  40 #define HiPOWERen       2       /* even poly, negative terms */
  41 static const unsigned long long evennegterm[HiPOWERen] = {
  42         0xf1f0200fd51569ccLL,
  43         0x003afb46105c4432LL
  44 };
  45
  46 static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL;
  47
  48 /*--- poly_tan() ------------------------------------------------------------+
  49  |                                                                           |
  50  +---------------------------------------------------------------------------*/
  51 void poly_tan(FPU_REG *st0_ptr)
  52 {
  53         long int exponent;
  54         int invert;
  55         Xsig argSq, argSqSq, accumulatoro, accumulatore, accum,
  56             argSignif, fix_up;
  57         unsigned long adj;
  58
  59         exponent = exponent(st0_ptr);
  60
  61 #ifdef PARANOID
  62         if (signnegative(st0_ptr)) {    /* Can't hack a number < 0.0 */
  63                 arith_invalid(0);
  64                 return;
  65         }                       /* Need a positive number */
  66 #endif /* PARANOID */
  67
  68         /* Split the problem into two domains, smaller and larger than pi/4 */
  69         if ((exponent == 0)
  70             || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2))) {
  71                 /* The argument is greater than (approx) pi/4 */
  72                 invert = 1;
  73                 accum.lsw = 0;
  74                 XSIG_LL(accum) = significand(st0_ptr);
  75
  76                 if (exponent == 0) {
  77                         /* The argument is >= 1.0 */
  78                         /* Put the binary point at the left. */
  79                         XSIG_LL(accum) <<= 1;
  80                 }
  81                 /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
  82                 XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum);
  83                 /* This is a special case which arises due to rounding. */
  84                 if (XSIG_LL(accum) == 0xffffffffffffffffLL) {
  85                         FPU_settag0(TAG_Valid);
  86                         significand(st0_ptr) = 0x8a51e04daabda360LL;
  87                         setexponent16(st0_ptr,
  88                                       (0x41 + EXTENDED_Ebias) | SIGN_Negative);
  89                         return;
  90                 }
  91
  92                 argSignif.lsw = accum.lsw;
  93                 XSIG_LL(argSignif) = XSIG_LL(accum);
  94                 exponent = -1 + norm_Xsig(&argSignif);
  95         } else {
  96                 invert = 0;
  97                 argSignif.lsw = 0;
  98                 XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
  99
 100                 if (exponent < -1) {
 101                         /* shift the argument right by the required places */
 102                         if (FPU_shrx(&XSIG_LL(accum), -1 - exponent) >=
 103                             0x80000000U)
 104                                 XSIG_LL(accum)++;       /* round up */
 105                 }
 106         }
 107
 108         XSIG_LL(argSq) = XSIG_LL(accum);
 109         argSq.lsw = accum.lsw;
 110         mul_Xsig_Xsig(&argSq, &argSq);
 111         XSIG_LL(argSqSq) = XSIG_LL(argSq);
 112         argSqSq.lsw = argSq.lsw;
 113         mul_Xsig_Xsig(&argSqSq, &argSqSq);
 114
 115         /* Compute the negative terms for the numerator polynomial */
 116         accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
 117         polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm,
 118                         HiPOWERon - 1);
 119         mul_Xsig_Xsig(&accumulatoro, &argSq);
 120         negate_Xsig(&accumulatoro);
 121         /* Add the positive terms */
 122         polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm,
 123                         HiPOWERop - 1);
 124
 125         /* Compute the positive terms for the denominator polynomial */
 126         accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
 127         polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm,
 128                         HiPOWERep - 1);
 129         mul_Xsig_Xsig(&accumulatore, &argSq);
 130         negate_Xsig(&accumulatore);
 131         /* Add the negative terms */
 132         polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm,
 133                         HiPOWERen - 1);
 134         /* Multiply by arg^2 */
 135         mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
 136         mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
 137         /* de-normalize and divide by 2 */
 138         shr_Xsig(&accumulatore, -2 * (1 + exponent) + 1);
 139         negate_Xsig(&accumulatore);     /* This does 1 - accumulator */
 140
 141         /* Now find the ratio. */
 142         if (accumulatore.msw == 0) {
 143                 /* accumulatoro must contain 1.0 here, (actually, 0) but it
 144                    really doesn't matter what value we use because it will
 145                    have negligible effect in later calculations
 146                  */
 147                 XSIG_LL(accum) = 0x8000000000000000LL;
 148                 accum.lsw = 0;
 149         } else {
 150                 div_Xsig(&accumulatoro, &accumulatore, &accum);
 151         }
 152
 153         /* Multiply by 1/3 * arg^3 */
 154         mul64_Xsig(&accum, &XSIG_LL(argSignif));
 155         mul64_Xsig(&accum, &XSIG_LL(argSignif));
 156         mul64_Xsig(&accum, &XSIG_LL(argSignif));
 157         mul64_Xsig(&accum, &twothirds);
 158         shr_Xsig(&accum, -2 * (exponent + 1));
 159
 160         /* tan(arg) = arg + accum */
 161         add_two_Xsig(&accum, &argSignif, &exponent);
 162
 163         if (invert) {
 164                 /* We now have the value of tan(pi_2 - arg) where pi_2 is an
 165                    approximation for pi/2
 166                  */
 167                 /* The next step is to fix the answer to compensate for the
 168                    error due to the approximation used for pi/2
 169                  */
 170
 171                 /* This is (approx) delta, the error in our approx for pi/2
 172                    (see above). It has an exponent of -65
 173                  */
 174                 XSIG_LL(fix_up) = 0x898cc51701b839a2LL;
 175                 fix_up.lsw = 0;
 176
 177                 if (exponent == 0)
 178                         adj = 0xffffffff;       /* We want approx 1.0 here, but
 179                                                    this is close enough. */
 180                 else if (exponent > -30) {
 181                         adj = accum.msw >> -(exponent + 1);     /* tan */
 182                         adj = mul_32_32(adj, adj);      /* tan^2 */
 183                 } else
 184                         adj = 0;
 185                 adj = mul_32_32(0x898cc517, adj);       /* delta * tan^2 */
 186
 187                 fix_up.msw += adj;
 188                 if (!(fix_up.msw & 0x80000000)) {       /* did fix_up overflow ? */
 189                         /* Yes, we need to add an msb */
 190                         shr_Xsig(&fix_up, 1);
 191                         fix_up.msw |= 0x80000000;
 192                         shr_Xsig(&fix_up, 64 + exponent);
 193                 } else
 194                         shr_Xsig(&fix_up, 65 + exponent);
 195
 196                 add_two_Xsig(&accum, &fix_up, &exponent);
 197
 198                 /* accum now contains tan(pi/2 - arg).
 199                    Use tan(arg) = 1.0 / tan(pi/2 - arg)
 200                  */
 201                 accumulatoro.lsw = accumulatoro.midw = 0;
 202                 accumulatoro.msw = 0x80000000;
 203                 div_Xsig(&accumulatoro, &accum, &accum);
 204                 exponent = -exponent - 1;
 205         }
 206
 207         /* Transfer the result */
 208         round_Xsig(&accum);
 209         FPU_settag0(TAG_Valid);
 210         significand(st0_ptr) = XSIG_LL(accum);
 211         setexponent16(st0_ptr, exponent + EXTENDED_Ebias);      /* Result is positive. */
 212
 213 }