usr/src/lib/libc/sparc/fp/_Q_div.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28
  29 #include "quad.h"
  30
  31 static const double C[] = {
  32         0.0,
  33         1.0,
  34         68719476736.0,
  35         402653184.0,
  36         24.0,
  37         16.0,
  38         3.66210937500000000000e-04,
  39         1.52587890625000000000e-05,
  40         1.43051147460937500000e-06,
  41         5.96046447753906250000e-08,
  42         3.72529029846191406250e-09,
  43         2.18278728425502777100e-11,
  44         8.52651282912120223045e-14,
  45         3.55271367880050092936e-15,
  46         1.30104260698260532081e-18,
  47         8.67361737988403547206e-19,
  48         2.16840434497100886801e-19,
  49         3.17637355220362627151e-22,
  50         7.75481824268463445192e-26,
  51         4.62223186652936604733e-33,
  52         9.62964972193617926528e-35,
  53         4.70197740328915003187e-38,
  54         2.75506488473973634680e-40,
  55 };
  56
  57 #define zero            C[0]
  58 #define one             C[1]
  59 #define two36           C[2]
  60 #define three2p27       C[3]
  61 #define three2p3        C[4]
  62 #define two4            C[5]
  63 #define three2m13       C[6]
  64 #define twom16          C[7]
  65 #define three2m21       C[8]
  66 #define twom24          C[9]
  67 #define twom28          C[10]
  68 #define three2m37       C[11]
  69 #define three2m45       C[12]
  70 #define twom48          C[13]
  71 #define three2m61       C[14]
  72 #define twom60          C[15]
  73 #define twom62          C[16]
  74 #define three2m73       C[17]
  75 #define three2m85       C[18]
  76 #define three2m109      C[19]
  77 #define twom113         C[20]
  78 #define twom124         C[21]
  79 #define three2m133      C[22]
  80
  81 static const unsigned int
  82         fsr_re = 0x00000000u,
  83         fsr_rn = 0xc0000000u;
  84
  85 #ifdef __sparcv9
  86
  87 /*
  88  * _Qp_div(pz, x, y) sets *pz = *x / *y.
  89  */
  90 void
  91 _Qp_div(union longdouble *pz, const union longdouble *x,
  92         const union longdouble *y)
  93
  94 #else
  95
  96 /*
  97  * _Q_div(x, y) returns *x / *y.
  98  */
  99 union longdouble
 100 _Q_div(const union longdouble *x, const union longdouble *y)
 101
 102 #endif /* __sparcv9 */
 103
 104 {
 105         union longdouble        z;
 106         union xdouble           u;
 107         double                  c, d, ry, xx[4], yy[5], zz[5];
 108         unsigned int            xm, ym, fsr, lx, ly, wx[3], wy[3];
 109         unsigned int            msw, frac2, frac3, frac4, rm;
 110         int                     ibit, ex, ey, ez, sign;
 111
 112         xm = x->l.msw & 0x7fffffff;
 113         ym = y->l.msw & 0x7fffffff;
 114         sign = (x->l.msw ^ y->l.msw) & ~0x7fffffff;
 115
 116         __quad_getfsrp(&fsr);
 117
 118         /* handle nan and inf cases */
 119         if (xm >= 0x7fff0000 || ym >= 0x7fff0000) {
 120                 /* handle nan cases according to V9 app. B */
 121                 if (QUAD_ISNAN(*y)) {
 122                         if (!(y->l.msw & 0x8000)) {
 123                                 /* snan, signal invalid */
 124                                 if (fsr & FSR_NVM) {
 125                                         __quad_fdivq(x, y, &Z);
 126                                 } else {
 127                                         Z = *y;
 128                                         Z.l.msw |= 0x8000;
 129                                         fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
 130                                             FSR_NVC;
 131                                         __quad_setfsrp(&fsr);
 132                                 }
 133                                 QUAD_RETURN(Z);
 134                         } else if (QUAD_ISNAN(*x) && !(x->l.msw & 0x8000)) {
 135                                 /* snan, signal invalid */
 136                                 if (fsr & FSR_NVM) {
 137                                         __quad_fdivq(x, y, &Z);
 138                                 } else {
 139                                         Z = *x;
 140                                         Z.l.msw |= 0x8000;
 141                                         fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
 142                                             FSR_NVC;
 143                                         __quad_setfsrp(&fsr);
 144                                 }
 145                                 QUAD_RETURN(Z);
 146                         }
 147                         Z = *y;
 148                         QUAD_RETURN(Z);
 149                 }
 150                 if (QUAD_ISNAN(*x)) {
 151                         if (!(x->l.msw & 0x8000)) {
 152                                 /* snan, signal invalid */
 153                                 if (fsr & FSR_NVM) {
 154                                         __quad_fdivq(x, y, &Z);
 155                                 } else {
 156                                         Z = *x;
 157                                         Z.l.msw |= 0x8000;
 158                                         fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
 159                                             FSR_NVC;
 160                                         __quad_setfsrp(&fsr);
 161                                 }
 162                                 QUAD_RETURN(Z);
 163                         }
 164                         Z = *x;
 165                         QUAD_RETURN(Z);
 166                 }
 167                 if (xm == 0x7fff0000) {
 168                         /* x is inf */
 169                         if (ym == 0x7fff0000) {
 170                                 /* inf / inf, signal invalid */
 171                                 if (fsr & FSR_NVM) {
 172                                         __quad_fdivq(x, y, &Z);
 173                                 } else {
 174                                         Z.l.msw = 0x7fffffff;
 175                                         Z.l.frac2 = Z.l.frac3 =
 176                                             Z.l.frac4 = 0xffffffff;
 177                                         fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
 178                                             FSR_NVC;
 179                                         __quad_setfsrp(&fsr);
 180                                 }
 181                                 QUAD_RETURN(Z);
 182                         }
 183                         /* inf / finite, return inf */
 184                         Z.l.msw = sign | 0x7fff0000;
 185                         Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
 186                         QUAD_RETURN(Z);
 187                 }
 188                 /* y is inf */
 189                 Z.l.msw = sign;
 190                 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
 191                 QUAD_RETURN(Z);
 192         }
 193
 194         /* handle zero cases */
 195         if (xm == 0 || ym == 0) {
 196                 if (QUAD_ISZERO(*x)) {
 197                         if (QUAD_ISZERO(*y)) {
 198                                 /* zero / zero, signal invalid */
 199                                 if (fsr & FSR_NVM) {
 200                                         __quad_fdivq(x, y, &Z);
 201                                 } else {
 202                                         Z.l.msw = 0x7fffffff;
 203                                         Z.l.frac2 = Z.l.frac3 =
 204                                             Z.l.frac4 = 0xffffffff;
 205                                         fsr = (fsr & ~FSR_CEXC) | FSR_NVA |
 206                                             FSR_NVC;
 207                                         __quad_setfsrp(&fsr);
 208                                 }
 209                                 QUAD_RETURN(Z);
 210                         }
 211                         /* zero / nonzero, return zero */
 212                         Z.l.msw = sign;
 213                         Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
 214                         QUAD_RETURN(Z);
 215                 }
 216                 if (QUAD_ISZERO(*y)) {
 217                         /* nonzero / zero, signal zero divide */
 218                         if (fsr & FSR_DZM) {
 219                                 __quad_fdivq(x, y, &Z);
 220                         } else {
 221                                 Z.l.msw = sign | 0x7fff0000;
 222                                 Z.l.frac2 = Z.l.frac3 = Z.l.frac4 = 0;
 223                                 fsr = (fsr & ~FSR_CEXC) | FSR_DZA | FSR_DZC;
 224                                 __quad_setfsrp(&fsr);
 225                         }
 226                         QUAD_RETURN(Z);
 227                 }
 228         }
 229
 230         /* now x and y are finite, nonzero */
 231         __quad_setfsrp(&fsr_re);
 232
 233         /* get their normalized significands and exponents */
 234         ex = (int)(xm >> 16);
 235         lx = xm & 0xffff;
 236         if (ex) {
 237                 lx |= 0x10000;
 238                 wx[0] = x->l.frac2;
 239                 wx[1] = x->l.frac3;
 240                 wx[2] = x->l.frac4;
 241         } else {
 242                 if (lx | (x->l.frac2 & 0xfffe0000)) {
 243                         wx[0] = x->l.frac2;
 244                         wx[1] = x->l.frac3;
 245                         wx[2] = x->l.frac4;
 246                         ex = 1;
 247                 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
 248                         lx = x->l.frac2;
 249                         wx[0] = x->l.frac3;
 250                         wx[1] = x->l.frac4;
 251                         wx[2] = 0;
 252                         ex = -31;
 253                 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
 254                         lx = x->l.frac3;
 255                         wx[0] = x->l.frac4;
 256                         wx[1] = wx[2] = 0;
 257                         ex = -63;
 258                 } else {
 259                         lx = x->l.frac4;
 260                         wx[0] = wx[1] = wx[2] = 0;
 261                         ex = -95;
 262                 }
 263                 while ((lx & 0x10000) == 0) {
 264                         lx = (lx << 1) | (wx[0] >> 31);
 265                         wx[0] = (wx[0] << 1) | (wx[1] >> 31);
 266                         wx[1] = (wx[1] << 1) | (wx[2] >> 31);
 267                         wx[2] <<= 1;
 268                         ex--;
 269                 }
 270         }
 271         ez = ex;
 272
 273         ey = (int)(ym >> 16);
 274         ly = ym & 0xffff;
 275         if (ey) {
 276                 ly |= 0x10000;
 277                 wy[0] = y->l.frac2;
 278                 wy[1] = y->l.frac3;
 279                 wy[2] = y->l.frac4;
 280         } else {
 281                 if (ly | (y->l.frac2 & 0xfffe0000)) {
 282                         wy[0] = y->l.frac2;
 283                         wy[1] = y->l.frac3;
 284                         wy[2] = y->l.frac4;
 285                         ey = 1;
 286                 } else if (y->l.frac2 | (y->l.frac3 & 0xfffe0000)) {
 287                         ly = y->l.frac2;
 288                         wy[0] = y->l.frac3;
 289                         wy[1] = y->l.frac4;
 290                         wy[2] = 0;
 291                         ey = -31;
 292                 } else if (y->l.frac3 | (y->l.frac4 & 0xfffe0000)) {
 293                         ly = y->l.frac3;
 294                         wy[0] = y->l.frac4;
 295                         wy[1] = wy[2] = 0;
 296                         ey = -63;
 297                 } else {
 298                         ly = y->l.frac4;
 299                         wy[0] = wy[1] = wy[2] = 0;
 300                         ey = -95;
 301                 }
 302                 while ((ly & 0x10000) == 0) {
 303                         ly = (ly << 1) | (wy[0] >> 31);
 304                         wy[0] = (wy[0] << 1) | (wy[1] >> 31);
 305                         wy[1] = (wy[1] << 1) | (wy[2] >> 31);
 306                         wy[2] <<= 1;
 307                         ey--;
 308                 }
 309         }
 310         ez -= ey - 0x3fff;
 311
 312         /* extract the significands into doubles */
 313         c = twom16;
 314         xx[0] = (double)((int)lx) * c;
 315         yy[0] = (double)((int)ly) * c;
 316
 317         c *= twom24;
 318         xx[0] += (double)((int)(wx[0] >> 8)) * c;
 319         yy[1] = (double)((int)(wy[0] >> 8)) * c;
 320
 321         c *= twom24;
 322         xx[1] = (double)((int)(((wx[0] << 16) |
 323             (wx[1] >> 16)) & 0xffffff)) * c;
 324         yy[2] = (double)((int)(((wy[0] << 16) |
 325             (wy[1] >> 16)) & 0xffffff)) * c;
 326
 327         c *= twom24;
 328         xx[2] = (double)((int)(((wx[1] << 8) |
 329             (wx[2] >> 24)) & 0xffffff)) * c;
 330         yy[3] = (double)((int)(((wy[1] << 8) |
 331             (wy[2] >> 24)) & 0xffffff)) * c;
 332
 333         c *= twom24;
 334         xx[3] = (double)((int)(wx[2] & 0xffffff)) * c;
 335         yy[4] = (double)((int)(wy[2] & 0xffffff)) * c;
 336
 337         /* approximate the reciprocal of y */
 338         ry = one / ((yy[0] + yy[1]) + yy[2]);
 339
 340         /* compute the first five "digits" of the quotient */
 341         zz[0] = (ry * (xx[0] + xx[1]) + three2p27) - three2p27;
 342         xx[0] = ((xx[0] - zz[0] * yy[0]) - zz[0] * yy[1]) + xx[1];
 343         d = zz[0] * yy[2];
 344         c = (d + three2m13) - three2m13;
 345         xx[0] -= c;
 346         xx[1] = xx[2] - (d - c);
 347         d = zz[0] * yy[3];
 348         c = (d + three2m37) - three2m37;
 349         xx[1] -= c;
 350         xx[2] = xx[3] - (d - c);
 351         d = zz[0] * yy[4];
 352         c = (d + three2m61) - three2m61;
 353         xx[2] -= c;
 354         xx[3] = c - d;
 355
 356         zz[1] = (ry * (xx[0] + xx[1]) + three2p3) - three2p3;
 357         xx[0] = ((xx[0] - zz[1] * yy[0]) - zz[1] * yy[1]) + xx[1];
 358         d = zz[1] * yy[2];
 359         c = (d + three2m37) - three2m37;
 360         xx[0] -= c;
 361         xx[1] = xx[2] - (d - c);
 362         d = zz[1] * yy[3];
 363         c = (d + three2m61) - three2m61;
 364         xx[1] -= c;
 365         xx[2] = xx[3] - (d - c);
 366         d = zz[1] * yy[4];
 367         c = (d + three2m85) - three2m85;
 368         xx[2] -= c;
 369         xx[3] = c - d;
 370
 371         zz[2] = (ry * (xx[0] + xx[1]) + three2m21) - three2m21;
 372         xx[0] = ((xx[0] - zz[2] * yy[0]) - zz[2] * yy[1]) + xx[1];
 373         d = zz[2] * yy[2];
 374         c = (d + three2m61) - three2m61;
 375         xx[0] -= c;
 376         xx[1] = xx[2] - (d - c);
 377         d = zz[2] * yy[3];
 378         c = (d + three2m85) - three2m85;
 379         xx[1] -= c;
 380         xx[2] = xx[3] - (d - c);
 381         d = zz[2] * yy[4];
 382         c = (d + three2m109) - three2m109;
 383         xx[2] -= c;
 384         xx[3] = c - d;
 385
 386         zz[3] = (ry * (xx[0] + xx[1]) + three2m45) - three2m45;
 387         xx[0] = ((xx[0] - zz[3] * yy[0]) - zz[3] * yy[1]) + xx[1];
 388         d = zz[3] * yy[2];
 389         c = (d + three2m85) - three2m85;
 390         xx[0] -= c;
 391         xx[1] = xx[2] - (d - c);
 392         d = zz[3] * yy[3];
 393         c = (d + three2m109) - three2m109;
 394         xx[1] -= c;
 395         xx[2] = xx[3] - (d - c);
 396         d = zz[3] * yy[4];
 397         c = (d + three2m133) - three2m133;
 398         xx[2] -= c;
 399         xx[3] = c - d;
 400
 401         zz[4] = (ry * (xx[0] + xx[1]) + three2m73) - three2m73;
 402
 403         /* reduce to three doubles, making sure zz[1] is positive */
 404         zz[0] += zz[1] - twom48;
 405         zz[1] = twom48 + zz[2] + zz[3];
 406         zz[2] = zz[4];
 407
 408         /* if the third term might lie on a rounding boundary, perturb it */
 409         if (zz[2] == (twom62 + zz[2]) - twom62) {
 410                 /* here we just need to get the sign of the remainder */
 411                 c = (((((xx[0] - zz[4] * yy[0]) - zz[4] * yy[1]) + xx[1]) +
 412                     (xx[2] - zz[4] * yy[2])) + (xx[3] - zz[4] * yy[3]))
 413                     - zz[4] * yy[4];
 414                 if (c < zero)
 415                         zz[2] -= twom124;
 416                 else if (c > zero)
 417                         zz[2] += twom124;
 418         }
 419
 420         /*
 421          * propagate carries/borrows, using round-to-negative-infinity mode
 422          * to make all terms nonnegative (note that we can't encounter a
 423          * borrow so large that the roundoff is unrepresentable because
 424          * we took care to make zz[1] positive above)
 425          */
 426         __quad_setfsrp(&fsr_rn);
 427         c = zz[1] + zz[2];
 428         zz[2] += (zz[1] - c);
 429         zz[1] = c;
 430         c = zz[0] + zz[1];
 431         zz[1] += (zz[0] - c);
 432         zz[0] = c;
 433
 434         /* check for borrow */
 435         if (c < one) {
 436                 /* postnormalize */
 437                 zz[0] += zz[0];
 438                 zz[1] += zz[1];
 439                 zz[2] += zz[2];
 440                 ez--;
 441         }
 442
 443         /* if exponent > 0 strip off integer bit, else denormalize */
 444         if (ez > 0) {
 445                 ibit = 1;
 446                 zz[0] -= one;
 447         } else {
 448                 ibit = 0;
 449                 if (ez > -128)
 450                         u.l.hi = (unsigned int)(0x3fe + ez) << 20;
 451                 else
 452                         u.l.hi = 0x37e00000;
 453                 u.l.lo = 0;
 454                 zz[0] *= u.d;
 455                 zz[1] *= u.d;
 456                 zz[2] *= u.d;
 457                 ez = 0;
 458         }
 459
 460         /* the first 48 bits of fraction come from zz[0] */
 461         u.d = d = two36 + zz[0];
 462         msw = u.l.lo;
 463         zz[0] -= (d - two36);
 464
 465         u.d = d = two4 + zz[0];
 466         frac2 = u.l.lo;
 467         zz[0] -= (d - two4);
 468
 469         /* the next 32 come from zz[0] and zz[1] */
 470         u.d = d = twom28 + (zz[0] + zz[1]);
 471         frac3 = u.l.lo;
 472         zz[0] -= (d - twom28);
 473
 474         /* condense the remaining fraction; errors here won't matter */
 475         c = zz[0] + zz[1];
 476         zz[1] = ((zz[0] - c) + zz[1]) + zz[2];
 477         zz[0] = c;
 478
 479         /* get the last word of fraction */
 480         u.d = d = twom60 + (zz[0] + zz[1]);
 481         frac4 = u.l.lo;
 482         zz[0] -= (d - twom60);
 483
 484         /* keep track of what's left for rounding; note that the error */
 485         /* in computing c will be non-negative due to rounding mode */
 486         c = zz[0] + zz[1];
 487
 488         /* get the rounding mode, fudging directed rounding modes */
 489         /* as though the result were positive */
 490         rm = fsr >> 30;
 491         if (sign)
 492                 rm ^= (rm >> 1);
 493
 494         /* round and raise exceptions */
 495         fsr &= ~FSR_CEXC;
 496         if (c != zero) {
 497                 fsr |= FSR_NXC;
 498
 499                 /* decide whether to round the fraction up */
 500                 if (rm == FSR_RP || (rm == FSR_RN && (c > twom113 ||
 501                     (c == twom113 && ((frac4 & 1) || (c - zz[0] !=
 502                     zz[1])))))) {
 503                         /* round up and renormalize if necessary */
 504                         if (++frac4 == 0)
 505                                 if (++frac3 == 0)
 506                                         if (++frac2 == 0)
 507                                                 if (++msw == 0x10000) {
 508                                                         msw = 0;
 509                                                         ez++;
 510                                                 }
 511                 }
 512         }
 513
 514         /* check for under/overflow */
 515         if (ez >= 0x7fff) {
 516                 if (rm == FSR_RN || rm == FSR_RP) {
 517                         z.l.msw = sign | 0x7fff0000;
 518                         z.l.frac2 = z.l.frac3 = z.l.frac4 = 0;
 519                 } else {
 520                         z.l.msw = sign | 0x7ffeffff;
 521                         z.l.frac2 = z.l.frac3 = z.l.frac4 = 0xffffffff;
 522                 }
 523                 fsr |= FSR_OFC | FSR_NXC;
 524         } else {
 525                 z.l.msw = sign | (ez << 16) | msw;
 526                 z.l.frac2 = frac2;
 527                 z.l.frac3 = frac3;
 528                 z.l.frac4 = frac4;
 529
 530                 /* !ibit => exact result was tiny before rounding, */
 531                 /* t nonzero => result delivered is inexact */
 532                 if (!ibit) {
 533                         if (c != zero)
 534                                 fsr |= FSR_UFC | FSR_NXC;
 535                         else if (fsr & FSR_UFM)
 536                                 fsr |= FSR_UFC;
 537                 }
 538         }
 539
 540         if ((fsr & FSR_CEXC) & (fsr >> 23)) {
 541                 __quad_setfsrp(&fsr);
 542                 __quad_fdivq(x, y, &Z);
 543         } else {
 544                 Z = z;
 545                 fsr |= (fsr & 0x1f) << 5;
 546                 __quad_setfsrp(&fsr);
 547         }
 548         QUAD_RETURN(Z);
 549 }