usr/src/lib/libmvec/common/__vcos.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29
  30 #include <sys/isa_defs.h>
  31 #include <sys/ccompile.h>
  32
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46
  47 /*
  48  * vcos.1.c
  49  *
  50  * Vector cosine function.  Just slight modifications to vsin.8.c, mainly
  51  * in the primary range part.
  52  *
  53  * Modification to primary range processing.  If an argument that does not
  54  * fall in the primary range is encountered, then processing is continued
  55  * in the medium range.
  56  *
  57  */
  58
  59 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
  60
  61 static const double
  62         half[2] = { 0.5, -0.5 },
  63         one             = 1.0,
  64         invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
  65         pio2_1  = 1.570796326734125614166,      /* first 33 bits of pi/2 */
  66         pio2_2  = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
  67         pio2_3  = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
  68         pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
  69         pp1             = -1.666666666605760465276263943134982554676e-0001,
  70         pp2             =  8.333261209690963126718376566146180944442e-0003,
  71         qq1             = -4.999999999977710986407023955908711557870e-0001,
  72         qq2             =  4.166654863857219350645055881018842089580e-0002,
  73         poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
  74                                 -4.999999999999931701464060878888294524481e-0001 },
  75         poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
  76                                  4.166666666394861917535640593963708222319e-0002 },
  77         poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
  78                                 -1.388888552656142867832756687736851681462e-0003 },
  79         poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
  80                                  2.478519423681460796618128289454530524759e-0005 };
  81
  82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
  83
  84 /* Don't __ the following; acomp will handle it */
  85 extern double fabs(double);
  86 extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
  87
  88 /*
  89  * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
  90  *
  91  * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
  92  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  93  *
  94  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  95  */
  96 void
  97 __vcos(int n, double * restrict x, int stridex, double * restrict y,
  98         int stridey)
  99 {
 100         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
 101         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
 102         double          x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
 103         unsigned        hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
 104         int             i, biguns, nsave, sxsave, sysave;
 105         volatile int    v __GNU_UNUSED;
 106         nsave = n;
 107         xsave = x;
 108         sxsave = stridex;
 109         ysave = y;
 110         sysave = stridey;
 111         biguns = 0;
 112
 113         do /* MAIN LOOP */
 114         {
 115                 /* Gotos here so _break_ exits MAIN LOOP. */
 116 LOOP0:  /* Find first arg in right range. */
 117                 xsb0 = HI(x); /* get most significant word */
 118                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 119                 if (hx0 > 0x3fe921fb) {
 120                         /* Too big: arg reduction needed, so leave for second part */
 121                         biguns = 1;
 122                         goto MEDIUM;
 123                 }
 124                 if (hx0 < 0x3e400000) {
 125                         /* Too small.  cos x ~ 1. */
 126                         v = *x;
 127                         *y = 1.0;
 128                         x += stridex;
 129                         y += stridey;
 130                         i = 0;
 131                         if (--n <= 0)
 132                                 break;
 133                         goto LOOP0;
 134                 }
 135                 x0 = *x;
 136                 py0 = y;
 137                 x += stridex;
 138                 y += stridey;
 139                 i = 1;
 140                 if (--n <= 0)
 141                         break;
 142
 143 LOOP1: /* Get second arg, same as above. */
 144                 xsb1 = HI(x);
 145                 hx1 = xsb1 & ~0x80000000;
 146                 if (hx1 > 0x3fe921fb)
 147                 {
 148                         biguns = 2;
 149                         goto MEDIUM;
 150                 }
 151                 if (hx1 < 0x3e400000)
 152                 {
 153                         v = *x;
 154                         *y = 1.0;
 155                         x += stridex;
 156                         y += stridey;
 157                         i = 1;
 158                         if (--n <= 0)
 159                                 break;
 160                         goto LOOP1;
 161                 }
 162                 x1 = *x;
 163                 py1 = y;
 164                 x += stridex;
 165                 y += stridey;
 166                 i = 2;
 167                 if (--n <= 0)
 168                         break;
 169
 170 LOOP2: /* Get third arg, same as above. */
 171                 xsb2 = HI(x);
 172                 hx2 = xsb2 & ~0x80000000;
 173                 if (hx2 > 0x3fe921fb)
 174                 {
 175                         biguns = 3;
 176                         goto MEDIUM;
 177                 }
 178                 if (hx2 < 0x3e400000)
 179                 {
 180                         v = *x;
 181                         *y = 1.0;
 182                         x += stridex;
 183                         y += stridey;
 184                         i = 2;
 185                         if (--n <= 0)
 186                                 break;
 187                         goto LOOP2;
 188                 }
 189                 x2 = *x;
 190                 py2 = y;
 191
 192                 /*
 193                  * 0x3fc40000 = 5/32 ~ 0.15625
 194                  * Get msb after subtraction.  Will be 1 only if
 195                  * hx0 - 5/32 is negative.
 196                  */
 197                 i = (hx0 - 0x3fc40000) >> 31;
 198                 i |= ((hx1 - 0x3fc40000) >> 30) & 2;
 199                 i |= ((hx2 - 0x3fc40000) >> 29) & 4;
 200                 switch (i)
 201                 {
 202                         double          a0, a1, a2, w0, w1, w2;
 203                         double          t0, t1, t2, z0, z1, z2;
 204                         unsigned        j0, j1, j2;
 205
 206                 case 0: /* All are > 5/32 */
 207                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 208                         j1 = (xsb1 + 0x4000) & 0xffff8000;
 209                         j2 = (xsb2 + 0x4000) & 0xffff8000;
 210                         HI(&t0) = j0;
 211                         HI(&t1) = j1;
 212                         HI(&t2) = j2;
 213                         LO(&t0) = 0;
 214                         LO(&t1) = 0;
 215                         LO(&t2) = 0;
 216                         x0 -= t0;
 217                         x1 -= t1;
 218                         x2 -= t2;
 219                         z0 = x0 * x0;
 220                         z1 = x1 * x1;
 221                         z2 = x2 * x2;
 222                         t0 = z0 * (qq1 + z0 * qq2);
 223                         t1 = z1 * (qq1 + z1 * qq2);
 224                         t2 = z2 * (qq1 + z2 * qq2);
 225                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 226                         w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 227                         w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
 228                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 229                         j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 230                         j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 231                         xsb0 = (xsb0 >> 30) & 2;
 232                         xsb1 = (xsb1 >> 30) & 2;
 233                         xsb2 = (xsb2 >> 30) & 2;
 234                         a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
 235                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 236                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 237                            /*   cos_lo(t)                        sin_hi(t) */
 238                         t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
 239                         t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
 240                         t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
 241
 242                         *py0 = a0 + t0;
 243                         *py1 = a1 + t1;
 244                         *py2 = a2 + t2;
 245                         break;
 246
 247                 case 1:
 248                         j1 = (xsb1 + 0x4000) & 0xffff8000;
 249                         j2 = (xsb2 + 0x4000) & 0xffff8000;
 250                         HI(&t1) = j1;
 251                         HI(&t2) = j2;
 252                         LO(&t1) = 0;
 253                         LO(&t2) = 0;
 254                         x1 -= t1;
 255                         x2 -= t2;
 256                         z0 = x0 * x0;
 257                         z1 = x1 * x1;
 258                         z2 = x2 * x2;
 259                         t0 = z0 * (poly3[1] + z0 * poly4[1]);
 260                         t1 = z1 * (qq1 + z1 * qq2);
 261                         t2 = z2 * (qq1 + z2 * qq2);
 262                         t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
 263                         w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 264                         w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
 265                         j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 266                         j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 267                         xsb1 = (xsb1 >> 30) & 2;
 268                         xsb2 = (xsb2 >> 30) & 2;
 269                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 270                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 271                         t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
 272                         t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
 273                         *py0 = one + t0;
 274                         *py1 = a1 + t1;
 275                         *py2 = a2 + t2;
 276                         break;
 277
 278                 case 2:
 279                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 280                         j2 = (xsb2 + 0x4000) & 0xffff8000;
 281                         HI(&t0) = j0;
 282                         HI(&t2) = j2;
 283                         LO(&t0) = 0;
 284                         LO(&t2) = 0;
 285                         x0 -= t0;
 286                         x2 -= t2;
 287                         z0 = x0 * x0;
 288                         z1 = x1 * x1;
 289                         z2 = x2 * x2;
 290                         t0 = z0 * (qq1 + z0 * qq2);
 291                         t1 = z1 * (poly3[1] + z1 * poly4[1]);
 292                         t2 = z2 * (qq1 + z2 * qq2);
 293                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 294                         t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
 295                         w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
 296                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 297                         j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 298                         xsb0 = (xsb0 >> 30) & 2;
 299                         xsb2 = (xsb2 >> 30) & 2;
 300                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 301                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 302                         t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
 303                         t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
 304                         *py0 = a0 + t0;
 305                         *py1 = one + t1;
 306                         *py2 = a2 + t2;
 307                         break;
 308
 309                 case 3:
 310                         j2 = (xsb2 + 0x4000) & 0xffff8000;
 311                         HI(&t2) = j2;
 312                         LO(&t2) = 0;
 313                         x2 -= t2;
 314                         z0 = x0 * x0;
 315                         z1 = x1 * x1;
 316                         z2 = x2 * x2;
 317                         t0 = z0 * (poly3[1] + z0 * poly4[1]);
 318                         t1 = z1 * (poly3[1] + z1 * poly4[1]);
 319                         t2 = z2 * (qq1 + z2 * qq2);
 320                         t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
 321                         t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
 322                         w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
 323                         j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 324                         xsb2 = (xsb2 >> 30) & 2;
 325                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 326                         t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2);
 327                         *py0 = one + t0;
 328                         *py1 = one + t1;
 329                         *py2 = a2 + t2;
 330                         break;
 331
 332                 case 4:
 333                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 334                         j1 = (xsb1 + 0x4000) & 0xffff8000;
 335                         HI(&t0) = j0;
 336                         HI(&t1) = j1;
 337                         LO(&t0) = 0;
 338                         LO(&t1) = 0;
 339                         x0 -= t0;
 340                         x1 -= t1;
 341                         z0 = x0 * x0;
 342                         z1 = x1 * x1;
 343                         z2 = x2 * x2;
 344                         t0 = z0 * (qq1 + z0 * qq2);
 345                         t1 = z1 * (qq1 + z1 * qq2);
 346                         t2 = z2 * (poly3[1] + z2 * poly4[1]);
 347                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 348                         w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 349                         t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
 350                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 351                         j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 352                         xsb0 = (xsb0 >> 30) & 2;
 353                         xsb1 = (xsb1 >> 30) & 2;
 354                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 355                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 356                         t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
 357                         t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
 358                         *py0 = a0 + t0;
 359                         *py1 = a1 + t1;
 360                         *py2 = one + t2;
 361                         break;
 362
 363                 case 5:
 364                         j1 = (xsb1 + 0x4000) & 0xffff8000;
 365                         HI(&t1) = j1;
 366                         LO(&t1) = 0;
 367                         x1 -= t1;
 368                         z0 = x0 * x0;
 369                         z1 = x1 * x1;
 370                         z2 = x2 * x2;
 371                         t0 = z0 * (poly3[1] + z0 * poly4[1]);
 372                         t1 = z1 * (qq1 + z1 * qq2);
 373                         t2 = z2 * (poly3[1] + z2 * poly4[1]);
 374                         t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
 375                         w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 376                         t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
 377                         j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 378                         xsb1 = (xsb1 >> 30) & 2;
 379                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 380                         t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
 381                         *py0 = one + t0;
 382                         *py1 = a1 + t1;
 383                         *py2 = one + t2;
 384                         break;
 385
 386                 case 6:
 387                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 388                         HI(&t0) = j0;
 389                         LO(&t0) = 0;
 390                         x0 -= t0;
 391                         z0 = x0 * x0;
 392                         z1 = x1 * x1;
 393                         z2 = x2 * x2;
 394                         t0 = z0 * (qq1 + z0 * qq2);
 395                         t1 = z1 * (poly3[1] + z1 * poly4[1]);
 396                         t2 = z2 * (poly3[1] + z2 * poly4[1]);
 397                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 398                         t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
 399                         t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
 400                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 401                         xsb0 = (xsb0 >> 30) & 2;
 402                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 403                         t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
 404                         *py0 = a0 + t0;
 405                         *py1 = one + t1;
 406                         *py2 = one + t2;
 407                         break;
 408
 409                 case 7: /* All are < 5/32 */
 410                         z0 = x0 * x0;
 411                         z1 = x1 * x1;
 412                         z2 = x2 * x2;
 413                         t0 = z0 * (poly3[1] + z0 * poly4[1]);
 414                         t1 = z1 * (poly3[1] + z1 * poly4[1]);
 415                         t2 = z2 * (poly3[1] + z2 * poly4[1]);
 416                         t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
 417                         t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
 418                         t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2));
 419                         *py0 = one + t0;
 420                         *py1 = one + t1;
 421                         *py2 = one + t2;
 422                         break;
 423                 }
 424
 425                 x += stridex;
 426                 y += stridey;
 427                 i = 0;
 428         } while (--n > 0); /* END MAIN LOOP */
 429
 430         /*
 431          * CLEAN UP last 0, 1, or 2 elts.
 432          */
 433         if (i > 0) /* Clean up elts at tail.  i < 3. */
 434         {
 435                 double          a0, a1, w0, w1;
 436                 double          t0, t1, z0, z1;
 437                 unsigned        j0, j1;
 438
 439                 if (i > 1)
 440                 {
 441                         if (hx1 < 0x3fc40000)
 442                         {
 443                                 z1 = x1 * x1;
 444                                 t1 = z1 * (poly3[1] + z1 * poly4[1]);
 445                                 t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1));
 446                                 t1 = one + t1;
 447                                 *py1 = t1;
 448                         }
 449                         else
 450                         {
 451                                 j1 = (xsb1 + 0x4000) & 0xffff8000;
 452                                 HI(&t1) = j1;
 453                                 LO(&t1) = 0;
 454                                 x1 -= t1;
 455                                 z1 = x1 * x1;
 456                                 t1 = z1 * (qq1 + z1 * qq2);
 457                                 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 458                                 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 459                                 xsb1 = (xsb1 >> 30) & 2;
 460                                 a1 = __vlibm_TBL_sincos_hi[j1+1];
 461                                 t1 = __vlibm_TBL_sincos_lo[j1+1]
 462                                         - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1);
 463                                 *py1 = a1 + t1;
 464                         }
 465                 }
 466                 if (hx0 < 0x3fc40000)
 467                 {
 468                         z0 = x0 * x0;
 469                         t0 = z0 * (poly3[1] + z0 * poly4[1]);
 470                         t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0));
 471                         t0 = one + t0;
 472                         *py0 = t0;
 473                 }
 474                 else
 475                 {
 476                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 477                         HI(&t0) = j0;
 478                         LO(&t0) = 0;
 479                         x0 -= t0;
 480                         z0 = x0 * x0;
 481                         t0 = z0 * (qq1 + z0 * qq2);
 482                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 483                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 484                         xsb0 = (xsb0 >> 30) & 2;
 485                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 486                         t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0);
 487                         *py0 = a0 + t0;
 488                 }
 489         } /* END CLEAN UP */
 490
 491         return;
 492
 493         /*
 494          * Take care of BIGUNS.
 495          *
 496          * We have jumped here in the middle of processing after having
 497          * encountered a medium range argument.  Therefore things are in a
 498          * bit of a tizzy.
 499          */
 500
 501 MEDIUM:
 502
 503         x0_or_one[1] = 1.0;
 504         x1_or_one[1] = 1.0;
 505         x2_or_one[1] = 1.0;
 506         x0_or_one[3] = -1.0;
 507         x1_or_one[3] = -1.0;
 508         x2_or_one[3] = -1.0;
 509         y0_or_zero[1] = 0.0;
 510         y1_or_zero[1] = 0.0;
 511         y2_or_zero[1] = 0.0;
 512         y0_or_zero[3] = 0.0;
 513         y1_or_zero[3] = 0.0;
 514         y2_or_zero[3] = 0.0;
 515
 516         if (biguns == 3)
 517         {
 518                 biguns = 0;
 519                 xsb0 = xsb0 >> 31;
 520                 xsb1 = xsb1 >> 31;
 521                 goto loop2;
 522         }
 523         else if (biguns == 2)
 524         {
 525                 xsb0 = xsb0 >> 31;
 526                 biguns = 0;
 527                 goto loop1;
 528         }
 529         biguns = 0;
 530
 531         do
 532         {
 533                 double          fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
 534                 unsigned        hx;
 535                 int                     n0, n1, n2;
 536
 537                 /*
 538                  * Find 3 more to work on: Not already done, not too big.
 539                  */
 540
 541 loop0:
 542                 hx = HI(x);
 543                 xsb0 = hx >> 31;
 544                 hx &= ~0x80000000;
 545                 if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */
 546                 {
 547                         if (hx >= 0x7ff00000) /* Inf or NaN */
 548                         {
 549                                 x0 = *x;
 550                                 *y = x0 - x0;
 551                         }
 552                         else
 553                                 biguns = 1;
 554                         x += stridex;
 555                         y += stridey;
 556                         i = 0;
 557                         if (--n <= 0)
 558                                 break;
 559                         goto loop0;
 560                 }
 561                 x0 = *x;
 562                 py0 = y;
 563                 x += stridex;
 564                 y += stridey;
 565                 i = 1;
 566                 if (--n <= 0)
 567                         break;
 568
 569 loop1:
 570                 hx = HI(x);
 571                 xsb1 = hx >> 31;
 572                 hx &= ~0x80000000;
 573                 if (hx > 0x413921fb)
 574                 {
 575                         if (hx >= 0x7ff00000)
 576                         {
 577                                 x1 = *x;
 578                                 *y = x1 - x1;
 579                         }
 580                         else
 581                                 biguns = 1;
 582                         x += stridex;
 583                         y += stridey;
 584                         i = 1;
 585                         if (--n <= 0)
 586                                 break;
 587                         goto loop1;
 588                 }
 589                 x1 = *x;
 590                 py1 = y;
 591                 x += stridex;
 592                 y += stridey;
 593                 i = 2;
 594                 if (--n <= 0)
 595                         break;
 596
 597 loop2:
 598                 hx = HI(x);
 599                 xsb2 = hx >> 31;
 600                 hx &= ~0x80000000;
 601                 if (hx > 0x413921fb)
 602                 {
 603                         if (hx >= 0x7ff00000)
 604                         {
 605                                 x2 = *x;
 606                                 *y = x2 - x2;
 607                         }
 608                         else
 609                                 biguns = 1;
 610                         x += stridex;
 611                         y += stridey;
 612                         i = 2;
 613                         if (--n <= 0)
 614                                 break;
 615                         goto loop2;
 616                 }
 617                 x2 = *x;
 618                 py2 = y;
 619
 620                 n0 = (int) (x0 * invpio2 + half[xsb0]);
 621                 n1 = (int) (x1 * invpio2 + half[xsb1]);
 622                 n2 = (int) (x2 * invpio2 + half[xsb2]);
 623                 fn0 = (double) n0;
 624                 fn1 = (double) n1;
 625                 fn2 = (double) n2;
 626                 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
 627                 n1 = (n1 + 1) & 3;
 628                 n2 = (n2 + 1) & 3;
 629                 a0 = x0 - fn0 * pio2_1;
 630                 a1 = x1 - fn1 * pio2_1;
 631                 a2 = x2 - fn2 * pio2_1;
 632                 w0 = fn0 * pio2_2;
 633                 w1 = fn1 * pio2_2;
 634                 w2 = fn2 * pio2_2;
 635                 x0 = a0 - w0;
 636                 x1 = a1 - w1;
 637                 x2 = a2 - w2;
 638                 y0 = (a0 - x0) - w0;
 639                 y1 = (a1 - x1) - w1;
 640                 y2 = (a2 - x2) - w2;
 641                 a0 = x0;
 642                 a1 = x1;
 643                 a2 = x2;
 644                 w0 = fn0 * pio2_3 - y0;
 645                 w1 = fn1 * pio2_3 - y1;
 646                 w2 = fn2 * pio2_3 - y2;
 647                 x0 = a0 - w0;
 648                 x1 = a1 - w1;
 649                 x2 = a2 - w2;
 650                 y0 = (a0 - x0) - w0;
 651                 y1 = (a1 - x1) - w1;
 652                 y2 = (a2 - x2) - w2;
 653                 a0 = x0;
 654                 a1 = x1;
 655                 a2 = x2;
 656                 w0 = fn0 * pio2_3t - y0;
 657                 w1 = fn1 * pio2_3t - y1;
 658                 w2 = fn2 * pio2_3t - y2;
 659                 x0 = a0 - w0;
 660                 x1 = a1 - w1;
 661                 x2 = a2 - w2;
 662                 y0 = (a0 - x0) - w0;
 663                 y1 = (a1 - x1) - w1;
 664                 y2 = (a2 - x2) - w2;
 665                 xsb0 = HI(&x0);
 666                 i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
 667                 xsb1 = HI(&x1);
 668                 i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
 669                 xsb2 = HI(&x2);
 670                 i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
 671                 switch (i)
 672                 {
 673                         double          t0, t1, t2, z0, z1, z2;
 674                         unsigned        j0, j1, j2;
 675
 676                 case 0:
 677                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 678                         j1 = (xsb1 + 0x4000) & 0xffff8000;
 679                         j2 = (xsb2 + 0x4000) & 0xffff8000;
 680                         HI(&t0) = j0;
 681                         HI(&t1) = j1;
 682                         HI(&t2) = j2;
 683                         LO(&t0) = 0;
 684                         LO(&t1) = 0;
 685                         LO(&t2) = 0;
 686                         x0 = (x0 - t0) + y0;
 687                         x1 = (x1 - t1) + y1;
 688                         x2 = (x2 - t2) + y2;
 689                         z0 = x0 * x0;
 690                         z1 = x1 * x1;
 691                         z2 = x2 * x2;
 692                         t0 = z0 * (qq1 + z0 * qq2);
 693                         t1 = z1 * (qq1 + z1 * qq2);
 694                         t2 = z2 * (qq1 + z2 * qq2);
 695                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 696                         w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 697                         w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
 698                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 699                         j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 700                         j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 701                         xsb0 = (xsb0 >> 30) & 2;
 702                         xsb1 = (xsb1 >> 30) & 2;
 703                         xsb2 = (xsb2 >> 30) & 2;
 704                         n0 ^= (xsb0 & ~(n0 << 1));
 705                         n1 ^= (xsb1 & ~(n1 << 1));
 706                         n2 ^= (xsb2 & ~(n2 << 1));
 707                         xsb0 |= 1;
 708                         xsb1 |= 1;
 709                         xsb2 |= 1;
 710                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 711                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 712                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 713                         t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
 714                         t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
 715                         t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
 716                         *py0 = ( a0 + t0 );
 717                         *py1 = ( a1 + t1 );
 718                         *py2 = ( a2 + t2 );
 719                         break;
 720
 721                 case 1:
 722                         j0 = n0 & 1;
 723                         j1 = (xsb1 + 0x4000) & 0xffff8000;
 724                         j2 = (xsb2 + 0x4000) & 0xffff8000;
 725                         HI(&t1) = j1;
 726                         HI(&t2) = j2;
 727                         LO(&t1) = 0;
 728                         LO(&t2) = 0;
 729                         x0_or_one[0] = x0;
 730                         x0_or_one[2] = -x0;
 731                         y0_or_zero[0] = y0;
 732                         y0_or_zero[2] = -y0;
 733                         x1 = (x1 - t1) + y1;
 734                         x2 = (x2 - t2) + y2;
 735                         z0 = x0 * x0;
 736                         z1 = x1 * x1;
 737                         z2 = x2 * x2;
 738                         t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
 739                         t1 = z1 * (qq1 + z1 * qq2);
 740                         t2 = z2 * (qq1 + z2 * qq2);
 741                         t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
 742                         w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 743                         w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
 744                         j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 745                         j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 746                         xsb1 = (xsb1 >> 30) & 2;
 747                         xsb2 = (xsb2 >> 30) & 2;
 748                         n1 ^= (xsb1 & ~(n1 << 1));
 749                         n2 ^= (xsb2 & ~(n2 << 1));
 750                         xsb1 |= 1;
 751                         xsb2 |= 1;
 752                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 753                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 754                         t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
 755                         t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
 756                         t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
 757                         *py0 = t0;
 758                         *py1 = ( a1 + t1 );
 759                         *py2 = ( a2 + t2 );
 760                         break;
 761
 762                 case 2:
 763                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 764                         j1 = n1 & 1;
 765                         j2 = (xsb2 + 0x4000) & 0xffff8000;
 766                         HI(&t0) = j0;
 767                         HI(&t2) = j2;
 768                         LO(&t0) = 0;
 769                         LO(&t2) = 0;
 770                         x1_or_one[0] = x1;
 771                         x1_or_one[2] = -x1;
 772                         x0 = (x0 - t0) + y0;
 773                         y1_or_zero[0] = y1;
 774                         y1_or_zero[2] = -y1;
 775                         x2 = (x2 - t2) + y2;
 776                         z0 = x0 * x0;
 777                         z1 = x1 * x1;
 778                         z2 = x2 * x2;
 779                         t0 = z0 * (qq1 + z0 * qq2);
 780                         t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
 781                         t2 = z2 * (qq1 + z2 * qq2);
 782                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 783                         t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
 784                         w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
 785                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 786                         j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 787                         xsb0 = (xsb0 >> 30) & 2;
 788                         xsb2 = (xsb2 >> 30) & 2;
 789                         n0 ^= (xsb0 & ~(n0 << 1));
 790                         n2 ^= (xsb2 & ~(n2 << 1));
 791                         xsb0 |= 1;
 792                         xsb2 |= 1;
 793                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 794                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 795                         t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
 796                         t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
 797                         t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
 798                         *py0 = ( a0 + t0 );
 799                         *py1 = t1;
 800                         *py2 = ( a2 + t2 );
 801                         break;
 802
 803                 case 3:
 804                         j0 = n0 & 1;
 805                         j1 = n1 & 1;
 806                         j2 = (xsb2 + 0x4000) & 0xffff8000;
 807                         HI(&t2) = j2;
 808                         LO(&t2) = 0;
 809                         x0_or_one[0] = x0;
 810                         x0_or_one[2] = -x0;
 811                         x1_or_one[0] = x1;
 812                         x1_or_one[2] = -x1;
 813                         y0_or_zero[0] = y0;
 814                         y0_or_zero[2] = -y0;
 815                         y1_or_zero[0] = y1;
 816                         y1_or_zero[2] = -y1;
 817                         x2 = (x2 - t2) + y2;
 818                         z0 = x0 * x0;
 819                         z1 = x1 * x1;
 820                         z2 = x2 * x2;
 821                         t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
 822                         t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
 823                         t2 = z2 * (qq1 + z2 * qq2);
 824                         t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
 825                         t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
 826                         w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
 827                         j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 828                         xsb2 = (xsb2 >> 30) & 2;
 829                         n2 ^= (xsb2 & ~(n2 << 1));
 830                         xsb2 |= 1;
 831                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 832                         t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
 833                         t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
 834                         t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
 835                         *py0 = t0;
 836                         *py1 = t1;
 837                         *py2 = ( a2 + t2 );
 838                         break;
 839
 840                 case 4:
 841                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 842                         j1 = (xsb1 + 0x4000) & 0xffff8000;
 843                         j2 = n2 & 1;
 844                         HI(&t0) = j0;
 845                         HI(&t1) = j1;
 846                         LO(&t0) = 0;
 847                         LO(&t1) = 0;
 848                         x2_or_one[0] = x2;
 849                         x2_or_one[2] = -x2;
 850                         x0 = (x0 - t0) + y0;
 851                         x1 = (x1 - t1) + y1;
 852                         y2_or_zero[0] = y2;
 853                         y2_or_zero[2] = -y2;
 854                         z0 = x0 * x0;
 855                         z1 = x1 * x1;
 856                         z2 = x2 * x2;
 857                         t0 = z0 * (qq1 + z0 * qq2);
 858                         t1 = z1 * (qq1 + z1 * qq2);
 859                         t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
 860                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 861                         w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 862                         t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
 863                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 864                         j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 865                         xsb0 = (xsb0 >> 30) & 2;
 866                         xsb1 = (xsb1 >> 30) & 2;
 867                         n0 ^= (xsb0 & ~(n0 << 1));
 868                         n1 ^= (xsb1 & ~(n1 << 1));
 869                         xsb0 |= 1;
 870                         xsb1 |= 1;
 871                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 872                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 873                         t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
 874                         t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
 875                         t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
 876                         *py0 = ( a0 + t0 );
 877                         *py1 = ( a1 + t1 );
 878                         *py2 = t2;
 879                         break;
 880
 881                 case 5:
 882                         j0 = n0 & 1;
 883                         j1 = (xsb1 + 0x4000) & 0xffff8000;
 884                         j2 = n2 & 1;
 885                         HI(&t1) = j1;
 886                         LO(&t1) = 0;
 887                         x0_or_one[0] = x0;
 888                         x0_or_one[2] = -x0;
 889                         x2_or_one[0] = x2;
 890                         x2_or_one[2] = -x2;
 891                         y0_or_zero[0] = y0;
 892                         y0_or_zero[2] = -y0;
 893                         x1 = (x1 - t1) + y1;
 894                         y2_or_zero[0] = y2;
 895                         y2_or_zero[2] = -y2;
 896                         z0 = x0 * x0;
 897                         z1 = x1 * x1;
 898                         z2 = x2 * x2;
 899                         t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
 900                         t1 = z1 * (qq1 + z1 * qq2);
 901                         t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
 902                         t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
 903                         w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
 904                         t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
 905                         j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 906                         xsb1 = (xsb1 >> 30) & 2;
 907                         n1 ^= (xsb1 & ~(n1 << 1));
 908                         xsb1 |= 1;
 909                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 910                         t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
 911                         t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
 912                         t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
 913                         *py0 = t0;
 914                         *py1 = ( a1 + t1 );
 915                         *py2 = t2;
 916                         break;
 917
 918                 case 6:
 919                         j0 = (xsb0 + 0x4000) & 0xffff8000;
 920                         j1 = n1 & 1;
 921                         j2 = n2 & 1;
 922                         HI(&t0) = j0;
 923                         LO(&t0) = 0;
 924                         x1_or_one[0] = x1;
 925                         x1_or_one[2] = -x1;
 926                         x2_or_one[0] = x2;
 927                         x2_or_one[2] = -x2;
 928                         x0 = (x0 - t0) + y0;
 929                         y1_or_zero[0] = y1;
 930                         y1_or_zero[2] = -y1;
 931                         y2_or_zero[0] = y2;
 932                         y2_or_zero[2] = -y2;
 933                         z0 = x0 * x0;
 934                         z1 = x1 * x1;
 935                         z2 = x2 * x2;
 936                         t0 = z0 * (qq1 + z0 * qq2);
 937                         t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
 938                         t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
 939                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
 940                         t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
 941                         t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
 942                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
 943                         xsb0 = (xsb0 >> 30) & 2;
 944                         n0 ^= (xsb0 & ~(n0 << 1));
 945                         xsb0 |= 1;
 946                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 947                         t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
 948                         t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
 949                         t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
 950                         *py0 = ( a0 + t0 );
 951                         *py1 = t1;
 952                         *py2 = t2;
 953                         break;
 954
 955                 case 7:
 956                         j0 = n0 & 1;
 957                         j1 = n1 & 1;
 958                         j2 = n2 & 1;
 959                         x0_or_one[0] = x0;
 960                         x0_or_one[2] = -x0;
 961                         x1_or_one[0] = x1;
 962                         x1_or_one[2] = -x1;
 963                         x2_or_one[0] = x2;
 964                         x2_or_one[2] = -x2;
 965                         y0_or_zero[0] = y0;
 966                         y0_or_zero[2] = -y0;
 967                         y1_or_zero[0] = y1;
 968                         y1_or_zero[2] = -y1;
 969                         y2_or_zero[0] = y2;
 970                         y2_or_zero[2] = -y2;
 971                         z0 = x0 * x0;
 972                         z1 = x1 * x1;
 973                         z2 = x2 * x2;
 974                         t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
 975                         t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
 976                         t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
 977                         t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
 978                         t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
 979                         t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
 980                         t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
 981                         t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
 982                         t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
 983                         *py0 = t0;
 984                         *py1 = t1;
 985                         *py2 = t2;
 986                         break;
 987                 }
 988
 989                 x += stridex;
 990                 y += stridey;
 991                 i = 0;
 992         } while (--n > 0);
 993
 994         if (i > 0)
 995         {
 996                 double          fn0, fn1, a0, a1, w0, w1, y0, y1;
 997                 double          t0, t1, z0, z1;
 998                 unsigned        j0, j1;
 999                 int                     n0, n1;
1000
1001                 if (i > 1)
1002                 {
1003                         n1 = (int) (x1 * invpio2 + half[xsb1]);
1004                         fn1 = (double) n1;
1005                         n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1006                         a1 = x1 - fn1 * pio2_1;
1007                         w1 = fn1 * pio2_2;
1008                         x1 = a1 - w1;
1009                         y1 = (a1 - x1) - w1;
1010                         a1 = x1;
1011                         w1 = fn1 * pio2_3 - y1;
1012                         x1 = a1 - w1;
1013                         y1 = (a1 - x1) - w1;
1014                         a1 = x1;
1015                         w1 = fn1 * pio2_3t - y1;
1016                         x1 = a1 - w1;
1017                         y1 = (a1 - x1) - w1;
1018                         xsb1 = HI(&x1);
1019                         if ((xsb1 & ~0x80000000) < thresh[n1&1])
1020                         {
1021                                 j1 = n1 & 1;
1022                                 x1_or_one[0] = x1;
1023                                 x1_or_one[2] = -x1;
1024                                 y1_or_zero[0] = y1;
1025                                 y1_or_zero[2] = -y1;
1026                                 z1 = x1 * x1;
1027                                 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1028                                 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1029                                 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1030                                 *py1 = t1;
1031                         }
1032                         else
1033                         {
1034                                 j1 = (xsb1 + 0x4000) & 0xffff8000;
1035                                 HI(&t1) = j1;
1036                                 LO(&t1) = 0;
1037                                 x1 = (x1 - t1) + y1;
1038                                 z1 = x1 * x1;
1039                                 t1 = z1 * (qq1 + z1 * qq2);
1040                                 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1041                                 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1042                                 xsb1 = (xsb1 >> 30) & 2;
1043                                 n1 ^= (xsb1 & ~(n1 << 1));
1044                                 xsb1 |= 1;
1045                                 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1046                                 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1047                                 *py1 = ( a1 + t1 );
1048                         }
1049                 }
1050                 n0 = (int) (x0 * invpio2 + half[xsb0]);
1051                 fn0 = (double) n0;
1052                 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1053                 a0 = x0 - fn0 * pio2_1;
1054                 w0 = fn0 * pio2_2;
1055                 x0 = a0 - w0;
1056                 y0 = (a0 - x0) - w0;
1057                 a0 = x0;
1058                 w0 = fn0 * pio2_3 - y0;
1059                 x0 = a0 - w0;
1060                 y0 = (a0 - x0) - w0;
1061                 a0 = x0;
1062                 w0 = fn0 * pio2_3t - y0;
1063                 x0 = a0 - w0;
1064                 y0 = (a0 - x0) - w0;
1065                 xsb0 = HI(&x0);
1066                 if ((xsb0 & ~0x80000000) < thresh[n0&1])
1067                 {
1068                         j0 = n0 & 1;
1069                         x0_or_one[0] = x0;
1070                         x0_or_one[2] = -x0;
1071                         y0_or_zero[0] = y0;
1072                         y0_or_zero[2] = -y0;
1073                         z0 = x0 * x0;
1074                         t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1075                         t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1076                         t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1077                         *py0 = t0;
1078                 }
1079                 else
1080                 {
1081                         j0 = (xsb0 + 0x4000) & 0xffff8000;
1082                         HI(&t0) = j0;
1083                         LO(&t0) = 0;
1084                         x0 = (x0 - t0) + y0;
1085                         z0 = x0 * x0;
1086                         t0 = z0 * (qq1 + z0 * qq2);
1087                         w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1088                         j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1089                         xsb0 = (xsb0 >> 30) & 2;
1090                         n0 ^= (xsb0 & ~(n0 << 1));
1091                         xsb0 |= 1;
1092                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
1093                         t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1094                         *py0 = ( a0 + t0 );
1095                 }
1096         }
1097
1098         if (biguns)
1099                 __vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);
1100 }