usr/src/lib/libmvec/common/__vatanf.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29
  30 #ifdef __RESTRICT
  31 #define restrict _Restrict
  32 #else
  33 #define restrict
  34 #endif
  35
  36 void
  37 __vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey)
  38 {
  39   extern const double __vlibm_TBL_atan1[];
  40   double  conup0, conup1, conup2;
  41   float dummy, ansf = 0.0;
  42   float f0, f1, f2;
  43   float ans0, ans1, ans2;
  44   float poly0, poly1, poly2;
  45   float sign0, sign1, sign2;
  46   int intf, intz, argcount;
  47   int index0, index1, index2;
  48   float z,*yaddr0,*yaddr1,*yaddr2;
  49   int *pz = (int *) &z;
  50 #ifdef UNROLL4
  51   double conup3;
  52   int index3;
  53   float f3, ans3, poly3, sign3, *yaddr3;
  54 #endif
  55
  56 /*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
  57  *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
  58
  59   static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
  60   static const float pone = 1.0f;
  61
  62   if (n <= 0) return;           /* if no. of elements is 0 or neg, do nothing */
  63   do
  64   {
  65   LOOP0:
  66
  67         intf     = *(int *) x;          /* upper half of x, as integer */
  68         f0 = *x;
  69         sign0 = pone;
  70         if (intf < 0) {
  71                 intf = intf & ~0x80000000; /* abs(upper argument) */
  72                 f0 = -f0;
  73                 sign0 = -sign0;
  74         }
  75
  76     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
  77     {
  78       if (intf > 0x7f800000)
  79       {
  80         ansf  = f0- f0;                                 /* return NaN if x=NaN*/
  81       }
  82       else if (intf < 0x31800000)               /* avoid underflow for small arg */
  83       {
  84         dummy = 1.0e37 + f0;
  85         dummy = dummy;
  86         ansf  = f0;
  87       }
  88       else if (intf > 0x5B000000)               /* avoid underflow for big arg  */
  89       {
  90         index0= 2;
  91         ansf  = __vlibm_TBL_atan1[index0];/* pi/2 up */
  92       }
  93       *y      = sign0*ansf;             /* store answer, with sign bit  */
  94       x      += stridex;
  95       y      += stridey;
  96       argcount = 0;                             /* initialize argcount          */
  97       if (--n <=0) break;                       /* we are done                  */
  98       goto LOOP0;                               /* otherwise, examine next arg  */
  99     }
 100
 101     if (intf > 0x42800000)                      /* if (|x| > 64                 */
 102     {
 103     f0 = -pone/f0;
 104         index0 = 2;                             /* point to pi/2 upper, lower   */
 105     }
 106     else if (intf >= 0x3C800000)                /* if |x| >= (1/64)...          */
 107     {
 108       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper        */
 109       pz[0]  = intz;                            /* store as a float (z)         */
 110     f0 = (f0 - z)/(pone + f0*z);
 111         index0 = (intz - 0x3C800000) >> 18;     /* (index >> 19) << 1)          */
 112         index0 = index0+ 4;                     /* skip over 0,0,pi/2,pi/2      */
 113     }
 114     else                                        /* |x| < 1/64 */
 115     {
 116         index0   = 0;                           /* points to 0,0 in table       */
 117     }
 118     yaddr0   = y;                               /* address to store this answer */
 119     x       += stridex;                         /* point to next arg            */
 120     y       += stridey;                         /* point to next result         */
 121     argcount = 1;                               /* we now have 1 good argument  */
 122     if (--n <=0)
 123     {
 124       goto UNROLL;                              /* finish up with 1 good arg    */
 125     }
 126
 127     /*--------------------------------------------------------------------------*/
 128     /*--------------------------------------------------------------------------*/
 129     /*--------------------------------------------------------------------------*/
 130
 131   LOOP1:
 132
 133         intf     = *(int *) x;          /* upper half of x, as integer */
 134         f1 = *x;
 135         sign1 = pone;
 136         if (intf < 0) {
 137                 intf = intf & ~0x80000000; /* abs(upper argument) */
 138                 f1 = -f1;
 139                 sign1 = -sign1;
 140         }
 141
 142     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
 143     {
 144       if (intf > 0x7f800000)
 145       {
 146         ansf   = f1 - f1;                       /* return NaN if x=NaN*/
 147       }
 148       else if (intf < 0x31800000)               /* avoid underflow for small arg */
 149       {
 150         dummy = 1.0e37 + f1;
 151         dummy = dummy;
 152         ansf   = f1;
 153       }
 154       else if (intf > 0x5B000000)               /* avoid underflow for big arg  */
 155       {
 156         index1 = 2;
 157         ansf   = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
 158       }
 159       *y      = sign1 * ansf;           /* store answer, with sign bit  */
 160       x      += stridex;
 161       y      += stridey;
 162       argcount = 1;                             /* we still have 1 good arg     */
 163       if (--n <=0)
 164       {
 165         goto UNROLL;                            /* finish up with 1 good arg    */
 166       }
 167       goto LOOP1;                               /* otherwise, examine next arg  */
 168     }
 169
 170     if (intf > 0x42800000)                      /* if (|x| > 64                 */
 171     {
 172     f1 = -pone/f1;
 173       index1 = 2;                               /* point to pi/2 upper, lower   */
 174     }
 175     else if (intf >= 0x3C800000)                /* if |x| >= (1/64)...          */
 176     {
 177       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper        */
 178       pz[0]  = intz;                            /* store as a float (z)         */
 179     f1 = (f1 - z)/(pone + f1*z);
 180       index1 = (intz - 0x3C800000) >> 18;       /* (index >> 19) << 1)          */
 181       index1 = index1 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 182     }
 183     else
 184     {
 185         index1   = 0;                           /* points to 0,0 in table       */
 186     }
 187
 188     yaddr1   = y;                               /* address to store this answer */
 189     x       += stridex;                         /* point to next arg            */
 190     y       += stridey;                         /* point to next result         */
 191     argcount = 2;                               /* we now have 2 good arguments */
 192     if (--n <=0)
 193     {
 194       goto UNROLL;                              /* finish up with 2 good args   */
 195     }
 196
 197     /*--------------------------------------------------------------------------*/
 198     /*--------------------------------------------------------------------------*/
 199     /*--------------------------------------------------------------------------*/
 200
 201   LOOP2:
 202
 203         intf     = *(int *) x;          /* upper half of x, as integer */
 204         f2 = *x;
 205         sign2 = pone;
 206         if (intf < 0) {
 207                 intf = intf & ~0x80000000; /* abs(upper argument) */
 208                 f2 = -f2;
 209                 sign2 = -sign2;
 210         }
 211
 212     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
 213     {
 214       if (intf > 0x7f800000)
 215       {
 216         ansf   = f2 - f2;                       /* return NaN if x=NaN*/
 217       }
 218       else if (intf < 0x31800000)               /* avoid underflow for small arg */
 219       {
 220         dummy = 1.0e37 + f2;
 221         dummy = dummy;
 222         ansf   = f2;
 223       }
 224       else if (intf > 0x5B000000)               /* avoid underflow for big arg  */
 225       {
 226         index2 = 2;
 227         ansf   = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
 228       }
 229       *y      = sign2 * ansf;           /* store answer, with sign bit  */
 230       x      += stridex;
 231       y      += stridey;
 232       argcount = 2;                             /* we still have 2 good args    */
 233       if (--n <=0)
 234       {
 235         goto UNROLL;                            /* finish up with 2 good args   */
 236       }
 237       goto LOOP2;                               /* otherwise, examine next arg  */
 238     }
 239
 240     if (intf > 0x42800000)                      /* if (|x| > 64                 */
 241     {
 242     f2 = -pone/f2;
 243       index2 = 2;                               /* point to pi/2 upper, lower   */
 244     }
 245     else if (intf >= 0x3C800000)                /* if |x| >= (1/64)...          */
 246     {
 247       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper        */
 248       pz[0]  = intz;                            /* store as a float (z)         */
 249     f2 = (f2 - z)/(pone + f2*z);
 250       index2 = (intz - 0x3C800000) >> 18;       /* (index >> 19) << 1)          */
 251       index2 = index2 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 252     }
 253     else
 254     {
 255         index2   = 0;                           /* points to 0,0 in table       */
 256     }
 257     yaddr2   = y;                               /* address to store this answer */
 258     x       += stridex;                         /* point to next arg            */
 259     y       += stridey;                         /* point to next result         */
 260     argcount = 3;                               /* we now have 3 good arguments */
 261     if (--n <=0)
 262     {
 263       goto UNROLL;                              /* finish up with 2 good args   */
 264     }
 265
 266
 267     /*--------------------------------------------------------------------------*/
 268     /*--------------------------------------------------------------------------*/
 269     /*--------------------------------------------------------------------------*/
 270
 271 #ifdef UNROLL4
 272   LOOP3:
 273
 274         intf     = *(int *) x;          /* upper half of x, as integer */
 275         f3 = *x;
 276         sign3 = pone;
 277         if (intf < 0) {
 278                 intf = intf & ~0x80000000; /* abs(upper argument) */
 279                 f3 = -f3;
 280                 sign3 = -sign3;
 281         }
 282
 283     if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
 284     {
 285       if (intf > 0x7f800000)
 286       {
 287         ansf   = f3 - f3;                       /* return NaN if x=NaN*/
 288       }
 289       else if (intf < 0x31800000)               /* avoid underflow for small arg */
 290       {
 291         dummy = 1.0e37 + f3;
 292         dummy = dummy;
 293         ansf   = f3;
 294       }
 295       else if (intf > 0x5B000000)               /* avoid underflow for big arg  */
 296       {
 297         index3 = 2;
 298         ansf   = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
 299       }
 300       *y      = sign3 * ansf;           /* store answer, with sign bit  */
 301       x      += stridex;
 302       y      += stridey;
 303       argcount = 3;                             /* we still have 3 good args    */
 304       if (--n <=0)
 305       {
 306         goto UNROLL;                            /* finish up with 3 good args   */
 307       }
 308       goto LOOP3;                               /* otherwise, examine next arg  */
 309     }
 310
 311     if (intf > 0x42800000)                      /* if (|x| > 64                 */
 312     {
 313         n3 = -pone;
 314         d3 = f3;
 315     f3 = n3/d3;
 316       index3 = 2;                               /* point to pi/2 upper, lower   */
 317     }
 318     else if (intf >= 0x3C800000)                /* if |x| >= (1/64)...          */
 319     {
 320       intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper        */
 321       pz[0]  = intz;                            /* store as a float (z)         */
 322         n3     = (f3 - z);
 323         d3     = (pone + f3*z);                 /* get reduced argument         */
 324     f3 = n3/d3;
 325       index3 = (intz - 0x3C800000) >> 18;       /* (index >> 19) << 1)          */
 326       index3 = index3 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 327     }
 328     else
 329     {
 330         n3 = f3;
 331         d3 = pone;
 332         index3   = 0;                           /* points to 0,0 in table       */
 333     }
 334     yaddr3   = y;                               /* address to store this answer */
 335     x       += stridex;                         /* point to next arg            */
 336     y       += stridey;                         /* point to next result         */
 337     argcount = 4;                               /* we now have 4 good arguments */
 338     if (--n <=0)
 339     {
 340       goto UNROLL;                              /* finish up with 3 good args   */
 341     }
 342 #endif /* UNROLL4 */
 343
 344 /* here is the n-way unrolled section,
 345    but we may actually have less than n
 346    arguments at this point
 347 */
 348
 349 UNROLL:
 350
 351 #ifdef UNROLL4
 352     if (argcount == 4)
 353     {
 354     conup0   = __vlibm_TBL_atan1[index0];
 355     conup1   = __vlibm_TBL_atan1[index1];
 356     conup2   = __vlibm_TBL_atan1[index2];
 357     conup3   = __vlibm_TBL_atan1[index3];
 358     poly0    = p1*f0*f0*f0 + f0;
 359     ans0     = sign0 * (float)(conup0 + poly0);
 360     poly1    = p1*f1*f1*f1 + f1;
 361     ans1     = sign1 * (float)(conup1 + poly1);
 362     poly2    = p1*f2*f2*f2 + f2;
 363     ans2     = sign2 * (float)(conup2 + poly2);
 364     poly3    = p1*f3*f3*f3 + f3;
 365     ans3     = sign3 * (float)(conup3 + poly3);
 366     *yaddr0  = ans0;
 367     *yaddr1  = ans1;
 368     *yaddr2  = ans2;
 369     *yaddr3  = ans3;
 370     }
 371     else
 372 #endif
 373     if (argcount == 3)
 374     {
 375     conup0   = __vlibm_TBL_atan1[index0];
 376     conup1   = __vlibm_TBL_atan1[index1];
 377     conup2   = __vlibm_TBL_atan1[index2];
 378     poly0    = p1*f0*f0*f0 + f0;
 379     poly1    = p1*f1*f1*f1 + f1;
 380     poly2    = p1*f2*f2*f2 + f2;
 381     ans0     = sign0 * (float)(conup0 + poly0);
 382     ans1     = sign1 * (float)(conup1 + poly1);
 383     ans2     = sign2 * (float)(conup2 + poly2);
 384     *yaddr0  = ans0;
 385     *yaddr1  = ans1;
 386     *yaddr2  = ans2;
 387     }
 388     else
 389     if (argcount == 2)
 390     {
 391     conup0   = __vlibm_TBL_atan1[index0];
 392     conup1   = __vlibm_TBL_atan1[index1];
 393     poly0    = p1*f0*f0*f0 + f0;
 394     poly1    = p1*f1*f1*f1 + f1;
 395     ans0     = sign0 * (float)(conup0 + poly0);
 396     ans1     = sign1 * (float)(conup1 + poly1);
 397     *yaddr0  = ans0;
 398     *yaddr1  = ans1;
 399     }
 400     else
 401     if (argcount == 1)
 402     {
 403     conup0   = __vlibm_TBL_atan1[index0];
 404     poly0    = p1*f0*f0*f0 + f0;
 405     ans0     = sign0 * (float)(conup0 + poly0);
 406     *yaddr0  = ans0;
 407      }
 408
 409   }  while (n > 0);
 410
 411 }