usr/src/lib/libmvec/common/__vatan.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29
  30 #include <sys/isa_defs.h>
  31 #include "libm_inlines.h"
  32
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46
  47 void
  48 __vatan(int n, double * restrict x, int stridex, double * restrict y, int stridey)
  49 {
  50   double  f, z, ans = 0.0L, ansu, ansl, tmp, poly, conup, conlo, dummy;
  51   double  f1,   ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1;
  52   double  f2,   ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2;
  53   int index, sign, intf, intflo, intz, argcount;
  54   int index1, sign1 = 0;
  55   int index2, sign2 = 0;
  56   double *yaddr,*yaddr1 = 0,*yaddr2 = 0;
  57   extern const double __vlibm_TBL_atan1[];
  58   extern double fabs(double);
  59
  60 /*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
  61  *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
  62
  63 /* define dummy names for readability.  Use parray to help compiler optimize loads */
  64 #define p3    parray[0]
  65 #define p2    parray[1]
  66 #define p1    parray[2]
  67
  68   static const double parray[] = {
  69    -1.428029046844299722E-01,           /* p[3]         */
  70     1.999999917247000615E-01,           /* p[2]         */
  71    -3.333333333329292858E-01,           /* p[1]         */
  72     1.0,                                /* not used for p[0], though            */
  73    -1.0,                                /* used to flip sign of answer          */
  74   };
  75
  76   if (n <= 0) return;           /* if no. of elements is 0 or neg, do nothing */
  77   do
  78   {
  79   LOOP0:
  80
  81     f        = fabs(*x);                        /* fetch argument               */
  82     intf     = HI(x);                   /* upper half of x, as integer  */
  83     intflo   = LO(x);                   /* lower half of x, as integer  */
  84     sign     = intf &  0x80000000;              /* sign of argument             */
  85     intf     = intf & ~0x80000000;              /* abs(upper argument)          */
  86
  87     if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
  88     {
  89       if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
  90       {
  91         ans   = f - f;                          /* return NaN if x=NaN*/
  92       }
  93       else if (intf < 0x3e300000)               /* avoid underflow for small arg */
  94       {
  95         dummy = 1.0e37 + f;
  96         dummy = dummy;
  97         ans   = f;
  98       }
  99       else if (intf > 0x43600000)               /* avoid underflow for big arg  */
 100       {
 101         index = 2;
 102         ans   = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low   */
 103       }
 104       *y      = (sign) ? -ans: ans;             /* store answer, with sign bit  */
 105       x      += stridex;
 106       y      += stridey;
 107       argcount = 0;                             /* initialize argcount          */
 108       if (--n <=0) break;                       /* we are done                  */
 109       goto LOOP0;                               /* otherwise, examine next arg  */
 110     }
 111
 112     index    = 0;                               /* points to 0,0 in table       */
 113     if (intf > 0x40500000)                      /* if (|x| > 64                 */
 114     { f = -1.0/f;
 115       index  = 2;                               /* point to pi/2 upper, lower   */
 116     }
 117     else if (intf >= 0x3f900000)                /* if |x| >= (1/64)...          */
 118     {
 119       intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
 120       HI(&z)  = intz;                           /* store as a double (z)        */
 121       LO(&z)  = 0;                              /* ...lower                     */
 122       f      = (f - z)/(1.0 + f*z);             /* get reduced argument         */
 123       index  = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
 124       index  = index + 4;                       /* skip over 0,0,pi/2,pi/2      */
 125     }
 126     yaddr    = y;                               /* address to store this answer */
 127     x       += stridex;                         /* point to next arg            */
 128     y       += stridey;                         /* point to next result         */
 129     argcount = 1;                               /* we now have 1 good argument  */
 130     if (--n <=0)
 131     {
 132       f1      = 0.0;                            /* put dummy values in args 1,2 */
 133       f2      = 0.0;
 134       index1  = 0;
 135       index2  = 0;
 136       goto UNROLL3;                             /* finish up with 1 good arg    */
 137     }
 138
 139     /*--------------------------------------------------------------------------*/
 140     /*--------------------------------------------------------------------------*/
 141     /*--------------------------------------------------------------------------*/
 142
 143   LOOP1:
 144
 145     f1       = fabs(*x);                        /* fetch argument               */
 146     intf     = HI(x);                   /* upper half of x, as integer  */
 147     intflo   = LO(x);                   /* lower half of x, as integer  */
 148     sign1    = intf &  0x80000000;              /* sign of argument             */
 149     intf     = intf & ~0x80000000;              /* abs(upper argument)          */
 150
 151     if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
 152     {
 153       if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
 154       {
 155         ans   = f1 - f1;                        /* return NaN if x=NaN*/
 156       }
 157       else if (intf < 0x3e300000)               /* avoid underflow for small arg */
 158       {
 159         dummy = 1.0e37 + f1;
 160         dummy = dummy;
 161         ans   = f1;
 162       }
 163       else if (intf > 0x43600000)               /* avoid underflow for big arg  */
 164       {
 165         index1 = 2;
 166         ans   = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low   */
 167       }
 168       *y      = (sign1) ? -ans: ans;            /* store answer, with sign bit  */
 169       x      += stridex;
 170       y      += stridey;
 171       argcount = 1;                             /* we still have 1 good arg     */
 172       if (--n <=0)
 173       {
 174         f1      = 0.0;                          /* put dummy values in args 1,2 */
 175         f2      = 0.0;
 176         index1  = 0;
 177         index2  = 0;
 178         goto UNROLL3;                           /* finish up with 1 good arg    */
 179       }
 180       goto LOOP1;                               /* otherwise, examine next arg  */
 181     }
 182
 183     index1   = 0;                               /* points to 0,0 in table       */
 184     if (intf > 0x40500000)                      /* if (|x| > 64                 */
 185     { f1 = -1.0/f1;
 186       index1 = 2;                               /* point to pi/2 upper, lower   */
 187     }
 188     else if (intf >= 0x3f900000)                /* if |x| >= (1/64)...          */
 189     {
 190       intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
 191       HI(&z) = intz;                            /* store as a double (z)        */
 192       LO(&z) = 0;                               /* ...lower                     */
 193       f1     = (f1 - z)/(1.0 + f1*z);           /* get reduced argument         */
 194       index1 = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
 195       index1 = index1 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 196     }
 197     yaddr1   = y;                               /* address to store this answer */
 198     x       += stridex;                         /* point to next arg            */
 199     y       += stridey;                         /* point to next result         */
 200     argcount = 2;                               /* we now have 2 good arguments */
 201     if (--n <=0)
 202     {
 203       f2      = 0.0;                            /* put dummy value in arg 2 */
 204       index2  = 0;
 205       goto UNROLL3;                             /* finish up with 2 good args   */
 206     }
 207
 208     /*--------------------------------------------------------------------------*/
 209     /*--------------------------------------------------------------------------*/
 210     /*--------------------------------------------------------------------------*/
 211
 212   LOOP2:
 213
 214     f2       = fabs(*x);                        /* fetch argument               */
 215     intf     = HI(x);                   /* upper half of x, as integer  */
 216     intflo   = LO(x);                   /* lower half of x, as integer  */
 217     sign2    = intf &  0x80000000;              /* sign of argument             */
 218     intf     = intf & ~0x80000000;              /* abs(upper argument)          */
 219
 220     if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */
 221     {
 222       if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0)))
 223       {
 224         ans   = f2 - f2;                        /* return NaN if x=NaN*/
 225       }
 226       else if (intf < 0x3e300000)               /* avoid underflow for small arg */
 227       {
 228         dummy = 1.0e37 + f2;
 229         dummy = dummy;
 230         ans   = f2;
 231       }
 232       else if (intf > 0x43600000)               /* avoid underflow for big arg  */
 233       {
 234         index2 = 2;
 235         ans   = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low   */
 236       }
 237       *y      = (sign2) ? -ans: ans;            /* store answer, with sign bit  */
 238       x      += stridex;
 239       y      += stridey;
 240       argcount = 2;                             /* we still have 2 good args    */
 241       if (--n <=0)
 242       {
 243         f2      = 0.0;                          /* put dummy value in arg 2 */
 244         index2  = 0;
 245         goto UNROLL3;                           /* finish up with 2 good args   */
 246       }
 247       goto LOOP2;                               /* otherwise, examine next arg  */
 248     }
 249
 250     index2   = 0;                               /* points to 0,0 in table       */
 251     if (intf > 0x40500000)                      /* if (|x| > 64                 */
 252     { f2 = -1.0/f2;
 253       index2 = 2;                               /* point to pi/2 upper, lower   */
 254     }
 255     else if (intf >= 0x3f900000)                /* if |x| >= (1/64)...          */
 256     {
 257       intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
 258       HI(&z) = intz;                            /* store as a double (z)        */
 259       LO(&z) = 0;                               /* ...lower                     */
 260       f2     = (f2 - z)/(1.0 + f2*z);           /* get reduced argument         */
 261       index2 = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
 262       index2 = index2 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 263     }
 264     yaddr2   = y;                               /* address to store this answer */
 265     x       += stridex;                         /* point to next arg            */
 266     y       += stridey;                         /* point to next result         */
 267     argcount = 3;                               /* we now have 3 good arguments */
 268
 269
 270 /* here is the 3 way unrolled section,
 271    note, we may actually only have
 272    1,2, or 3 'real' arguments at this point
 273 */
 274
 275 UNROLL3:
 276
 277     conup    = __vlibm_TBL_atan1[index ];       /* upper table                  */
 278     conup1   = __vlibm_TBL_atan1[index1];       /* upper table                  */
 279     conup2   = __vlibm_TBL_atan1[index2];       /* upper table                  */
 280
 281     conlo    = __vlibm_TBL_atan1[index +1];     /* lower table                  */
 282     conlo1   = __vlibm_TBL_atan1[index1+1];     /* lower table                  */
 283     conlo2   = __vlibm_TBL_atan1[index2+1];     /* lower table                  */
 284
 285     tmp      = f *f ;
 286     tmp1     = f1*f1;
 287     tmp2     = f2*f2;
 288
 289     poly     = f *((p3*tmp  + p2)*tmp  + p1)*tmp ;
 290     poly1    = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1;
 291     poly2    = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2;
 292
 293     ansu     = conup  + f ;                     /* compute atan(f)  upper       */
 294     ansu1    = conup1 + f1;                     /* compute atan(f)  upper       */
 295     ansu2    = conup2 + f2;                     /* compute atan(f)  upper       */
 296
 297     ansl     = (((conup  - ansu) + f) + poly) + conlo ;
 298     ansl1    = (((conup1 - ansu1) + f1) + poly1) + conlo1;
 299     ansl2    = (((conup2 - ansu2) + f2) + poly2) + conlo2;
 300
 301     ans      = ansu  + ansl ;
 302     ans1     = ansu1 + ansl1;
 303     ans2     = ansu2 + ansl2;
 304
 305 /* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */
 306
 307    *yaddr    = sign ? -ans: ans;                /* this one is always good      */
 308    if (argcount < 3) break;                     /* end loop and finish up       */
 309      *yaddr1   = sign1 ? -ans1: ans1;
 310      *yaddr2   = sign2 ? -ans2: ans2;
 311
 312   }  while (--n > 0);
 313
 314  if (argcount == 2)
 315    {  *yaddr1  = sign1 ? -ans1: ans1;
 316    }
 317 }