newlib/libm/machine/amdgcn/v64sf_tan.c

   1 /*
   2  * Copyright 2023 Siemens
   3  *
   4  * The authors hereby grant permission to use, copy, modify, distribute,
   5  * and license this software and its documentation for any purpose, provided
   6  * that existing copyright notices are retained in all copies and that this
   7  * notice is included verbatim in any distributions.  No written agreement,
   8  * license, or royalty fee is required for any of the authorized uses.
   9  * Modifications to this software may be copyrighted by their authors
  10  * and need not follow the licensing terms described here, provided that
  11  * the new terms are clearly indicated on the first page of each file where
  12  * they apply.
  13  */
  14
  15 /*
  16  * ====================================================
  17  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  18  *
  19  * Developed at SunPro, a Sun Microsystems, Inc. business.
  20  * Permission to use, copy, modify, and distribute this
  21  * software is freely granted, provided that this notice
  22  * is preserved.
  23  * ====================================================
  24  */
  25
  26 /* Based on newlib/libm/math/kf_tan.c in Newlib.  */
  27
  28 #include "amdgcnmach.h"
  29
  30 static const float
  31 one   =  1.0000000000e+00, /* 0x3f800000 */
  32 pio4  =  7.8539812565e-01, /* 0x3f490fda */
  33 pio4lo=  3.7748947079e-08, /* 0x33222168 */
  34 T[] =  {
  35   3.3333334327e-01, /* 0x3eaaaaab */
  36   1.3333334029e-01, /* 0x3e088889 */
  37   5.3968254477e-02, /* 0x3d5d0dd1 */
  38   2.1869488060e-02, /* 0x3cb327a4 */
  39   8.8632395491e-03, /* 0x3c11371f */
  40   3.5920790397e-03, /* 0x3b6b6916 */
  41   1.4562094584e-03, /* 0x3abede48 */
  42   5.8804126456e-04, /* 0x3a1a26c8 */
  43   2.4646313977e-04, /* 0x398137b9 */
  44   7.8179444245e-05, /* 0x38a3f445 */
  45   7.1407252108e-05, /* 0x3895c07a */
  46  -1.8558637748e-05, /* 0xb79bae5f */
  47   2.5907305826e-05, /* 0x37d95384 */
  48 };
  49
  50 #if defined (__has_builtin) && __has_builtin (__builtin_gcn_fabsvf)
  51
  52 static v64sf
  53 v64sf_kernel_tanf (v64sf x, v64sf y, v64si iy, v64si __mask)
  54 {
  55   FUNCTION_INIT (v64sf);
  56
  57   v64si hx;
  58   GET_FLOAT_WORD(hx, x, NO_COND);
  59   v64si ix = hx & 0x7fffffff;   /* high word of |x| */
  60
  61   VECTOR_IF(ix<0x31800000, cond)                        /* x < 2**-28 */
  62     VECTOR_IF2(__builtin_convertvector (x, v64si)==0, cond2, cond)                      /* generate inexact */
  63       VECTOR_RETURN (1.0f / __builtin_gcn_fabsvf (x), (ix|(iy+1))==0);
  64       VECTOR_RETURN (x, cond2 & (iy == 1));
  65       VECTOR_RETURN (-1.0f / x, cond2);
  66     VECTOR_ENDIF
  67   VECTOR_ENDIF
  68   VECTOR_IF(ix>=0x3f2ca140, cond)                       /* |x|>=0.6744 */
  69     VECTOR_COND_MOVE (x, -x, cond & (hx < 0));
  70     VECTOR_COND_MOVE (y, -y, cond & (hx < 0));
  71     v64sf z = pio4-x;
  72     v64sf w = pio4lo-y;
  73     VECTOR_COND_MOVE (x, z+w, cond);
  74     VECTOR_COND_MOVE (y, VECTOR_INIT (0.0f), cond);
  75   VECTOR_ENDIF
  76   v64sf z       = x*x;
  77   v64sf w = z*z;
  78   /* Break x^5*(T[1]+x^2*T[2]+...) into
  79     *     x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
  80     *     x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
  81     */
  82   v64sf r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
  83   v64sf v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
  84   v64sf s = z*x;
  85   r = y + z*(s*(r+v)+y);
  86   r += T[0]*s;
  87   w = x+r;
  88   VECTOR_IF(ix>=0x3f2ca140, cond)
  89     v = __builtin_convertvector (iy, v64sf);
  90     VECTOR_RETURN (__builtin_convertvector (1-((hx>>30)&2), v64sf)
  91                    * (v-2.0f*(x-(w*w/(w+v)-r))), cond);
  92   VECTOR_ENDIF
  93   VECTOR_RETURN (w, iy == 1);
  94   /* if allow error up to 2 ulp,
  95      simply return -1.0/(x+r) here */
  96   /*  compute -1.0/(x+r) accurately */
  97   z = w;
  98   v64si i;
  99   GET_FLOAT_WORD(i,z, NO_COND);
 100   SET_FLOAT_WORD(z,i&0xfffff000, NO_COND);
 101   v  = r - (z - x);     /* z+v = r+x */
 102   v64sf a, t;
 103   t = a  = -1.0f/w;     /* a = -1.0/w */
 104   GET_FLOAT_WORD(i,t, NO_COND);
 105   SET_FLOAT_WORD(t,i&0xfffff000, NO_COND);
 106   s  = 1.0f+t*z;
 107   VECTOR_RETURN (t+a*(s+t*v), NO_COND);
 108
 109   FUNCTION_RETURN;
 110 }
 111
 112 static v64si
 113 v64sf_rem_pio2f (v64sf x, v64sf *y)
 114 {
 115   /* Work in double-precision for better accuracy.  */
 116   v64df dx = __builtin_convertvector (x, v64df);
 117   v64df r = dx * __INV_PI_OVER_TWO_2_24;
 118   v64si n = (__builtin_convertvector (r, v64si) + 0x800000) >> 24;
 119   dx = dx - __builtin_convertvector (n, v64df) * __PI_OVER_TWO;
 120
 121   y[0] = __builtin_convertvector (dx, v64sf);
 122   y[1] = __builtin_convertvector (dx, v64sf) - y[0];
 123   return n;
 124 }
 125
 126 DEF_VS_MATH_FUNC (v64sf, tanf, v64sf x)
 127 {
 128   FUNCTION_INIT (v64sf);
 129
 130   v64si ix;
 131   GET_FLOAT_WORD (ix, x, NO_COND);
 132
 133   /* |x| ~< pi/4 */
 134   ix &= 0x7fffffff;
 135   VECTOR_RETURN (v64sf_kernel_tanf (x, VECTOR_INIT (0.0f), VECTOR_INIT (1), __mask),
 136                  ix <= 0x3f490fda);
 137
 138   /* tan(Inf or NaN) is NaN */
 139   VECTOR_RETURN (x-x, ~FLT_UWORD_IS_FINITE(ix));  /* NaN */
 140
 141   /* argument reduction needed */
 142   v64sf y[2];
 143   v64si n = v64sf_rem_pio2f (x,y);
 144   VECTOR_RETURN (v64sf_kernel_tanf (y[0], y[1], 1-((n&1)<<1), __mask),  //  1 -- n even
 145                  NO_COND);                                              // -1 -- n odd
 146
 147   FUNCTION_RETURN;
 148 }
 149
 150 DEF_VARIANTS (tanf, sf, sf)
 151
 152 #endif