newlib/libm/machine/amdgcn/v64sf_atangent.c

   1 /*
   2  * Copyright 2023 Siemens
   3  *
   4  * The authors hereby grant permission to use, copy, modify, distribute,
   5  * and license this software and its documentation for any purpose, provided
   6  * that existing copyright notices are retained in all copies and that this
   7  * notice is included verbatim in any distributions.  No written agreement,
   8  * license, or royalty fee is required for any of the authorized uses.
   9  * Modifications to this software may be copyrighted by their authors
  10  * and need not follow the licensing terms described here, provided that
  11  * the new terms are clearly indicated on the first page of each file where
  12  * they apply.
  13  */
  14
  15 /*
  16  * Copyright (c) 1994-2009  Red Hat, Inc. All rights reserved.
  17  *
  18  * This copyrighted material is made available to anyone wishing to use,
  19  * modify, copy, or redistribute it subject to the terms and conditions
  20  * of the BSD License.   This program is distributed in the hope that
  21  * it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
  22  * including the implied warranties of MERCHANTABILITY or FITNESS FOR
  23  * A PARTICULAR PURPOSE.  A copy of this license is available at
  24  * http://www.opensource.org/licenses. Any Red Hat trademarks that are
  25  * incorporated in the source code or documentation are not subject to
  26  * the BSD License and may only be used or replicated with the express
  27  * permission of Red Hat, Inc.
  28  */
  29
  30 /******************************************************************
  31  * The following routines are coded directly from the algorithms
  32  * and coefficients given in "Software Manual for the Elementary
  33  * Functions" by William J. Cody, Jr. and William Waite, Prentice
  34  * Hall, 1980.
  35  ******************************************************************/
  36
  37 /* Based on newlib/libm/mathfp/sf_atangent.c in Newlib.  */
  38
  39 #include <float.h>
  40 #include "amdgcnmach.h"
  41
  42 static const float ROOT3 = 1.732050807;
  43 static const float a[] = { 0.0, 0.523598775, 1.570796326,
  44                         1.047197551 };
  45 static const float q[] = { 0.1412500740e+1 };
  46 static const float p[] = { -0.4708325141, -0.5090958253e-1 };
  47
  48 #if defined (__has_builtin) \
  49         && __has_builtin (__builtin_gcn_frexpvf_exp) \
  50         && __has_builtin (__builtin_gcn_fabsvf)
  51
  52 DEF_VS_MATH_FUNC (v64sf, atangentf, v64sf x, v64sf v, v64sf u, int arctan2)
  53 {
  54   FUNCTION_INIT (v64sf);
  55
  56   v64sf zero = VECTOR_INIT (0.0f);
  57   v64sf res;
  58   v64si branch = VECTOR_INIT (0);
  59
  60   /* Preparation for calculating arctan2. */
  61   if (arctan2)
  62     {
  63       VECTOR_IF (u == 0.0f, cond)
  64         VECTOR_IF2 (v == 0.0f, cond2, cond)
  65           errno = ERANGE;
  66           VECTOR_RETURN (VECTOR_INIT (0.0f), cond2);
  67         VECTOR_ELSE2 (cond2, cond)
  68           VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
  69           VECTOR_COND_MOVE (res, VECTOR_INIT ((float) __PI_OVER_TWO),  cond2);
  70         VECTOR_ENDIF
  71       VECTOR_ENDIF
  72
  73       VECTOR_IF (~branch, cond)
  74         /* Get the exponent values of the inputs. */
  75         v64si expv = __builtin_gcn_frexpvf_exp (v);
  76         v64si expu = __builtin_gcn_frexpvf_exp (u);
  77
  78         /* See if a divide will overflow. */
  79         v64si e = expv - expu;
  80
  81         VECTOR_IF2 (e > FLT_MAX_EXP, cond2, cond)
  82           VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
  83           VECTOR_COND_MOVE (res, VECTOR_INIT ((float) __PI_OVER_TWO), cond2);
  84         VECTOR_ENDIF
  85
  86         /* Also check for underflow. */
  87         VECTOR_IF2 (e < FLT_MIN_EXP, cond2, cond)
  88           VECTOR_COND_MOVE (branch, VECTOR_INIT (-1), cond2);
  89           VECTOR_COND_MOVE (res, zero, cond2);
  90         VECTOR_ENDIF
  91       VECTOR_ENDIF
  92     }
  93
  94   VECTOR_IF (~branch, cond)
  95     v64sf f;
  96     v64si N = VECTOR_INIT (0);
  97
  98     if (arctan2)
  99       f = __builtin_gcn_fabsvf (v / u);
 100     else
 101       f = __builtin_gcn_fabsvf (x);
 102
 103     VECTOR_IF2 (f > 1.0f, cond2, cond)
 104       VECTOR_COND_MOVE (f, 1.0f / f, cond2);
 105       VECTOR_COND_MOVE (N, VECTOR_INIT (2), cond2);
 106     VECTOR_ENDIF
 107
 108     VECTOR_IF2 (f > (2.0f - ROOT3), cond2, cond)
 109       float A = ROOT3 - 1.0f;
 110       VECTOR_COND_MOVE (f, (((A * f - 0.5f) - 0.5f) + f) / (ROOT3 + f),
 111                         cond2);
 112       N += cond2 & 1;
 113     VECTOR_ENDIF
 114
 115     /* Check for values that are too small. */
 116     VECTOR_IF2 ((-z_rooteps_f < f) & (f < z_rooteps_f), cond2, cond)
 117       VECTOR_COND_MOVE (res, f, cond2);
 118
 119     /* Calculate the Taylor series. */
 120     VECTOR_ELSE2 (cond2, cond)
 121       v64sf g = f * f;
 122       v64sf P = (p[1] * g + p[0]) * g;
 123       v64sf Q = g + q[0];
 124       v64sf R = P / Q;
 125
 126       VECTOR_COND_MOVE (res, f + f * R, cond2);
 127     VECTOR_ENDIF
 128
 129     VECTOR_COND_MOVE (res, -res, cond & (N > 1));
 130
 131     res += VECTOR_MERGE (VECTOR_INIT (a[1]), zero, cond & (N == 1));
 132     res += VECTOR_MERGE (VECTOR_INIT (a[2]), zero, cond & (N == 2));
 133     res += VECTOR_MERGE (VECTOR_INIT (a[3]), zero, cond & (N == 3));
 134   VECTOR_ENDIF
 135
 136   if (arctan2)
 137     {
 138       /*if (u < 0.0)*/
 139         VECTOR_COND_MOVE (res, (float) __PI - res, u < 0.0f);
 140       /*if (v < 0.0)*/
 141         VECTOR_COND_MOVE (res, -res, v < 0.0f);
 142     }
 143   /*else if (x < 0.0) */
 144   else
 145     VECTOR_COND_MOVE (res, -res, x < 0.0f);
 146
 147   VECTOR_RETURN (res, NO_COND);
 148
 149   FUNCTION_RETURN;
 150 }
 151
 152 #endif