newlib/libm/machine/amdgcn/v64sf_exp.c

   1 /*
   2  * Copyright 2023 Siemens
   3  *
   4  * The authors hereby grant permission to use, copy, modify, distribute,
   5  * and license this software and its documentation for any purpose, provided
   6  * that existing copyright notices are retained in all copies and that this
   7  * notice is included verbatim in any distributions.  No written agreement,
   8  * license, or royalty fee is required for any of the authorized uses.
   9  * Modifications to this software may be copyrighted by their authors
  10  * and need not follow the licensing terms described here, provided that
  11  * the new terms are clearly indicated on the first page of each file where
  12  * they apply.
  13  */
  14
  15 /*
  16  * ====================================================
  17  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
  18  *
  19  * Developed at SunPro, a Sun Microsystems, Inc. business.
  20  * Permission to use, copy, modify, and distribute this
  21  * software is freely granted, provided that this notice
  22  * is preserved.
  23  * ====================================================
  24  */
  25
  26 /* Based on newlib/libm/math/ef_exp.c in Newlib.  */
  27
  28 #include "amdgcnmach.h"
  29
  30 static const float
  31 huge    = 1.0e+30,
  32 twom100 = 7.8886090522e-31,      /* 2**-100=0x0d800000 */
  33 ln2HI[2]   ={ 6.9313812256e-01,         /* 0x3f317180 */
  34              -6.9313812256e-01,},       /* 0xbf317180 */
  35 ln2LO[2]   ={ 9.0580006145e-06,         /* 0x3717f7d1 */
  36              -9.0580006145e-06,},       /* 0xb717f7d1 */
  37 invln2 =  1.4426950216e+00,             /* 0x3fb8aa3b */
  38 P1   =  1.6666667163e-01, /* 0x3e2aaaab */
  39 P2   = -2.7777778450e-03, /* 0xbb360b61 */
  40 P3   =  6.6137559770e-05, /* 0x388ab355 */
  41 P4   = -1.6533901999e-06, /* 0xb5ddea0e */
  42 P5   =  4.1381369442e-08; /* 0x3331bb4c */
  43
  44 DEF_VS_MATH_FUNC (v64sf, expf, v64sf x)
  45 {
  46   FUNCTION_INIT (v64sf);
  47
  48   v64si k = VECTOR_INIT (0);
  49   v64si sx;
  50   GET_FLOAT_WORD(sx, x, NO_COND);
  51   v64si xsb = (sx>>31)&1;               /* sign bit of x */
  52   v64si hx = sx & 0x7fffffff;           /* high word of |x| */
  53
  54   /* filter out non-finite argument */
  55   VECTOR_RETURN (x+x, FLT_UWORD_IS_NAN(hx));     /* NaN */
  56   VECTOR_RETURN (x, FLT_UWORD_IS_INFINITE(hx) & (xsb == 0));
  57   VECTOR_RETURN (VECTOR_INIT (0.0f), FLT_UWORD_IS_INFINITE (hx)); /* exp(+-inf)={inf,0} */
  58   VECTOR_RETURN (v64sf_math_oflowf (VECTOR_INIT (0)), sx > FLT_UWORD_LOG_MAX);  /* overflow */
  59   VECTOR_RETURN (v64sf_math_uflowf (VECTOR_INIT (0)), (sx < 0) & (hx > FLT_UWORD_LOG_MIN));   /* underflow */
  60
  61   /* argument reduction */
  62   v64sf hi, lo;
  63   VECTOR_IF (hx > 0x3eb17218, cond)             /* if  |x| > 0.5 ln2 */
  64     VECTOR_IF2 (hx < 0x3F851592, cond2, cond)   /* and |x| < 1.5 ln2 */
  65       VECTOR_COND_MOVE (hi, x-ln2HI[0], cond2 & (xsb == 0));
  66       VECTOR_COND_MOVE (hi, x-ln2HI[1], cond2 & (xsb == 1));
  67       VECTOR_COND_MOVE (lo, VECTOR_INIT (ln2LO[0]), cond2 & (xsb == 0));
  68       VECTOR_COND_MOVE (lo, VECTOR_INIT (ln2LO[1]), cond2 & (xsb == 1));
  69       VECTOR_COND_MOVE (k, 1-xsb-xsb, cond2);
  70     VECTOR_ELSE2 (cond2, cond)
  71       VECTOR_COND_MOVE (k, __builtin_convertvector (invln2*x + 0.5f, v64si), cond2 & (xsb == 0));
  72       VECTOR_COND_MOVE (k, __builtin_convertvector (invln2*x - 0.5f, v64si), cond2 & (xsb == 1));
  73       v64sf t = __builtin_convertvector (k, v64sf);
  74       VECTOR_COND_MOVE (hi, x - t*ln2HI[0], cond2);     /* t*ln2HI is exact here */
  75       VECTOR_COND_MOVE (lo, t*ln2LO[0], cond2);
  76     VECTOR_ENDIF
  77     VECTOR_COND_MOVE (x, hi - lo, cond);
  78   VECTOR_ELSEIF (hx < 0x34000000, cond) /* when |x|<2**-23 */
  79     VECTOR_RETURN (1.0f+x, cond & (huge+x > 1.0f));  /* trigger inexact */
  80   VECTOR_ENDIF
  81
  82     /* x is now in primary range */
  83   v64sf t = x*x;
  84   v64sf c = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
  85   VECTOR_RETURN (1.0f - ((x*c)/(c-2.0f)-x), k==0);
  86   v64sf y = 1.0f - ((lo-(x*c)/(2.0f-c))-hi);
  87   VECTOR_IF (k >= -125, cond)
  88     v64si hy;
  89     GET_FLOAT_WORD(hy, y, cond);
  90     SET_FLOAT_WORD(y,hy+(k<<23), cond); /* add k to y's exponent */
  91     VECTOR_RETURN (y, cond);
  92   VECTOR_ELSE (cond)
  93     v64si hy;
  94     GET_FLOAT_WORD(hy, y, cond);
  95     SET_FLOAT_WORD(y, hy+((k+100)<<23), cond);  /* add k to y's exponent */
  96     VECTOR_RETURN (y*twom100, cond);
  97   VECTOR_ENDIF
  98
  99   FUNCTION_RETURN;
 100 }
 101
 102 DEF_VARIANTS (expf, sf, sf)