2 * Copyright 2023 Siemens
4 * The authors hereby grant permission to use, copy, modify, distribute,
5 * and license this software and its documentation for any purpose, provided
6 * that existing copyright notices are retained in all copies and that this
7 * notice is included verbatim in any distributions. No written agreement,
8 * license, or royalty fee is required for any of the authorized uses.
9 * Modifications to this software may be copyrighted by their authors
10 * and need not follow the licensing terms described here, provided that
11 * the new terms are clearly indicated on the first page of each file where
15 /* Common header file for AMD GCN vector math routines. */
18 * ====================================================
19 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
21 * Developed at SunPro, a Sun Microsystems, Inc. business.
22 * Permission to use, copy, modify, and distribute this
23 * software is freely granted, provided that this notice
25 * ====================================================
28 /* Copyright (c) 2017-2018 Arm Ltd. All rights reserved.
30 SPDX-License-Identifier: BSD-3-Clause
32 Redistribution and use in source and binary forms, with or without
33 modification, are permitted provided that the following conditions
35 1. Redistributions of source code must retain the above copyright
36 notice, this list of conditions and the following disclaimer.
37 2. Redistributions in binary form must reproduce the above copyright
38 notice, this list of conditions and the following disclaimer in the
39 documentation and/or other materials provided with the distribution.
40 3. The name of the company may not be used to endorse or promote
41 products derived from this software without specific prior written
44 THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
45 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
49 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
50 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
51 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
52 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
53 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
55 /* This header is partially based on:
57 newlib/libm/common/fdlibm.h
58 newlib/libm/mathfp/zmath.h
59 newlib/libm/common/math_errf.c
60 newlib/libm/common/math_config.h */
63 #include <sys/types.h>
64 #include <machine/ieeefp.h>
65 #include "amdgcn_veclib.h"
67 /* Vectorized versions of macros from newlib/libm/common/fdlibm.h */
69 #define EXTRACT_WORDS(hi, lo, x) \
72 __tmp.t_v64df = (x); \
73 hi = __builtin_convertvector (__tmp.t_v64di >> 32, typeof (hi)); \
74 lo = __builtin_convertvector (__tmp.t_v64di & 0xffffffff, typeof (lo)); \
77 #define INSERT_WORDS(x, hi, lo, cond) \
80 __tmp.t_v64di = __builtin_convertvector (hi, v64di) << 32 | \
81 __builtin_convertvector (lo, v64di) & 0xffffffff; \
82 VECTOR_COND_MOVE (x, __tmp.t_v64df, cond); \
85 #define GET_HIGH_WORD(x, y, cond) \
88 __tmp.t_v64df = (y); \
89 VECTOR_COND_MOVE (x, __builtin_convertvector (__tmp.t_v64di >> 32, v64si), \
93 #define GET_LOW_WORD(x, y, cond) \
96 __tmp.t_v64df = (y); \
97 VECTOR_COND_MOVE (x, __builtin_convertvector (__tmp.t_v64di & 0xffffffff, \
101 #define SET_HIGH_WORD(x, y, cond) \
103 vector_union __tmp; \
105 __tmp.t_v64di &= 0xffffffff; \
106 __tmp.t_v64di |= __builtin_convertvector (y, v64di) << 32; \
107 VECTOR_COND_MOVE (x, __tmp.t_v64df, (cond)); \
110 #define SET_LOW_WORD(x, y, cond) \
112 vector_union __tmp; \
114 __tmp.t_v64di &= 0xffffffff00000000ULL; \
115 __tmp.t_v64di |= __builtin_convertvector (y, v64di); \
116 VECTOR_COND_MOVE (x, __tmp.t_v64df, (cond)); \
119 #define GET_FLOAT_WORD(x, y, cond) \
120 VECTOR_COND_MOVE(x, CAST_VECTOR(v64si, (y)), (cond))
122 #define SET_FLOAT_WORD(x, y, cond) \
123 VECTOR_COND_MOVE(x, CAST_VECTOR(v64sf, (y)), (cond))
125 /* Definitions from newlib/libm/common/fdlibm.h */
127 #ifdef _FLT_LARGEST_EXPONENT_IS_NORMAL
128 #define FLT_UWORD_IS_FINITE(x) ((x) == (x))
129 #define FLT_UWORD_IS_NAN(x) ((x) != (x))
130 #define FLT_UWORD_IS_INFINITE(x) ((x) != (x))
131 #define FLT_UWORD_MAX 0x7fffffff
132 #define FLT_UWORD_EXP_MAX 0x43010000
133 #define FLT_UWORD_LOG_MAX 0x42b2d4fc
134 #define FLT_UWORD_LOG_2MAX 0x42b437e0
135 #define HUGE ((float)0X1.FFFFFEP128)
137 #define FLT_UWORD_IS_FINITE(x) ((x)<0x7f800000)
138 #define FLT_UWORD_IS_NAN(x) ((x)>0x7f800000)
139 #define FLT_UWORD_IS_INFINITE(x) ((x)==0x7f800000)
140 #define FLT_UWORD_MAX 0x7f7fffffL
141 #define FLT_UWORD_EXP_MAX 0x43000000
142 #define FLT_UWORD_LOG_MAX 0x42b17217
143 #define FLT_UWORD_LOG_2MAX 0x42b2d4fc
144 #define HUGE ((float)3.40282346638528860e+38)
146 #define FLT_UWORD_HALF_MAX (FLT_UWORD_MAX-(1L<<23))
147 #define FLT_LARGEST_EXP (FLT_UWORD_MAX>>23)
149 #ifdef _FLT_NO_DENORMALS
150 #define FLT_UWORD_IS_ZERO(x) ((x)<0x00800000)
151 #define FLT_UWORD_IS_SUBNORMAL(x) ((x) != (x))
152 #define FLT_UWORD_MIN 0x00800000
153 #define FLT_UWORD_EXP_MIN 0x42fc0000
154 #define FLT_UWORD_LOG_MIN 0x42aeac50
155 #define FLT_SMALLEST_EXP 1
157 #define FLT_UWORD_IS_ZERO(x) ((x)==0)
158 #define FLT_UWORD_IS_SUBNORMAL(x) ((x)<0x00800000)
159 #define FLT_UWORD_MIN 0x00000001
160 #define FLT_UWORD_EXP_MIN 0x43160000
161 #define FLT_UWORD_LOG_MIN 0x42cff1b5
162 #define FLT_SMALLEST_EXP -22
165 /* Definitions from newlib/libm/mathfp/zmath.h */
171 #define __PI 3.14159265358979323846
172 #define __SQRT_HALF 0.70710678118654752440
173 #define __PI_OVER_TWO 1.57079632679489661923132
174 #define __INV_PI_OVER_TWO_2_24 10680707.430881743590348355907974
189 extern double SMALLX
;
191 extern udouble z_infinity
;
192 extern udouble z_notanum
;
193 extern double z_rooteps
;
195 extern ufloat z_infinity_f
;
196 extern ufloat z_notanum_f
;
197 extern float z_rooteps_f
;
199 /* Vectorized versions of functions from newlib/libm/common/math_errf.c */
201 static v64sf
v64sf_math_oflowf (v64si sign
)
204 return VECTOR_MERGE (VECTOR_INIT (-0x1p
97f
),
205 VECTOR_INIT (0x1p
97f
), sign
) * 0x1p
97f
;
208 static v64sf
v64sf_math_uflowf (v64si sign
)
211 return VECTOR_MERGE (VECTOR_INIT (-0x1p
-95f
),
212 VECTOR_INIT (0x1p
-95f
), sign
) * 0x1p
-95f
;
215 /* Vectorized versions of functions from newlib/libm/common/math_config.h */
217 static v64si
v64sf_issignalingf_inline (v64sf x
)
219 v64si __mask
= VECTOR_INIT (-1);
221 GET_FLOAT_WORD (ix
, x
, NO_COND
);
222 /* Use IEEE-754 2008 encoding - i.e. exponent bits all 1, MSB of
223 significand is 0 for signalling NaN. */
224 return ((ix
& 0x7f800000) == 0x7f800000) & ((ix
& 0x00400000) == 0);
227 /* Vector extensions to sys/reent.h */
230 v64si _v64si_gamma_signgam
;
233 extern struct v64_reent
*_v64_reent
;
234 #define _V64_REENT _v64_reent
236 #define _REENT_V64SI_SIGNGAM(ptr) ((ptr)->_v64si_gamma_signgam)
238 /* Vector extensions to math.h */
240 #define v64si_signgam (*__v64si_signgam())
241 extern v64si
* __v64si_signgam (void);
242 #define __v64si_signgam_r(ptr) _REENT_V64SI_SIGNGAM(ptr)