2 * Copyright 2023 Siemens
4 * The authors hereby grant permission to use, copy, modify, distribute,
5 * and license this software and its documentation for any purpose, provided
6 * that existing copyright notices are retained in all copies and that this
7 * notice is included verbatim in any distributions. No written agreement,
8 * license, or royalty fee is required for any of the authorized uses.
9 * Modifications to this software may be copyrighted by their authors
10 * and need not follow the licensing terms described here, provided that
11 * the new terms are clearly indicated on the first page of each file where
16 * Copyright (c) 1994-2009 Red Hat, Inc. All rights reserved.
18 * This copyrighted material is made available to anyone wishing to use,
19 * modify, copy, or redistribute it subject to the terms and conditions
20 * of the BSD License. This program is distributed in the hope that
21 * it will be useful, but WITHOUT ANY WARRANTY expressed or implied,
22 * including the implied warranties of MERCHANTABILITY or FITNESS FOR
23 * A PARTICULAR PURPOSE. A copy of this license is available at
24 * http://www.opensource.org/licenses. Any Red Hat trademarks that are
25 * incorporated in the source code or documentation are not subject to
26 * the BSD License and may only be used or replicated with the express
27 * permission of Red Hat, Inc.
30 /******************************************************************
31 * The following routines are coded directly from the algorithms
32 * and coefficients given in "Software Manual for the Elementary
33 * Functions" by William J. Cody, Jr. and William Waite, Prentice
35 ******************************************************************/
37 /* Based on newlib/libm/mathfp/s_atangent.c in Newlib. */
40 #include "amdgcnmach.h"
42 #if defined (__has_builtin) \
43 && __has_builtin (__builtin_gcn_fabsv) \
44 && __has_builtin (__builtin_gcn_frexpv_exp)
46 DEF_VD_MATH_FUNC (v64df
, atangent
, v64df x
, v64df v
, v64df u
, int arctan2
)
48 static const double ROOT3
= 1.73205080756887729353;
49 static const double a
[] = { 0.0, 0.52359877559829887308, 1.57079632679489661923,
50 1.04719755119659774615 };
51 static const double q
[] = { 0.41066306682575781263e+2,
52 0.86157349597130242515e+2,
53 0.59578436142597344465e+2,
54 0.15024001160028576121e+2 };
55 static const double p
[] = { -0.13688768894191926929e+2,
56 -0.20505855195861651981e+2,
57 -0.84946240351320683534e+1,
58 -0.83758299368150059274 };
59 static const float z_rooteps
= 7.4505859692e-9;
61 FUNCTION_INIT (v64df
);
63 v64df zero
= VECTOR_INIT (0.0);
64 v64df pi
= VECTOR_INIT (__PI
);
65 v64df pi_over_two
= VECTOR_INIT (__PI_OVER_TWO
);
67 v64si branch
= VECTOR_INIT (0);
69 /* Preparation for calculating arctan2. */
72 VECTOR_IF (u
== 0.0, cond
)
73 VECTOR_IF2 (v
== 0.0, cond2
, cond
)
75 VECTOR_RETURN (VECTOR_INIT (0.0), cond2
);
76 VECTOR_ELSE2 (cond2
, cond
)
77 VECTOR_COND_MOVE (branch
, VECTOR_INIT (-1), cond2
);
78 VECTOR_COND_MOVE (res
, pi_over_two
, cond2
);
82 VECTOR_IF (~branch
, cond
)
83 /* Get the exponent values of the inputs. */
84 v64si expv
= __builtin_gcn_frexpv_exp (v
);
85 v64si expu
= __builtin_gcn_frexpv_exp (u
);
87 /* See if a divide will overflow. */
88 v64si e
= expv
- expu
;
90 VECTOR_IF2 (e
> DBL_MAX_EXP
, cond2
, cond
)
91 VECTOR_COND_MOVE (branch
, VECTOR_INIT (-1), cond2
);
92 VECTOR_COND_MOVE (res
, pi_over_two
, cond2
);
95 /* Also check for underflow. */
96 VECTOR_IF2 (e
< DBL_MIN_EXP
, cond2
, cond
)
97 VECTOR_COND_MOVE (branch
, VECTOR_INIT (-1), cond2
);
98 VECTOR_COND_MOVE (res
, zero
, cond2
);
103 VECTOR_IF (~branch
, cond
)
105 v64si N
= VECTOR_INIT (0);
108 f
= __builtin_gcn_fabsv (v
/ u
);
110 f
= __builtin_gcn_fabsv (x
);
112 VECTOR_IF2 (__builtin_convertvector(f
> 1.0, v64si
), cond2
, cond
)
113 VECTOR_COND_MOVE (f
, 1.0 / f
, cond2
);
114 VECTOR_COND_MOVE (N
, VECTOR_INIT (2), cond2
);
117 VECTOR_IF2 (__builtin_convertvector(f
> (2.0 - ROOT3
), v64si
), cond2
, cond
)
118 double A
= ROOT3
- 1.0;
119 VECTOR_COND_MOVE (f
, (((A
* f
- 0.5) - 0.5) + f
) / (ROOT3
+ f
),
124 /* Check for values that are too small. */
125 VECTOR_IF2 (__builtin_convertvector((-z_rooteps
< f
) & (f
< z_rooteps
), v64si
), cond2
, cond
)
126 VECTOR_COND_MOVE (res
, f
, cond2
);
128 /* Calculate the Taylor series. */
129 VECTOR_ELSE2 (cond2
, cond
)
131 v64df P
= (((p
[3] * g
+ p
[2]) * g
+ p
[1]) * g
+ p
[0]) * g
;
132 v64df Q
= (((g
+ q
[3]) * g
+ q
[2]) * g
+ q
[1]) * g
+ q
[0];
135 VECTOR_COND_MOVE (res
, f
+ f
* R
, cond2
);
138 VECTOR_COND_MOVE (res
, -res
, cond
& (N
> 1));
140 res
+= VECTOR_MERGE (VECTOR_INIT (a
[1]), zero
, cond
& (N
== 1));
141 res
+= VECTOR_MERGE (VECTOR_INIT (a
[2]), zero
, cond
& (N
== 2));
142 res
+= VECTOR_MERGE (VECTOR_INIT (a
[3]), zero
, cond
& (N
== 3));
148 VECTOR_COND_MOVE (res
, pi
- res
, u
< 0.0);
150 VECTOR_COND_MOVE (res
, -res
, v
< 0.0);
152 /*else if (x < 0.0) */
154 VECTOR_COND_MOVE (res
, -res
, x
< 0.0);
156 VECTOR_RETURN (res
, NO_COND
);