4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
42 #define restrict _Restrict
48 __vatan(int n
, double * restrict x
, int stridex
, double * restrict y
, int stridey
)
50 double f
, z
, ans
= 0.0L, ansu
, ansl
, tmp
, poly
, conup
, conlo
, dummy
;
51 double f1
, ans1
, ansu1
, ansl1
, tmp1
, poly1
, conup1
, conlo1
;
52 double f2
, ans2
, ansu2
, ansl2
, tmp2
, poly2
, conup2
, conlo2
;
53 int index
, sign
, intf
, intflo
, intz
, argcount
;
54 int index1
, sign1
= 0;
55 int index2
, sign2
= 0;
56 double *yaddr
,*yaddr1
= 0,*yaddr2
= 0;
57 extern const double __vlibm_TBL_atan1
[];
58 extern double fabs(double);
60 /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
61 * Error = -3.08254E-18 On the interval |x| < 1/64 */
63 /* define dummy names for readability. Use parray to help compiler optimize loads */
68 static const double parray
[] = {
69 -1.428029046844299722E-01, /* p[3] */
70 1.999999917247000615E-01, /* p[2] */
71 -3.333333333329292858E-01, /* p[1] */
72 1.0, /* not used for p[0], though */
73 -1.0, /* used to flip sign of answer */
76 if (n
<= 0) return; /* if no. of elements is 0 or neg, do nothing */
81 f
= fabs(*x
); /* fetch argument */
82 intf
= HI(x
); /* upper half of x, as integer */
83 intflo
= LO(x
); /* lower half of x, as integer */
84 sign
= intf
& 0x80000000; /* sign of argument */
85 intf
= intf
& ~0x80000000; /* abs(upper argument) */
87 if ((intf
> 0x43600000) || (intf
< 0x3e300000)) /* filter out special cases */
89 if ( (intf
> 0x7ff00000) || ((intf
== 0x7ff00000) && (intflo
!=0)))
91 ans
= f
- f
; /* return NaN if x=NaN*/
93 else if (intf
< 0x3e300000) /* avoid underflow for small arg */
99 else if (intf
> 0x43600000) /* avoid underflow for big arg */
102 ans
= __vlibm_TBL_atan1
[index
] + __vlibm_TBL_atan1
[index
+1];/* pi/2 up + pi/2 low */
104 *y
= (sign
) ? -ans
: ans
; /* store answer, with sign bit */
107 argcount
= 0; /* initialize argcount */
108 if (--n
<=0) break; /* we are done */
109 goto LOOP0
; /* otherwise, examine next arg */
112 index
= 0; /* points to 0,0 in table */
113 if (intf
> 0x40500000) /* if (|x| > 64 */
115 index
= 2; /* point to pi/2 upper, lower */
117 else if (intf
>= 0x3f900000) /* if |x| >= (1/64)... */
119 intz
= (intf
+ 0x00008000) & 0x7fff0000;/* round arg, keep upper */
120 HI(&z
) = intz
; /* store as a double (z) */
121 LO(&z
) = 0; /* ...lower */
122 f
= (f
- z
)/(1.0 + f
*z
); /* get reduced argument */
123 index
= (intz
- 0x3f900000) >> 15; /* (index >> 16) << 1) */
124 index
= index
+ 4; /* skip over 0,0,pi/2,pi/2 */
126 yaddr
= y
; /* address to store this answer */
127 x
+= stridex
; /* point to next arg */
128 y
+= stridey
; /* point to next result */
129 argcount
= 1; /* we now have 1 good argument */
132 f1
= 0.0; /* put dummy values in args 1,2 */
136 goto UNROLL3
; /* finish up with 1 good arg */
139 /*--------------------------------------------------------------------------*/
140 /*--------------------------------------------------------------------------*/
141 /*--------------------------------------------------------------------------*/
145 f1
= fabs(*x
); /* fetch argument */
146 intf
= HI(x
); /* upper half of x, as integer */
147 intflo
= LO(x
); /* lower half of x, as integer */
148 sign1
= intf
& 0x80000000; /* sign of argument */
149 intf
= intf
& ~0x80000000; /* abs(upper argument) */
151 if ((intf
> 0x43600000) || (intf
< 0x3e300000)) /* filter out special cases */
153 if ( (intf
> 0x7ff00000) || ((intf
== 0x7ff00000) && (intflo
!=0)))
155 ans
= f1
- f1
; /* return NaN if x=NaN*/
157 else if (intf
< 0x3e300000) /* avoid underflow for small arg */
163 else if (intf
> 0x43600000) /* avoid underflow for big arg */
166 ans
= __vlibm_TBL_atan1
[index1
] + __vlibm_TBL_atan1
[index1
+1];/* pi/2 up + pi/2 low */
168 *y
= (sign1
) ? -ans
: ans
; /* store answer, with sign bit */
171 argcount
= 1; /* we still have 1 good arg */
174 f1
= 0.0; /* put dummy values in args 1,2 */
178 goto UNROLL3
; /* finish up with 1 good arg */
180 goto LOOP1
; /* otherwise, examine next arg */
183 index1
= 0; /* points to 0,0 in table */
184 if (intf
> 0x40500000) /* if (|x| > 64 */
186 index1
= 2; /* point to pi/2 upper, lower */
188 else if (intf
>= 0x3f900000) /* if |x| >= (1/64)... */
190 intz
= (intf
+ 0x00008000) & 0x7fff0000;/* round arg, keep upper */
191 HI(&z
) = intz
; /* store as a double (z) */
192 LO(&z
) = 0; /* ...lower */
193 f1
= (f1
- z
)/(1.0 + f1
*z
); /* get reduced argument */
194 index1
= (intz
- 0x3f900000) >> 15; /* (index >> 16) << 1) */
195 index1
= index1
+ 4; /* skip over 0,0,pi/2,pi/2 */
197 yaddr1
= y
; /* address to store this answer */
198 x
+= stridex
; /* point to next arg */
199 y
+= stridey
; /* point to next result */
200 argcount
= 2; /* we now have 2 good arguments */
203 f2
= 0.0; /* put dummy value in arg 2 */
205 goto UNROLL3
; /* finish up with 2 good args */
208 /*--------------------------------------------------------------------------*/
209 /*--------------------------------------------------------------------------*/
210 /*--------------------------------------------------------------------------*/
214 f2
= fabs(*x
); /* fetch argument */
215 intf
= HI(x
); /* upper half of x, as integer */
216 intflo
= LO(x
); /* lower half of x, as integer */
217 sign2
= intf
& 0x80000000; /* sign of argument */
218 intf
= intf
& ~0x80000000; /* abs(upper argument) */
220 if ((intf
> 0x43600000) || (intf
< 0x3e300000)) /* filter out special cases */
222 if ( (intf
> 0x7ff00000) || ((intf
== 0x7ff00000) && (intflo
!=0)))
224 ans
= f2
- f2
; /* return NaN if x=NaN*/
226 else if (intf
< 0x3e300000) /* avoid underflow for small arg */
232 else if (intf
> 0x43600000) /* avoid underflow for big arg */
235 ans
= __vlibm_TBL_atan1
[index2
] + __vlibm_TBL_atan1
[index2
+1];/* pi/2 up + pi/2 low */
237 *y
= (sign2
) ? -ans
: ans
; /* store answer, with sign bit */
240 argcount
= 2; /* we still have 2 good args */
243 f2
= 0.0; /* put dummy value in arg 2 */
245 goto UNROLL3
; /* finish up with 2 good args */
247 goto LOOP2
; /* otherwise, examine next arg */
250 index2
= 0; /* points to 0,0 in table */
251 if (intf
> 0x40500000) /* if (|x| > 64 */
253 index2
= 2; /* point to pi/2 upper, lower */
255 else if (intf
>= 0x3f900000) /* if |x| >= (1/64)... */
257 intz
= (intf
+ 0x00008000) & 0x7fff0000;/* round arg, keep upper */
258 HI(&z
) = intz
; /* store as a double (z) */
259 LO(&z
) = 0; /* ...lower */
260 f2
= (f2
- z
)/(1.0 + f2
*z
); /* get reduced argument */
261 index2
= (intz
- 0x3f900000) >> 15; /* (index >> 16) << 1) */
262 index2
= index2
+ 4; /* skip over 0,0,pi/2,pi/2 */
264 yaddr2
= y
; /* address to store this answer */
265 x
+= stridex
; /* point to next arg */
266 y
+= stridey
; /* point to next result */
267 argcount
= 3; /* we now have 3 good arguments */
270 /* here is the 3 way unrolled section,
271 note, we may actually only have
272 1,2, or 3 'real' arguments at this point
277 conup
= __vlibm_TBL_atan1
[index
]; /* upper table */
278 conup1
= __vlibm_TBL_atan1
[index1
]; /* upper table */
279 conup2
= __vlibm_TBL_atan1
[index2
]; /* upper table */
281 conlo
= __vlibm_TBL_atan1
[index
+1]; /* lower table */
282 conlo1
= __vlibm_TBL_atan1
[index1
+1]; /* lower table */
283 conlo2
= __vlibm_TBL_atan1
[index2
+1]; /* lower table */
289 poly
= f
*((p3
*tmp
+ p2
)*tmp
+ p1
)*tmp
;
290 poly1
= f1
*((p3
*tmp1
+ p2
)*tmp1
+ p1
)*tmp1
;
291 poly2
= f2
*((p3
*tmp2
+ p2
)*tmp2
+ p1
)*tmp2
;
293 ansu
= conup
+ f
; /* compute atan(f) upper */
294 ansu1
= conup1
+ f1
; /* compute atan(f) upper */
295 ansu2
= conup2
+ f2
; /* compute atan(f) upper */
297 ansl
= (((conup
- ansu
) + f
) + poly
) + conlo
;
298 ansl1
= (((conup1
- ansu1
) + f1
) + poly1
) + conlo1
;
299 ansl2
= (((conup2
- ansu2
) + f2
) + poly2
) + conlo2
;
302 ans1
= ansu1
+ ansl1
;
303 ans2
= ansu2
+ ansl2
;
305 /* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */
307 *yaddr
= sign
? -ans
: ans
; /* this one is always good */
308 if (argcount
< 3) break; /* end loop and finish up */
309 *yaddr1
= sign1
? -ans1
: ans1
;
310 *yaddr2
= sign2
? -ans2
: ans2
;
315 { *yaddr1
= sign1
? -ans1
: ans1
;