4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
31 #define restrict _Restrict
37 __vatanf(int n
, float * restrict x
, int stridex
, float * restrict y
, int stridey
)
39 extern const double __vlibm_TBL_atan1
[];
40 double conup0
, conup1
, conup2
;
41 float dummy
, ansf
= 0.0;
43 float ans0
, ans1
, ans2
;
44 float poly0
, poly1
, poly2
;
45 float sign0
, sign1
, sign2
;
46 int intf
, intz
, argcount
;
47 int index0
, index1
, index2
;
48 float z
,*yaddr0
,*yaddr1
,*yaddr2
;
53 float f3
, ans3
, poly3
, sign3
, *yaddr3
;
56 /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
57 * Error = -3.08254E-18 On the interval |x| < 1/64 */
59 static const float p1
= -0.33329644f
/* -3.333333333329292858E-01f */ ;
60 static const float pone
= 1.0f
;
62 if (n
<= 0) return; /* if no. of elements is 0 or neg, do nothing */
67 intf
= *(int *) x
; /* upper half of x, as integer */
71 intf
= intf
& ~0x80000000; /* abs(upper argument) */
76 if ((intf
> 0x5B000000) || (intf
< 0x31800000)) /* filter out special cases */
78 if (intf
> 0x7f800000)
80 ansf
= f0
- f0
; /* return NaN if x=NaN*/
82 else if (intf
< 0x31800000) /* avoid underflow for small arg */
88 else if (intf
> 0x5B000000) /* avoid underflow for big arg */
91 ansf
= __vlibm_TBL_atan1
[index0
];/* pi/2 up */
93 *y
= sign0
*ansf
; /* store answer, with sign bit */
96 argcount
= 0; /* initialize argcount */
97 if (--n
<=0) break; /* we are done */
98 goto LOOP0
; /* otherwise, examine next arg */
101 if (intf
> 0x42800000) /* if (|x| > 64 */
104 index0
= 2; /* point to pi/2 upper, lower */
106 else if (intf
>= 0x3C800000) /* if |x| >= (1/64)... */
108 intz
= (intf
+ 0x00040000) & 0x7ff80000;/* round arg, keep upper */
109 pz
[0] = intz
; /* store as a float (z) */
110 f0
= (f0
- z
)/(pone
+ f0
*z
);
111 index0
= (intz
- 0x3C800000) >> 18; /* (index >> 19) << 1) */
112 index0
= index0
+ 4; /* skip over 0,0,pi/2,pi/2 */
114 else /* |x| < 1/64 */
116 index0
= 0; /* points to 0,0 in table */
118 yaddr0
= y
; /* address to store this answer */
119 x
+= stridex
; /* point to next arg */
120 y
+= stridey
; /* point to next result */
121 argcount
= 1; /* we now have 1 good argument */
124 goto UNROLL
; /* finish up with 1 good arg */
127 /*--------------------------------------------------------------------------*/
128 /*--------------------------------------------------------------------------*/
129 /*--------------------------------------------------------------------------*/
133 intf
= *(int *) x
; /* upper half of x, as integer */
137 intf
= intf
& ~0x80000000; /* abs(upper argument) */
142 if ((intf
> 0x5B000000) || (intf
< 0x31800000)) /* filter out special cases */
144 if (intf
> 0x7f800000)
146 ansf
= f1
- f1
; /* return NaN if x=NaN*/
148 else if (intf
< 0x31800000) /* avoid underflow for small arg */
154 else if (intf
> 0x5B000000) /* avoid underflow for big arg */
157 ansf
= __vlibm_TBL_atan1
[index1
] ;/* pi/2 up */
159 *y
= sign1
* ansf
; /* store answer, with sign bit */
162 argcount
= 1; /* we still have 1 good arg */
165 goto UNROLL
; /* finish up with 1 good arg */
167 goto LOOP1
; /* otherwise, examine next arg */
170 if (intf
> 0x42800000) /* if (|x| > 64 */
173 index1
= 2; /* point to pi/2 upper, lower */
175 else if (intf
>= 0x3C800000) /* if |x| >= (1/64)... */
177 intz
= (intf
+ 0x00040000) & 0x7ff80000;/* round arg, keep upper */
178 pz
[0] = intz
; /* store as a float (z) */
179 f1
= (f1
- z
)/(pone
+ f1
*z
);
180 index1
= (intz
- 0x3C800000) >> 18; /* (index >> 19) << 1) */
181 index1
= index1
+ 4; /* skip over 0,0,pi/2,pi/2 */
185 index1
= 0; /* points to 0,0 in table */
188 yaddr1
= y
; /* address to store this answer */
189 x
+= stridex
; /* point to next arg */
190 y
+= stridey
; /* point to next result */
191 argcount
= 2; /* we now have 2 good arguments */
194 goto UNROLL
; /* finish up with 2 good args */
197 /*--------------------------------------------------------------------------*/
198 /*--------------------------------------------------------------------------*/
199 /*--------------------------------------------------------------------------*/
203 intf
= *(int *) x
; /* upper half of x, as integer */
207 intf
= intf
& ~0x80000000; /* abs(upper argument) */
212 if ((intf
> 0x5B000000) || (intf
< 0x31800000)) /* filter out special cases */
214 if (intf
> 0x7f800000)
216 ansf
= f2
- f2
; /* return NaN if x=NaN*/
218 else if (intf
< 0x31800000) /* avoid underflow for small arg */
224 else if (intf
> 0x5B000000) /* avoid underflow for big arg */
227 ansf
= __vlibm_TBL_atan1
[index2
] ;/* pi/2 up */
229 *y
= sign2
* ansf
; /* store answer, with sign bit */
232 argcount
= 2; /* we still have 2 good args */
235 goto UNROLL
; /* finish up with 2 good args */
237 goto LOOP2
; /* otherwise, examine next arg */
240 if (intf
> 0x42800000) /* if (|x| > 64 */
243 index2
= 2; /* point to pi/2 upper, lower */
245 else if (intf
>= 0x3C800000) /* if |x| >= (1/64)... */
247 intz
= (intf
+ 0x00040000) & 0x7ff80000;/* round arg, keep upper */
248 pz
[0] = intz
; /* store as a float (z) */
249 f2
= (f2
- z
)/(pone
+ f2
*z
);
250 index2
= (intz
- 0x3C800000) >> 18; /* (index >> 19) << 1) */
251 index2
= index2
+ 4; /* skip over 0,0,pi/2,pi/2 */
255 index2
= 0; /* points to 0,0 in table */
257 yaddr2
= y
; /* address to store this answer */
258 x
+= stridex
; /* point to next arg */
259 y
+= stridey
; /* point to next result */
260 argcount
= 3; /* we now have 3 good arguments */
263 goto UNROLL
; /* finish up with 2 good args */
267 /*--------------------------------------------------------------------------*/
268 /*--------------------------------------------------------------------------*/
269 /*--------------------------------------------------------------------------*/
274 intf
= *(int *) x
; /* upper half of x, as integer */
278 intf
= intf
& ~0x80000000; /* abs(upper argument) */
283 if ((intf
> 0x5B000000) || (intf
< 0x31800000)) /* filter out special cases */
285 if (intf
> 0x7f800000)
287 ansf
= f3
- f3
; /* return NaN if x=NaN*/
289 else if (intf
< 0x31800000) /* avoid underflow for small arg */
295 else if (intf
> 0x5B000000) /* avoid underflow for big arg */
298 ansf
= __vlibm_TBL_atan1
[index3
] ;/* pi/2 up */
300 *y
= sign3
* ansf
; /* store answer, with sign bit */
303 argcount
= 3; /* we still have 3 good args */
306 goto UNROLL
; /* finish up with 3 good args */
308 goto LOOP3
; /* otherwise, examine next arg */
311 if (intf
> 0x42800000) /* if (|x| > 64 */
316 index3
= 2; /* point to pi/2 upper, lower */
318 else if (intf
>= 0x3C800000) /* if |x| >= (1/64)... */
320 intz
= (intf
+ 0x00040000) & 0x7ff80000;/* round arg, keep upper */
321 pz
[0] = intz
; /* store as a float (z) */
323 d3
= (pone
+ f3
*z
); /* get reduced argument */
325 index3
= (intz
- 0x3C800000) >> 18; /* (index >> 19) << 1) */
326 index3
= index3
+ 4; /* skip over 0,0,pi/2,pi/2 */
332 index3
= 0; /* points to 0,0 in table */
334 yaddr3
= y
; /* address to store this answer */
335 x
+= stridex
; /* point to next arg */
336 y
+= stridey
; /* point to next result */
337 argcount
= 4; /* we now have 4 good arguments */
340 goto UNROLL
; /* finish up with 3 good args */
344 /* here is the n-way unrolled section,
345 but we may actually have less than n
346 arguments at this point
354 conup0
= __vlibm_TBL_atan1
[index0
];
355 conup1
= __vlibm_TBL_atan1
[index1
];
356 conup2
= __vlibm_TBL_atan1
[index2
];
357 conup3
= __vlibm_TBL_atan1
[index3
];
358 poly0
= p1
*f0
*f0
*f0
+ f0
;
359 ans0
= sign0
* (float)(conup0
+ poly0
);
360 poly1
= p1
*f1
*f1
*f1
+ f1
;
361 ans1
= sign1
* (float)(conup1
+ poly1
);
362 poly2
= p1
*f2
*f2
*f2
+ f2
;
363 ans2
= sign2
* (float)(conup2
+ poly2
);
364 poly3
= p1
*f3
*f3
*f3
+ f3
;
365 ans3
= sign3
* (float)(conup3
+ poly3
);
375 conup0
= __vlibm_TBL_atan1
[index0
];
376 conup1
= __vlibm_TBL_atan1
[index1
];
377 conup2
= __vlibm_TBL_atan1
[index2
];
378 poly0
= p1
*f0
*f0
*f0
+ f0
;
379 poly1
= p1
*f1
*f1
*f1
+ f1
;
380 poly2
= p1
*f2
*f2
*f2
+ f2
;
381 ans0
= sign0
* (float)(conup0
+ poly0
);
382 ans1
= sign1
* (float)(conup1
+ poly1
);
383 ans2
= sign2
* (float)(conup2
+ poly2
);
391 conup0
= __vlibm_TBL_atan1
[index0
];
392 conup1
= __vlibm_TBL_atan1
[index1
];
393 poly0
= p1
*f0
*f0
*f0
+ f0
;
394 poly1
= p1
*f1
*f1
*f1
+ f1
;
395 ans0
= sign0
* (float)(conup0
+ poly0
);
396 ans1
= sign1
* (float)(conup1
+ poly1
);
403 conup0
= __vlibm_TBL_atan1
[index0
];
404 poly0
= p1
*f0
*f0
*f0
+ f0
;
405 ans0
= sign0
* (float)(conup0
+ poly0
);