dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / lib / libmvec / common / __vatanf.c
blobbf14dd9ffb27144c8988b14c00bc6b2d80c9696d
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #ifdef __RESTRICT
31 #define restrict _Restrict
32 #else
33 #define restrict
34 #endif
36 void
37 __vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey)
39 extern const double __vlibm_TBL_atan1[];
40 double conup0, conup1, conup2;
41 float dummy, ansf = 0.0;
42 float f0, f1, f2;
43 float ans0, ans1, ans2;
44 float poly0, poly1, poly2;
45 float sign0, sign1, sign2;
46 int intf, intz, argcount;
47 int index0, index1, index2;
48 float z,*yaddr0,*yaddr1,*yaddr2;
49 int *pz = (int *) &z;
50 #ifdef UNROLL4
51 double conup3;
52 int index3;
53 float f3, ans3, poly3, sign3, *yaddr3;
54 #endif
56 /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
57 * Error = -3.08254E-18 On the interval |x| < 1/64 */
59 static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
60 static const float pone = 1.0f;
62 if (n <= 0) return; /* if no. of elements is 0 or neg, do nothing */
65 LOOP0:
67 intf = *(int *) x; /* upper half of x, as integer */
68 f0 = *x;
69 sign0 = pone;
70 if (intf < 0) {
71 intf = intf & ~0x80000000; /* abs(upper argument) */
72 f0 = -f0;
73 sign0 = -sign0;
76 if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
78 if (intf > 0x7f800000)
80 ansf = f0- f0; /* return NaN if x=NaN*/
82 else if (intf < 0x31800000) /* avoid underflow for small arg */
84 dummy = 1.0e37 + f0;
85 dummy = dummy;
86 ansf = f0;
88 else if (intf > 0x5B000000) /* avoid underflow for big arg */
90 index0= 2;
91 ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */
93 *y = sign0*ansf; /* store answer, with sign bit */
94 x += stridex;
95 y += stridey;
96 argcount = 0; /* initialize argcount */
97 if (--n <=0) break; /* we are done */
98 goto LOOP0; /* otherwise, examine next arg */
101 if (intf > 0x42800000) /* if (|x| > 64 */
103 f0 = -pone/f0;
104 index0 = 2; /* point to pi/2 upper, lower */
106 else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */
108 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
109 pz[0] = intz; /* store as a float (z) */
110 f0 = (f0 - z)/(pone + f0*z);
111 index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
112 index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */
114 else /* |x| < 1/64 */
116 index0 = 0; /* points to 0,0 in table */
118 yaddr0 = y; /* address to store this answer */
119 x += stridex; /* point to next arg */
120 y += stridey; /* point to next result */
121 argcount = 1; /* we now have 1 good argument */
122 if (--n <=0)
124 goto UNROLL; /* finish up with 1 good arg */
127 /*--------------------------------------------------------------------------*/
128 /*--------------------------------------------------------------------------*/
129 /*--------------------------------------------------------------------------*/
131 LOOP1:
133 intf = *(int *) x; /* upper half of x, as integer */
134 f1 = *x;
135 sign1 = pone;
136 if (intf < 0) {
137 intf = intf & ~0x80000000; /* abs(upper argument) */
138 f1 = -f1;
139 sign1 = -sign1;
142 if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
144 if (intf > 0x7f800000)
146 ansf = f1 - f1; /* return NaN if x=NaN*/
148 else if (intf < 0x31800000) /* avoid underflow for small arg */
150 dummy = 1.0e37 + f1;
151 dummy = dummy;
152 ansf = f1;
154 else if (intf > 0x5B000000) /* avoid underflow for big arg */
156 index1 = 2;
157 ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
159 *y = sign1 * ansf; /* store answer, with sign bit */
160 x += stridex;
161 y += stridey;
162 argcount = 1; /* we still have 1 good arg */
163 if (--n <=0)
165 goto UNROLL; /* finish up with 1 good arg */
167 goto LOOP1; /* otherwise, examine next arg */
170 if (intf > 0x42800000) /* if (|x| > 64 */
172 f1 = -pone/f1;
173 index1 = 2; /* point to pi/2 upper, lower */
175 else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */
177 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
178 pz[0] = intz; /* store as a float (z) */
179 f1 = (f1 - z)/(pone + f1*z);
180 index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
181 index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */
183 else
185 index1 = 0; /* points to 0,0 in table */
188 yaddr1 = y; /* address to store this answer */
189 x += stridex; /* point to next arg */
190 y += stridey; /* point to next result */
191 argcount = 2; /* we now have 2 good arguments */
192 if (--n <=0)
194 goto UNROLL; /* finish up with 2 good args */
197 /*--------------------------------------------------------------------------*/
198 /*--------------------------------------------------------------------------*/
199 /*--------------------------------------------------------------------------*/
201 LOOP2:
203 intf = *(int *) x; /* upper half of x, as integer */
204 f2 = *x;
205 sign2 = pone;
206 if (intf < 0) {
207 intf = intf & ~0x80000000; /* abs(upper argument) */
208 f2 = -f2;
209 sign2 = -sign2;
212 if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
214 if (intf > 0x7f800000)
216 ansf = f2 - f2; /* return NaN if x=NaN*/
218 else if (intf < 0x31800000) /* avoid underflow for small arg */
220 dummy = 1.0e37 + f2;
221 dummy = dummy;
222 ansf = f2;
224 else if (intf > 0x5B000000) /* avoid underflow for big arg */
226 index2 = 2;
227 ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
229 *y = sign2 * ansf; /* store answer, with sign bit */
230 x += stridex;
231 y += stridey;
232 argcount = 2; /* we still have 2 good args */
233 if (--n <=0)
235 goto UNROLL; /* finish up with 2 good args */
237 goto LOOP2; /* otherwise, examine next arg */
240 if (intf > 0x42800000) /* if (|x| > 64 */
242 f2 = -pone/f2;
243 index2 = 2; /* point to pi/2 upper, lower */
245 else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */
247 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
248 pz[0] = intz; /* store as a float (z) */
249 f2 = (f2 - z)/(pone + f2*z);
250 index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
251 index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */
253 else
255 index2 = 0; /* points to 0,0 in table */
257 yaddr2 = y; /* address to store this answer */
258 x += stridex; /* point to next arg */
259 y += stridey; /* point to next result */
260 argcount = 3; /* we now have 3 good arguments */
261 if (--n <=0)
263 goto UNROLL; /* finish up with 2 good args */
267 /*--------------------------------------------------------------------------*/
268 /*--------------------------------------------------------------------------*/
269 /*--------------------------------------------------------------------------*/
271 #ifdef UNROLL4
272 LOOP3:
274 intf = *(int *) x; /* upper half of x, as integer */
275 f3 = *x;
276 sign3 = pone;
277 if (intf < 0) {
278 intf = intf & ~0x80000000; /* abs(upper argument) */
279 f3 = -f3;
280 sign3 = -sign3;
283 if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
285 if (intf > 0x7f800000)
287 ansf = f3 - f3; /* return NaN if x=NaN*/
289 else if (intf < 0x31800000) /* avoid underflow for small arg */
291 dummy = 1.0e37 + f3;
292 dummy = dummy;
293 ansf = f3;
295 else if (intf > 0x5B000000) /* avoid underflow for big arg */
297 index3 = 2;
298 ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
300 *y = sign3 * ansf; /* store answer, with sign bit */
301 x += stridex;
302 y += stridey;
303 argcount = 3; /* we still have 3 good args */
304 if (--n <=0)
306 goto UNROLL; /* finish up with 3 good args */
308 goto LOOP3; /* otherwise, examine next arg */
311 if (intf > 0x42800000) /* if (|x| > 64 */
313 n3 = -pone;
314 d3 = f3;
315 f3 = n3/d3;
316 index3 = 2; /* point to pi/2 upper, lower */
318 else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */
320 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
321 pz[0] = intz; /* store as a float (z) */
322 n3 = (f3 - z);
323 d3 = (pone + f3*z); /* get reduced argument */
324 f3 = n3/d3;
325 index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
326 index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */
328 else
330 n3 = f3;
331 d3 = pone;
332 index3 = 0; /* points to 0,0 in table */
334 yaddr3 = y; /* address to store this answer */
335 x += stridex; /* point to next arg */
336 y += stridey; /* point to next result */
337 argcount = 4; /* we now have 4 good arguments */
338 if (--n <=0)
340 goto UNROLL; /* finish up with 3 good args */
342 #endif /* UNROLL4 */
344 /* here is the n-way unrolled section,
345 but we may actually have less than n
346 arguments at this point
349 UNROLL:
351 #ifdef UNROLL4
352 if (argcount == 4)
354 conup0 = __vlibm_TBL_atan1[index0];
355 conup1 = __vlibm_TBL_atan1[index1];
356 conup2 = __vlibm_TBL_atan1[index2];
357 conup3 = __vlibm_TBL_atan1[index3];
358 poly0 = p1*f0*f0*f0 + f0;
359 ans0 = sign0 * (float)(conup0 + poly0);
360 poly1 = p1*f1*f1*f1 + f1;
361 ans1 = sign1 * (float)(conup1 + poly1);
362 poly2 = p1*f2*f2*f2 + f2;
363 ans2 = sign2 * (float)(conup2 + poly2);
364 poly3 = p1*f3*f3*f3 + f3;
365 ans3 = sign3 * (float)(conup3 + poly3);
366 *yaddr0 = ans0;
367 *yaddr1 = ans1;
368 *yaddr2 = ans2;
369 *yaddr3 = ans3;
371 else
372 #endif
373 if (argcount == 3)
375 conup0 = __vlibm_TBL_atan1[index0];
376 conup1 = __vlibm_TBL_atan1[index1];
377 conup2 = __vlibm_TBL_atan1[index2];
378 poly0 = p1*f0*f0*f0 + f0;
379 poly1 = p1*f1*f1*f1 + f1;
380 poly2 = p1*f2*f2*f2 + f2;
381 ans0 = sign0 * (float)(conup0 + poly0);
382 ans1 = sign1 * (float)(conup1 + poly1);
383 ans2 = sign2 * (float)(conup2 + poly2);
384 *yaddr0 = ans0;
385 *yaddr1 = ans1;
386 *yaddr2 = ans2;
388 else
389 if (argcount == 2)
391 conup0 = __vlibm_TBL_atan1[index0];
392 conup1 = __vlibm_TBL_atan1[index1];
393 poly0 = p1*f0*f0*f0 + f0;
394 poly1 = p1*f1*f1*f1 + f1;
395 ans0 = sign0 * (float)(conup0 + poly0);
396 ans1 = sign1 * (float)(conup1 + poly1);
397 *yaddr0 = ans0;
398 *yaddr1 = ans1;
400 else
401 if (argcount == 1)
403 conup0 = __vlibm_TBL_atan1[index0];
404 poly0 = p1*f0*f0*f0 + f0;
405 ans0 = sign0 * (float)(conup0 + poly0);
406 *yaddr0 = ans0;
409 } while (n > 0);