Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / lib / libmvec / common / __vatan2.c
blob49500b8f9185d068cac97c245ccde1abb10e1526
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
47 extern const double __vlibm_TBL_atan2[];
49 static const double
50 zero = 0.0,
51 twom3 = 0.125,
52 one = 1.0,
53 two110 = 1.2980742146337069071e+33,
54 pio4 = 7.8539816339744827900e-01,
55 pio2 = 1.5707963267948965580e+00,
56 pio2_lo = 6.1232339957367658860e-17,
57 pi = 3.1415926535897931160e+00,
58 pi_lo = 1.2246467991473531772e-16,
59 p1 = -3.33333333333327571893331786354179101074860633009e-0001,
60 p2 = 1.99999999942671624230086497610394721817438631379e-0001,
61 p3 = -1.42856965565428636896183013324727205980484158356e-0001,
62 p4 = 1.10894981496317081405107718475040168084164825641e-0001;
64 /* Don't __ the following; acomp will handle it */
65 extern double fabs(double);
67 void
68 __vatan2(int n, double * restrict y, int stridey, double * restrict x,
69 int stridex, double * restrict z, int stridez)
71 double x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2;
72 double ah0, ah1, ah2, al0, al1, al2, t0, t1, t2;
73 double z0, z1, z2, sign0, sign1, sign2, xh;
74 int i, k, hx, hy, sx, sy;
78 loop0:
79 hy = HI(y);
80 sy = hy & 0x80000000;
81 hy &= ~0x80000000;
82 sign0 = (sy)? -one : one;
84 hx = HI(x);
85 sx = hx & 0x80000000;
86 hx &= ~0x80000000;
88 if (hy > hx || (hy == hx && LO(y) > LO(x)))
90 i = hx;
91 hx = hy;
92 hy = i;
93 x0 = fabs(*y);
94 y0 = fabs(*x);
95 if (sx)
97 ah0 = pio2;
98 al0 = pio2_lo;
100 else
102 ah0 = -pio2;
103 al0 = -pio2_lo;
104 sign0 = -sign0;
107 else
109 x0 = fabs(*x);
110 y0 = fabs(*y);
111 if (sx)
113 ah0 = -pi;
114 al0 = -pi_lo;
115 sign0 = -sign0;
117 else
118 ah0 = al0 = zero;
121 if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
123 if (hx >= 0x7ff00000)
125 if ((hx ^ 0x7ff00000) | LO(&x0)) /* nan */
126 ah0 = x0 + y0;
127 else if (hy >= 0x7ff00000)
128 ah0 += pio4;
129 *z = sign0 * ah0;
130 x += stridex;
131 y += stridey;
132 z += stridez;
133 i = 0;
134 if (--n <= 0)
135 break;
136 goto loop0;
138 if (hx - hy >= 0x03600000)
140 if ((int) ah0 == 0)
141 ah0 = y0 / x0;
142 *z = sign0 * ah0;
143 x += stridex;
144 y += stridey;
145 z += stridez;
146 i = 0;
147 if (--n <= 0)
148 break;
149 goto loop0;
151 y0 *= twom3;
152 x0 *= twom3;
153 hy -= 0x00300000;
154 hx -= 0x00300000;
156 else if (hy < 0x00100000)
158 if ((hy | LO(&y0)) == 0)
160 *z = sign0 * ah0;
161 x += stridex;
162 y += stridey;
163 z += stridez;
164 i = 0;
165 if (--n <= 0)
166 break;
167 goto loop0;
169 y0 *= two110;
170 x0 *= two110;
171 hy = HI(&y0);
172 hx = HI(&x0);
175 k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
176 if (k > 644)
177 k = 644;
178 ah0 += __vlibm_TBL_atan2[k];
179 al0 += __vlibm_TBL_atan2[k+1];
180 t0 = __vlibm_TBL_atan2[k+2];
182 xh = x0;
183 LO(&xh) = 0;
184 z0 = ((y0 - t0 * xh) - t0 * (x0 - xh)) / (x0 + y0 * t0);
185 pz0 = z;
186 x += stridex;
187 y += stridey;
188 z += stridez;
189 i = 1;
190 if (--n <= 0)
191 break;
193 loop1:
194 hy = HI(y);
195 sy = hy & 0x80000000;
196 hy &= ~0x80000000;
197 sign1 = (sy)? -one : one;
199 hx = HI(x);
200 sx = hx & 0x80000000;
201 hx &= ~0x80000000;
203 if (hy > hx || (hy == hx && LO(y) > LO(x)))
205 i = hx;
206 hx = hy;
207 hy = i;
208 x1 = fabs(*y);
209 y1 = fabs(*x);
210 if (sx)
212 ah1 = pio2;
213 al1 = pio2_lo;
215 else
217 ah1 = -pio2;
218 al1 = -pio2_lo;
219 sign1 = -sign1;
222 else
224 x1 = fabs(*x);
225 y1 = fabs(*y);
226 if (sx)
228 ah1 = -pi;
229 al1 = -pi_lo;
230 sign1 = -sign1;
232 else
233 ah1 = al1 = zero;
236 if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
238 if (hx >= 0x7ff00000)
240 if ((hx ^ 0x7ff00000) | LO(&x1)) /* nan */
241 ah1 = x1 + y1;
242 else if (hy >= 0x7ff00000)
243 ah1 += pio4;
244 *z = sign1 * ah1;
245 x += stridex;
246 y += stridey;
247 z += stridez;
248 i = 1;
249 if (--n <= 0)
250 break;
251 goto loop1;
253 if (hx - hy >= 0x03600000)
255 if ((int) ah1 == 0)
256 ah1 = y1 / x1;
257 *z = sign1 * ah1;
258 x += stridex;
259 y += stridey;
260 z += stridez;
261 i = 1;
262 if (--n <= 0)
263 break;
264 goto loop1;
266 y1 *= twom3;
267 x1 *= twom3;
268 hy -= 0x00300000;
269 hx -= 0x00300000;
271 else if (hy < 0x00100000)
273 if ((hy | LO(&y1)) == 0)
275 *z = sign1 * ah1;
276 x += stridex;
277 y += stridey;
278 z += stridez;
279 i = 1;
280 if (--n <= 0)
281 break;
282 goto loop1;
284 y1 *= two110;
285 x1 *= two110;
286 hy = HI(&y1);
287 hx = HI(&x1);
290 k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
291 if (k > 644)
292 k = 644;
293 ah1 += __vlibm_TBL_atan2[k];
294 al1 += __vlibm_TBL_atan2[k+1];
295 t1 = __vlibm_TBL_atan2[k+2];
297 xh = x1;
298 LO(&xh) = 0;
299 z1 = ((y1 - t1 * xh) - t1 * (x1 - xh)) / (x1 + y1 * t1);
300 pz1 = z;
301 x += stridex;
302 y += stridey;
303 z += stridez;
304 i = 2;
305 if (--n <= 0)
306 break;
308 loop2:
309 hy = HI(y);
310 sy = hy & 0x80000000;
311 hy &= ~0x80000000;
312 sign2 = (sy)? -one : one;
314 hx = HI(x);
315 sx = hx & 0x80000000;
316 hx &= ~0x80000000;
318 if (hy > hx || (hy == hx && LO(y) > LO(x)))
320 i = hx;
321 hx = hy;
322 hy = i;
323 x2 = fabs(*y);
324 y2 = fabs(*x);
325 if (sx)
327 ah2 = pio2;
328 al2 = pio2_lo;
330 else
332 ah2 = -pio2;
333 al2 = -pio2_lo;
334 sign2 = -sign2;
337 else
339 x2 = fabs(*x);
340 y2 = fabs(*y);
341 if (sx)
343 ah2 = -pi;
344 al2 = -pi_lo;
345 sign2 = -sign2;
347 else
348 ah2 = al2 = zero;
351 if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
353 if (hx >= 0x7ff00000)
355 if ((hx ^ 0x7ff00000) | LO(&x2)) /* nan */
356 ah2 = x2 + y2;
357 else if (hy >= 0x7ff00000)
358 ah2 += pio4;
359 *z = sign2 * ah2;
360 x += stridex;
361 y += stridey;
362 z += stridez;
363 i = 2;
364 if (--n <= 0)
365 break;
366 goto loop2;
368 if (hx - hy >= 0x03600000)
370 if ((int) ah2 == 0)
371 ah2 = y2 / x2;
372 *z = sign2 * ah2;
373 x += stridex;
374 y += stridey;
375 z += stridez;
376 i = 2;
377 if (--n <= 0)
378 break;
379 goto loop2;
381 y2 *= twom3;
382 x2 *= twom3;
383 hy -= 0x00300000;
384 hx -= 0x00300000;
386 else if (hy < 0x00100000)
388 if ((hy | LO(&y2)) == 0)
390 *z = sign2 * ah2;
391 x += stridex;
392 y += stridey;
393 z += stridez;
394 i = 2;
395 if (--n <= 0)
396 break;
397 goto loop2;
399 y2 *= two110;
400 x2 *= two110;
401 hy = HI(&y2);
402 hx = HI(&x2);
405 k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
406 if (k > 644)
407 k = 644;
408 ah2 += __vlibm_TBL_atan2[k];
409 al2 += __vlibm_TBL_atan2[k+1];
410 t2 = __vlibm_TBL_atan2[k+2];
412 xh = x2;
413 LO(&xh) = 0;
414 z2 = ((y2 - t2 * xh) - t2 * (x2 - xh)) / (x2 + y2 * t2);
415 pz2 = z;
417 x0 = z0 * z0;
418 x1 = z1 * z1;
419 x2 = z2 * z2;
421 t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 *
422 (p2 + x0 * (p3 + x0 * p4)))));
423 t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 *
424 (p2 + x1 * (p3 + x1 * p4)))));
425 t2 = ah2 + (z2 + (al2 + (z2 * x2) * (p1 + x2 *
426 (p2 + x2 * (p3 + x2 * p4)))));
428 *pz0 = sign0 * t0;
429 *pz1 = sign1 * t1;
430 *pz2 = sign2 * t2;
432 x += stridex;
433 y += stridey;
434 z += stridez;
435 i = 0;
436 } while (--n > 0);
438 if (i > 0)
440 if (i > 1)
442 x1 = z1 * z1;
443 t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 *
444 (p2 + x1 * (p3 + x1 * p4)))));
445 *pz1 = sign1 * t1;
448 x0 = z0 * z0;
449 t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 *
450 (p2 + x0 * (p3 + x0 * p4)))));
451 *pz0 = sign0 * t0;