Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / lib / libmvec / common / __vsin.c
blobf679b8089de9a065b16f13a66f54d75d7b708b2a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
47 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
49 static const double
50 half[2] = { 0.5, -0.5 },
51 one = 1.0,
52 invpio2 = 0.636619772367581343075535,
53 pio2_1 = 1.570796326734125614166,
54 pio2_2 = 6.077100506303965976596e-11,
55 pio2_3 = 2.022266248711166455796e-21,
56 pio2_3t = 8.478427660368899643959e-32,
57 pp1 = -1.666666666605760465276263943134982554676e-0001,
58 pp2 = 8.333261209690963126718376566146180944442e-0003,
59 qq1 = -4.999999999977710986407023955908711557870e-0001,
60 qq2 = 4.166654863857219350645055881018842089580e-0002,
61 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
62 -4.999999999999931701464060878888294524481e-0001 },
63 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
64 4.166666666394861917535640593963708222319e-0002 },
65 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
66 -1.388888552656142867832756687736851681462e-0003 },
67 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
68 2.478519423681460796618128289454530524759e-0005 };
70 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
72 /* Don't __ the following; acomp will handle it */
73 extern double fabs(double);
74 extern void __vlibm_vsin_big(int, double *, int, double *, int, int);
76 void
77 __vsin(int n, double * restrict x, int stridex, double * restrict y,
78 int stridey)
80 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
81 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
82 double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
83 unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
84 int i, biguns, nsave, sxsave, sysave;
85 volatile int v __unused;
86 nsave = n;
87 xsave = x;
88 sxsave = stridex;
89 ysave = y;
90 sysave = stridey;
91 biguns = 0;
93 x0 = *x; /* error: 'x0' may be used uninitialized */
96 LOOP0:
97 xsb0 = HI(x);
98 hx0 = xsb0 & ~0x80000000;
99 if (hx0 > 0x3fe921fb)
101 biguns = 1;
102 goto MEDIUM;
104 if (hx0 < 0x3e400000)
106 v = *x;
107 *y = *x;
108 x += stridex;
109 y += stridey;
110 i = 0;
111 if (--n <= 0)
112 break;
113 goto LOOP0;
115 x0 = *x;
116 py0 = y;
117 x += stridex;
118 y += stridey;
119 i = 1;
120 if (--n <= 0)
121 break;
123 LOOP1:
124 xsb1 = HI(x);
125 hx1 = xsb1 & ~0x80000000;
126 if (hx1 > 0x3fe921fb)
128 biguns = 2;
129 goto MEDIUM;
131 if (hx1 < 0x3e400000)
133 v = *x;
134 *y = *x;
135 x += stridex;
136 y += stridey;
137 i = 1;
138 if (--n <= 0)
139 break;
140 goto LOOP1;
142 x1 = *x;
143 py1 = y;
144 x += stridex;
145 y += stridey;
146 i = 2;
147 if (--n <= 0)
148 break;
150 LOOP2:
151 xsb2 = HI(x);
152 hx2 = xsb2 & ~0x80000000;
153 if (hx2 > 0x3fe921fb)
155 biguns = 3;
156 goto MEDIUM;
158 if (hx2 < 0x3e400000)
160 v = *x;
161 *y = *x;
162 x += stridex;
163 y += stridey;
164 i = 2;
165 if (--n <= 0)
166 break;
167 goto LOOP2;
169 x2 = *x;
170 py2 = y;
172 i = (hx0 - 0x3fc90000) >> 31;
173 i |= ((hx1 - 0x3fc90000) >> 30) & 2;
174 i |= ((hx2 - 0x3fc90000) >> 29) & 4;
175 switch (i)
177 double a0, a1, a2, w0, w1, w2;
178 double t0, t1, t2, z0, z1, z2;
179 unsigned j0, j1, j2;
181 case 0:
182 j0 = (xsb0 + 0x4000) & 0xffff8000;
183 j1 = (xsb1 + 0x4000) & 0xffff8000;
184 j2 = (xsb2 + 0x4000) & 0xffff8000;
185 HI(&t0) = j0;
186 HI(&t1) = j1;
187 HI(&t2) = j2;
188 LO(&t0) = 0;
189 LO(&t1) = 0;
190 LO(&t2) = 0;
191 x0 -= t0;
192 x1 -= t1;
193 x2 -= t2;
194 z0 = x0 * x0;
195 z1 = x1 * x1;
196 z2 = x2 * x2;
197 t0 = z0 * (qq1 + z0 * qq2);
198 t1 = z1 * (qq1 + z1 * qq2);
199 t2 = z2 * (qq1 + z2 * qq2);
200 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
201 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
202 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
203 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
204 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
205 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
206 xsb0 = (xsb0 >> 30) & 2;
207 xsb1 = (xsb1 >> 30) & 2;
208 xsb2 = (xsb2 >> 30) & 2;
209 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
210 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
211 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
212 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
213 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
214 t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
215 *py0 = a0 + t0;
216 *py1 = a1 + t1;
217 *py2 = a2 + t2;
218 break;
220 case 1:
221 j1 = (xsb1 + 0x4000) & 0xffff8000;
222 j2 = (xsb2 + 0x4000) & 0xffff8000;
223 HI(&t1) = j1;
224 HI(&t2) = j2;
225 LO(&t1) = 0;
226 LO(&t2) = 0;
227 x1 -= t1;
228 x2 -= t2;
229 z0 = x0 * x0;
230 z1 = x1 * x1;
231 z2 = x2 * x2;
232 t0 = z0 * (poly3[0] + z0 * poly4[0]);
233 t1 = z1 * (qq1 + z1 * qq2);
234 t2 = z2 * (qq1 + z2 * qq2);
235 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
236 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
237 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
238 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
239 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
240 xsb1 = (xsb1 >> 30) & 2;
241 xsb2 = (xsb2 >> 30) & 2;
242 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
243 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
244 t0 = x0 + x0 * t0;
245 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
246 t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
247 *py0 = t0;
248 *py1 = a1 + t1;
249 *py2 = a2 + t2;
250 break;
252 case 2:
253 j0 = (xsb0 + 0x4000) & 0xffff8000;
254 j2 = (xsb2 + 0x4000) & 0xffff8000;
255 HI(&t0) = j0;
256 HI(&t2) = j2;
257 LO(&t0) = 0;
258 LO(&t2) = 0;
259 x0 -= t0;
260 x2 -= t2;
261 z0 = x0 * x0;
262 z1 = x1 * x1;
263 z2 = x2 * x2;
264 t0 = z0 * (qq1 + z0 * qq2);
265 t1 = z1 * (poly3[0] + z1 * poly4[0]);
266 t2 = z2 * (qq1 + z2 * qq2);
267 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
268 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
269 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
270 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
271 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
272 xsb0 = (xsb0 >> 30) & 2;
273 xsb2 = (xsb2 >> 30) & 2;
274 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
275 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
276 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
277 t1 = x1 + x1 * t1;
278 t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
279 *py0 = a0 + t0;
280 *py1 = t1;
281 *py2 = a2 + t2;
282 break;
284 case 3:
285 j2 = (xsb2 + 0x4000) & 0xffff8000;
286 HI(&t2) = j2;
287 LO(&t2) = 0;
288 x2 -= t2;
289 z0 = x0 * x0;
290 z1 = x1 * x1;
291 z2 = x2 * x2;
292 t0 = z0 * (poly3[0] + z0 * poly4[0]);
293 t1 = z1 * (poly3[0] + z1 * poly4[0]);
294 t2 = z2 * (qq1 + z2 * qq2);
295 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
296 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
297 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
298 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299 xsb2 = (xsb2 >> 30) & 2;
300 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
301 t0 = x0 + x0 * t0;
302 t1 = x1 + x1 * t1;
303 t2 = (__vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+xsb2];
304 *py0 = t0;
305 *py1 = t1;
306 *py2 = a2 + t2;
307 break;
309 case 4:
310 j0 = (xsb0 + 0x4000) & 0xffff8000;
311 j1 = (xsb1 + 0x4000) & 0xffff8000;
312 HI(&t0) = j0;
313 HI(&t1) = j1;
314 LO(&t0) = 0;
315 LO(&t1) = 0;
316 x0 -= t0;
317 x1 -= t1;
318 z0 = x0 * x0;
319 z1 = x1 * x1;
320 z2 = x2 * x2;
321 t0 = z0 * (qq1 + z0 * qq2);
322 t1 = z1 * (qq1 + z1 * qq2);
323 t2 = z2 * (poly3[0] + z2 * poly4[0]);
324 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
325 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
326 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
327 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
328 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
329 xsb0 = (xsb0 >> 30) & 2;
330 xsb1 = (xsb1 >> 30) & 2;
331 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
332 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
333 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
334 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
335 t2 = x2 + x2 * t2;
336 *py0 = a0 + t0;
337 *py1 = a1 + t1;
338 *py2 = t2;
339 break;
341 case 5:
342 j1 = (xsb1 + 0x4000) & 0xffff8000;
343 HI(&t1) = j1;
344 LO(&t1) = 0;
345 x1 -= t1;
346 z0 = x0 * x0;
347 z1 = x1 * x1;
348 z2 = x2 * x2;
349 t0 = z0 * (poly3[0] + z0 * poly4[0]);
350 t1 = z1 * (qq1 + z1 * qq2);
351 t2 = z2 * (poly3[0] + z2 * poly4[0]);
352 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
353 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
354 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
355 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
356 xsb1 = (xsb1 >> 30) & 2;
357 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
358 t0 = x0 + x0 * t0;
359 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
360 t2 = x2 + x2 * t2;
361 *py0 = t0;
362 *py1 = a1 + t1;
363 *py2 = t2;
364 break;
366 case 6:
367 j0 = (xsb0 + 0x4000) & 0xffff8000;
368 HI(&t0) = j0;
369 LO(&t0) = 0;
370 x0 -= t0;
371 z0 = x0 * x0;
372 z1 = x1 * x1;
373 z2 = x2 * x2;
374 t0 = z0 * (qq1 + z0 * qq2);
375 t1 = z1 * (poly3[0] + z1 * poly4[0]);
376 t2 = z2 * (poly3[0] + z2 * poly4[0]);
377 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
378 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
379 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
380 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
381 xsb0 = (xsb0 >> 30) & 2;
382 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
383 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
384 t1 = x1 + x1 * t1;
385 t2 = x2 + x2 * t2;
386 *py0 = a0 + t0;
387 *py1 = t1;
388 *py2 = t2;
389 break;
391 case 7:
392 z0 = x0 * x0;
393 z1 = x1 * x1;
394 z2 = x2 * x2;
395 t0 = z0 * (poly3[0] + z0 * poly4[0]);
396 t1 = z1 * (poly3[0] + z1 * poly4[0]);
397 t2 = z2 * (poly3[0] + z2 * poly4[0]);
398 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
399 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
400 t2 = z2 * (poly1[0] + z2 * (poly2[0] + t2));
401 t0 = x0 + x0 * t0;
402 t1 = x1 + x1 * t1;
403 t2 = x2 + x2 * t2;
404 *py0 = t0;
405 *py1 = t1;
406 *py2 = t2;
407 break;
410 x += stridex;
411 y += stridey;
412 i = 0;
413 } while (--n > 0);
415 if (i > 0)
417 double a0, a1, w0, w1;
418 double t0, t1, z0, z1;
419 unsigned j0, j1;
421 if (i > 1)
423 if (hx1 < 0x3fc90000)
425 z1 = x1 * x1;
426 t1 = z1 * (poly3[0] + z1 * poly4[0]);
427 t1 = z1 * (poly1[0] + z1 * (poly2[0] + t1));
428 t1 = x1 + x1 * t1;
429 *py1 = t1;
431 else
433 j1 = (xsb1 + 0x4000) & 0xffff8000;
434 HI(&t1) = j1;
435 LO(&t1) = 0;
436 x1 -= t1;
437 z1 = x1 * x1;
438 t1 = z1 * (qq1 + z1 * qq2);
439 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
440 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
441 xsb1 = (xsb1 >> 30) & 2;
442 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
443 t1 = (__vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+xsb1];
444 *py1 = a1 + t1;
447 if (hx0 < 0x3fc90000)
449 z0 = x0 * x0;
450 t0 = z0 * (poly3[0] + z0 * poly4[0]);
451 t0 = z0 * (poly1[0] + z0 * (poly2[0] + t0));
452 t0 = x0 + x0 * t0;
453 *py0 = t0;
455 else
457 j0 = (xsb0 + 0x4000) & 0xffff8000;
458 HI(&t0) = j0;
459 LO(&t0) = 0;
460 x0 -= t0;
461 z0 = x0 * x0;
462 t0 = z0 * (qq1 + z0 * qq2);
463 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
464 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
465 xsb0 = (xsb0 >> 30) & 2;
466 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
467 t0 = (__vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+xsb0];
468 *py0 = a0 + t0;
472 return;
475 * MEDIUM RANGE PROCESSING
476 * Jump here at first sign of medium range argument. We are a bit
477 * confused due to the jump.. fix up several variables and jump into
478 * the nth loop, same as was being processed above.
481 MEDIUM:
483 x0_or_one[1] = 1.0;
484 x1_or_one[1] = 1.0;
485 x2_or_one[1] = 1.0;
486 x0_or_one[3] = -1.0;
487 x1_or_one[3] = -1.0;
488 x2_or_one[3] = -1.0;
489 y0_or_zero[1] = 0.0;
490 y1_or_zero[1] = 0.0;
491 y2_or_zero[1] = 0.0;
492 y0_or_zero[3] = 0.0;
493 y1_or_zero[3] = 0.0;
494 y2_or_zero[3] = 0.0;
496 if (biguns == 3)
498 biguns = 0;
499 xsb0 = xsb0 >> 31;
500 xsb1 = xsb1 >> 31;
501 goto loop2;
503 else if (biguns == 2)
505 xsb0 = xsb0 >> 31;
506 biguns = 0;
507 goto loop1;
509 biguns = 0;
513 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
514 unsigned hx;
515 int n0, n1, n2;
517 loop0:
518 hx = HI(x);
519 xsb0 = hx >> 31;
520 hx &= ~0x80000000;
521 if (hx < 0x3e400000)
523 v = *x;
524 *y = *x;
525 x += stridex;
526 y += stridey;
527 i = 0;
528 if (--n <= 0)
529 break;
530 goto loop0;
532 if (hx > 0x413921fb)
534 if (hx >= 0x7ff00000)
536 x0 = *x;
537 *y = x0 - x0;
539 else
540 biguns = 1;
541 x += stridex;
542 y += stridey;
543 i = 0;
544 if (--n <= 0)
545 break;
546 goto loop0;
548 x0 = *x;
549 py0 = y;
550 x += stridex;
551 y += stridey;
552 i = 1;
553 if (--n <= 0)
554 break;
556 loop1:
557 hx = HI(x);
558 xsb1 = hx >> 31;
559 hx &= ~0x80000000;
560 if (hx < 0x3e400000)
562 v = *x;
563 *y = *x;
564 x += stridex;
565 y += stridey;
566 i = 1;
567 if (--n <= 0)
568 break;
569 goto loop1;
571 if (hx > 0x413921fb)
573 if (hx >= 0x7ff00000)
575 x1 = *x;
576 *y = x1 - x1;
578 else
579 biguns = 1;
580 x += stridex;
581 y += stridey;
582 i = 1;
583 if (--n <= 0)
584 break;
585 goto loop1;
587 x1 = *x;
588 py1 = y;
589 x += stridex;
590 y += stridey;
591 i = 2;
592 if (--n <= 0)
593 break;
595 loop2:
596 hx = HI(x);
597 xsb2 = hx >> 31;
598 hx &= ~0x80000000;
599 if (hx < 0x3e400000)
601 v = *x;
602 *y = *x;
603 x += stridex;
604 y += stridey;
605 i = 2;
606 if (--n <= 0)
607 break;
608 goto loop2;
610 if (hx > 0x413921fb)
612 if (hx >= 0x7ff00000)
614 x2 = *x;
615 *y = x2 - x2;
617 else
618 biguns = 1;
619 x += stridex;
620 y += stridey;
621 i = 2;
622 if (--n <= 0)
623 break;
624 goto loop2;
626 x2 = *x;
627 py2 = y;
629 n0 = (int) (x0 * invpio2 + half[xsb0]);
630 n1 = (int) (x1 * invpio2 + half[xsb1]);
631 n2 = (int) (x2 * invpio2 + half[xsb2]);
632 fn0 = (double) n0;
633 fn1 = (double) n1;
634 fn2 = (double) n2;
635 n0 &= 3;
636 n1 &= 3;
637 n2 &= 3;
638 a0 = x0 - fn0 * pio2_1;
639 a1 = x1 - fn1 * pio2_1;
640 a2 = x2 - fn2 * pio2_1;
641 w0 = fn0 * pio2_2;
642 w1 = fn1 * pio2_2;
643 w2 = fn2 * pio2_2;
644 x0 = a0 - w0;
645 x1 = a1 - w1;
646 x2 = a2 - w2;
647 y0 = (a0 - x0) - w0;
648 y1 = (a1 - x1) - w1;
649 y2 = (a2 - x2) - w2;
650 a0 = x0;
651 a1 = x1;
652 a2 = x2;
653 w0 = fn0 * pio2_3 - y0;
654 w1 = fn1 * pio2_3 - y1;
655 w2 = fn2 * pio2_3 - y2;
656 x0 = a0 - w0;
657 x1 = a1 - w1;
658 x2 = a2 - w2;
659 y0 = (a0 - x0) - w0;
660 y1 = (a1 - x1) - w1;
661 y2 = (a2 - x2) - w2;
662 a0 = x0;
663 a1 = x1;
664 a2 = x2;
665 w0 = fn0 * pio2_3t - y0;
666 w1 = fn1 * pio2_3t - y1;
667 w2 = fn2 * pio2_3t - y2;
668 x0 = a0 - w0;
669 x1 = a1 - w1;
670 x2 = a2 - w2;
671 y0 = (a0 - x0) - w0;
672 y1 = (a1 - x1) - w1;
673 y2 = (a2 - x2) - w2;
674 xsb0 = HI(&x0);
675 i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
676 xsb1 = HI(&x1);
677 i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
678 xsb2 = HI(&x2);
679 i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
680 switch (i)
682 double t0, t1, t2, z0, z1, z2;
683 unsigned j0, j1, j2;
685 case 0:
686 j0 = (xsb0 + 0x4000) & 0xffff8000;
687 j1 = (xsb1 + 0x4000) & 0xffff8000;
688 j2 = (xsb2 + 0x4000) & 0xffff8000;
689 HI(&t0) = j0;
690 HI(&t1) = j1;
691 HI(&t2) = j2;
692 LO(&t0) = 0;
693 LO(&t1) = 0;
694 LO(&t2) = 0;
695 x0 = (x0 - t0) + y0;
696 x1 = (x1 - t1) + y1;
697 x2 = (x2 - t2) + y2;
698 z0 = x0 * x0;
699 z1 = x1 * x1;
700 z2 = x2 * x2;
701 t0 = z0 * (qq1 + z0 * qq2);
702 t1 = z1 * (qq1 + z1 * qq2);
703 t2 = z2 * (qq1 + z2 * qq2);
704 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
705 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
706 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
707 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
708 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
709 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
710 xsb0 = (xsb0 >> 30) & 2;
711 xsb1 = (xsb1 >> 30) & 2;
712 xsb2 = (xsb2 >> 30) & 2;
713 n0 ^= (xsb0 & ~(n0 << 1));
714 n1 ^= (xsb1 & ~(n1 << 1));
715 n2 ^= (xsb2 & ~(n2 << 1));
716 xsb0 |= 1;
717 xsb1 |= 1;
718 xsb2 |= 1;
719 a0 = __vlibm_TBL_sincos_hi[j0+n0];
720 a1 = __vlibm_TBL_sincos_hi[j1+n1];
721 a2 = __vlibm_TBL_sincos_hi[j2+n2];
722 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
723 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
724 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
725 *py0 = ( a0 + t0 );
726 *py1 = ( a1 + t1 );
727 *py2 = ( a2 + t2 );
728 break;
730 case 1:
731 j0 = n0 & 1;
732 j1 = (xsb1 + 0x4000) & 0xffff8000;
733 j2 = (xsb2 + 0x4000) & 0xffff8000;
734 HI(&t1) = j1;
735 HI(&t2) = j2;
736 LO(&t1) = 0;
737 LO(&t2) = 0;
738 x0_or_one[0] = x0;
739 x0_or_one[2] = -x0;
740 y0_or_zero[0] = y0;
741 y0_or_zero[2] = -y0;
742 x1 = (x1 - t1) + y1;
743 x2 = (x2 - t2) + y2;
744 z0 = x0 * x0;
745 z1 = x1 * x1;
746 z2 = x2 * x2;
747 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
748 t1 = z1 * (qq1 + z1 * qq2);
749 t2 = z2 * (qq1 + z2 * qq2);
750 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
751 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
752 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
753 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
754 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
755 xsb1 = (xsb1 >> 30) & 2;
756 xsb2 = (xsb2 >> 30) & 2;
757 n1 ^= (xsb1 & ~(n1 << 1));
758 n2 ^= (xsb2 & ~(n2 << 1));
759 xsb1 |= 1;
760 xsb2 |= 1;
761 a1 = __vlibm_TBL_sincos_hi[j1+n1];
762 a2 = __vlibm_TBL_sincos_hi[j2+n2];
763 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
764 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
765 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
766 *py0 = t0;
767 *py1 = ( a1 + t1 );
768 *py2 = ( a2 + t2 );
769 break;
771 case 2:
772 j0 = (xsb0 + 0x4000) & 0xffff8000;
773 j1 = n1 & 1;
774 j2 = (xsb2 + 0x4000) & 0xffff8000;
775 HI(&t0) = j0;
776 HI(&t2) = j2;
777 LO(&t0) = 0;
778 LO(&t2) = 0;
779 x1_or_one[0] = x1;
780 x1_or_one[2] = -x1;
781 x0 = (x0 - t0) + y0;
782 y1_or_zero[0] = y1;
783 y1_or_zero[2] = -y1;
784 x2 = (x2 - t2) + y2;
785 z0 = x0 * x0;
786 z1 = x1 * x1;
787 z2 = x2 * x2;
788 t0 = z0 * (qq1 + z0 * qq2);
789 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
790 t2 = z2 * (qq1 + z2 * qq2);
791 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
792 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
793 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
794 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
795 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
796 xsb0 = (xsb0 >> 30) & 2;
797 xsb2 = (xsb2 >> 30) & 2;
798 n0 ^= (xsb0 & ~(n0 << 1));
799 n2 ^= (xsb2 & ~(n2 << 1));
800 xsb0 |= 1;
801 xsb2 |= 1;
802 a0 = __vlibm_TBL_sincos_hi[j0+n0];
803 a2 = __vlibm_TBL_sincos_hi[j2+n2];
804 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
805 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
806 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
807 *py0 = ( a0 + t0 );
808 *py1 = t1;
809 *py2 = ( a2 + t2 );
810 break;
812 case 3:
813 j0 = n0 & 1;
814 j1 = n1 & 1;
815 j2 = (xsb2 + 0x4000) & 0xffff8000;
816 HI(&t2) = j2;
817 LO(&t2) = 0;
818 x0_or_one[0] = x0;
819 x0_or_one[2] = -x0;
820 x1_or_one[0] = x1;
821 x1_or_one[2] = -x1;
822 y0_or_zero[0] = y0;
823 y0_or_zero[2] = -y0;
824 y1_or_zero[0] = y1;
825 y1_or_zero[2] = -y1;
826 x2 = (x2 - t2) + y2;
827 z0 = x0 * x0;
828 z1 = x1 * x1;
829 z2 = x2 * x2;
830 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
831 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
832 t2 = z2 * (qq1 + z2 * qq2);
833 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
834 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
835 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
836 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
837 xsb2 = (xsb2 >> 30) & 2;
838 n2 ^= (xsb2 & ~(n2 << 1));
839 xsb2 |= 1;
840 a2 = __vlibm_TBL_sincos_hi[j2+n2];
841 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
842 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
843 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
844 *py0 = t0;
845 *py1 = t1;
846 *py2 = ( a2 + t2 );
847 break;
849 case 4:
850 j0 = (xsb0 + 0x4000) & 0xffff8000;
851 j1 = (xsb1 + 0x4000) & 0xffff8000;
852 j2 = n2 & 1;
853 HI(&t0) = j0;
854 HI(&t1) = j1;
855 LO(&t0) = 0;
856 LO(&t1) = 0;
857 x2_or_one[0] = x2;
858 x2_or_one[2] = -x2;
859 x0 = (x0 - t0) + y0;
860 x1 = (x1 - t1) + y1;
861 y2_or_zero[0] = y2;
862 y2_or_zero[2] = -y2;
863 z0 = x0 * x0;
864 z1 = x1 * x1;
865 z2 = x2 * x2;
866 t0 = z0 * (qq1 + z0 * qq2);
867 t1 = z1 * (qq1 + z1 * qq2);
868 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
869 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
870 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
871 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
872 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
873 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
874 xsb0 = (xsb0 >> 30) & 2;
875 xsb1 = (xsb1 >> 30) & 2;
876 n0 ^= (xsb0 & ~(n0 << 1));
877 n1 ^= (xsb1 & ~(n1 << 1));
878 xsb0 |= 1;
879 xsb1 |= 1;
880 a0 = __vlibm_TBL_sincos_hi[j0+n0];
881 a1 = __vlibm_TBL_sincos_hi[j1+n1];
882 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
883 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
884 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
885 *py0 = ( a0 + t0 );
886 *py1 = ( a1 + t1 );
887 *py2 = t2;
888 break;
890 case 5:
891 j0 = n0 & 1;
892 j1 = (xsb1 + 0x4000) & 0xffff8000;
893 j2 = n2 & 1;
894 HI(&t1) = j1;
895 LO(&t1) = 0;
896 x0_or_one[0] = x0;
897 x0_or_one[2] = -x0;
898 x2_or_one[0] = x2;
899 x2_or_one[2] = -x2;
900 y0_or_zero[0] = y0;
901 y0_or_zero[2] = -y0;
902 x1 = (x1 - t1) + y1;
903 y2_or_zero[0] = y2;
904 y2_or_zero[2] = -y2;
905 z0 = x0 * x0;
906 z1 = x1 * x1;
907 z2 = x2 * x2;
908 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
909 t1 = z1 * (qq1 + z1 * qq2);
910 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
911 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
912 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
913 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
914 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
915 xsb1 = (xsb1 >> 30) & 2;
916 n1 ^= (xsb1 & ~(n1 << 1));
917 xsb1 |= 1;
918 a1 = __vlibm_TBL_sincos_hi[j1+n1];
919 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
920 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
921 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
922 *py0 = t0;
923 *py1 = ( a1 + t1 );
924 *py2 = t2;
925 break;
927 case 6:
928 j0 = (xsb0 + 0x4000) & 0xffff8000;
929 j1 = n1 & 1;
930 j2 = n2 & 1;
931 HI(&t0) = j0;
932 LO(&t0) = 0;
933 x1_or_one[0] = x1;
934 x1_or_one[2] = -x1;
935 x2_or_one[0] = x2;
936 x2_or_one[2] = -x2;
937 x0 = (x0 - t0) + y0;
938 y1_or_zero[0] = y1;
939 y1_or_zero[2] = -y1;
940 y2_or_zero[0] = y2;
941 y2_or_zero[2] = -y2;
942 z0 = x0 * x0;
943 z1 = x1 * x1;
944 z2 = x2 * x2;
945 t0 = z0 * (qq1 + z0 * qq2);
946 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
947 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
948 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
949 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
950 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
951 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
952 xsb0 = (xsb0 >> 30) & 2;
953 n0 ^= (xsb0 & ~(n0 << 1));
954 xsb0 |= 1;
955 a0 = __vlibm_TBL_sincos_hi[j0+n0];
956 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
957 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
958 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
959 *py0 = ( a0 + t0 );
960 *py1 = t1;
961 *py2 = t2;
962 break;
964 case 7:
965 j0 = n0 & 1;
966 j1 = n1 & 1;
967 j2 = n2 & 1;
968 x0_or_one[0] = x0;
969 x0_or_one[2] = -x0;
970 x1_or_one[0] = x1;
971 x1_or_one[2] = -x1;
972 x2_or_one[0] = x2;
973 x2_or_one[2] = -x2;
974 y0_or_zero[0] = y0;
975 y0_or_zero[2] = -y0;
976 y1_or_zero[0] = y1;
977 y1_or_zero[2] = -y1;
978 y2_or_zero[0] = y2;
979 y2_or_zero[2] = -y2;
980 z0 = x0 * x0;
981 z1 = x1 * x1;
982 z2 = x2 * x2;
983 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
984 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
985 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
986 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
987 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
988 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
989 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
990 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
991 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
992 *py0 = t0;
993 *py1 = t1;
994 *py2 = t2;
995 break;
998 x += stridex;
999 y += stridey;
1000 i = 0;
1001 } while (--n > 0);
1003 if (i > 0)
1005 double fn0, fn1, a0, a1, w0, w1, y0, y1;
1006 double t0, t1, z0, z1;
1007 unsigned j0, j1;
1008 int n0, n1;
1010 if (i > 1)
1012 n1 = (int) (x1 * invpio2 + half[xsb1]);
1013 fn1 = (double) n1;
1014 n1 &= 3;
1015 a1 = x1 - fn1 * pio2_1;
1016 w1 = fn1 * pio2_2;
1017 x1 = a1 - w1;
1018 y1 = (a1 - x1) - w1;
1019 a1 = x1;
1020 w1 = fn1 * pio2_3 - y1;
1021 x1 = a1 - w1;
1022 y1 = (a1 - x1) - w1;
1023 a1 = x1;
1024 w1 = fn1 * pio2_3t - y1;
1025 x1 = a1 - w1;
1026 y1 = (a1 - x1) - w1;
1027 xsb1 = HI(&x1);
1028 if ((xsb1 & ~0x80000000) < thresh[n1&1])
1030 j1 = n1 & 1;
1031 x1_or_one[0] = x1;
1032 x1_or_one[2] = -x1;
1033 y1_or_zero[0] = y1;
1034 y1_or_zero[2] = -y1;
1035 z1 = x1 * x1;
1036 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
1037 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
1038 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
1039 *py1 = t1;
1041 else
1043 j1 = (xsb1 + 0x4000) & 0xffff8000;
1044 HI(&t1) = j1;
1045 LO(&t1) = 0;
1046 x1 = (x1 - t1) + y1;
1047 z1 = x1 * x1;
1048 t1 = z1 * (qq1 + z1 * qq2);
1049 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
1050 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1051 xsb1 = (xsb1 >> 30) & 2;
1052 n1 ^= (xsb1 & ~(n1 << 1));
1053 xsb1 |= 1;
1054 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1055 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
1056 *py1 = ( a1 + t1 );
1059 n0 = (int) (x0 * invpio2 + half[xsb0]);
1060 fn0 = (double) n0;
1061 n0 &= 3;
1062 a0 = x0 - fn0 * pio2_1;
1063 w0 = fn0 * pio2_2;
1064 x0 = a0 - w0;
1065 y0 = (a0 - x0) - w0;
1066 a0 = x0;
1067 w0 = fn0 * pio2_3 - y0;
1068 x0 = a0 - w0;
1069 y0 = (a0 - x0) - w0;
1070 a0 = x0;
1071 w0 = fn0 * pio2_3t - y0;
1072 x0 = a0 - w0;
1073 y0 = (a0 - x0) - w0;
1074 xsb0 = HI(&x0);
1075 if ((xsb0 & ~0x80000000) < thresh[n0&1])
1077 j0 = n0 & 1;
1078 x0_or_one[0] = x0;
1079 x0_or_one[2] = -x0;
1080 y0_or_zero[0] = y0;
1081 y0_or_zero[2] = -y0;
1082 z0 = x0 * x0;
1083 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
1084 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
1085 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
1086 *py0 = t0;
1088 else
1090 j0 = (xsb0 + 0x4000) & 0xffff8000;
1091 HI(&t0) = j0;
1092 LO(&t0) = 0;
1093 x0 = (x0 - t0) + y0;
1094 z0 = x0 * x0;
1095 t0 = z0 * (qq1 + z0 * qq2);
1096 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
1097 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
1098 xsb0 = (xsb0 >> 30) & 2;
1099 n0 ^= (xsb0 & ~(n0 << 1));
1100 xsb0 |= 1;
1101 a0 = __vlibm_TBL_sincos_hi[j0+n0];
1102 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
1103 *py0 = ( a0 + t0 );
1107 if (biguns)
1108 __vlibm_vsin_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);