Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / lib / libmvec / common / __vcosbig_ultra3.c
blob04b1c9ec82b0c0a2e68f061c6eb9c263a0b29ae1
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
32 #ifdef _LITTLE_ENDIAN
33 #define HI(x) *(1+(int*)x)
34 #define LO(x) *(unsigned*)x
35 #else
36 #define HI(x) *(int*)x
37 #define LO(x) *(1+(unsigned*)x)
38 #endif
40 #ifdef __RESTRICT
41 #define restrict _Restrict
42 #else
43 #define restrict
44 #endif
46 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
48 static const double
49 half[2] = { 0.5, -0.5 },
50 one = 1.0,
51 invpio2 = 0.636619772367581343075535,
52 pio2_1 = 1.570796326734125614166,
53 pio2_2 = 6.077100506303965976596e-11,
54 pio2_3 = 2.022266248711166455796e-21,
55 pio2_3t = 8.478427660368899643959e-32,
56 pp1 = -1.666666666605760465276263943134982554676e-0001,
57 pp2 = 8.333261209690963126718376566146180944442e-0003,
58 qq1 = -4.999999999977710986407023955908711557870e-0001,
59 qq2 = 4.166654863857219350645055881018842089580e-0002,
60 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
61 -4.999999999999931701464060878888294524481e-0001 },
62 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
63 4.166666666394861917535640593963708222319e-0002 },
64 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
65 -1.388888552656142867832756687736851681462e-0003 },
66 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
67 2.478519423681460796618128289454530524759e-0005 };
69 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
71 extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
73 void
74 __vlibm_vcos_big_ultra3(int n, double * restrict x, int stridex, double * restrict y,
75 int stridey, int pthresh)
77 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
78 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
79 double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
80 unsigned xsb0, xsb1, xsb2;
81 int i, biguns, nsave, sxsave, sysave;
83 nsave = n;
84 xsave = x;
85 sxsave = stridex;
86 ysave = y;
87 sysave = stridey;
88 biguns = 0;
90 x0_or_one[1] = 1.0;
91 x1_or_one[1] = 1.0;
92 x2_or_one[1] = 1.0;
93 x0_or_one[3] = -1.0;
94 x1_or_one[3] = -1.0;
95 x2_or_one[3] = -1.0;
96 y0_or_zero[1] = 0.0;
97 y1_or_zero[1] = 0.0;
98 y2_or_zero[1] = 0.0;
99 y0_or_zero[3] = 0.0;
100 y1_or_zero[3] = 0.0;
101 y2_or_zero[3] = 0.0;
105 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
106 unsigned hx;
107 int n0, n1, n2;
109 loop0:
110 hx = HI(x);
111 xsb0 = hx >> 31;
112 hx &= ~0x80000000;
113 if (hx <= pthresh || hx > 0x413921fb)
115 if (hx > 0x413921fb && hx < 0x7ff00000)
116 biguns = 1;
117 x += stridex;
118 y += stridey;
119 i = 0;
120 if (--n <= 0)
121 break;
122 goto loop0;
124 x0 = *x;
125 py0 = y;
126 x += stridex;
127 y += stridey;
128 i = 1;
129 if (--n <= 0)
130 break;
132 loop1:
133 hx = HI(x);
134 xsb1 = hx >> 31;
135 hx &= ~0x80000000;
136 if (hx <= pthresh || hx > 0x413921fb)
138 if (hx > 0x413921fb && hx < 0x7ff00000)
139 biguns = 1;
140 x += stridex;
141 y += stridey;
142 i = 1;
143 if (--n <= 0)
144 break;
145 goto loop1;
147 x1 = *x;
148 py1 = y;
149 x += stridex;
150 y += stridey;
151 i = 2;
152 if (--n <= 0)
153 break;
155 loop2:
156 hx = HI(x);
157 xsb2 = hx >> 31;
158 hx &= ~0x80000000;
159 if (hx <= pthresh || hx > 0x413921fb)
161 if (hx > 0x413921fb && hx < 0x7ff00000)
162 biguns = 1;
163 x += stridex;
164 y += stridey;
165 i = 2;
166 if (--n <= 0)
167 break;
168 goto loop2;
170 x2 = *x;
171 py2 = y;
173 n0 = (int) (x0 * invpio2 + half[xsb0]);
174 n1 = (int) (x1 * invpio2 + half[xsb1]);
175 n2 = (int) (x2 * invpio2 + half[xsb2]);
176 fn0 = (double) n0;
177 fn1 = (double) n1;
178 fn2 = (double) n2;
179 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
180 n1 = (n1 + 1) & 3;
181 n2 = (n2 + 1) & 3;
182 a0 = x0 - fn0 * pio2_1;
183 a1 = x1 - fn1 * pio2_1;
184 a2 = x2 - fn2 * pio2_1;
185 w0 = fn0 * pio2_2;
186 w1 = fn1 * pio2_2;
187 w2 = fn2 * pio2_2;
188 x0 = a0 - w0;
189 x1 = a1 - w1;
190 x2 = a2 - w2;
191 y0 = (a0 - x0) - w0;
192 y1 = (a1 - x1) - w1;
193 y2 = (a2 - x2) - w2;
194 a0 = x0;
195 a1 = x1;
196 a2 = x2;
197 w0 = fn0 * pio2_3 - y0;
198 w1 = fn1 * pio2_3 - y1;
199 w2 = fn2 * pio2_3 - y2;
200 x0 = a0 - w0;
201 x1 = a1 - w1;
202 x2 = a2 - w2;
203 y0 = (a0 - x0) - w0;
204 y1 = (a1 - x1) - w1;
205 y2 = (a2 - x2) - w2;
206 a0 = x0;
207 a1 = x1;
208 a2 = x2;
209 w0 = fn0 * pio2_3t - y0;
210 w1 = fn1 * pio2_3t - y1;
211 w2 = fn2 * pio2_3t - y2;
212 x0 = a0 - w0;
213 x1 = a1 - w1;
214 x2 = a2 - w2;
215 y0 = (a0 - x0) - w0;
216 y1 = (a1 - x1) - w1;
217 y2 = (a2 - x2) - w2;
218 xsb0 = HI(&x0);
219 i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31;
220 xsb1 = HI(&x1);
221 i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2;
222 xsb2 = HI(&x2);
223 i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4;
224 switch (i)
226 double t0, t1, t2, z0, z1, z2;
227 unsigned j0, j1, j2;
229 case 0:
230 j0 = (xsb0 + 0x4000) & 0xffff8000;
231 j1 = (xsb1 + 0x4000) & 0xffff8000;
232 j2 = (xsb2 + 0x4000) & 0xffff8000;
233 HI(&t0) = j0;
234 HI(&t1) = j1;
235 HI(&t2) = j2;
236 LO(&t0) = 0;
237 LO(&t1) = 0;
238 LO(&t2) = 0;
239 x0 = (x0 - t0) + y0;
240 x1 = (x1 - t1) + y1;
241 x2 = (x2 - t2) + y2;
242 z0 = x0 * x0;
243 z1 = x1 * x1;
244 z2 = x2 * x2;
245 t0 = z0 * (qq1 + z0 * qq2);
246 t1 = z1 * (qq1 + z1 * qq2);
247 t2 = z2 * (qq1 + z2 * qq2);
248 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
249 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
250 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
251 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
252 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
253 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
254 xsb0 = (xsb0 >> 30) & 2;
255 xsb1 = (xsb1 >> 30) & 2;
256 xsb2 = (xsb2 >> 30) & 2;
257 n0 ^= (xsb0 & ~(n0 << 1));
258 n1 ^= (xsb1 & ~(n1 << 1));
259 n2 ^= (xsb2 & ~(n2 << 1));
260 xsb0 |= 1;
261 xsb1 |= 1;
262 xsb2 |= 1;
263 a0 = __vlibm_TBL_sincos_hi[j0+n0];
264 a1 = __vlibm_TBL_sincos_hi[j1+n1];
265 a2 = __vlibm_TBL_sincos_hi[j2+n2];
266 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
267 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
268 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
269 *py0 = ( a0 + t0 );
270 *py1 = ( a1 + t1 );
271 *py2 = ( a2 + t2 );
272 break;
274 case 1:
275 j0 = n0 & 1;
276 j1 = (xsb1 + 0x4000) & 0xffff8000;
277 j2 = (xsb2 + 0x4000) & 0xffff8000;
278 HI(&t1) = j1;
279 HI(&t2) = j2;
280 LO(&t1) = 0;
281 LO(&t2) = 0;
282 x0_or_one[0] = x0;
283 x0_or_one[2] = -x0;
284 y0_or_zero[0] = y0;
285 y0_or_zero[2] = -y0;
286 x1 = (x1 - t1) + y1;
287 x2 = (x2 - t2) + y2;
288 z0 = x0 * x0;
289 z1 = x1 * x1;
290 z2 = x2 * x2;
291 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
292 t1 = z1 * (qq1 + z1 * qq2);
293 t2 = z2 * (qq1 + z2 * qq2);
294 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
295 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
296 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
297 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299 xsb1 = (xsb1 >> 30) & 2;
300 xsb2 = (xsb2 >> 30) & 2;
301 n1 ^= (xsb1 & ~(n1 << 1));
302 n2 ^= (xsb2 & ~(n2 << 1));
303 xsb1 |= 1;
304 xsb2 |= 1;
305 a1 = __vlibm_TBL_sincos_hi[j1+n1];
306 a2 = __vlibm_TBL_sincos_hi[j2+n2];
307 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
308 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
309 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
310 *py0 = t0;
311 *py1 = ( a1 + t1 );
312 *py2 = ( a2 + t2 );
313 break;
315 case 2:
316 j0 = (xsb0 + 0x4000) & 0xffff8000;
317 j1 = n1 & 1;
318 j2 = (xsb2 + 0x4000) & 0xffff8000;
319 HI(&t0) = j0;
320 HI(&t2) = j2;
321 LO(&t0) = 0;
322 LO(&t2) = 0;
323 x1_or_one[0] = x1;
324 x1_or_one[2] = -x1;
325 x0 = (x0 - t0) + y0;
326 y1_or_zero[0] = y1;
327 y1_or_zero[2] = -y1;
328 x2 = (x2 - t2) + y2;
329 z0 = x0 * x0;
330 z1 = x1 * x1;
331 z2 = x2 * x2;
332 t0 = z0 * (qq1 + z0 * qq2);
333 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
334 t2 = z2 * (qq1 + z2 * qq2);
335 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
336 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
337 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
338 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
339 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
340 xsb0 = (xsb0 >> 30) & 2;
341 xsb2 = (xsb2 >> 30) & 2;
342 n0 ^= (xsb0 & ~(n0 << 1));
343 n2 ^= (xsb2 & ~(n2 << 1));
344 xsb0 |= 1;
345 xsb2 |= 1;
346 a0 = __vlibm_TBL_sincos_hi[j0+n0];
347 a2 = __vlibm_TBL_sincos_hi[j2+n2];
348 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
349 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
350 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
351 *py0 = ( a0 + t0 );
352 *py1 = t1;
353 *py2 = ( a2 + t2 );
354 break;
356 case 3:
357 j0 = n0 & 1;
358 j1 = n1 & 1;
359 j2 = (xsb2 + 0x4000) & 0xffff8000;
360 HI(&t2) = j2;
361 LO(&t2) = 0;
362 x0_or_one[0] = x0;
363 x0_or_one[2] = -x0;
364 x1_or_one[0] = x1;
365 x1_or_one[2] = -x1;
366 y0_or_zero[0] = y0;
367 y0_or_zero[2] = -y0;
368 y1_or_zero[0] = y1;
369 y1_or_zero[2] = -y1;
370 x2 = (x2 - t2) + y2;
371 z0 = x0 * x0;
372 z1 = x1 * x1;
373 z2 = x2 * x2;
374 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
375 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
376 t2 = z2 * (qq1 + z2 * qq2);
377 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
378 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
379 w2 = x2 * (one + z2 * (pp1 + z2 * pp2));
380 j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
381 xsb2 = (xsb2 >> 30) & 2;
382 n2 ^= (xsb2 & ~(n2 << 1));
383 xsb2 |= 1;
384 a2 = __vlibm_TBL_sincos_hi[j2+n2];
385 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
386 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
387 t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2];
388 *py0 = t0;
389 *py1 = t1;
390 *py2 = ( a2 + t2 );
391 break;
393 case 4:
394 j0 = (xsb0 + 0x4000) & 0xffff8000;
395 j1 = (xsb1 + 0x4000) & 0xffff8000;
396 j2 = n2 & 1;
397 HI(&t0) = j0;
398 HI(&t1) = j1;
399 LO(&t0) = 0;
400 LO(&t1) = 0;
401 x2_or_one[0] = x2;
402 x2_or_one[2] = -x2;
403 x0 = (x0 - t0) + y0;
404 x1 = (x1 - t1) + y1;
405 y2_or_zero[0] = y2;
406 y2_or_zero[2] = -y2;
407 z0 = x0 * x0;
408 z1 = x1 * x1;
409 z2 = x2 * x2;
410 t0 = z0 * (qq1 + z0 * qq2);
411 t1 = z1 * (qq1 + z1 * qq2);
412 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
413 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
414 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
415 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
416 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
417 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
418 xsb0 = (xsb0 >> 30) & 2;
419 xsb1 = (xsb1 >> 30) & 2;
420 n0 ^= (xsb0 & ~(n0 << 1));
421 n1 ^= (xsb1 & ~(n1 << 1));
422 xsb0 |= 1;
423 xsb1 |= 1;
424 a0 = __vlibm_TBL_sincos_hi[j0+n0];
425 a1 = __vlibm_TBL_sincos_hi[j1+n1];
426 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
427 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
428 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
429 *py0 = ( a0 + t0 );
430 *py1 = ( a1 + t1 );
431 *py2 = t2;
432 break;
434 case 5:
435 j0 = n0 & 1;
436 j1 = (xsb1 + 0x4000) & 0xffff8000;
437 j2 = n2 & 1;
438 HI(&t1) = j1;
439 LO(&t1) = 0;
440 x0_or_one[0] = x0;
441 x0_or_one[2] = -x0;
442 x2_or_one[0] = x2;
443 x2_or_one[2] = -x2;
444 y0_or_zero[0] = y0;
445 y0_or_zero[2] = -y0;
446 x1 = (x1 - t1) + y1;
447 y2_or_zero[0] = y2;
448 y2_or_zero[2] = -y2;
449 z0 = x0 * x0;
450 z1 = x1 * x1;
451 z2 = x2 * x2;
452 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
453 t1 = z1 * (qq1 + z1 * qq2);
454 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
455 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
456 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
457 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
458 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
459 xsb1 = (xsb1 >> 30) & 2;
460 n1 ^= (xsb1 & ~(n1 << 1));
461 xsb1 |= 1;
462 a1 = __vlibm_TBL_sincos_hi[j1+n1];
463 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
464 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
465 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
466 *py0 = t0;
467 *py1 = ( a1 + t1 );
468 *py2 = t2;
469 break;
471 case 6:
472 j0 = (xsb0 + 0x4000) & 0xffff8000;
473 j1 = n1 & 1;
474 j2 = n2 & 1;
475 HI(&t0) = j0;
476 LO(&t0) = 0;
477 x1_or_one[0] = x1;
478 x1_or_one[2] = -x1;
479 x2_or_one[0] = x2;
480 x2_or_one[2] = -x2;
481 x0 = (x0 - t0) + y0;
482 y1_or_zero[0] = y1;
483 y1_or_zero[2] = -y1;
484 y2_or_zero[0] = y2;
485 y2_or_zero[2] = -y2;
486 z0 = x0 * x0;
487 z1 = x1 * x1;
488 z2 = x2 * x2;
489 t0 = z0 * (qq1 + z0 * qq2);
490 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
491 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
492 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
493 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
494 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
495 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
496 xsb0 = (xsb0 >> 30) & 2;
497 n0 ^= (xsb0 & ~(n0 << 1));
498 xsb0 |= 1;
499 a0 = __vlibm_TBL_sincos_hi[j0+n0];
500 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
501 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
502 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
503 *py0 = ( a0 + t0 );
504 *py1 = t1;
505 *py2 = t2;
506 break;
508 case 7:
509 j0 = n0 & 1;
510 j1 = n1 & 1;
511 j2 = n2 & 1;
512 x0_or_one[0] = x0;
513 x0_or_one[2] = -x0;
514 x1_or_one[0] = x1;
515 x1_or_one[2] = -x1;
516 x2_or_one[0] = x2;
517 x2_or_one[2] = -x2;
518 y0_or_zero[0] = y0;
519 y0_or_zero[2] = -y0;
520 y1_or_zero[0] = y1;
521 y1_or_zero[2] = -y1;
522 y2_or_zero[0] = y2;
523 y2_or_zero[2] = -y2;
524 z0 = x0 * x0;
525 z1 = x1 * x1;
526 z2 = x2 * x2;
527 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
528 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
529 t2 = z2 * (poly3[j2] + z2 * poly4[j2]);
530 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
531 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
532 t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2));
533 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
534 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
535 t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2);
536 *py0 = t0;
537 *py1 = t1;
538 *py2 = t2;
539 break;
542 x += stridex;
543 y += stridey;
544 i = 0;
545 } while (--n > 0);
547 if (i > 0)
549 double fn0, fn1, a0, a1, w0, w1, y0, y1;
550 double t0, t1, z0, z1;
551 unsigned j0, j1;
552 int n0, n1;
554 if (i > 1)
556 n1 = (int) (x1 * invpio2 + half[xsb1]);
557 fn1 = (double) n1;
558 n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
559 a1 = x1 - fn1 * pio2_1;
560 w1 = fn1 * pio2_2;
561 x1 = a1 - w1;
562 y1 = (a1 - x1) - w1;
563 a1 = x1;
564 w1 = fn1 * pio2_3 - y1;
565 x1 = a1 - w1;
566 y1 = (a1 - x1) - w1;
567 a1 = x1;
568 w1 = fn1 * pio2_3t - y1;
569 x1 = a1 - w1;
570 y1 = (a1 - x1) - w1;
571 xsb1 = HI(&x1);
572 if ((xsb1 & ~0x80000000) < thresh[n1&1])
574 j1 = n1 & 1;
575 x1_or_one[0] = x1;
576 x1_or_one[2] = -x1;
577 y1_or_zero[0] = y1;
578 y1_or_zero[2] = -y1;
579 z1 = x1 * x1;
580 t1 = z1 * (poly3[j1] + z1 * poly4[j1]);
581 t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1));
582 t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1);
583 *py1 = t1;
585 else
587 j1 = (xsb1 + 0x4000) & 0xffff8000;
588 HI(&t1) = j1;
589 LO(&t1) = 0;
590 x1 = (x1 - t1) + y1;
591 z1 = x1 * x1;
592 t1 = z1 * (qq1 + z1 * qq2);
593 w1 = x1 * (one + z1 * (pp1 + z1 * pp2));
594 j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
595 xsb1 = (xsb1 >> 30) & 2;
596 n1 ^= (xsb1 & ~(n1 << 1));
597 xsb1 |= 1;
598 a1 = __vlibm_TBL_sincos_hi[j1+n1];
599 t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1];
600 *py1 = ( a1 + t1 );
603 n0 = (int) (x0 * invpio2 + half[xsb0]);
604 fn0 = (double) n0;
605 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
606 a0 = x0 - fn0 * pio2_1;
607 w0 = fn0 * pio2_2;
608 x0 = a0 - w0;
609 y0 = (a0 - x0) - w0;
610 a0 = x0;
611 w0 = fn0 * pio2_3 - y0;
612 x0 = a0 - w0;
613 y0 = (a0 - x0) - w0;
614 a0 = x0;
615 w0 = fn0 * pio2_3t - y0;
616 x0 = a0 - w0;
617 y0 = (a0 - x0) - w0;
618 xsb0 = HI(&x0);
619 if ((xsb0 & ~0x80000000) < thresh[n0&1])
621 j0 = n0 & 1;
622 x0_or_one[0] = x0;
623 x0_or_one[2] = -x0;
624 y0_or_zero[0] = y0;
625 y0_or_zero[2] = -y0;
626 z0 = x0 * x0;
627 t0 = z0 * (poly3[j0] + z0 * poly4[j0]);
628 t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0));
629 t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0);
630 *py0 = t0;
632 else
634 j0 = (xsb0 + 0x4000) & 0xffff8000;
635 HI(&t0) = j0;
636 LO(&t0) = 0;
637 x0 = (x0 - t0) + y0;
638 z0 = x0 * x0;
639 t0 = z0 * (qq1 + z0 * qq2);
640 w0 = x0 * (one + z0 * (pp1 + z0 * pp2));
641 j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
642 xsb0 = (xsb0 >> 30) & 2;
643 n0 ^= (xsb0 & ~(n0 << 1));
644 xsb0 |= 1;
645 a0 = __vlibm_TBL_sincos_hi[j0+n0];
646 t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0];
647 *py0 = ( a0 + t0 );
651 if (biguns)
652 __vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb);