4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
30 #include <sys/isa_defs.h>
33 #define HI(x) *(1+(int*)x)
34 #define LO(x) *(unsigned*)x
36 #define HI(x) *(int*)x
37 #define LO(x) *(1+(unsigned*)x)
41 #define restrict _Restrict
46 extern const double __vlibm_TBL_sincos_hi
[], __vlibm_TBL_sincos_lo
[];
49 half
[2] = { 0.5, -0.5 },
51 invpio2
= 0.636619772367581343075535,
52 pio2_1
= 1.570796326734125614166,
53 pio2_2
= 6.077100506303965976596e-11,
54 pio2_3
= 2.022266248711166455796e-21,
55 pio2_3t
= 8.478427660368899643959e-32,
56 pp1
= -1.666666666605760465276263943134982554676e-0001,
57 pp2
= 8.333261209690963126718376566146180944442e-0003,
58 qq1
= -4.999999999977710986407023955908711557870e-0001,
59 qq2
= 4.166654863857219350645055881018842089580e-0002,
60 poly1
[2]= { -1.666666666666629669805215138920301589656e-0001,
61 -4.999999999999931701464060878888294524481e-0001 },
62 poly2
[2]= { 8.333333332390951295683993455280336376663e-0003,
63 4.166666666394861917535640593963708222319e-0002 },
64 poly3
[2]= { -1.984126237997976692791551778230098403960e-0004,
65 -1.388888552656142867832756687736851681462e-0003 },
66 poly4
[2]= { 2.753403624854277237649987622848330351110e-0006,
67 2.478519423681460796618128289454530524759e-0005 };
69 static const unsigned thresh
[2] = { 0x3fc90000, 0x3fc40000 };
71 extern void __vlibm_vcos_big(int, double *, int, double *, int, int);
74 __vlibm_vcos_big_ultra3(int n
, double * restrict x
, int stridex
, double * restrict y
,
75 int stridey
, int pthresh
)
77 double x0_or_one
[4], x1_or_one
[4], x2_or_one
[4];
78 double y0_or_zero
[4], y1_or_zero
[4], y2_or_zero
[4];
79 double x0
, x1
, x2
, *py0
, *py1
, *py2
, *xsave
, *ysave
;
80 unsigned xsb0
, xsb1
, xsb2
;
81 int i
, biguns
, nsave
, sxsave
, sysave
;
105 double fn0
, fn1
, fn2
, a0
, a1
, a2
, w0
, w1
, w2
, y0
, y1
, y2
;
113 if (hx
<= pthresh
|| hx
> 0x413921fb)
115 if (hx
> 0x413921fb && hx
< 0x7ff00000)
136 if (hx
<= pthresh
|| hx
> 0x413921fb)
138 if (hx
> 0x413921fb && hx
< 0x7ff00000)
159 if (hx
<= pthresh
|| hx
> 0x413921fb)
161 if (hx
> 0x413921fb && hx
< 0x7ff00000)
173 n0
= (int) (x0
* invpio2
+ half
[xsb0
]);
174 n1
= (int) (x1
* invpio2
+ half
[xsb1
]);
175 n2
= (int) (x2
* invpio2
+ half
[xsb2
]);
179 n0
= (n0
+ 1) & 3; /* Add 1 (before the mod) to make sin into cos */
182 a0
= x0
- fn0
* pio2_1
;
183 a1
= x1
- fn1
* pio2_1
;
184 a2
= x2
- fn2
* pio2_1
;
197 w0
= fn0
* pio2_3
- y0
;
198 w1
= fn1
* pio2_3
- y1
;
199 w2
= fn2
* pio2_3
- y2
;
209 w0
= fn0
* pio2_3t
- y0
;
210 w1
= fn1
* pio2_3t
- y1
;
211 w2
= fn2
* pio2_3t
- y2
;
219 i
= ((xsb0
& ~0x80000000) - thresh
[n0
&1]) >> 31;
221 i
|= (((xsb1
& ~0x80000000) - thresh
[n1
&1]) >> 30) & 2;
223 i
|= (((xsb2
& ~0x80000000) - thresh
[n2
&1]) >> 29) & 4;
226 double t0
, t1
, t2
, z0
, z1
, z2
;
230 j0
= (xsb0
+ 0x4000) & 0xffff8000;
231 j1
= (xsb1
+ 0x4000) & 0xffff8000;
232 j2
= (xsb2
+ 0x4000) & 0xffff8000;
245 t0
= z0
* (qq1
+ z0
* qq2
);
246 t1
= z1
* (qq1
+ z1
* qq2
);
247 t2
= z2
* (qq1
+ z2
* qq2
);
248 w0
= x0
* (one
+ z0
* (pp1
+ z0
* pp2
));
249 w1
= x1
* (one
+ z1
* (pp1
+ z1
* pp2
));
250 w2
= x2
* (one
+ z2
* (pp1
+ z2
* pp2
));
251 j0
= (((j0
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
252 j1
= (((j1
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
253 j2
= (((j2
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
254 xsb0
= (xsb0
>> 30) & 2;
255 xsb1
= (xsb1
>> 30) & 2;
256 xsb2
= (xsb2
>> 30) & 2;
257 n0
^= (xsb0
& ~(n0
<< 1));
258 n1
^= (xsb1
& ~(n1
<< 1));
259 n2
^= (xsb2
& ~(n2
<< 1));
263 a0
= __vlibm_TBL_sincos_hi
[j0
+n0
];
264 a1
= __vlibm_TBL_sincos_hi
[j1
+n1
];
265 a2
= __vlibm_TBL_sincos_hi
[j2
+n2
];
266 t0
= (__vlibm_TBL_sincos_hi
[j0
+((n0
+xsb0
)&3)] * w0
+ a0
* t0
) + __vlibm_TBL_sincos_lo
[j0
+n0
];
267 t1
= (__vlibm_TBL_sincos_hi
[j1
+((n1
+xsb1
)&3)] * w1
+ a1
* t1
) + __vlibm_TBL_sincos_lo
[j1
+n1
];
268 t2
= (__vlibm_TBL_sincos_hi
[j2
+((n2
+xsb2
)&3)] * w2
+ a2
* t2
) + __vlibm_TBL_sincos_lo
[j2
+n2
];
276 j1
= (xsb1
+ 0x4000) & 0xffff8000;
277 j2
= (xsb2
+ 0x4000) & 0xffff8000;
291 t0
= z0
* (poly3
[j0
] + z0
* poly4
[j0
]);
292 t1
= z1
* (qq1
+ z1
* qq2
);
293 t2
= z2
* (qq1
+ z2
* qq2
);
294 t0
= z0
* (poly1
[j0
] + z0
* (poly2
[j0
] + t0
));
295 w1
= x1
* (one
+ z1
* (pp1
+ z1
* pp2
));
296 w2
= x2
* (one
+ z2
* (pp1
+ z2
* pp2
));
297 j1
= (((j1
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
298 j2
= (((j2
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
299 xsb1
= (xsb1
>> 30) & 2;
300 xsb2
= (xsb2
>> 30) & 2;
301 n1
^= (xsb1
& ~(n1
<< 1));
302 n2
^= (xsb2
& ~(n2
<< 1));
305 a1
= __vlibm_TBL_sincos_hi
[j1
+n1
];
306 a2
= __vlibm_TBL_sincos_hi
[j2
+n2
];
307 t0
= x0_or_one
[n0
] + (y0_or_zero
[n0
] + x0_or_one
[n0
] * t0
);
308 t1
= (__vlibm_TBL_sincos_hi
[j1
+((n1
+xsb1
)&3)] * w1
+ a1
* t1
) + __vlibm_TBL_sincos_lo
[j1
+n1
];
309 t2
= (__vlibm_TBL_sincos_hi
[j2
+((n2
+xsb2
)&3)] * w2
+ a2
* t2
) + __vlibm_TBL_sincos_lo
[j2
+n2
];
316 j0
= (xsb0
+ 0x4000) & 0xffff8000;
318 j2
= (xsb2
+ 0x4000) & 0xffff8000;
332 t0
= z0
* (qq1
+ z0
* qq2
);
333 t1
= z1
* (poly3
[j1
] + z1
* poly4
[j1
]);
334 t2
= z2
* (qq1
+ z2
* qq2
);
335 w0
= x0
* (one
+ z0
* (pp1
+ z0
* pp2
));
336 t1
= z1
* (poly1
[j1
] + z1
* (poly2
[j1
] + t1
));
337 w2
= x2
* (one
+ z2
* (pp1
+ z2
* pp2
));
338 j0
= (((j0
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
339 j2
= (((j2
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
340 xsb0
= (xsb0
>> 30) & 2;
341 xsb2
= (xsb2
>> 30) & 2;
342 n0
^= (xsb0
& ~(n0
<< 1));
343 n2
^= (xsb2
& ~(n2
<< 1));
346 a0
= __vlibm_TBL_sincos_hi
[j0
+n0
];
347 a2
= __vlibm_TBL_sincos_hi
[j2
+n2
];
348 t0
= (__vlibm_TBL_sincos_hi
[j0
+((n0
+xsb0
)&3)] * w0
+ a0
* t0
) + __vlibm_TBL_sincos_lo
[j0
+n0
];
349 t1
= x1_or_one
[n1
] + (y1_or_zero
[n1
] + x1_or_one
[n1
] * t1
);
350 t2
= (__vlibm_TBL_sincos_hi
[j2
+((n2
+xsb2
)&3)] * w2
+ a2
* t2
) + __vlibm_TBL_sincos_lo
[j2
+n2
];
359 j2
= (xsb2
+ 0x4000) & 0xffff8000;
374 t0
= z0
* (poly3
[j0
] + z0
* poly4
[j0
]);
375 t1
= z1
* (poly3
[j1
] + z1
* poly4
[j1
]);
376 t2
= z2
* (qq1
+ z2
* qq2
);
377 t0
= z0
* (poly1
[j0
] + z0
* (poly2
[j0
] + t0
));
378 t1
= z1
* (poly1
[j1
] + z1
* (poly2
[j1
] + t1
));
379 w2
= x2
* (one
+ z2
* (pp1
+ z2
* pp2
));
380 j2
= (((j2
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
381 xsb2
= (xsb2
>> 30) & 2;
382 n2
^= (xsb2
& ~(n2
<< 1));
384 a2
= __vlibm_TBL_sincos_hi
[j2
+n2
];
385 t0
= x0_or_one
[n0
] + (y0_or_zero
[n0
] + x0_or_one
[n0
] * t0
);
386 t1
= x1_or_one
[n1
] + (y1_or_zero
[n1
] + x1_or_one
[n1
] * t1
);
387 t2
= (__vlibm_TBL_sincos_hi
[j2
+((n2
+xsb2
)&3)] * w2
+ a2
* t2
) + __vlibm_TBL_sincos_lo
[j2
+n2
];
394 j0
= (xsb0
+ 0x4000) & 0xffff8000;
395 j1
= (xsb1
+ 0x4000) & 0xffff8000;
410 t0
= z0
* (qq1
+ z0
* qq2
);
411 t1
= z1
* (qq1
+ z1
* qq2
);
412 t2
= z2
* (poly3
[j2
] + z2
* poly4
[j2
]);
413 w0
= x0
* (one
+ z0
* (pp1
+ z0
* pp2
));
414 w1
= x1
* (one
+ z1
* (pp1
+ z1
* pp2
));
415 t2
= z2
* (poly1
[j2
] + z2
* (poly2
[j2
] + t2
));
416 j0
= (((j0
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
417 j1
= (((j1
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
418 xsb0
= (xsb0
>> 30) & 2;
419 xsb1
= (xsb1
>> 30) & 2;
420 n0
^= (xsb0
& ~(n0
<< 1));
421 n1
^= (xsb1
& ~(n1
<< 1));
424 a0
= __vlibm_TBL_sincos_hi
[j0
+n0
];
425 a1
= __vlibm_TBL_sincos_hi
[j1
+n1
];
426 t0
= (__vlibm_TBL_sincos_hi
[j0
+((n0
+xsb0
)&3)] * w0
+ a0
* t0
) + __vlibm_TBL_sincos_lo
[j0
+n0
];
427 t1
= (__vlibm_TBL_sincos_hi
[j1
+((n1
+xsb1
)&3)] * w1
+ a1
* t1
) + __vlibm_TBL_sincos_lo
[j1
+n1
];
428 t2
= x2_or_one
[n2
] + (y2_or_zero
[n2
] + x2_or_one
[n2
] * t2
);
436 j1
= (xsb1
+ 0x4000) & 0xffff8000;
452 t0
= z0
* (poly3
[j0
] + z0
* poly4
[j0
]);
453 t1
= z1
* (qq1
+ z1
* qq2
);
454 t2
= z2
* (poly3
[j2
] + z2
* poly4
[j2
]);
455 t0
= z0
* (poly1
[j0
] + z0
* (poly2
[j0
] + t0
));
456 w1
= x1
* (one
+ z1
* (pp1
+ z1
* pp2
));
457 t2
= z2
* (poly1
[j2
] + z2
* (poly2
[j2
] + t2
));
458 j1
= (((j1
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
459 xsb1
= (xsb1
>> 30) & 2;
460 n1
^= (xsb1
& ~(n1
<< 1));
462 a1
= __vlibm_TBL_sincos_hi
[j1
+n1
];
463 t0
= x0_or_one
[n0
] + (y0_or_zero
[n0
] + x0_or_one
[n0
] * t0
);
464 t1
= (__vlibm_TBL_sincos_hi
[j1
+((n1
+xsb1
)&3)] * w1
+ a1
* t1
) + __vlibm_TBL_sincos_lo
[j1
+n1
];
465 t2
= x2_or_one
[n2
] + (y2_or_zero
[n2
] + x2_or_one
[n2
] * t2
);
472 j0
= (xsb0
+ 0x4000) & 0xffff8000;
489 t0
= z0
* (qq1
+ z0
* qq2
);
490 t1
= z1
* (poly3
[j1
] + z1
* poly4
[j1
]);
491 t2
= z2
* (poly3
[j2
] + z2
* poly4
[j2
]);
492 w0
= x0
* (one
+ z0
* (pp1
+ z0
* pp2
));
493 t1
= z1
* (poly1
[j1
] + z1
* (poly2
[j1
] + t1
));
494 t2
= z2
* (poly1
[j2
] + z2
* (poly2
[j2
] + t2
));
495 j0
= (((j0
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
496 xsb0
= (xsb0
>> 30) & 2;
497 n0
^= (xsb0
& ~(n0
<< 1));
499 a0
= __vlibm_TBL_sincos_hi
[j0
+n0
];
500 t0
= (__vlibm_TBL_sincos_hi
[j0
+((n0
+xsb0
)&3)] * w0
+ a0
* t0
) + __vlibm_TBL_sincos_lo
[j0
+n0
];
501 t1
= x1_or_one
[n1
] + (y1_or_zero
[n1
] + x1_or_one
[n1
] * t1
);
502 t2
= x2_or_one
[n2
] + (y2_or_zero
[n2
] + x2_or_one
[n2
] * t2
);
527 t0
= z0
* (poly3
[j0
] + z0
* poly4
[j0
]);
528 t1
= z1
* (poly3
[j1
] + z1
* poly4
[j1
]);
529 t2
= z2
* (poly3
[j2
] + z2
* poly4
[j2
]);
530 t0
= z0
* (poly1
[j0
] + z0
* (poly2
[j0
] + t0
));
531 t1
= z1
* (poly1
[j1
] + z1
* (poly2
[j1
] + t1
));
532 t2
= z2
* (poly1
[j2
] + z2
* (poly2
[j2
] + t2
));
533 t0
= x0_or_one
[n0
] + (y0_or_zero
[n0
] + x0_or_one
[n0
] * t0
);
534 t1
= x1_or_one
[n1
] + (y1_or_zero
[n1
] + x1_or_one
[n1
] * t1
);
535 t2
= x2_or_one
[n2
] + (y2_or_zero
[n2
] + x2_or_one
[n2
] * t2
);
549 double fn0
, fn1
, a0
, a1
, w0
, w1
, y0
, y1
;
550 double t0
, t1
, z0
, z1
;
556 n1
= (int) (x1
* invpio2
+ half
[xsb1
]);
558 n1
= (n1
+ 1) & 3; /* Add 1 (before the mod) to make sin into cos */
559 a1
= x1
- fn1
* pio2_1
;
564 w1
= fn1
* pio2_3
- y1
;
568 w1
= fn1
* pio2_3t
- y1
;
572 if ((xsb1
& ~0x80000000) < thresh
[n1
&1])
580 t1
= z1
* (poly3
[j1
] + z1
* poly4
[j1
]);
581 t1
= z1
* (poly1
[j1
] + z1
* (poly2
[j1
] + t1
));
582 t1
= x1_or_one
[n1
] + (y1_or_zero
[n1
] + x1_or_one
[n1
] * t1
);
587 j1
= (xsb1
+ 0x4000) & 0xffff8000;
592 t1
= z1
* (qq1
+ z1
* qq2
);
593 w1
= x1
* (one
+ z1
* (pp1
+ z1
* pp2
));
594 j1
= (((j1
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
595 xsb1
= (xsb1
>> 30) & 2;
596 n1
^= (xsb1
& ~(n1
<< 1));
598 a1
= __vlibm_TBL_sincos_hi
[j1
+n1
];
599 t1
= (__vlibm_TBL_sincos_hi
[j1
+((n1
+xsb1
)&3)] * w1
+ a1
* t1
) + __vlibm_TBL_sincos_lo
[j1
+n1
];
603 n0
= (int) (x0
* invpio2
+ half
[xsb0
]);
605 n0
= (n0
+ 1) & 3; /* Add 1 (before the mod) to make sin into cos */
606 a0
= x0
- fn0
* pio2_1
;
611 w0
= fn0
* pio2_3
- y0
;
615 w0
= fn0
* pio2_3t
- y0
;
619 if ((xsb0
& ~0x80000000) < thresh
[n0
&1])
627 t0
= z0
* (poly3
[j0
] + z0
* poly4
[j0
]);
628 t0
= z0
* (poly1
[j0
] + z0
* (poly2
[j0
] + t0
));
629 t0
= x0_or_one
[n0
] + (y0_or_zero
[n0
] + x0_or_one
[n0
] * t0
);
634 j0
= (xsb0
+ 0x4000) & 0xffff8000;
639 t0
= z0
* (qq1
+ z0
* qq2
);
640 w0
= x0
* (one
+ z0
* (pp1
+ z0
* pp2
));
641 j0
= (((j0
& ~0x80000000) - 0x3fc40000) >> 13) & ~0x3;
642 xsb0
= (xsb0
>> 30) & 2;
643 n0
^= (xsb0
& ~(n0
<< 1));
645 a0
= __vlibm_TBL_sincos_hi
[j0
+n0
];
646 t0
= (__vlibm_TBL_sincos_hi
[j0
+((n0
+xsb0
)&3)] * w0
+ a0
* t0
) + __vlibm_TBL_sincos_lo
[j0
+n0
];
652 __vlibm_vcos_big(nsave
, xsave
, sxsave
, ysave
, sysave
, 0x413921fb);