Merge tag 'pull-loongarch-20241016' of https://gitlab.com/gaosong/qemu into staging
[qemu/armbru.git] / target / m68k / softfloat.c
blob02dcc03d15d8a8164c0b30a01787c77545302291
1 /*
2 * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator,
3 * derived from NetBSD M68040 FPSP functions,
4 * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic
5 * Package. Those parts of the code (and some later contributions) are
6 * provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
13 * Any future contributions to this file will be taken to be licensed under
14 * the Softfloat-2a license unless specifically indicated otherwise.
18 * Portions of this work are licensed under the terms of the GNU GPL,
19 * version 2 or later. See the COPYING file in the top-level directory.
22 #include "qemu/osdep.h"
23 #include "softfloat.h"
24 #include "fpu/softfloat-macros.h"
25 #include "softfloat_fpsp_tables.h"
27 #define pi_exp 0x4000
28 #define piby2_exp 0x3FFF
29 #define pi_sig UINT64_C(0xc90fdaa22168c235)
31 static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status)
33 if (floatx80_is_signaling_nan(a, status)) {
34 float_raise(float_flag_invalid, status);
35 a = floatx80_silence_nan(a, status);
38 if (status->default_nan_mode) {
39 return floatx80_default_nan(status);
42 return a;
46 * Returns the mantissa of the extended double-precision floating-point
47 * value `a'.
50 floatx80 floatx80_getman(floatx80 a, float_status *status)
52 bool aSign;
53 int32_t aExp;
54 uint64_t aSig;
56 aSig = extractFloatx80Frac(a);
57 aExp = extractFloatx80Exp(a);
58 aSign = extractFloatx80Sign(a);
60 if (aExp == 0x7FFF) {
61 if ((uint64_t) (aSig << 1)) {
62 return propagateFloatx80NaNOneArg(a , status);
64 float_raise(float_flag_invalid , status);
65 return floatx80_default_nan(status);
68 if (aExp == 0) {
69 if (aSig == 0) {
70 return packFloatx80(aSign, 0, 0);
72 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
75 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign,
76 0x3FFF, aSig, 0, status);
80 * Returns the exponent of the extended double-precision floating-point
81 * value `a' as an extended double-precision value.
84 floatx80 floatx80_getexp(floatx80 a, float_status *status)
86 bool aSign;
87 int32_t aExp;
88 uint64_t aSig;
90 aSig = extractFloatx80Frac(a);
91 aExp = extractFloatx80Exp(a);
92 aSign = extractFloatx80Sign(a);
94 if (aExp == 0x7FFF) {
95 if ((uint64_t) (aSig << 1)) {
96 return propagateFloatx80NaNOneArg(a , status);
98 float_raise(float_flag_invalid , status);
99 return floatx80_default_nan(status);
102 if (aExp == 0) {
103 if (aSig == 0) {
104 return packFloatx80(aSign, 0, 0);
106 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
109 return int32_to_floatx80(aExp - 0x3FFF, status);
113 * Scales extended double-precision floating-point value in operand `a' by
114 * value `b'. The function truncates the value in the second operand 'b' to
115 * an integral value and adds that value to the exponent of the operand 'a'.
116 * The operation performed according to the IEC/IEEE Standard for Binary
117 * Floating-Point Arithmetic.
120 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
122 bool aSign, bSign;
123 int32_t aExp, bExp, shiftCount;
124 uint64_t aSig, bSig;
126 aSig = extractFloatx80Frac(a);
127 aExp = extractFloatx80Exp(a);
128 aSign = extractFloatx80Sign(a);
129 bSig = extractFloatx80Frac(b);
130 bExp = extractFloatx80Exp(b);
131 bSign = extractFloatx80Sign(b);
133 if (bExp == 0x7FFF) {
134 if ((uint64_t) (bSig << 1) ||
135 ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) {
136 return propagateFloatx80NaN(a, b, status);
138 float_raise(float_flag_invalid , status);
139 return floatx80_default_nan(status);
141 if (aExp == 0x7FFF) {
142 if ((uint64_t) (aSig << 1)) {
143 return propagateFloatx80NaN(a, b, status);
145 return packFloatx80(aSign, floatx80_infinity.high,
146 floatx80_infinity.low);
148 if (aExp == 0) {
149 if (aSig == 0) {
150 return packFloatx80(aSign, 0, 0);
152 if (bExp < 0x3FFF) {
153 return a;
155 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
158 if (bExp < 0x3FFF) {
159 return a;
162 if (0x400F < bExp) {
163 aExp = bSign ? -0x6001 : 0xE000;
164 return roundAndPackFloatx80(status->floatx80_rounding_precision,
165 aSign, aExp, aSig, 0, status);
168 shiftCount = 0x403E - bExp;
169 bSig >>= shiftCount;
170 aExp = bSign ? (aExp - bSig) : (aExp + bSig);
172 return roundAndPackFloatx80(status->floatx80_rounding_precision,
173 aSign, aExp, aSig, 0, status);
176 floatx80 floatx80_move(floatx80 a, float_status *status)
178 bool aSign;
179 int32_t aExp;
180 uint64_t aSig;
182 aSig = extractFloatx80Frac(a);
183 aExp = extractFloatx80Exp(a);
184 aSign = extractFloatx80Sign(a);
186 if (aExp == 0x7FFF) {
187 if ((uint64_t)(aSig << 1)) {
188 return propagateFloatx80NaNOneArg(a, status);
190 return a;
192 if (aExp == 0) {
193 if (aSig == 0) {
194 return a;
196 normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
197 aSign, aExp, aSig, 0, status);
199 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign,
200 aExp, aSig, 0, status);
204 * Algorithms for transcendental functions supported by MC68881 and MC68882
205 * mathematical coprocessors. The functions are derived from FPSP library.
208 #define one_exp 0x3FFF
209 #define one_sig UINT64_C(0x8000000000000000)
212 * Function for compactifying extended double-precision floating point values.
215 static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig)
217 return (aExp << 16) | (aSig >> 48);
221 * Log base e of x plus 1
224 floatx80 floatx80_lognp1(floatx80 a, float_status *status)
226 bool aSign;
227 int32_t aExp;
228 uint64_t aSig, fSig;
230 FloatRoundMode user_rnd_mode;
231 FloatX80RoundPrec user_rnd_prec;
233 int32_t compact, j, k;
234 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
236 aSig = extractFloatx80Frac(a);
237 aExp = extractFloatx80Exp(a);
238 aSign = extractFloatx80Sign(a);
240 if (aExp == 0x7FFF) {
241 if ((uint64_t) (aSig << 1)) {
242 propagateFloatx80NaNOneArg(a, status);
244 if (aSign) {
245 float_raise(float_flag_invalid, status);
246 return floatx80_default_nan(status);
248 return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low);
251 if (aExp == 0 && aSig == 0) {
252 return packFloatx80(aSign, 0, 0);
255 if (aSign && aExp >= one_exp) {
256 if (aExp == one_exp && aSig == one_sig) {
257 float_raise(float_flag_divbyzero, status);
258 return packFloatx80(aSign, floatx80_infinity.high,
259 floatx80_infinity.low);
261 float_raise(float_flag_invalid, status);
262 return floatx80_default_nan(status);
265 if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) {
266 /* <= min threshold */
267 float_raise(float_flag_inexact, status);
268 return floatx80_move(a, status);
271 user_rnd_mode = status->float_rounding_mode;
272 user_rnd_prec = status->floatx80_rounding_precision;
273 status->float_rounding_mode = float_round_nearest_even;
274 status->floatx80_rounding_precision = floatx80_precision_x;
276 compact = floatx80_make_compact(aExp, aSig);
278 fp0 = a; /* Z */
279 fp1 = a;
281 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
282 status), status); /* X = (1+Z) */
284 aExp = extractFloatx80Exp(fp0);
285 aSig = extractFloatx80Frac(fp0);
287 compact = floatx80_make_compact(aExp, aSig);
289 if (compact < 0x3FFE8000 || compact > 0x3FFFC000) {
290 /* |X| < 1/2 or |X| > 3/2 */
291 k = aExp - 0x3FFF;
292 fp1 = int32_to_floatx80(k, status);
294 fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
295 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
297 f = packFloatx80(0, 0x3FFF, fSig); /* F */
298 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */
300 fp0 = floatx80_sub(fp0, f, status); /* Y-F */
302 lp1cont1:
303 /* LP1CONT1 */
304 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */
305 logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC));
306 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */
307 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */
309 fp3 = fp2;
310 fp1 = fp2;
312 fp1 = floatx80_mul(fp1, float64_to_floatx80(
313 make_float64(0x3FC2499AB5E4040B), status),
314 status); /* V*A6 */
315 fp2 = floatx80_mul(fp2, float64_to_floatx80(
316 make_float64(0xBFC555B5848CB7DB), status),
317 status); /* V*A5 */
318 fp1 = floatx80_add(fp1, float64_to_floatx80(
319 make_float64(0x3FC99999987D8730), status),
320 status); /* A4+V*A6 */
321 fp2 = floatx80_add(fp2, float64_to_floatx80(
322 make_float64(0xBFCFFFFFFF6F7E97), status),
323 status); /* A3+V*A5 */
324 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */
325 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */
326 fp1 = floatx80_add(fp1, float64_to_floatx80(
327 make_float64(0x3FD55555555555A4), status),
328 status); /* A2+V*(A4+V*A6) */
329 fp2 = floatx80_add(fp2, float64_to_floatx80(
330 make_float64(0xBFE0000000000008), status),
331 status); /* A1+V*(A3+V*A5) */
332 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */
333 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */
334 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */
335 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */
337 fp1 = floatx80_add(fp1, log_tbl[j + 1],
338 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
339 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */
341 status->float_rounding_mode = user_rnd_mode;
342 status->floatx80_rounding_precision = user_rnd_prec;
344 a = floatx80_add(fp0, klog2, status);
346 float_raise(float_flag_inexact, status);
348 return a;
349 } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) {
350 /* |X| < 1/16 or |X| > -1/16 */
351 /* LP1CARE */
352 fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
353 f = packFloatx80(0, 0x3FFF, fSig); /* F */
354 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
356 if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */
357 /* KISZERO */
358 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000),
359 status), f, status); /* 1-F */
360 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */
361 fp1 = packFloatx80(0, 0, 0); /* K = 0 */
362 } else {
363 /* KISNEG */
364 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000),
365 status), f, status); /* 2-F */
366 fp1 = floatx80_add(fp1, fp1, status); /* 2Z */
367 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */
368 fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */
370 goto lp1cont1;
371 } else {
372 /* LP1ONE16 */
373 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */
374 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
375 status), status); /* FP0 IS 1+X */
377 /* LP1CONT2 */
378 fp1 = floatx80_div(fp1, fp0, status); /* U */
379 saveu = fp1;
380 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */
381 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */
383 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6),
384 status); /* B5 */
385 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0),
386 status); /* B4 */
387 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */
388 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */
389 fp3 = floatx80_add(fp3, float64_to_floatx80(
390 make_float64(0x3F624924928BCCFF), status),
391 status); /* B3+W*B5 */
392 fp2 = floatx80_add(fp2, float64_to_floatx80(
393 make_float64(0x3F899999999995EC), status),
394 status); /* B2+W*B4 */
395 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */
396 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */
397 fp1 = floatx80_add(fp1, float64_to_floatx80(
398 make_float64(0x3FB5555555555555), status),
399 status); /* B1+W*(B3+W*B5) */
401 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */
402 fp1 = floatx80_add(fp1, fp2,
403 status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */
404 fp0 = floatx80_mul(fp0, fp1,
405 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */
407 status->float_rounding_mode = user_rnd_mode;
408 status->floatx80_rounding_precision = user_rnd_prec;
410 a = floatx80_add(fp0, saveu, status);
412 /*if (!floatx80_is_zero(a)) { */
413 float_raise(float_flag_inexact, status);
414 /*} */
416 return a;
421 * Log base e
424 floatx80 floatx80_logn(floatx80 a, float_status *status)
426 bool aSign;
427 int32_t aExp;
428 uint64_t aSig, fSig;
430 FloatRoundMode user_rnd_mode;
431 FloatX80RoundPrec user_rnd_prec;
433 int32_t compact, j, k, adjk;
434 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
436 aSig = extractFloatx80Frac(a);
437 aExp = extractFloatx80Exp(a);
438 aSign = extractFloatx80Sign(a);
440 if (aExp == 0x7FFF) {
441 if ((uint64_t) (aSig << 1)) {
442 propagateFloatx80NaNOneArg(a, status);
444 if (aSign == 0) {
445 return packFloatx80(0, floatx80_infinity.high,
446 floatx80_infinity.low);
450 adjk = 0;
452 if (aExp == 0) {
453 if (aSig == 0) { /* zero */
454 float_raise(float_flag_divbyzero, status);
455 return packFloatx80(1, floatx80_infinity.high,
456 floatx80_infinity.low);
458 if ((aSig & one_sig) == 0) { /* denormal */
459 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
460 adjk = -100;
461 aExp += 100;
462 a = packFloatx80(aSign, aExp, aSig);
466 if (aSign) {
467 float_raise(float_flag_invalid, status);
468 return floatx80_default_nan(status);
471 user_rnd_mode = status->float_rounding_mode;
472 user_rnd_prec = status->floatx80_rounding_precision;
473 status->float_rounding_mode = float_round_nearest_even;
474 status->floatx80_rounding_precision = floatx80_precision_x;
476 compact = floatx80_make_compact(aExp, aSig);
478 if (compact < 0x3FFEF07D || compact > 0x3FFF8841) {
479 /* |X| < 15/16 or |X| > 17/16 */
480 k = aExp - 0x3FFF;
481 k += adjk;
482 fp1 = int32_to_floatx80(k, status);
484 fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000);
485 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
487 f = packFloatx80(0, 0x3FFF, fSig); /* F */
488 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */
490 fp0 = floatx80_sub(fp0, f, status); /* Y-F */
492 /* LP1CONT1 */
493 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */
494 logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC));
495 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */
496 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */
498 fp3 = fp2;
499 fp1 = fp2;
501 fp1 = floatx80_mul(fp1, float64_to_floatx80(
502 make_float64(0x3FC2499AB5E4040B), status),
503 status); /* V*A6 */
504 fp2 = floatx80_mul(fp2, float64_to_floatx80(
505 make_float64(0xBFC555B5848CB7DB), status),
506 status); /* V*A5 */
507 fp1 = floatx80_add(fp1, float64_to_floatx80(
508 make_float64(0x3FC99999987D8730), status),
509 status); /* A4+V*A6 */
510 fp2 = floatx80_add(fp2, float64_to_floatx80(
511 make_float64(0xBFCFFFFFFF6F7E97), status),
512 status); /* A3+V*A5 */
513 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */
514 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */
515 fp1 = floatx80_add(fp1, float64_to_floatx80(
516 make_float64(0x3FD55555555555A4), status),
517 status); /* A2+V*(A4+V*A6) */
518 fp2 = floatx80_add(fp2, float64_to_floatx80(
519 make_float64(0xBFE0000000000008), status),
520 status); /* A1+V*(A3+V*A5) */
521 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */
522 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */
523 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */
524 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */
526 fp1 = floatx80_add(fp1, log_tbl[j + 1],
527 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
528 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */
530 status->float_rounding_mode = user_rnd_mode;
531 status->floatx80_rounding_precision = user_rnd_prec;
533 a = floatx80_add(fp0, klog2, status);
535 float_raise(float_flag_inexact, status);
537 return a;
538 } else { /* |X-1| >= 1/16 */
539 fp0 = a;
540 fp1 = a;
541 fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000),
542 status), status); /* FP1 IS X-1 */
543 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
544 status), status); /* FP0 IS X+1 */
545 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */
547 /* LP1CONT2 */
548 fp1 = floatx80_div(fp1, fp0, status); /* U */
549 saveu = fp1;
550 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */
551 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */
553 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6),
554 status); /* B5 */
555 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0),
556 status); /* B4 */
557 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */
558 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */
559 fp3 = floatx80_add(fp3, float64_to_floatx80(
560 make_float64(0x3F624924928BCCFF), status),
561 status); /* B3+W*B5 */
562 fp2 = floatx80_add(fp2, float64_to_floatx80(
563 make_float64(0x3F899999999995EC), status),
564 status); /* B2+W*B4 */
565 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */
566 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */
567 fp1 = floatx80_add(fp1, float64_to_floatx80(
568 make_float64(0x3FB5555555555555), status),
569 status); /* B1+W*(B3+W*B5) */
571 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */
572 fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */
573 fp0 = floatx80_mul(fp0, fp1,
574 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */
576 status->float_rounding_mode = user_rnd_mode;
577 status->floatx80_rounding_precision = user_rnd_prec;
579 a = floatx80_add(fp0, saveu, status);
581 /*if (!floatx80_is_zero(a)) { */
582 float_raise(float_flag_inexact, status);
583 /*} */
585 return a;
590 * Log base 10
593 floatx80 floatx80_log10(floatx80 a, float_status *status)
595 bool aSign;
596 int32_t aExp;
597 uint64_t aSig;
599 FloatRoundMode user_rnd_mode;
600 FloatX80RoundPrec user_rnd_prec;
602 floatx80 fp0, fp1;
604 aSig = extractFloatx80Frac(a);
605 aExp = extractFloatx80Exp(a);
606 aSign = extractFloatx80Sign(a);
608 if (aExp == 0x7FFF) {
609 if ((uint64_t) (aSig << 1)) {
610 propagateFloatx80NaNOneArg(a, status);
612 if (aSign == 0) {
613 return packFloatx80(0, floatx80_infinity.high,
614 floatx80_infinity.low);
618 if (aExp == 0 && aSig == 0) {
619 float_raise(float_flag_divbyzero, status);
620 return packFloatx80(1, floatx80_infinity.high,
621 floatx80_infinity.low);
624 if (aSign) {
625 float_raise(float_flag_invalid, status);
626 return floatx80_default_nan(status);
629 user_rnd_mode = status->float_rounding_mode;
630 user_rnd_prec = status->floatx80_rounding_precision;
631 status->float_rounding_mode = float_round_nearest_even;
632 status->floatx80_rounding_precision = floatx80_precision_x;
634 fp0 = floatx80_logn(a, status);
635 fp1 = packFloatx80(0, 0x3FFD, UINT64_C(0xDE5BD8A937287195)); /* INV_L10 */
637 status->float_rounding_mode = user_rnd_mode;
638 status->floatx80_rounding_precision = user_rnd_prec;
640 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */
642 float_raise(float_flag_inexact, status);
644 return a;
648 * Log base 2
651 floatx80 floatx80_log2(floatx80 a, float_status *status)
653 bool aSign;
654 int32_t aExp;
655 uint64_t aSig;
657 FloatRoundMode user_rnd_mode;
658 FloatX80RoundPrec user_rnd_prec;
660 floatx80 fp0, fp1;
662 aSig = extractFloatx80Frac(a);
663 aExp = extractFloatx80Exp(a);
664 aSign = extractFloatx80Sign(a);
666 if (aExp == 0x7FFF) {
667 if ((uint64_t) (aSig << 1)) {
668 propagateFloatx80NaNOneArg(a, status);
670 if (aSign == 0) {
671 return packFloatx80(0, floatx80_infinity.high,
672 floatx80_infinity.low);
676 if (aExp == 0) {
677 if (aSig == 0) {
678 float_raise(float_flag_divbyzero, status);
679 return packFloatx80(1, floatx80_infinity.high,
680 floatx80_infinity.low);
682 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
685 if (aSign) {
686 float_raise(float_flag_invalid, status);
687 return floatx80_default_nan(status);
690 user_rnd_mode = status->float_rounding_mode;
691 user_rnd_prec = status->floatx80_rounding_precision;
692 status->float_rounding_mode = float_round_nearest_even;
693 status->floatx80_rounding_precision = floatx80_precision_x;
695 if (aSig == one_sig) { /* X is 2^k */
696 status->float_rounding_mode = user_rnd_mode;
697 status->floatx80_rounding_precision = user_rnd_prec;
699 a = int32_to_floatx80(aExp - 0x3FFF, status);
700 } else {
701 fp0 = floatx80_logn(a, status);
702 fp1 = packFloatx80(0, 0x3FFF, UINT64_C(0xB8AA3B295C17F0BC)); /* INV_L2 */
704 status->float_rounding_mode = user_rnd_mode;
705 status->floatx80_rounding_precision = user_rnd_prec;
707 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */
710 float_raise(float_flag_inexact, status);
712 return a;
716 * e to x
719 floatx80 floatx80_etox(floatx80 a, float_status *status)
721 bool aSign;
722 int32_t aExp;
723 uint64_t aSig;
725 FloatRoundMode user_rnd_mode;
726 FloatX80RoundPrec user_rnd_prec;
728 int32_t compact, n, j, k, m, m1;
729 floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale;
730 bool adjflag;
732 aSig = extractFloatx80Frac(a);
733 aExp = extractFloatx80Exp(a);
734 aSign = extractFloatx80Sign(a);
736 if (aExp == 0x7FFF) {
737 if ((uint64_t) (aSig << 1)) {
738 return propagateFloatx80NaNOneArg(a, status);
740 if (aSign) {
741 return packFloatx80(0, 0, 0);
743 return packFloatx80(0, floatx80_infinity.high,
744 floatx80_infinity.low);
747 if (aExp == 0 && aSig == 0) {
748 return packFloatx80(0, one_exp, one_sig);
751 user_rnd_mode = status->float_rounding_mode;
752 user_rnd_prec = status->floatx80_rounding_precision;
753 status->float_rounding_mode = float_round_nearest_even;
754 status->floatx80_rounding_precision = floatx80_precision_x;
756 adjflag = 0;
758 if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */
759 compact = floatx80_make_compact(aExp, aSig);
761 if (compact < 0x400CB167) { /* |X| < 16380 log2 */
762 fp0 = a;
763 fp1 = a;
764 fp0 = floatx80_mul(fp0, float32_to_floatx80(
765 make_float32(0x42B8AA3B), status),
766 status); /* 64/log2 * X */
767 adjflag = 0;
768 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
769 fp0 = int32_to_floatx80(n, status);
771 j = n & 0x3F; /* J = N mod 64 */
772 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
773 if (n < 0 && j) {
775 * arithmetic right shift is division and
776 * round towards minus infinity
778 m--;
780 m += 0x3FFF; /* biased exponent of 2^(M) */
782 expcont1:
783 fp2 = fp0; /* N */
784 fp0 = floatx80_mul(fp0, float32_to_floatx80(
785 make_float32(0xBC317218), status),
786 status); /* N * L1, L1 = lead(-log2/64) */
787 l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6));
788 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */
789 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */
790 fp0 = floatx80_add(fp0, fp2, status); /* R */
792 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
793 fp2 = float32_to_floatx80(make_float32(0x3AB60B70),
794 status); /* A5 */
795 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */
796 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895),
797 status), fp1,
798 status); /* fp3 is S*A4 */
799 fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64(
800 0x3FA5555555554431), status),
801 status); /* fp2 is A3+S*A5 */
802 fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64(
803 0x3FC5555555554018), status),
804 status); /* fp3 is A2+S*A4 */
805 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */
806 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */
807 fp2 = floatx80_add(fp2, float32_to_floatx80(
808 make_float32(0x3F000000), status),
809 status); /* fp2 is A1+S*(A3+S*A5) */
810 fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */
811 fp2 = floatx80_mul(fp2, fp1,
812 status); /* fp2 IS S*(A1+S*(A3+S*A5)) */
813 fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */
814 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */
816 fp1 = exp_tbl[j];
817 fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */
818 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status),
819 status); /* accurate 2^(J/64) */
820 fp0 = floatx80_add(fp0, fp1,
821 status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */
823 scale = packFloatx80(0, m, one_sig);
824 if (adjflag) {
825 adjscale = packFloatx80(0, m1, one_sig);
826 fp0 = floatx80_mul(fp0, adjscale, status);
829 status->float_rounding_mode = user_rnd_mode;
830 status->floatx80_rounding_precision = user_rnd_prec;
832 a = floatx80_mul(fp0, scale, status);
834 float_raise(float_flag_inexact, status);
836 return a;
837 } else { /* |X| >= 16380 log2 */
838 if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */
839 status->float_rounding_mode = user_rnd_mode;
840 status->floatx80_rounding_precision = user_rnd_prec;
841 if (aSign) {
842 a = roundAndPackFloatx80(
843 status->floatx80_rounding_precision,
844 0, -0x1000, aSig, 0, status);
845 } else {
846 a = roundAndPackFloatx80(
847 status->floatx80_rounding_precision,
848 0, 0x8000, aSig, 0, status);
850 float_raise(float_flag_inexact, status);
852 return a;
853 } else {
854 fp0 = a;
855 fp1 = a;
856 fp0 = floatx80_mul(fp0, float32_to_floatx80(
857 make_float32(0x42B8AA3B), status),
858 status); /* 64/log2 * X */
859 adjflag = 1;
860 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
861 fp0 = int32_to_floatx80(n, status);
863 j = n & 0x3F; /* J = N mod 64 */
864 /* NOTE: this is really arithmetic right shift by 6 */
865 k = n / 64;
866 if (n < 0 && j) {
867 /* arithmetic right shift is division and
868 * round towards minus infinity
870 k--;
872 /* NOTE: this is really arithmetic right shift by 1 */
873 m1 = k / 2;
874 if (k < 0 && (k & 1)) {
875 /* arithmetic right shift is division and
876 * round towards minus infinity
878 m1--;
880 m = k - m1;
881 m1 += 0x3FFF; /* biased exponent of 2^(M1) */
882 m += 0x3FFF; /* biased exponent of 2^(M) */
884 goto expcont1;
887 } else { /* |X| < 2^(-65) */
888 status->float_rounding_mode = user_rnd_mode;
889 status->floatx80_rounding_precision = user_rnd_prec;
891 a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000),
892 status), status); /* 1 + X */
894 float_raise(float_flag_inexact, status);
896 return a;
901 * 2 to x
904 floatx80 floatx80_twotox(floatx80 a, float_status *status)
906 bool aSign;
907 int32_t aExp;
908 uint64_t aSig;
910 FloatRoundMode user_rnd_mode;
911 FloatX80RoundPrec user_rnd_prec;
913 int32_t compact, n, j, l, m, m1;
914 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
916 aSig = extractFloatx80Frac(a);
917 aExp = extractFloatx80Exp(a);
918 aSign = extractFloatx80Sign(a);
920 if (aExp == 0x7FFF) {
921 if ((uint64_t) (aSig << 1)) {
922 return propagateFloatx80NaNOneArg(a, status);
924 if (aSign) {
925 return packFloatx80(0, 0, 0);
927 return packFloatx80(0, floatx80_infinity.high,
928 floatx80_infinity.low);
931 if (aExp == 0 && aSig == 0) {
932 return packFloatx80(0, one_exp, one_sig);
935 user_rnd_mode = status->float_rounding_mode;
936 user_rnd_prec = status->floatx80_rounding_precision;
937 status->float_rounding_mode = float_round_nearest_even;
938 status->floatx80_rounding_precision = floatx80_precision_x;
940 fp0 = a;
942 compact = floatx80_make_compact(aExp, aSig);
944 if (compact < 0x3FB98000 || compact > 0x400D80C0) {
945 /* |X| > 16480 or |X| < 2^(-70) */
946 if (compact > 0x3FFF8000) { /* |X| > 16480 */
947 status->float_rounding_mode = user_rnd_mode;
948 status->floatx80_rounding_precision = user_rnd_prec;
950 if (aSign) {
951 return roundAndPackFloatx80(status->floatx80_rounding_precision,
952 0, -0x1000, aSig, 0, status);
953 } else {
954 return roundAndPackFloatx80(status->floatx80_rounding_precision,
955 0, 0x8000, aSig, 0, status);
957 } else { /* |X| < 2^(-70) */
958 status->float_rounding_mode = user_rnd_mode;
959 status->floatx80_rounding_precision = user_rnd_prec;
961 a = floatx80_add(fp0, float32_to_floatx80(
962 make_float32(0x3F800000), status),
963 status); /* 1 + X */
965 float_raise(float_flag_inexact, status);
967 return a;
969 } else { /* 2^(-70) <= |X| <= 16480 */
970 fp1 = fp0; /* X */
971 fp1 = floatx80_mul(fp1, float32_to_floatx80(
972 make_float32(0x42800000), status),
973 status); /* X * 64 */
974 n = floatx80_to_int32(fp1, status);
975 fp1 = int32_to_floatx80(n, status);
976 j = n & 0x3F;
977 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
978 if (n < 0 && j) {
980 * arithmetic right shift is division and
981 * round towards minus infinity
983 l--;
985 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */
986 if (l < 0 && (l & 1)) {
988 * arithmetic right shift is division and
989 * round towards minus infinity
991 m--;
993 m1 = l - m;
994 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */
996 adjfact = packFloatx80(0, m1, one_sig);
997 fact1 = exp2_tbl[j];
998 fact1.high += m;
999 fact2.high = exp2_tbl2[j] >> 16;
1000 fact2.high += m;
1001 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
1002 fact2.low <<= 48;
1004 fp1 = floatx80_mul(fp1, float32_to_floatx80(
1005 make_float32(0x3C800000), status),
1006 status); /* (1/64)*N */
1007 fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */
1008 fp2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); /* LOG2 */
1009 fp0 = floatx80_mul(fp0, fp2, status); /* R */
1011 /* EXPR */
1012 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1013 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2),
1014 status); /* A5 */
1015 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C),
1016 status); /* A4 */
1017 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */
1018 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */
1019 fp2 = floatx80_add(fp2, float64_to_floatx80(
1020 make_float64(0x3FA5555555554CC1), status),
1021 status); /* A3+S*A5 */
1022 fp3 = floatx80_add(fp3, float64_to_floatx80(
1023 make_float64(0x3FC5555555554A54), status),
1024 status); /* A2+S*A4 */
1025 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */
1026 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */
1027 fp2 = floatx80_add(fp2, float64_to_floatx80(
1028 make_float64(0x3FE0000000000000), status),
1029 status); /* A1+S*(A3+S*A5) */
1030 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */
1032 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */
1033 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */
1034 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */
1036 fp0 = floatx80_mul(fp0, fact1, status);
1037 fp0 = floatx80_add(fp0, fact2, status);
1038 fp0 = floatx80_add(fp0, fact1, status);
1040 status->float_rounding_mode = user_rnd_mode;
1041 status->floatx80_rounding_precision = user_rnd_prec;
1043 a = floatx80_mul(fp0, adjfact, status);
1045 float_raise(float_flag_inexact, status);
1047 return a;
1052 * 10 to x
1055 floatx80 floatx80_tentox(floatx80 a, float_status *status)
1057 bool aSign;
1058 int32_t aExp;
1059 uint64_t aSig;
1061 FloatRoundMode user_rnd_mode;
1062 FloatX80RoundPrec user_rnd_prec;
1064 int32_t compact, n, j, l, m, m1;
1065 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
1067 aSig = extractFloatx80Frac(a);
1068 aExp = extractFloatx80Exp(a);
1069 aSign = extractFloatx80Sign(a);
1071 if (aExp == 0x7FFF) {
1072 if ((uint64_t) (aSig << 1)) {
1073 return propagateFloatx80NaNOneArg(a, status);
1075 if (aSign) {
1076 return packFloatx80(0, 0, 0);
1078 return packFloatx80(0, floatx80_infinity.high,
1079 floatx80_infinity.low);
1082 if (aExp == 0 && aSig == 0) {
1083 return packFloatx80(0, one_exp, one_sig);
1086 user_rnd_mode = status->float_rounding_mode;
1087 user_rnd_prec = status->floatx80_rounding_precision;
1088 status->float_rounding_mode = float_round_nearest_even;
1089 status->floatx80_rounding_precision = floatx80_precision_x;
1091 fp0 = a;
1093 compact = floatx80_make_compact(aExp, aSig);
1095 if (compact < 0x3FB98000 || compact > 0x400B9B07) {
1096 /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */
1097 if (compact > 0x3FFF8000) { /* |X| > 16480 */
1098 status->float_rounding_mode = user_rnd_mode;
1099 status->floatx80_rounding_precision = user_rnd_prec;
1101 if (aSign) {
1102 return roundAndPackFloatx80(status->floatx80_rounding_precision,
1103 0, -0x1000, aSig, 0, status);
1104 } else {
1105 return roundAndPackFloatx80(status->floatx80_rounding_precision,
1106 0, 0x8000, aSig, 0, status);
1108 } else { /* |X| < 2^(-70) */
1109 status->float_rounding_mode = user_rnd_mode;
1110 status->floatx80_rounding_precision = user_rnd_prec;
1112 a = floatx80_add(fp0, float32_to_floatx80(
1113 make_float32(0x3F800000), status),
1114 status); /* 1 + X */
1116 float_raise(float_flag_inexact, status);
1118 return a;
1120 } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */
1121 fp1 = fp0; /* X */
1122 fp1 = floatx80_mul(fp1, float64_to_floatx80(
1123 make_float64(0x406A934F0979A371),
1124 status), status); /* X*64*LOG10/LOG2 */
1125 n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */
1126 fp1 = int32_to_floatx80(n, status);
1128 j = n & 0x3F;
1129 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
1130 if (n < 0 && j) {
1132 * arithmetic right shift is division and
1133 * round towards minus infinity
1135 l--;
1137 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */
1138 if (l < 0 && (l & 1)) {
1140 * arithmetic right shift is division and
1141 * round towards minus infinity
1143 m--;
1145 m1 = l - m;
1146 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */
1148 adjfact = packFloatx80(0, m1, one_sig);
1149 fact1 = exp2_tbl[j];
1150 fact1.high += m;
1151 fact2.high = exp2_tbl2[j] >> 16;
1152 fact2.high += m;
1153 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
1154 fact2.low <<= 48;
1156 fp2 = fp1; /* N */
1157 fp1 = floatx80_mul(fp1, float64_to_floatx80(
1158 make_float64(0x3F734413509F8000), status),
1159 status); /* N*(LOG2/64LOG10)_LEAD */
1160 fp3 = packFloatx80(1, 0x3FCD, UINT64_C(0xC0219DC1DA994FD2));
1161 fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */
1162 fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */
1163 fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */
1164 fp2 = packFloatx80(0, 0x4000, UINT64_C(0x935D8DDDAAA8AC17)); /* LOG10 */
1165 fp0 = floatx80_mul(fp0, fp2, status); /* R */
1167 /* EXPR */
1168 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1169 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2),
1170 status); /* A5 */
1171 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C),
1172 status); /* A4 */
1173 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */
1174 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */
1175 fp2 = floatx80_add(fp2, float64_to_floatx80(
1176 make_float64(0x3FA5555555554CC1), status),
1177 status); /* A3+S*A5 */
1178 fp3 = floatx80_add(fp3, float64_to_floatx80(
1179 make_float64(0x3FC5555555554A54), status),
1180 status); /* A2+S*A4 */
1181 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */
1182 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */
1183 fp2 = floatx80_add(fp2, float64_to_floatx80(
1184 make_float64(0x3FE0000000000000), status),
1185 status); /* A1+S*(A3+S*A5) */
1186 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */
1188 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */
1189 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */
1190 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */
1192 fp0 = floatx80_mul(fp0, fact1, status);
1193 fp0 = floatx80_add(fp0, fact2, status);
1194 fp0 = floatx80_add(fp0, fact1, status);
1196 status->float_rounding_mode = user_rnd_mode;
1197 status->floatx80_rounding_precision = user_rnd_prec;
1199 a = floatx80_mul(fp0, adjfact, status);
1201 float_raise(float_flag_inexact, status);
1203 return a;
1208 * Tangent
1211 floatx80 floatx80_tan(floatx80 a, float_status *status)
1213 bool aSign, xSign;
1214 int32_t aExp, xExp;
1215 uint64_t aSig, xSig;
1217 FloatRoundMode user_rnd_mode;
1218 FloatX80RoundPrec user_rnd_prec;
1220 int32_t compact, l, n, j;
1221 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2;
1222 float32 twoto63;
1223 bool endflag;
1225 aSig = extractFloatx80Frac(a);
1226 aExp = extractFloatx80Exp(a);
1227 aSign = extractFloatx80Sign(a);
1229 if (aExp == 0x7FFF) {
1230 if ((uint64_t) (aSig << 1)) {
1231 return propagateFloatx80NaNOneArg(a, status);
1233 float_raise(float_flag_invalid, status);
1234 return floatx80_default_nan(status);
1237 if (aExp == 0 && aSig == 0) {
1238 return packFloatx80(aSign, 0, 0);
1241 user_rnd_mode = status->float_rounding_mode;
1242 user_rnd_prec = status->floatx80_rounding_precision;
1243 status->float_rounding_mode = float_round_nearest_even;
1244 status->floatx80_rounding_precision = floatx80_precision_x;
1246 compact = floatx80_make_compact(aExp, aSig);
1248 fp0 = a;
1250 if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1251 /* 2^(-40) > |X| > 15 PI */
1252 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1253 /* REDUCEX */
1254 fp1 = packFloatx80(0, 0, 0);
1255 if (compact == 0x7FFEFFFF) {
1256 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1257 UINT64_C(0xC90FDAA200000000));
1258 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1259 UINT64_C(0x85A308D300000000));
1260 fp0 = floatx80_add(fp0, twopi1, status);
1261 fp1 = fp0;
1262 fp0 = floatx80_add(fp0, twopi2, status);
1263 fp1 = floatx80_sub(fp1, fp0, status);
1264 fp1 = floatx80_add(fp1, twopi2, status);
1266 loop:
1267 xSign = extractFloatx80Sign(fp0);
1268 xExp = extractFloatx80Exp(fp0);
1269 xExp -= 0x3FFF;
1270 if (xExp <= 28) {
1271 l = 0;
1272 endflag = true;
1273 } else {
1274 l = xExp - 27;
1275 endflag = false;
1277 invtwopi = packFloatx80(0, 0x3FFE - l,
1278 UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
1279 twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
1280 twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
1282 /* SIGN(INARG)*2^63 IN SGL */
1283 twoto63 = packFloat32(xSign, 0xBE, 0);
1285 fp2 = floatx80_mul(fp0, invtwopi, status);
1286 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1287 status); /* THE FRACT PART OF FP2 IS ROUNDED */
1288 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1289 status); /* FP2 is N */
1290 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1291 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1292 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1293 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1294 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1295 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1296 fp3 = fp0; /* FP3 is A */
1297 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1298 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1300 if (endflag) {
1301 n = floatx80_to_int32(fp2, status);
1302 goto tancont;
1304 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1305 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1306 goto loop;
1307 } else {
1308 status->float_rounding_mode = user_rnd_mode;
1309 status->floatx80_rounding_precision = user_rnd_prec;
1311 a = floatx80_move(a, status);
1313 float_raise(float_flag_inexact, status);
1315 return a;
1317 } else {
1318 fp1 = floatx80_mul(fp0, float64_to_floatx80(
1319 make_float64(0x3FE45F306DC9C883), status),
1320 status); /* X*2/PI */
1322 n = floatx80_to_int32(fp1, status);
1323 j = 32 + n;
1325 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1326 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1327 status); /* FP0 IS R = (X-Y1)-Y2 */
1329 tancont:
1330 if (n & 1) {
1331 /* NODD */
1332 fp1 = fp0; /* R */
1333 fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1334 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688),
1335 status); /* Q4 */
1336 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04),
1337 status); /* P3 */
1338 fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */
1339 fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */
1340 fp3 = floatx80_add(fp3, float64_to_floatx80(
1341 make_float64(0xBF346F59B39BA65F), status),
1342 status); /* Q3+SQ4 */
1343 fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00));
1344 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */
1345 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */
1346 fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */
1347 fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1));
1348 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */
1349 fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA));
1350 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */
1351 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */
1352 fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */
1353 fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE));
1354 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */
1355 fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */
1356 fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */
1357 fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */
1358 fp0 = floatx80_add(fp0, float32_to_floatx80(
1359 make_float32(0x3F800000), status),
1360 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */
1362 xSign = extractFloatx80Sign(fp1);
1363 xExp = extractFloatx80Exp(fp1);
1364 xSig = extractFloatx80Frac(fp1);
1365 xSign ^= 1;
1366 fp1 = packFloatx80(xSign, xExp, xSig);
1368 status->float_rounding_mode = user_rnd_mode;
1369 status->floatx80_rounding_precision = user_rnd_prec;
1371 a = floatx80_div(fp0, fp1, status);
1373 float_raise(float_flag_inexact, status);
1375 return a;
1376 } else {
1377 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1378 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688),
1379 status); /* Q4 */
1380 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04),
1381 status); /* P3 */
1382 fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */
1383 fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */
1384 fp3 = floatx80_add(fp3, float64_to_floatx80(
1385 make_float64(0xBF346F59B39BA65F), status),
1386 status); /* Q3+SQ4 */
1387 fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00));
1388 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */
1389 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */
1390 fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */
1391 fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1));
1392 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */
1393 fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA));
1394 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */
1395 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */
1396 fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */
1397 fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE));
1398 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */
1399 fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */
1400 fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */
1401 fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */
1402 fp1 = floatx80_add(fp1, float32_to_floatx80(
1403 make_float32(0x3F800000), status),
1404 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */
1406 status->float_rounding_mode = user_rnd_mode;
1407 status->floatx80_rounding_precision = user_rnd_prec;
1409 a = floatx80_div(fp0, fp1, status);
1411 float_raise(float_flag_inexact, status);
1413 return a;
1419 * Sine
1422 floatx80 floatx80_sin(floatx80 a, float_status *status)
1424 bool aSign, xSign;
1425 int32_t aExp, xExp;
1426 uint64_t aSig, xSig;
1428 FloatRoundMode user_rnd_mode;
1429 FloatX80RoundPrec user_rnd_prec;
1431 int32_t compact, l, n, j;
1432 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
1433 float32 posneg1, twoto63;
1434 bool endflag;
1436 aSig = extractFloatx80Frac(a);
1437 aExp = extractFloatx80Exp(a);
1438 aSign = extractFloatx80Sign(a);
1440 if (aExp == 0x7FFF) {
1441 if ((uint64_t) (aSig << 1)) {
1442 return propagateFloatx80NaNOneArg(a, status);
1444 float_raise(float_flag_invalid, status);
1445 return floatx80_default_nan(status);
1448 if (aExp == 0 && aSig == 0) {
1449 return packFloatx80(aSign, 0, 0);
1452 user_rnd_mode = status->float_rounding_mode;
1453 user_rnd_prec = status->floatx80_rounding_precision;
1454 status->float_rounding_mode = float_round_nearest_even;
1455 status->floatx80_rounding_precision = floatx80_precision_x;
1457 compact = floatx80_make_compact(aExp, aSig);
1459 fp0 = a;
1461 if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1462 /* 2^(-40) > |X| > 15 PI */
1463 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1464 /* REDUCEX */
1465 fp1 = packFloatx80(0, 0, 0);
1466 if (compact == 0x7FFEFFFF) {
1467 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1468 UINT64_C(0xC90FDAA200000000));
1469 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1470 UINT64_C(0x85A308D300000000));
1471 fp0 = floatx80_add(fp0, twopi1, status);
1472 fp1 = fp0;
1473 fp0 = floatx80_add(fp0, twopi2, status);
1474 fp1 = floatx80_sub(fp1, fp0, status);
1475 fp1 = floatx80_add(fp1, twopi2, status);
1477 loop:
1478 xSign = extractFloatx80Sign(fp0);
1479 xExp = extractFloatx80Exp(fp0);
1480 xExp -= 0x3FFF;
1481 if (xExp <= 28) {
1482 l = 0;
1483 endflag = true;
1484 } else {
1485 l = xExp - 27;
1486 endflag = false;
1488 invtwopi = packFloatx80(0, 0x3FFE - l,
1489 UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
1490 twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
1491 twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
1493 /* SIGN(INARG)*2^63 IN SGL */
1494 twoto63 = packFloat32(xSign, 0xBE, 0);
1496 fp2 = floatx80_mul(fp0, invtwopi, status);
1497 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1498 status); /* THE FRACT PART OF FP2 IS ROUNDED */
1499 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1500 status); /* FP2 is N */
1501 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1502 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1503 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1504 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1505 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1506 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1507 fp3 = fp0; /* FP3 is A */
1508 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1509 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1511 if (endflag) {
1512 n = floatx80_to_int32(fp2, status);
1513 goto sincont;
1515 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1516 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1517 goto loop;
1518 } else {
1519 /* SINSM */
1520 fp0 = float32_to_floatx80(make_float32(0x3F800000),
1521 status); /* 1 */
1523 status->float_rounding_mode = user_rnd_mode;
1524 status->floatx80_rounding_precision = user_rnd_prec;
1526 /* SINTINY */
1527 a = floatx80_move(a, status);
1528 float_raise(float_flag_inexact, status);
1530 return a;
1532 } else {
1533 fp1 = floatx80_mul(fp0, float64_to_floatx80(
1534 make_float64(0x3FE45F306DC9C883), status),
1535 status); /* X*2/PI */
1537 n = floatx80_to_int32(fp1, status);
1538 j = 32 + n;
1540 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1541 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1542 status); /* FP0 IS R = (X-Y1)-Y2 */
1544 sincont:
1545 if (n & 1) {
1546 /* COSPOLY */
1547 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1548 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1549 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3),
1550 status); /* B8 */
1551 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19),
1552 status); /* B7 */
1554 xSign = extractFloatx80Sign(fp0); /* X IS S */
1555 xExp = extractFloatx80Exp(fp0);
1556 xSig = extractFloatx80Frac(fp0);
1558 if ((n >> 1) & 1) {
1559 xSign ^= 1;
1560 posneg1 = make_float32(0xBF800000); /* -1 */
1561 } else {
1562 xSign ^= 0;
1563 posneg1 = make_float32(0x3F800000); /* 1 */
1564 } /* X IS NOW R'= SGN*R */
1566 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */
1567 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */
1568 fp2 = floatx80_add(fp2, float64_to_floatx80(
1569 make_float64(0x3E21EED90612C972), status),
1570 status); /* B6+TB8 */
1571 fp3 = floatx80_add(fp3, float64_to_floatx80(
1572 make_float64(0xBE927E4FB79D9FCF), status),
1573 status); /* B5+TB7 */
1574 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */
1575 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */
1576 fp2 = floatx80_add(fp2, float64_to_floatx80(
1577 make_float64(0x3EFA01A01A01D423), status),
1578 status); /* B4+T(B6+TB8) */
1579 fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438));
1580 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */
1581 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */
1582 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */
1583 fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E));
1584 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */
1585 fp1 = floatx80_add(fp1, float32_to_floatx80(
1586 make_float32(0xBF000000), status),
1587 status); /* B1+T(B3+T(B5+TB7)) */
1588 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */
1589 fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+
1590 * [S(B2+T(B4+T(B6+TB8)))]
1593 x = packFloatx80(xSign, xExp, xSig);
1594 fp0 = floatx80_mul(fp0, x, status);
1596 status->float_rounding_mode = user_rnd_mode;
1597 status->floatx80_rounding_precision = user_rnd_prec;
1599 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
1601 float_raise(float_flag_inexact, status);
1603 return a;
1604 } else {
1605 /* SINPOLY */
1606 xSign = extractFloatx80Sign(fp0); /* X IS R */
1607 xExp = extractFloatx80Exp(fp0);
1608 xSig = extractFloatx80Frac(fp0);
1610 xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */
1612 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1613 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1614 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5),
1615 status); /* A7 */
1616 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1),
1617 status); /* A6 */
1618 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */
1619 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */
1620 fp3 = floatx80_add(fp3, float64_to_floatx80(
1621 make_float64(0xBE5AE6452A118AE4), status),
1622 status); /* A5+T*A7 */
1623 fp2 = floatx80_add(fp2, float64_to_floatx80(
1624 make_float64(0x3EC71DE3A5341531), status),
1625 status); /* A4+T*A6 */
1626 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */
1627 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */
1628 fp3 = floatx80_add(fp3, float64_to_floatx80(
1629 make_float64(0xBF2A01A01A018B59), status),
1630 status); /* A3+T(A5+TA7) */
1631 fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF));
1632 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */
1633 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */
1634 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */
1635 fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99));
1636 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */
1637 fp1 = floatx80_add(fp1, fp2,
1638 status); /* [A1+T(A3+T(A5+TA7))]+
1639 * [S(A2+T(A4+TA6))]
1642 x = packFloatx80(xSign, xExp, xSig);
1643 fp0 = floatx80_mul(fp0, x, status); /* R'*S */
1644 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */
1646 status->float_rounding_mode = user_rnd_mode;
1647 status->floatx80_rounding_precision = user_rnd_prec;
1649 a = floatx80_add(fp0, x, status);
1651 float_raise(float_flag_inexact, status);
1653 return a;
1659 * Cosine
1662 floatx80 floatx80_cos(floatx80 a, float_status *status)
1664 bool aSign, xSign;
1665 int32_t aExp, xExp;
1666 uint64_t aSig, xSig;
1668 FloatRoundMode user_rnd_mode;
1669 FloatX80RoundPrec user_rnd_prec;
1671 int32_t compact, l, n, j;
1672 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
1673 float32 posneg1, twoto63;
1674 bool endflag;
1676 aSig = extractFloatx80Frac(a);
1677 aExp = extractFloatx80Exp(a);
1678 aSign = extractFloatx80Sign(a);
1680 if (aExp == 0x7FFF) {
1681 if ((uint64_t) (aSig << 1)) {
1682 return propagateFloatx80NaNOneArg(a, status);
1684 float_raise(float_flag_invalid, status);
1685 return floatx80_default_nan(status);
1688 if (aExp == 0 && aSig == 0) {
1689 return packFloatx80(0, one_exp, one_sig);
1692 user_rnd_mode = status->float_rounding_mode;
1693 user_rnd_prec = status->floatx80_rounding_precision;
1694 status->float_rounding_mode = float_round_nearest_even;
1695 status->floatx80_rounding_precision = floatx80_precision_x;
1697 compact = floatx80_make_compact(aExp, aSig);
1699 fp0 = a;
1701 if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1702 /* 2^(-40) > |X| > 15 PI */
1703 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1704 /* REDUCEX */
1705 fp1 = packFloatx80(0, 0, 0);
1706 if (compact == 0x7FFEFFFF) {
1707 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1708 UINT64_C(0xC90FDAA200000000));
1709 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1710 UINT64_C(0x85A308D300000000));
1711 fp0 = floatx80_add(fp0, twopi1, status);
1712 fp1 = fp0;
1713 fp0 = floatx80_add(fp0, twopi2, status);
1714 fp1 = floatx80_sub(fp1, fp0, status);
1715 fp1 = floatx80_add(fp1, twopi2, status);
1717 loop:
1718 xSign = extractFloatx80Sign(fp0);
1719 xExp = extractFloatx80Exp(fp0);
1720 xExp -= 0x3FFF;
1721 if (xExp <= 28) {
1722 l = 0;
1723 endflag = true;
1724 } else {
1725 l = xExp - 27;
1726 endflag = false;
1728 invtwopi = packFloatx80(0, 0x3FFE - l,
1729 UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */
1730 twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000));
1731 twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000));
1733 /* SIGN(INARG)*2^63 IN SGL */
1734 twoto63 = packFloat32(xSign, 0xBE, 0);
1736 fp2 = floatx80_mul(fp0, invtwopi, status);
1737 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1738 status); /* THE FRACT PART OF FP2 IS ROUNDED */
1739 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1740 status); /* FP2 is N */
1741 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1742 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1743 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1744 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1745 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1746 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1747 fp3 = fp0; /* FP3 is A */
1748 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1749 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1751 if (endflag) {
1752 n = floatx80_to_int32(fp2, status);
1753 goto sincont;
1755 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1756 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1757 goto loop;
1758 } else {
1759 /* SINSM */
1760 fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */
1762 status->float_rounding_mode = user_rnd_mode;
1763 status->floatx80_rounding_precision = user_rnd_prec;
1765 /* COSTINY */
1766 a = floatx80_sub(fp0, float32_to_floatx80(
1767 make_float32(0x00800000), status),
1768 status);
1769 float_raise(float_flag_inexact, status);
1771 return a;
1773 } else {
1774 fp1 = floatx80_mul(fp0, float64_to_floatx80(
1775 make_float64(0x3FE45F306DC9C883), status),
1776 status); /* X*2/PI */
1778 n = floatx80_to_int32(fp1, status);
1779 j = 32 + n;
1781 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1782 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1783 status); /* FP0 IS R = (X-Y1)-Y2 */
1785 sincont:
1786 if ((n + 1) & 1) {
1787 /* COSPOLY */
1788 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1789 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1790 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3),
1791 status); /* B8 */
1792 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19),
1793 status); /* B7 */
1795 xSign = extractFloatx80Sign(fp0); /* X IS S */
1796 xExp = extractFloatx80Exp(fp0);
1797 xSig = extractFloatx80Frac(fp0);
1799 if (((n + 1) >> 1) & 1) {
1800 xSign ^= 1;
1801 posneg1 = make_float32(0xBF800000); /* -1 */
1802 } else {
1803 xSign ^= 0;
1804 posneg1 = make_float32(0x3F800000); /* 1 */
1805 } /* X IS NOW R'= SGN*R */
1807 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */
1808 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */
1809 fp2 = floatx80_add(fp2, float64_to_floatx80(
1810 make_float64(0x3E21EED90612C972), status),
1811 status); /* B6+TB8 */
1812 fp3 = floatx80_add(fp3, float64_to_floatx80(
1813 make_float64(0xBE927E4FB79D9FCF), status),
1814 status); /* B5+TB7 */
1815 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */
1816 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */
1817 fp2 = floatx80_add(fp2, float64_to_floatx80(
1818 make_float64(0x3EFA01A01A01D423), status),
1819 status); /* B4+T(B6+TB8) */
1820 fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438));
1821 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */
1822 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */
1823 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */
1824 fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E));
1825 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */
1826 fp1 = floatx80_add(fp1, float32_to_floatx80(
1827 make_float32(0xBF000000), status),
1828 status); /* B1+T(B3+T(B5+TB7)) */
1829 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */
1830 fp0 = floatx80_add(fp0, fp1, status);
1831 /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */
1833 x = packFloatx80(xSign, xExp, xSig);
1834 fp0 = floatx80_mul(fp0, x, status);
1836 status->float_rounding_mode = user_rnd_mode;
1837 status->floatx80_rounding_precision = user_rnd_prec;
1839 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
1841 float_raise(float_flag_inexact, status);
1843 return a;
1844 } else {
1845 /* SINPOLY */
1846 xSign = extractFloatx80Sign(fp0); /* X IS R */
1847 xExp = extractFloatx80Exp(fp0);
1848 xSig = extractFloatx80Frac(fp0);
1850 xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */
1852 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1853 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1854 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5),
1855 status); /* A7 */
1856 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1),
1857 status); /* A6 */
1858 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */
1859 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */
1860 fp3 = floatx80_add(fp3, float64_to_floatx80(
1861 make_float64(0xBE5AE6452A118AE4), status),
1862 status); /* A5+T*A7 */
1863 fp2 = floatx80_add(fp2, float64_to_floatx80(
1864 make_float64(0x3EC71DE3A5341531), status),
1865 status); /* A4+T*A6 */
1866 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */
1867 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */
1868 fp3 = floatx80_add(fp3, float64_to_floatx80(
1869 make_float64(0xBF2A01A01A018B59), status),
1870 status); /* A3+T(A5+TA7) */
1871 fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF));
1872 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */
1873 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */
1874 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */
1875 fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99));
1876 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */
1877 fp1 = floatx80_add(fp1, fp2, status);
1878 /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */
1880 x = packFloatx80(xSign, xExp, xSig);
1881 fp0 = floatx80_mul(fp0, x, status); /* R'*S */
1882 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */
1884 status->float_rounding_mode = user_rnd_mode;
1885 status->floatx80_rounding_precision = user_rnd_prec;
1887 a = floatx80_add(fp0, x, status);
1889 float_raise(float_flag_inexact, status);
1891 return a;
1897 * Arc tangent
1900 floatx80 floatx80_atan(floatx80 a, float_status *status)
1902 bool aSign;
1903 int32_t aExp;
1904 uint64_t aSig;
1906 FloatRoundMode user_rnd_mode;
1907 FloatX80RoundPrec user_rnd_prec;
1909 int32_t compact, tbl_index;
1910 floatx80 fp0, fp1, fp2, fp3, xsave;
1912 aSig = extractFloatx80Frac(a);
1913 aExp = extractFloatx80Exp(a);
1914 aSign = extractFloatx80Sign(a);
1916 if (aExp == 0x7FFF) {
1917 if ((uint64_t) (aSig << 1)) {
1918 return propagateFloatx80NaNOneArg(a, status);
1920 a = packFloatx80(aSign, piby2_exp, pi_sig);
1921 float_raise(float_flag_inexact, status);
1922 return floatx80_move(a, status);
1925 if (aExp == 0 && aSig == 0) {
1926 return packFloatx80(aSign, 0, 0);
1929 compact = floatx80_make_compact(aExp, aSig);
1931 user_rnd_mode = status->float_rounding_mode;
1932 user_rnd_prec = status->floatx80_rounding_precision;
1933 status->float_rounding_mode = float_round_nearest_even;
1934 status->floatx80_rounding_precision = floatx80_precision_x;
1936 if (compact < 0x3FFB8000 || compact > 0x4002FFFF) {
1937 /* |X| >= 16 or |X| < 1/16 */
1938 if (compact > 0x3FFF8000) { /* |X| >= 16 */
1939 if (compact > 0x40638000) { /* |X| > 2^(100) */
1940 fp0 = packFloatx80(aSign, piby2_exp, pi_sig);
1941 fp1 = packFloatx80(aSign, 0x0001, one_sig);
1943 status->float_rounding_mode = user_rnd_mode;
1944 status->floatx80_rounding_precision = user_rnd_prec;
1946 a = floatx80_sub(fp0, fp1, status);
1948 float_raise(float_flag_inexact, status);
1950 return a;
1951 } else {
1952 fp0 = a;
1953 fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */
1954 fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */
1955 xsave = fp1;
1956 fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */
1957 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */
1958 fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A),
1959 status); /* C5 */
1960 fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D),
1961 status); /* C4 */
1962 fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */
1963 fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */
1964 fp3 = floatx80_add(fp3, float64_to_floatx80(
1965 make_float64(0xBFC24924827107B8), status),
1966 status); /* C3+Z*C5 */
1967 fp2 = floatx80_add(fp2, float64_to_floatx80(
1968 make_float64(0x3FC999999996263E), status),
1969 status); /* C2+Z*C4 */
1970 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */
1971 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */
1972 fp1 = floatx80_add(fp1, float64_to_floatx80(
1973 make_float64(0xBFD5555555555536), status),
1974 status); /* C1+Z*(C3+Z*C5) */
1975 fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */
1976 /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */
1977 fp1 = floatx80_add(fp1, fp2, status);
1978 /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */
1979 fp0 = floatx80_mul(fp0, fp1, status);
1980 fp0 = floatx80_add(fp0, xsave, status);
1981 fp1 = packFloatx80(aSign, piby2_exp, pi_sig);
1983 status->float_rounding_mode = user_rnd_mode;
1984 status->floatx80_rounding_precision = user_rnd_prec;
1986 a = floatx80_add(fp0, fp1, status);
1988 float_raise(float_flag_inexact, status);
1990 return a;
1992 } else { /* |X| < 1/16 */
1993 if (compact < 0x3FD78000) { /* |X| < 2^(-40) */
1994 status->float_rounding_mode = user_rnd_mode;
1995 status->floatx80_rounding_precision = user_rnd_prec;
1997 a = floatx80_move(a, status);
1999 float_raise(float_flag_inexact, status);
2001 return a;
2002 } else {
2003 fp0 = a;
2004 xsave = a;
2005 fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */
2006 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */
2007 fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989),
2008 status); /* B6 */
2009 fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB),
2010 status); /* B5 */
2011 fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */
2012 fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */
2013 fp2 = floatx80_add(fp2, float64_to_floatx80(
2014 make_float64(0x3FBC71C646940220), status),
2015 status); /* B4+Z*B6 */
2016 fp3 = floatx80_add(fp3, float64_to_floatx80(
2017 make_float64(0xBFC24924921872F9),
2018 status), status); /* B3+Z*B5 */
2019 fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */
2020 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */
2021 fp2 = floatx80_add(fp2, float64_to_floatx80(
2022 make_float64(0x3FC9999999998FA9), status),
2023 status); /* B2+Z*(B4+Z*B6) */
2024 fp1 = floatx80_add(fp1, float64_to_floatx80(
2025 make_float64(0xBFD5555555555555), status),
2026 status); /* B1+Z*(B3+Z*B5) */
2027 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */
2028 fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */
2029 /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */
2030 fp1 = floatx80_add(fp1, fp2, status);
2031 /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */
2032 fp0 = floatx80_mul(fp0, fp1, status);
2034 status->float_rounding_mode = user_rnd_mode;
2035 status->floatx80_rounding_precision = user_rnd_prec;
2037 a = floatx80_add(fp0, xsave, status);
2039 float_raise(float_flag_inexact, status);
2041 return a;
2044 } else {
2045 aSig &= UINT64_C(0xF800000000000000);
2046 aSig |= UINT64_C(0x0400000000000000);
2047 xsave = packFloatx80(aSign, aExp, aSig); /* F */
2048 fp0 = a;
2049 fp1 = a; /* X */
2050 fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */
2051 fp1 = floatx80_mul(fp1, xsave, status); /* X*F */
2052 fp0 = floatx80_sub(fp0, xsave, status); /* X-F */
2053 fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */
2054 fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */
2056 tbl_index = compact;
2058 tbl_index &= 0x7FFF0000;
2059 tbl_index -= 0x3FFB0000;
2060 tbl_index >>= 1;
2061 tbl_index += compact & 0x00007800;
2062 tbl_index >>= 11;
2064 fp3 = atan_tbl[tbl_index];
2066 fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */
2068 fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */
2069 fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8),
2070 status); /* A3 */
2071 fp2 = floatx80_add(fp2, fp1, status); /* A3+V */
2072 fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */
2073 fp1 = floatx80_mul(fp1, fp0, status); /* U*V */
2074 fp2 = floatx80_add(fp2, float64_to_floatx80(
2075 make_float64(0x4002AC6934A26DB3), status),
2076 status); /* A2+V*(A3+V) */
2077 fp1 = floatx80_mul(fp1, float64_to_floatx80(
2078 make_float64(0xBFC2476F4E1DA28E), status),
2079 status); /* A1+U*V */
2080 fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */
2081 fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */
2083 status->float_rounding_mode = user_rnd_mode;
2084 status->floatx80_rounding_precision = user_rnd_prec;
2086 a = floatx80_add(fp0, fp3, status); /* ATAN(X) */
2088 float_raise(float_flag_inexact, status);
2090 return a;
2095 * Arc sine
2098 floatx80 floatx80_asin(floatx80 a, float_status *status)
2100 bool aSign;
2101 int32_t aExp;
2102 uint64_t aSig;
2104 FloatRoundMode user_rnd_mode;
2105 FloatX80RoundPrec user_rnd_prec;
2107 int32_t compact;
2108 floatx80 fp0, fp1, fp2, one;
2110 aSig = extractFloatx80Frac(a);
2111 aExp = extractFloatx80Exp(a);
2112 aSign = extractFloatx80Sign(a);
2114 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2115 return propagateFloatx80NaNOneArg(a, status);
2118 if (aExp == 0 && aSig == 0) {
2119 return packFloatx80(aSign, 0, 0);
2122 compact = floatx80_make_compact(aExp, aSig);
2124 if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2125 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2126 float_raise(float_flag_inexact, status);
2127 a = packFloatx80(aSign, piby2_exp, pi_sig);
2128 return floatx80_move(a, status);
2129 } else { /* |X| > 1 */
2130 float_raise(float_flag_invalid, status);
2131 return floatx80_default_nan(status);
2134 } /* |X| < 1 */
2136 user_rnd_mode = status->float_rounding_mode;
2137 user_rnd_prec = status->floatx80_rounding_precision;
2138 status->float_rounding_mode = float_round_nearest_even;
2139 status->floatx80_rounding_precision = floatx80_precision_x;
2141 one = packFloatx80(0, one_exp, one_sig);
2142 fp0 = a;
2144 fp1 = floatx80_sub(one, fp0, status); /* 1 - X */
2145 fp2 = floatx80_add(one, fp0, status); /* 1 + X */
2146 fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */
2147 fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */
2148 fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */
2150 status->float_rounding_mode = user_rnd_mode;
2151 status->floatx80_rounding_precision = user_rnd_prec;
2153 a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */
2155 float_raise(float_flag_inexact, status);
2157 return a;
2161 * Arc cosine
2164 floatx80 floatx80_acos(floatx80 a, float_status *status)
2166 bool aSign;
2167 int32_t aExp;
2168 uint64_t aSig;
2170 FloatRoundMode user_rnd_mode;
2171 FloatX80RoundPrec user_rnd_prec;
2173 int32_t compact;
2174 floatx80 fp0, fp1, one;
2176 aSig = extractFloatx80Frac(a);
2177 aExp = extractFloatx80Exp(a);
2178 aSign = extractFloatx80Sign(a);
2180 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2181 return propagateFloatx80NaNOneArg(a, status);
2183 if (aExp == 0 && aSig == 0) {
2184 float_raise(float_flag_inexact, status);
2185 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0,
2186 piby2_exp, pi_sig, 0, status);
2189 compact = floatx80_make_compact(aExp, aSig);
2191 if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2192 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2193 if (aSign) { /* X == -1 */
2194 a = packFloatx80(0, pi_exp, pi_sig);
2195 float_raise(float_flag_inexact, status);
2196 return floatx80_move(a, status);
2197 } else { /* X == +1 */
2198 return packFloatx80(0, 0, 0);
2200 } else { /* |X| > 1 */
2201 float_raise(float_flag_invalid, status);
2202 return floatx80_default_nan(status);
2204 } /* |X| < 1 */
2206 user_rnd_mode = status->float_rounding_mode;
2207 user_rnd_prec = status->floatx80_rounding_precision;
2208 status->float_rounding_mode = float_round_nearest_even;
2209 status->floatx80_rounding_precision = floatx80_precision_x;
2211 one = packFloatx80(0, one_exp, one_sig);
2212 fp0 = a;
2214 fp1 = floatx80_add(one, fp0, status); /* 1 + X */
2215 fp0 = floatx80_sub(one, fp0, status); /* 1 - X */
2216 fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */
2217 fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */
2218 fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */
2220 status->float_rounding_mode = user_rnd_mode;
2221 status->floatx80_rounding_precision = user_rnd_prec;
2223 a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */
2225 float_raise(float_flag_inexact, status);
2227 return a;
2231 * Hyperbolic arc tangent
2234 floatx80 floatx80_atanh(floatx80 a, float_status *status)
2236 bool aSign;
2237 int32_t aExp;
2238 uint64_t aSig;
2240 FloatRoundMode user_rnd_mode;
2241 FloatX80RoundPrec user_rnd_prec;
2243 int32_t compact;
2244 floatx80 fp0, fp1, fp2, one;
2246 aSig = extractFloatx80Frac(a);
2247 aExp = extractFloatx80Exp(a);
2248 aSign = extractFloatx80Sign(a);
2250 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2251 return propagateFloatx80NaNOneArg(a, status);
2254 if (aExp == 0 && aSig == 0) {
2255 return packFloatx80(aSign, 0, 0);
2258 compact = floatx80_make_compact(aExp, aSig);
2260 if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2261 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2262 float_raise(float_flag_divbyzero, status);
2263 return packFloatx80(aSign, floatx80_infinity.high,
2264 floatx80_infinity.low);
2265 } else { /* |X| > 1 */
2266 float_raise(float_flag_invalid, status);
2267 return floatx80_default_nan(status);
2269 } /* |X| < 1 */
2271 user_rnd_mode = status->float_rounding_mode;
2272 user_rnd_prec = status->floatx80_rounding_precision;
2273 status->float_rounding_mode = float_round_nearest_even;
2274 status->floatx80_rounding_precision = floatx80_precision_x;
2276 one = packFloatx80(0, one_exp, one_sig);
2277 fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */
2278 fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */
2279 fp1 = packFloatx80(1, aExp, aSig); /* -Y */
2280 fp0 = floatx80_add(fp0, fp0, status); /* 2Y */
2281 fp1 = floatx80_add(fp1, one, status); /* 1-Y */
2282 fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */
2283 fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */
2285 status->float_rounding_mode = user_rnd_mode;
2286 status->floatx80_rounding_precision = user_rnd_prec;
2288 a = floatx80_mul(fp0, fp2,
2289 status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */
2291 float_raise(float_flag_inexact, status);
2293 return a;
2297 * e to x minus 1
2300 floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
2302 bool aSign;
2303 int32_t aExp;
2304 uint64_t aSig;
2306 FloatRoundMode user_rnd_mode;
2307 FloatX80RoundPrec user_rnd_prec;
2309 int32_t compact, n, j, m, m1;
2310 floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc;
2312 aSig = extractFloatx80Frac(a);
2313 aExp = extractFloatx80Exp(a);
2314 aSign = extractFloatx80Sign(a);
2316 if (aExp == 0x7FFF) {
2317 if ((uint64_t) (aSig << 1)) {
2318 return propagateFloatx80NaNOneArg(a, status);
2320 if (aSign) {
2321 return packFloatx80(aSign, one_exp, one_sig);
2323 return packFloatx80(0, floatx80_infinity.high,
2324 floatx80_infinity.low);
2327 if (aExp == 0 && aSig == 0) {
2328 return packFloatx80(aSign, 0, 0);
2331 user_rnd_mode = status->float_rounding_mode;
2332 user_rnd_prec = status->floatx80_rounding_precision;
2333 status->float_rounding_mode = float_round_nearest_even;
2334 status->floatx80_rounding_precision = floatx80_precision_x;
2336 if (aExp >= 0x3FFD) { /* |X| >= 1/4 */
2337 compact = floatx80_make_compact(aExp, aSig);
2339 if (compact <= 0x4004C215) { /* |X| <= 70 log2 */
2340 fp0 = a;
2341 fp1 = a;
2342 fp0 = floatx80_mul(fp0, float32_to_floatx80(
2343 make_float32(0x42B8AA3B), status),
2344 status); /* 64/log2 * X */
2345 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
2346 fp0 = int32_to_floatx80(n, status);
2348 j = n & 0x3F; /* J = N mod 64 */
2349 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
2350 if (n < 0 && j) {
2352 * arithmetic right shift is division and
2353 * round towards minus infinity
2355 m--;
2357 m1 = -m;
2358 /*m += 0x3FFF; // biased exponent of 2^(M) */
2359 /*m1 += 0x3FFF; // biased exponent of -2^(-M) */
2361 fp2 = fp0; /* N */
2362 fp0 = floatx80_mul(fp0, float32_to_floatx80(
2363 make_float32(0xBC317218), status),
2364 status); /* N * L1, L1 = lead(-log2/64) */
2365 l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6));
2366 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */
2367 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */
2368 fp0 = floatx80_add(fp0, fp2, status); /* R */
2370 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
2371 fp2 = float32_to_floatx80(make_float32(0x3950097B),
2372 status); /* A6 */
2373 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */
2374 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A),
2375 status), fp1, status); /* fp3 is S*A5 */
2376 fp2 = floatx80_add(fp2, float64_to_floatx80(
2377 make_float64(0x3F81111111174385), status),
2378 status); /* fp2 IS A4+S*A6 */
2379 fp3 = floatx80_add(fp3, float64_to_floatx80(
2380 make_float64(0x3FA5555555554F5A), status),
2381 status); /* fp3 is A3+S*A5 */
2382 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */
2383 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */
2384 fp2 = floatx80_add(fp2, float64_to_floatx80(
2385 make_float64(0x3FC5555555555555), status),
2386 status); /* fp2 IS A2+S*(A4+S*A6) */
2387 fp3 = floatx80_add(fp3, float32_to_floatx80(
2388 make_float32(0x3F000000), status),
2389 status); /* fp3 IS A1+S*(A3+S*A5) */
2390 fp2 = floatx80_mul(fp2, fp1,
2391 status); /* fp2 IS S*(A2+S*(A4+S*A6)) */
2392 fp1 = floatx80_mul(fp1, fp3,
2393 status); /* fp1 IS S*(A1+S*(A3+S*A5)) */
2394 fp2 = floatx80_mul(fp2, fp0,
2395 status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */
2396 fp0 = floatx80_add(fp0, fp1,
2397 status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */
2398 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */
2400 fp0 = floatx80_mul(fp0, exp_tbl[j],
2401 status); /* 2^(J/64)*(Exp(R)-1) */
2403 if (m >= 64) {
2404 fp1 = float32_to_floatx80(exp_tbl2[j], status);
2405 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2406 fp1 = floatx80_add(fp1, onebysc, status);
2407 fp0 = floatx80_add(fp0, fp1, status);
2408 fp0 = floatx80_add(fp0, exp_tbl[j], status);
2409 } else if (m < -3) {
2410 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j],
2411 status), status);
2412 fp0 = floatx80_add(fp0, exp_tbl[j], status);
2413 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2414 fp0 = floatx80_add(fp0, onebysc, status);
2415 } else { /* -3 <= m <= 63 */
2416 fp1 = exp_tbl[j];
2417 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j],
2418 status), status);
2419 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2420 fp1 = floatx80_add(fp1, onebysc, status);
2421 fp0 = floatx80_add(fp0, fp1, status);
2424 sc = packFloatx80(0, m + 0x3FFF, one_sig);
2426 status->float_rounding_mode = user_rnd_mode;
2427 status->floatx80_rounding_precision = user_rnd_prec;
2429 a = floatx80_mul(fp0, sc, status);
2431 float_raise(float_flag_inexact, status);
2433 return a;
2434 } else { /* |X| > 70 log2 */
2435 if (aSign) {
2436 fp0 = float32_to_floatx80(make_float32(0xBF800000),
2437 status); /* -1 */
2439 status->float_rounding_mode = user_rnd_mode;
2440 status->floatx80_rounding_precision = user_rnd_prec;
2442 a = floatx80_add(fp0, float32_to_floatx80(
2443 make_float32(0x00800000), status),
2444 status); /* -1 + 2^(-126) */
2446 float_raise(float_flag_inexact, status);
2448 return a;
2449 } else {
2450 status->float_rounding_mode = user_rnd_mode;
2451 status->floatx80_rounding_precision = user_rnd_prec;
2453 return floatx80_etox(a, status);
2456 } else { /* |X| < 1/4 */
2457 if (aExp >= 0x3FBE) {
2458 fp0 = a;
2459 fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */
2460 fp1 = float32_to_floatx80(make_float32(0x2F30CAA8),
2461 status); /* B12 */
2462 fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */
2463 fp2 = float32_to_floatx80(make_float32(0x310F8290),
2464 status); /* B11 */
2465 fp1 = floatx80_add(fp1, float32_to_floatx80(
2466 make_float32(0x32D73220), status),
2467 status); /* B10 */
2468 fp2 = floatx80_mul(fp2, fp0, status);
2469 fp1 = floatx80_mul(fp1, fp0, status);
2470 fp2 = floatx80_add(fp2, float32_to_floatx80(
2471 make_float32(0x3493F281), status),
2472 status); /* B9 */
2473 fp1 = floatx80_add(fp1, float64_to_floatx80(
2474 make_float64(0x3EC71DE3A5774682), status),
2475 status); /* B8 */
2476 fp2 = floatx80_mul(fp2, fp0, status);
2477 fp1 = floatx80_mul(fp1, fp0, status);
2478 fp2 = floatx80_add(fp2, float64_to_floatx80(
2479 make_float64(0x3EFA01A019D7CB68), status),
2480 status); /* B7 */
2481 fp1 = floatx80_add(fp1, float64_to_floatx80(
2482 make_float64(0x3F2A01A01A019DF3), status),
2483 status); /* B6 */
2484 fp2 = floatx80_mul(fp2, fp0, status);
2485 fp1 = floatx80_mul(fp1, fp0, status);
2486 fp2 = floatx80_add(fp2, float64_to_floatx80(
2487 make_float64(0x3F56C16C16C170E2), status),
2488 status); /* B5 */
2489 fp1 = floatx80_add(fp1, float64_to_floatx80(
2490 make_float64(0x3F81111111111111), status),
2491 status); /* B4 */
2492 fp2 = floatx80_mul(fp2, fp0, status);
2493 fp1 = floatx80_mul(fp1, fp0, status);
2494 fp2 = floatx80_add(fp2, float64_to_floatx80(
2495 make_float64(0x3FA5555555555555), status),
2496 status); /* B3 */
2497 fp3 = packFloatx80(0, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAAAB));
2498 fp1 = floatx80_add(fp1, fp3, status); /* B2 */
2499 fp2 = floatx80_mul(fp2, fp0, status);
2500 fp1 = floatx80_mul(fp1, fp0, status);
2502 fp2 = floatx80_mul(fp2, fp0, status);
2503 fp1 = floatx80_mul(fp1, a, status);
2505 fp0 = floatx80_mul(fp0, float32_to_floatx80(
2506 make_float32(0x3F000000), status),
2507 status); /* S*B1 */
2508 fp1 = floatx80_add(fp1, fp2, status); /* Q */
2509 fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */
2511 status->float_rounding_mode = user_rnd_mode;
2512 status->floatx80_rounding_precision = user_rnd_prec;
2514 a = floatx80_add(fp0, a, status);
2516 float_raise(float_flag_inexact, status);
2518 return a;
2519 } else { /* |X| < 2^(-65) */
2520 sc = packFloatx80(1, 1, one_sig);
2521 fp0 = a;
2523 if (aExp < 0x0033) { /* |X| < 2^(-16382) */
2524 fp0 = floatx80_mul(fp0, float64_to_floatx80(
2525 make_float64(0x48B0000000000000), status),
2526 status);
2527 fp0 = floatx80_add(fp0, sc, status);
2529 status->float_rounding_mode = user_rnd_mode;
2530 status->floatx80_rounding_precision = user_rnd_prec;
2532 a = floatx80_mul(fp0, float64_to_floatx80(
2533 make_float64(0x3730000000000000), status),
2534 status);
2535 } else {
2536 status->float_rounding_mode = user_rnd_mode;
2537 status->floatx80_rounding_precision = user_rnd_prec;
2539 a = floatx80_add(fp0, sc, status);
2542 float_raise(float_flag_inexact, status);
2544 return a;
2550 * Hyperbolic tangent
2553 floatx80 floatx80_tanh(floatx80 a, float_status *status)
2555 bool aSign, vSign;
2556 int32_t aExp, vExp;
2557 uint64_t aSig, vSig;
2559 FloatRoundMode user_rnd_mode;
2560 FloatX80RoundPrec user_rnd_prec;
2562 int32_t compact;
2563 floatx80 fp0, fp1;
2564 uint32_t sign;
2566 aSig = extractFloatx80Frac(a);
2567 aExp = extractFloatx80Exp(a);
2568 aSign = extractFloatx80Sign(a);
2570 if (aExp == 0x7FFF) {
2571 if ((uint64_t) (aSig << 1)) {
2572 return propagateFloatx80NaNOneArg(a, status);
2574 return packFloatx80(aSign, one_exp, one_sig);
2577 if (aExp == 0 && aSig == 0) {
2578 return packFloatx80(aSign, 0, 0);
2581 user_rnd_mode = status->float_rounding_mode;
2582 user_rnd_prec = status->floatx80_rounding_precision;
2583 status->float_rounding_mode = float_round_nearest_even;
2584 status->floatx80_rounding_precision = floatx80_precision_x;
2586 compact = floatx80_make_compact(aExp, aSig);
2588 if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) {
2589 /* TANHBORS */
2590 if (compact < 0x3FFF8000) {
2591 /* TANHSM */
2592 status->float_rounding_mode = user_rnd_mode;
2593 status->floatx80_rounding_precision = user_rnd_prec;
2595 a = floatx80_move(a, status);
2597 float_raise(float_flag_inexact, status);
2599 return a;
2600 } else {
2601 if (compact > 0x40048AA1) {
2602 /* TANHHUGE */
2603 sign = 0x3F800000;
2604 sign |= aSign ? 0x80000000 : 0x00000000;
2605 fp0 = float32_to_floatx80(make_float32(sign), status);
2606 sign &= 0x80000000;
2607 sign ^= 0x80800000; /* -SIGN(X)*EPS */
2609 status->float_rounding_mode = user_rnd_mode;
2610 status->floatx80_rounding_precision = user_rnd_prec;
2612 a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign),
2613 status), status);
2615 float_raise(float_flag_inexact, status);
2617 return a;
2618 } else {
2619 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */
2620 fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */
2621 fp0 = floatx80_add(fp0, float32_to_floatx80(
2622 make_float32(0x3F800000),
2623 status), status); /* EXP(Y)+1 */
2624 sign = aSign ? 0x80000000 : 0x00000000;
2625 fp1 = floatx80_div(float32_to_floatx80(make_float32(
2626 sign ^ 0xC0000000), status), fp0,
2627 status); /* -SIGN(X)*2 / [EXP(Y)+1] */
2628 fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000),
2629 status); /* SIGN */
2631 status->float_rounding_mode = user_rnd_mode;
2632 status->floatx80_rounding_precision = user_rnd_prec;
2634 a = floatx80_add(fp1, fp0, status);
2636 float_raise(float_flag_inexact, status);
2638 return a;
2641 } else { /* 2**(-40) < |X| < (5/2)LOG2 */
2642 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */
2643 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */
2644 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000),
2645 status),
2646 status); /* Z+2 */
2648 vSign = extractFloatx80Sign(fp1);
2649 vExp = extractFloatx80Exp(fp1);
2650 vSig = extractFloatx80Frac(fp1);
2652 fp1 = packFloatx80(vSign ^ aSign, vExp, vSig);
2654 status->float_rounding_mode = user_rnd_mode;
2655 status->floatx80_rounding_precision = user_rnd_prec;
2657 a = floatx80_div(fp0, fp1, status);
2659 float_raise(float_flag_inexact, status);
2661 return a;
2666 * Hyperbolic sine
2669 floatx80 floatx80_sinh(floatx80 a, float_status *status)
2671 bool aSign;
2672 int32_t aExp;
2673 uint64_t aSig;
2675 FloatRoundMode user_rnd_mode;
2676 FloatX80RoundPrec user_rnd_prec;
2678 int32_t compact;
2679 floatx80 fp0, fp1, fp2;
2680 float32 fact;
2682 aSig = extractFloatx80Frac(a);
2683 aExp = extractFloatx80Exp(a);
2684 aSign = extractFloatx80Sign(a);
2686 if (aExp == 0x7FFF) {
2687 if ((uint64_t) (aSig << 1)) {
2688 return propagateFloatx80NaNOneArg(a, status);
2690 return packFloatx80(aSign, floatx80_infinity.high,
2691 floatx80_infinity.low);
2694 if (aExp == 0 && aSig == 0) {
2695 return packFloatx80(aSign, 0, 0);
2698 user_rnd_mode = status->float_rounding_mode;
2699 user_rnd_prec = status->floatx80_rounding_precision;
2700 status->float_rounding_mode = float_round_nearest_even;
2701 status->floatx80_rounding_precision = floatx80_precision_x;
2703 compact = floatx80_make_compact(aExp, aSig);
2705 if (compact > 0x400CB167) {
2706 /* SINHBIG */
2707 if (compact > 0x400CB2B3) {
2708 status->float_rounding_mode = user_rnd_mode;
2709 status->floatx80_rounding_precision = user_rnd_prec;
2711 return roundAndPackFloatx80(status->floatx80_rounding_precision,
2712 aSign, 0x8000, aSig, 0, status);
2713 } else {
2714 fp0 = floatx80_abs(a); /* Y = |X| */
2715 fp0 = floatx80_sub(fp0, float64_to_floatx80(
2716 make_float64(0x40C62D38D3D64634), status),
2717 status); /* (|X|-16381LOG2_LEAD) */
2718 fp0 = floatx80_sub(fp0, float64_to_floatx80(
2719 make_float64(0x3D6F90AEB1E75CC7), status),
2720 status); /* |X| - 16381 LOG2, ACCURATE */
2721 fp0 = floatx80_etox(fp0, status);
2722 fp2 = packFloatx80(aSign, 0x7FFB, one_sig);
2724 status->float_rounding_mode = user_rnd_mode;
2725 status->floatx80_rounding_precision = user_rnd_prec;
2727 a = floatx80_mul(fp0, fp2, status);
2729 float_raise(float_flag_inexact, status);
2731 return a;
2733 } else { /* |X| < 16380 LOG2 */
2734 fp0 = floatx80_abs(a); /* Y = |X| */
2735 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */
2736 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
2737 status), status); /* 1+Z */
2738 fp2 = fp0;
2739 fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */
2740 fp0 = floatx80_add(fp0, fp2, status);
2742 fact = packFloat32(aSign, 0x7E, 0);
2744 status->float_rounding_mode = user_rnd_mode;
2745 status->floatx80_rounding_precision = user_rnd_prec;
2747 a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status);
2749 float_raise(float_flag_inexact, status);
2751 return a;
2756 * Hyperbolic cosine
2759 floatx80 floatx80_cosh(floatx80 a, float_status *status)
2761 int32_t aExp;
2762 uint64_t aSig;
2764 FloatRoundMode user_rnd_mode;
2765 FloatX80RoundPrec user_rnd_prec;
2767 int32_t compact;
2768 floatx80 fp0, fp1;
2770 aSig = extractFloatx80Frac(a);
2771 aExp = extractFloatx80Exp(a);
2773 if (aExp == 0x7FFF) {
2774 if ((uint64_t) (aSig << 1)) {
2775 return propagateFloatx80NaNOneArg(a, status);
2777 return packFloatx80(0, floatx80_infinity.high,
2778 floatx80_infinity.low);
2781 if (aExp == 0 && aSig == 0) {
2782 return packFloatx80(0, one_exp, one_sig);
2785 user_rnd_mode = status->float_rounding_mode;
2786 user_rnd_prec = status->floatx80_rounding_precision;
2787 status->float_rounding_mode = float_round_nearest_even;
2788 status->floatx80_rounding_precision = floatx80_precision_x;
2790 compact = floatx80_make_compact(aExp, aSig);
2792 if (compact > 0x400CB167) {
2793 if (compact > 0x400CB2B3) {
2794 status->float_rounding_mode = user_rnd_mode;
2795 status->floatx80_rounding_precision = user_rnd_prec;
2796 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0,
2797 0x8000, one_sig, 0, status);
2798 } else {
2799 fp0 = packFloatx80(0, aExp, aSig);
2800 fp0 = floatx80_sub(fp0, float64_to_floatx80(
2801 make_float64(0x40C62D38D3D64634), status),
2802 status);
2803 fp0 = floatx80_sub(fp0, float64_to_floatx80(
2804 make_float64(0x3D6F90AEB1E75CC7), status),
2805 status);
2806 fp0 = floatx80_etox(fp0, status);
2807 fp1 = packFloatx80(0, 0x7FFB, one_sig);
2809 status->float_rounding_mode = user_rnd_mode;
2810 status->floatx80_rounding_precision = user_rnd_prec;
2812 a = floatx80_mul(fp0, fp1, status);
2814 float_raise(float_flag_inexact, status);
2816 return a;
2820 fp0 = packFloatx80(0, aExp, aSig); /* |X| */
2821 fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */
2822 fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000),
2823 status), status); /* (1/2)*EXP(|X|) */
2824 fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */
2825 fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */
2827 status->float_rounding_mode = user_rnd_mode;
2828 status->floatx80_rounding_precision = user_rnd_prec;
2830 a = floatx80_add(fp0, fp1, status);
2832 float_raise(float_flag_inexact, status);
2834 return a;