1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3 M68000 Hi-Performance Microprocessor Division
4 M68060 Software Package
5 Production Release P1.00 -- October 10, 1994
7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10 To the maximum extent permitted by applicable law,
11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13 and any warranty against infringement with regard to the SOFTWARE
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
16 To the maximum extent permitted by applicable law,
17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
26 No licenses are granted by implication, estoppel or otherwise under any patents
27 or trademarks of Motorola, Inc.
28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 # This file is appended to the top of the 060ILSP package
32 # and contains the entry points into the package. The user, in
33 # effect, branches to one of the branch table entries located here.
267 # leave room for future possible additions
271 # This file contains a set of define statements for constants
272 # in order to promote readability within the corecode itself.
275 set LOCAL_SIZE, 192 # stack frame size(bytes)
276 set LV, -LOCAL_SIZE # stack offset
278 set EXC_SR, 0x4 # stack status register
279 set EXC_PC, 0x6 # stack pc
280 set EXC_VOFF, 0xa # stacked vector offset
281 set EXC_EA, 0xc # stacked <ea>
283 set EXC_FP, 0x0 # frame pointer
285 set EXC_AREGS, -68 # offset of all address regs
286 set EXC_DREGS, -100 # offset of all data regs
287 set EXC_FPREGS, -36 # offset of all fp regs
289 set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
290 set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
291 set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
292 set EXC_A5, EXC_AREGS+(5*4)
293 set EXC_A4, EXC_AREGS+(4*4)
294 set EXC_A3, EXC_AREGS+(3*4)
295 set EXC_A2, EXC_AREGS+(2*4)
296 set EXC_A1, EXC_AREGS+(1*4)
297 set EXC_A0, EXC_AREGS+(0*4)
298 set EXC_D7, EXC_DREGS+(7*4)
299 set EXC_D6, EXC_DREGS+(6*4)
300 set EXC_D5, EXC_DREGS+(5*4)
301 set EXC_D4, EXC_DREGS+(4*4)
302 set EXC_D3, EXC_DREGS+(3*4)
303 set EXC_D2, EXC_DREGS+(2*4)
304 set EXC_D1, EXC_DREGS+(1*4)
305 set EXC_D0, EXC_DREGS+(0*4)
307 set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
308 set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
309 set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
311 set FP_SCR1, LV+80 # fp scratch 1
312 set FP_SCR1_EX, FP_SCR1+0
313 set FP_SCR1_SGN, FP_SCR1+2
314 set FP_SCR1_HI, FP_SCR1+4
315 set FP_SCR1_LO, FP_SCR1+8
317 set FP_SCR0, LV+68 # fp scratch 0
318 set FP_SCR0_EX, FP_SCR0+0
319 set FP_SCR0_SGN, FP_SCR0+2
320 set FP_SCR0_HI, FP_SCR0+4
321 set FP_SCR0_LO, FP_SCR0+8
323 set FP_DST, LV+56 # fp destination operand
324 set FP_DST_EX, FP_DST+0
325 set FP_DST_SGN, FP_DST+2
326 set FP_DST_HI, FP_DST+4
327 set FP_DST_LO, FP_DST+8
329 set FP_SRC, LV+44 # fp source operand
330 set FP_SRC_EX, FP_SRC+0
331 set FP_SRC_SGN, FP_SRC+2
332 set FP_SRC_HI, FP_SRC+4
333 set FP_SRC_LO, FP_SRC+8
335 set USER_FPIAR, LV+40 # FP instr address register
337 set USER_FPSR, LV+36 # FP status register
338 set FPSR_CC, USER_FPSR+0 # FPSR condition codes
339 set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
340 set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
341 set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
343 set USER_FPCR, LV+32 # FP control register
344 set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
345 set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
347 set L_SCR3, LV+28 # integer scratch 3
348 set L_SCR2, LV+24 # integer scratch 2
349 set L_SCR1, LV+20 # integer scratch 1
351 set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
353 set EXC_TEMP2, LV+24 # temporary space
354 set EXC_TEMP, LV+16 # temporary space
356 set DTAG, LV+15 # destination operand type
357 set STAG, LV+14 # source operand type
359 set SPCOND_FLG, LV+10 # flag: special case (see below)
361 set EXC_CC, LV+8 # saved condition codes
362 set EXC_EXTWPTR, LV+4 # saved current PC (active)
363 set EXC_EXTWORD, LV+2 # saved extension word
364 set EXC_CMDREG, LV+2 # saved extension word
365 set EXC_OPWORD, LV+0 # saved operation word
367 ################################
371 set FTEMP, 0 # offsets within an
372 set FTEMP_EX, 0 # extended precision
373 set FTEMP_SGN, 2 # value saved in memory.
378 set LOCAL, 0 # offsets within an
379 set LOCAL_EX, 0 # extended precision
380 set LOCAL_SGN, 2 # value saved in memory.
385 set DST, 0 # offsets within an
386 set DST_EX, 0 # extended precision
387 set DST_HI, 4 # value saved in memory.
390 set SRC, 0 # offsets within an
391 set SRC_EX, 0 # extended precision
392 set SRC_HI, 4 # value saved in memory.
395 set SGL_LO, 0x3f81 # min sgl prec exponent
396 set SGL_HI, 0x407e # max sgl prec exponent
397 set DBL_LO, 0x3c01 # min dbl prec exponent
398 set DBL_HI, 0x43fe # max dbl prec exponent
399 set EXT_LO, 0x0 # min ext prec exponent
400 set EXT_HI, 0x7ffe # max ext prec exponent
402 set EXT_BIAS, 0x3fff # extended precision bias
403 set SGL_BIAS, 0x007f # single precision bias
404 set DBL_BIAS, 0x03ff # double precision bias
406 set NORM, 0x00 # operand type for STAG/DTAG
407 set ZERO, 0x01 # operand type for STAG/DTAG
408 set INF, 0x02 # operand type for STAG/DTAG
409 set QNAN, 0x03 # operand type for STAG/DTAG
410 set DENORM, 0x04 # operand type for STAG/DTAG
411 set SNAN, 0x05 # operand type for STAG/DTAG
412 set UNNORM, 0x06 # operand type for STAG/DTAG
417 set neg_bit, 0x3 # negative result
418 set z_bit, 0x2 # zero result
419 set inf_bit, 0x1 # infinite result
420 set nan_bit, 0x0 # NAN result
422 set q_sn_bit, 0x7 # sign bit of quotient byte
424 set bsun_bit, 7 # branch on unordered
425 set snan_bit, 6 # signalling NAN
426 set operr_bit, 5 # operand error
427 set ovfl_bit, 4 # overflow
428 set unfl_bit, 3 # underflow
429 set dz_bit, 2 # divide by zero
430 set inex2_bit, 1 # inexact result 2
431 set inex1_bit, 0 # inexact result 1
433 set aiop_bit, 7 # accrued inexact operation bit
434 set aovfl_bit, 6 # accrued overflow bit
435 set aunfl_bit, 5 # accrued underflow bit
436 set adz_bit, 4 # accrued dz bit
437 set ainex_bit, 3 # accrued inexact bit
439 #############################
440 # FPSR individual bit masks #
441 #############################
442 set neg_mask, 0x08000000 # negative bit mask (lw)
443 set inf_mask, 0x02000000 # infinity bit mask (lw)
444 set z_mask, 0x04000000 # zero bit mask (lw)
445 set nan_mask, 0x01000000 # nan bit mask (lw)
447 set neg_bmask, 0x08 # negative bit mask (byte)
448 set inf_bmask, 0x02 # infinity bit mask (byte)
449 set z_bmask, 0x04 # zero bit mask (byte)
450 set nan_bmask, 0x01 # nan bit mask (byte)
452 set bsun_mask, 0x00008000 # bsun exception mask
453 set snan_mask, 0x00004000 # snan exception mask
454 set operr_mask, 0x00002000 # operr exception mask
455 set ovfl_mask, 0x00001000 # overflow exception mask
456 set unfl_mask, 0x00000800 # underflow exception mask
457 set dz_mask, 0x00000400 # dz exception mask
458 set inex2_mask, 0x00000200 # inex2 exception mask
459 set inex1_mask, 0x00000100 # inex1 exception mask
461 set aiop_mask, 0x00000080 # accrued illegal operation
462 set aovfl_mask, 0x00000040 # accrued overflow
463 set aunfl_mask, 0x00000020 # accrued underflow
464 set adz_mask, 0x00000010 # accrued divide by zero
465 set ainex_mask, 0x00000008 # accrued inexact
467 ######################################
468 # FPSR combinations used in the FPSP #
469 ######################################
470 set dzinf_mask, inf_mask+dz_mask+adz_mask
471 set opnan_mask, nan_mask+operr_mask+aiop_mask
472 set nzi_mask, 0x01ffffff #clears N, Z, and I
473 set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
474 set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
475 set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
476 set inx1a_mask, inex1_mask+ainex_mask
477 set inx2a_mask, inex2_mask+ainex_mask
478 set snaniop_mask, nan_mask+snan_mask+aiop_mask
479 set snaniop2_mask, snan_mask+aiop_mask
480 set naniop_mask, nan_mask+aiop_mask
481 set neginf_mask, neg_mask+inf_mask
482 set infaiop_mask, inf_mask+aiop_mask
483 set negz_mask, neg_mask+z_mask
484 set opaop_mask, operr_mask+aiop_mask
485 set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
486 set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
491 set rnd_stky_bit, 29 # stky bit pos in longword
493 set sign_bit, 0x7 # sign bit
494 set signan_bit, 0x6 # signalling nan bit
496 set sgl_thresh, 0x3f81 # minimum sgl exponent
497 set dbl_thresh, 0x3c01 # minimum dbl exponent
499 set x_mode, 0x0 # extended precision
500 set s_mode, 0x4 # single precision
501 set d_mode, 0x8 # double precision
503 set rn_mode, 0x0 # round-to-nearest
504 set rz_mode, 0x1 # round-to-zero
505 set rm_mode, 0x2 # round-tp-minus-infinity
506 set rp_mode, 0x3 # round-to-plus-infinity
508 set mantissalen, 64 # length of mantissa in bits
510 set BYTE, 1 # len(byte) == 1 byte
511 set WORD, 2 # len(word) == 2 bytes
512 set LONG, 4 # len(longword) == 2 bytes
514 set BSUN_VEC, 0xc0 # bsun vector offset
515 set INEX_VEC, 0xc4 # inexact vector offset
516 set DZ_VEC, 0xc8 # dz vector offset
517 set UNFL_VEC, 0xcc # unfl vector offset
518 set OPERR_VEC, 0xd0 # operr vector offset
519 set OVFL_VEC, 0xd4 # ovfl vector offset
520 set SNAN_VEC, 0xd8 # snan vector offset
522 ###########################
523 # SPecial CONDition FLaGs #
524 ###########################
525 set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
526 set fbsun_flg, 0x02 # flag bit: bsun exception
527 set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
528 set mda7_flg, 0x08 # flag bit: -(a7) <ea>
529 set fmovm_flg, 0x40 # flag bit: fmovm instruction
530 set immed_flg, 0x80 # flag bit: &<data> <ea>
538 ##################################
539 # TRANSCENDENTAL "LAST-OP" FLAGS #
540 ##################################
541 set FMUL_OP, 0x0 # fmul instr performed last
542 set FDIV_OP, 0x1 # fdiv performed last
543 set FADD_OP, 0x2 # fadd performed last
544 set FMOV_OP, 0x3 # fmov performed last
549 T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
550 T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
552 PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
553 PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
556 long 0x3FE45F30,0x6DC9C883
558 #########################################################################
560 #########################################################################
563 link %a6,&-LOCAL_SIZE
565 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
566 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
567 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
569 fmov.l &0x0,%fpcr # zero FPCR
572 # copy, convert, and tag input argument
574 fmov.s 0x8(%a6),%fp0 # load sgl input
575 fmov.x %fp0,FP_SRC(%a6)
577 bsr.l tag # fetch operand type
581 andi.l &0x00ff00ff,USER_FPSR(%a6)
584 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
588 bsr.l ssin # operand is a NORM
591 cmpi.b %d1,&ZERO # is operand a ZERO?
596 cmpi.b %d1,&INF # is operand an INF?
601 cmpi.b %d1,&QNAN # is operand a QNAN?
606 bsr.l ssind # operand is a DENORM
610 # Result is now in FP0
612 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
613 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
614 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
620 link %a6,&-LOCAL_SIZE
622 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
623 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
624 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
626 fmov.l &0x0,%fpcr # zero FPCR
629 # copy, convert, and tag input argument
631 fmov.d 0x8(%a6),%fp0 # load dbl input
632 fmov.x %fp0,FP_SRC(%a6)
634 bsr.l tag # fetch operand type
638 andi.l &0x00ff00ff,USER_FPSR(%a6)
641 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
646 bsr.l ssin # operand is a NORM
649 cmpi.b %d1,&ZERO # is operand a ZERO?
654 cmpi.b %d1,&INF # is operand an INF?
659 cmpi.b %d1,&QNAN # is operand a QNAN?
664 bsr.l ssind # operand is a DENORM
668 # Result is now in FP0
670 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
671 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
672 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
678 link %a6,&-LOCAL_SIZE
680 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
681 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
682 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
684 fmov.l &0x0,%fpcr # zero FPCR
687 # copy, convert, and tag input argument
690 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
691 mov.l 0x8+0x4(%a6),0x4(%a0)
692 mov.l 0x8+0x8(%a6),0x8(%a0)
693 bsr.l tag # fetch operand type
697 andi.l &0x00ff00ff,USER_FPSR(%a6)
700 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
704 bsr.l ssin # operand is a NORM
707 cmpi.b %d1,&ZERO # is operand a ZERO?
712 cmpi.b %d1,&INF # is operand an INF?
717 cmpi.b %d1,&QNAN # is operand a QNAN?
722 bsr.l ssind # operand is a DENORM
726 # Result is now in FP0
728 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
729 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
730 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
735 #########################################################################
737 #########################################################################
740 link %a6,&-LOCAL_SIZE
742 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
743 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
744 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
746 fmov.l &0x0,%fpcr # zero FPCR
749 # copy, convert, and tag input argument
751 fmov.s 0x8(%a6),%fp0 # load sgl input
752 fmov.x %fp0,FP_SRC(%a6)
754 bsr.l tag # fetch operand type
758 andi.l &0x00ff00ff,USER_FPSR(%a6)
761 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
765 bsr.l scos # operand is a NORM
768 cmpi.b %d1,&ZERO # is operand a ZERO?
773 cmpi.b %d1,&INF # is operand an INF?
778 cmpi.b %d1,&QNAN # is operand a QNAN?
783 bsr.l scosd # operand is a DENORM
787 # Result is now in FP0
789 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
790 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
791 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
797 link %a6,&-LOCAL_SIZE
799 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
800 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
801 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
803 fmov.l &0x0,%fpcr # zero FPCR
806 # copy, convert, and tag input argument
808 fmov.d 0x8(%a6),%fp0 # load dbl input
809 fmov.x %fp0,FP_SRC(%a6)
811 bsr.l tag # fetch operand type
815 andi.l &0x00ff00ff,USER_FPSR(%a6)
818 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
823 bsr.l scos # operand is a NORM
826 cmpi.b %d1,&ZERO # is operand a ZERO?
831 cmpi.b %d1,&INF # is operand an INF?
836 cmpi.b %d1,&QNAN # is operand a QNAN?
841 bsr.l scosd # operand is a DENORM
845 # Result is now in FP0
847 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
848 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
849 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
855 link %a6,&-LOCAL_SIZE
857 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
858 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
859 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
861 fmov.l &0x0,%fpcr # zero FPCR
864 # copy, convert, and tag input argument
867 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
868 mov.l 0x8+0x4(%a6),0x4(%a0)
869 mov.l 0x8+0x8(%a6),0x8(%a0)
870 bsr.l tag # fetch operand type
874 andi.l &0x00ff00ff,USER_FPSR(%a6)
877 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
881 bsr.l scos # operand is a NORM
884 cmpi.b %d1,&ZERO # is operand a ZERO?
889 cmpi.b %d1,&INF # is operand an INF?
894 cmpi.b %d1,&QNAN # is operand a QNAN?
899 bsr.l scosd # operand is a DENORM
903 # Result is now in FP0
905 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
906 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
907 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
912 #########################################################################
914 #########################################################################
917 link %a6,&-LOCAL_SIZE
919 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
920 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
921 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
923 fmov.l &0x0,%fpcr # zero FPCR
926 # copy, convert, and tag input argument
928 fmov.s 0x8(%a6),%fp0 # load sgl input
929 fmov.x %fp0,FP_SRC(%a6)
931 bsr.l tag # fetch operand type
935 andi.l &0x00ff00ff,USER_FPSR(%a6)
938 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
942 bsr.l ssinh # operand is a NORM
945 cmpi.b %d1,&ZERO # is operand a ZERO?
950 cmpi.b %d1,&INF # is operand an INF?
955 cmpi.b %d1,&QNAN # is operand a QNAN?
960 bsr.l ssinhd # operand is a DENORM
964 # Result is now in FP0
966 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
967 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
968 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
974 link %a6,&-LOCAL_SIZE
976 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
977 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
978 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
980 fmov.l &0x0,%fpcr # zero FPCR
983 # copy, convert, and tag input argument
985 fmov.d 0x8(%a6),%fp0 # load dbl input
986 fmov.x %fp0,FP_SRC(%a6)
988 bsr.l tag # fetch operand type
992 andi.l &0x00ff00ff,USER_FPSR(%a6)
995 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1000 bsr.l ssinh # operand is a NORM
1003 cmpi.b %d1,&ZERO # is operand a ZERO?
1005 bsr.l src_zero # yes
1008 cmpi.b %d1,&INF # is operand an INF?
1013 cmpi.b %d1,&QNAN # is operand a QNAN?
1015 bsr.l src_qnan # yes
1018 bsr.l ssinhd # operand is a DENORM
1022 # Result is now in FP0
1024 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1025 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1026 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1032 link %a6,&-LOCAL_SIZE
1034 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1035 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1036 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1038 fmov.l &0x0,%fpcr # zero FPCR
1041 # copy, convert, and tag input argument
1044 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1045 mov.l 0x8+0x4(%a6),0x4(%a0)
1046 mov.l 0x8+0x8(%a6),0x8(%a0)
1047 bsr.l tag # fetch operand type
1051 andi.l &0x00ff00ff,USER_FPSR(%a6)
1054 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1058 bsr.l ssinh # operand is a NORM
1061 cmpi.b %d1,&ZERO # is operand a ZERO?
1063 bsr.l src_zero # yes
1066 cmpi.b %d1,&INF # is operand an INF?
1071 cmpi.b %d1,&QNAN # is operand a QNAN?
1073 bsr.l src_qnan # yes
1076 bsr.l ssinhd # operand is a DENORM
1080 # Result is now in FP0
1082 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1083 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1084 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1089 #########################################################################
1090 # MONADIC TEMPLATE #
1091 #########################################################################
1094 link %a6,&-LOCAL_SIZE
1096 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1097 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1098 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1100 fmov.l &0x0,%fpcr # zero FPCR
1103 # copy, convert, and tag input argument
1105 fmov.s 0x8(%a6),%fp0 # load sgl input
1106 fmov.x %fp0,FP_SRC(%a6)
1108 bsr.l tag # fetch operand type
1112 andi.l &0x00ff00ff,USER_FPSR(%a6)
1115 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1119 bsr.l slognp1 # operand is a NORM
1122 cmpi.b %d1,&ZERO # is operand a ZERO?
1124 bsr.l src_zero # yes
1127 cmpi.b %d1,&INF # is operand an INF?
1129 bsr.l sopr_inf # yes
1132 cmpi.b %d1,&QNAN # is operand a QNAN?
1134 bsr.l src_qnan # yes
1137 bsr.l slognp1d # operand is a DENORM
1141 # Result is now in FP0
1143 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1144 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1145 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1151 link %a6,&-LOCAL_SIZE
1153 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1154 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1155 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1157 fmov.l &0x0,%fpcr # zero FPCR
1160 # copy, convert, and tag input argument
1162 fmov.d 0x8(%a6),%fp0 # load dbl input
1163 fmov.x %fp0,FP_SRC(%a6)
1165 bsr.l tag # fetch operand type
1169 andi.l &0x00ff00ff,USER_FPSR(%a6)
1172 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1177 bsr.l slognp1 # operand is a NORM
1180 cmpi.b %d1,&ZERO # is operand a ZERO?
1182 bsr.l src_zero # yes
1185 cmpi.b %d1,&INF # is operand an INF?
1187 bsr.l sopr_inf # yes
1190 cmpi.b %d1,&QNAN # is operand a QNAN?
1192 bsr.l src_qnan # yes
1195 bsr.l slognp1d # operand is a DENORM
1199 # Result is now in FP0
1201 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1202 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1203 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1209 link %a6,&-LOCAL_SIZE
1211 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1212 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1213 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1215 fmov.l &0x0,%fpcr # zero FPCR
1218 # copy, convert, and tag input argument
1221 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1222 mov.l 0x8+0x4(%a6),0x4(%a0)
1223 mov.l 0x8+0x8(%a6),0x8(%a0)
1224 bsr.l tag # fetch operand type
1228 andi.l &0x00ff00ff,USER_FPSR(%a6)
1231 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1235 bsr.l slognp1 # operand is a NORM
1238 cmpi.b %d1,&ZERO # is operand a ZERO?
1240 bsr.l src_zero # yes
1243 cmpi.b %d1,&INF # is operand an INF?
1245 bsr.l sopr_inf # yes
1248 cmpi.b %d1,&QNAN # is operand a QNAN?
1250 bsr.l src_qnan # yes
1253 bsr.l slognp1d # operand is a DENORM
1257 # Result is now in FP0
1259 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1260 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1261 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1266 #########################################################################
1267 # MONADIC TEMPLATE #
1268 #########################################################################
1271 link %a6,&-LOCAL_SIZE
1273 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1274 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1275 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1277 fmov.l &0x0,%fpcr # zero FPCR
1280 # copy, convert, and tag input argument
1282 fmov.s 0x8(%a6),%fp0 # load sgl input
1283 fmov.x %fp0,FP_SRC(%a6)
1285 bsr.l tag # fetch operand type
1289 andi.l &0x00ff00ff,USER_FPSR(%a6)
1292 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1296 bsr.l setoxm1 # operand is a NORM
1299 cmpi.b %d1,&ZERO # is operand a ZERO?
1301 bsr.l src_zero # yes
1304 cmpi.b %d1,&INF # is operand an INF?
1306 bsr.l setoxm1i # yes
1309 cmpi.b %d1,&QNAN # is operand a QNAN?
1311 bsr.l src_qnan # yes
1314 bsr.l setoxm1d # operand is a DENORM
1318 # Result is now in FP0
1320 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1321 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1322 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1328 link %a6,&-LOCAL_SIZE
1330 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1331 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1332 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1334 fmov.l &0x0,%fpcr # zero FPCR
1337 # copy, convert, and tag input argument
1339 fmov.d 0x8(%a6),%fp0 # load dbl input
1340 fmov.x %fp0,FP_SRC(%a6)
1342 bsr.l tag # fetch operand type
1346 andi.l &0x00ff00ff,USER_FPSR(%a6)
1349 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1354 bsr.l setoxm1 # operand is a NORM
1357 cmpi.b %d1,&ZERO # is operand a ZERO?
1359 bsr.l src_zero # yes
1362 cmpi.b %d1,&INF # is operand an INF?
1364 bsr.l setoxm1i # yes
1367 cmpi.b %d1,&QNAN # is operand a QNAN?
1369 bsr.l src_qnan # yes
1372 bsr.l setoxm1d # operand is a DENORM
1376 # Result is now in FP0
1378 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1379 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1380 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1386 link %a6,&-LOCAL_SIZE
1388 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1389 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1390 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1392 fmov.l &0x0,%fpcr # zero FPCR
1395 # copy, convert, and tag input argument
1398 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1399 mov.l 0x8+0x4(%a6),0x4(%a0)
1400 mov.l 0x8+0x8(%a6),0x8(%a0)
1401 bsr.l tag # fetch operand type
1405 andi.l &0x00ff00ff,USER_FPSR(%a6)
1408 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1412 bsr.l setoxm1 # operand is a NORM
1415 cmpi.b %d1,&ZERO # is operand a ZERO?
1417 bsr.l src_zero # yes
1420 cmpi.b %d1,&INF # is operand an INF?
1422 bsr.l setoxm1i # yes
1425 cmpi.b %d1,&QNAN # is operand a QNAN?
1427 bsr.l src_qnan # yes
1430 bsr.l setoxm1d # operand is a DENORM
1434 # Result is now in FP0
1436 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1437 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1438 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1443 #########################################################################
1444 # MONADIC TEMPLATE #
1445 #########################################################################
1448 link %a6,&-LOCAL_SIZE
1450 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1451 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1452 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1454 fmov.l &0x0,%fpcr # zero FPCR
1457 # copy, convert, and tag input argument
1459 fmov.s 0x8(%a6),%fp0 # load sgl input
1460 fmov.x %fp0,FP_SRC(%a6)
1462 bsr.l tag # fetch operand type
1466 andi.l &0x00ff00ff,USER_FPSR(%a6)
1469 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1473 bsr.l stanh # operand is a NORM
1476 cmpi.b %d1,&ZERO # is operand a ZERO?
1478 bsr.l src_zero # yes
1481 cmpi.b %d1,&INF # is operand an INF?
1486 cmpi.b %d1,&QNAN # is operand a QNAN?
1488 bsr.l src_qnan # yes
1491 bsr.l stanhd # operand is a DENORM
1495 # Result is now in FP0
1497 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1498 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1499 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1505 link %a6,&-LOCAL_SIZE
1507 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1508 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1509 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1511 fmov.l &0x0,%fpcr # zero FPCR
1514 # copy, convert, and tag input argument
1516 fmov.d 0x8(%a6),%fp0 # load dbl input
1517 fmov.x %fp0,FP_SRC(%a6)
1519 bsr.l tag # fetch operand type
1523 andi.l &0x00ff00ff,USER_FPSR(%a6)
1526 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1531 bsr.l stanh # operand is a NORM
1534 cmpi.b %d1,&ZERO # is operand a ZERO?
1536 bsr.l src_zero # yes
1539 cmpi.b %d1,&INF # is operand an INF?
1544 cmpi.b %d1,&QNAN # is operand a QNAN?
1546 bsr.l src_qnan # yes
1549 bsr.l stanhd # operand is a DENORM
1553 # Result is now in FP0
1555 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1556 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1557 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1563 link %a6,&-LOCAL_SIZE
1565 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1566 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1567 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1569 fmov.l &0x0,%fpcr # zero FPCR
1572 # copy, convert, and tag input argument
1575 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1576 mov.l 0x8+0x4(%a6),0x4(%a0)
1577 mov.l 0x8+0x8(%a6),0x8(%a0)
1578 bsr.l tag # fetch operand type
1582 andi.l &0x00ff00ff,USER_FPSR(%a6)
1585 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1589 bsr.l stanh # operand is a NORM
1592 cmpi.b %d1,&ZERO # is operand a ZERO?
1594 bsr.l src_zero # yes
1597 cmpi.b %d1,&INF # is operand an INF?
1602 cmpi.b %d1,&QNAN # is operand a QNAN?
1604 bsr.l src_qnan # yes
1607 bsr.l stanhd # operand is a DENORM
1611 # Result is now in FP0
1613 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1614 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1615 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1620 #########################################################################
1621 # MONADIC TEMPLATE #
1622 #########################################################################
1625 link %a6,&-LOCAL_SIZE
1627 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1628 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1629 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1631 fmov.l &0x0,%fpcr # zero FPCR
1634 # copy, convert, and tag input argument
1636 fmov.s 0x8(%a6),%fp0 # load sgl input
1637 fmov.x %fp0,FP_SRC(%a6)
1639 bsr.l tag # fetch operand type
1643 andi.l &0x00ff00ff,USER_FPSR(%a6)
1646 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1650 bsr.l satan # operand is a NORM
1653 cmpi.b %d1,&ZERO # is operand a ZERO?
1655 bsr.l src_zero # yes
1658 cmpi.b %d1,&INF # is operand an INF?
1663 cmpi.b %d1,&QNAN # is operand a QNAN?
1665 bsr.l src_qnan # yes
1668 bsr.l satand # operand is a DENORM
1672 # Result is now in FP0
1674 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1675 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1676 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1682 link %a6,&-LOCAL_SIZE
1684 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1685 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1686 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1688 fmov.l &0x0,%fpcr # zero FPCR
1691 # copy, convert, and tag input argument
1693 fmov.d 0x8(%a6),%fp0 # load dbl input
1694 fmov.x %fp0,FP_SRC(%a6)
1696 bsr.l tag # fetch operand type
1700 andi.l &0x00ff00ff,USER_FPSR(%a6)
1703 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1708 bsr.l satan # operand is a NORM
1711 cmpi.b %d1,&ZERO # is operand a ZERO?
1713 bsr.l src_zero # yes
1716 cmpi.b %d1,&INF # is operand an INF?
1721 cmpi.b %d1,&QNAN # is operand a QNAN?
1723 bsr.l src_qnan # yes
1726 bsr.l satand # operand is a DENORM
1730 # Result is now in FP0
1732 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1733 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1734 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1740 link %a6,&-LOCAL_SIZE
1742 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1743 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1744 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1746 fmov.l &0x0,%fpcr # zero FPCR
1749 # copy, convert, and tag input argument
1752 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1753 mov.l 0x8+0x4(%a6),0x4(%a0)
1754 mov.l 0x8+0x8(%a6),0x8(%a0)
1755 bsr.l tag # fetch operand type
1759 andi.l &0x00ff00ff,USER_FPSR(%a6)
1762 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1766 bsr.l satan # operand is a NORM
1769 cmpi.b %d1,&ZERO # is operand a ZERO?
1771 bsr.l src_zero # yes
1774 cmpi.b %d1,&INF # is operand an INF?
1779 cmpi.b %d1,&QNAN # is operand a QNAN?
1781 bsr.l src_qnan # yes
1784 bsr.l satand # operand is a DENORM
1788 # Result is now in FP0
1790 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1791 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1792 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1797 #########################################################################
1798 # MONADIC TEMPLATE #
1799 #########################################################################
1802 link %a6,&-LOCAL_SIZE
1804 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1805 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1806 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1808 fmov.l &0x0,%fpcr # zero FPCR
1811 # copy, convert, and tag input argument
1813 fmov.s 0x8(%a6),%fp0 # load sgl input
1814 fmov.x %fp0,FP_SRC(%a6)
1816 bsr.l tag # fetch operand type
1820 andi.l &0x00ff00ff,USER_FPSR(%a6)
1823 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1827 bsr.l sasin # operand is a NORM
1830 cmpi.b %d1,&ZERO # is operand a ZERO?
1832 bsr.l src_zero # yes
1835 cmpi.b %d1,&INF # is operand an INF?
1840 cmpi.b %d1,&QNAN # is operand a QNAN?
1842 bsr.l src_qnan # yes
1845 bsr.l sasind # operand is a DENORM
1849 # Result is now in FP0
1851 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1852 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1853 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1859 link %a6,&-LOCAL_SIZE
1861 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1862 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1863 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1865 fmov.l &0x0,%fpcr # zero FPCR
1868 # copy, convert, and tag input argument
1870 fmov.d 0x8(%a6),%fp0 # load dbl input
1871 fmov.x %fp0,FP_SRC(%a6)
1873 bsr.l tag # fetch operand type
1877 andi.l &0x00ff00ff,USER_FPSR(%a6)
1880 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1885 bsr.l sasin # operand is a NORM
1888 cmpi.b %d1,&ZERO # is operand a ZERO?
1890 bsr.l src_zero # yes
1893 cmpi.b %d1,&INF # is operand an INF?
1898 cmpi.b %d1,&QNAN # is operand a QNAN?
1900 bsr.l src_qnan # yes
1903 bsr.l sasind # operand is a DENORM
1907 # Result is now in FP0
1909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1910 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1911 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1917 link %a6,&-LOCAL_SIZE
1919 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1920 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1921 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1923 fmov.l &0x0,%fpcr # zero FPCR
1926 # copy, convert, and tag input argument
1929 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1930 mov.l 0x8+0x4(%a6),0x4(%a0)
1931 mov.l 0x8+0x8(%a6),0x8(%a0)
1932 bsr.l tag # fetch operand type
1936 andi.l &0x00ff00ff,USER_FPSR(%a6)
1939 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1943 bsr.l sasin # operand is a NORM
1946 cmpi.b %d1,&ZERO # is operand a ZERO?
1948 bsr.l src_zero # yes
1951 cmpi.b %d1,&INF # is operand an INF?
1956 cmpi.b %d1,&QNAN # is operand a QNAN?
1958 bsr.l src_qnan # yes
1961 bsr.l sasind # operand is a DENORM
1965 # Result is now in FP0
1967 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1968 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1969 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1974 #########################################################################
1975 # MONADIC TEMPLATE #
1976 #########################################################################
1979 link %a6,&-LOCAL_SIZE
1981 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1982 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1983 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1985 fmov.l &0x0,%fpcr # zero FPCR
1988 # copy, convert, and tag input argument
1990 fmov.s 0x8(%a6),%fp0 # load sgl input
1991 fmov.x %fp0,FP_SRC(%a6)
1993 bsr.l tag # fetch operand type
1997 andi.l &0x00ff00ff,USER_FPSR(%a6)
2000 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2004 bsr.l satanh # operand is a NORM
2007 cmpi.b %d1,&ZERO # is operand a ZERO?
2009 bsr.l src_zero # yes
2012 cmpi.b %d1,&INF # is operand an INF?
2017 cmpi.b %d1,&QNAN # is operand a QNAN?
2019 bsr.l src_qnan # yes
2022 bsr.l satanhd # operand is a DENORM
2026 # Result is now in FP0
2028 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2029 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2030 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2036 link %a6,&-LOCAL_SIZE
2038 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2039 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2040 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2042 fmov.l &0x0,%fpcr # zero FPCR
2045 # copy, convert, and tag input argument
2047 fmov.d 0x8(%a6),%fp0 # load dbl input
2048 fmov.x %fp0,FP_SRC(%a6)
2050 bsr.l tag # fetch operand type
2054 andi.l &0x00ff00ff,USER_FPSR(%a6)
2057 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2062 bsr.l satanh # operand is a NORM
2065 cmpi.b %d1,&ZERO # is operand a ZERO?
2067 bsr.l src_zero # yes
2070 cmpi.b %d1,&INF # is operand an INF?
2075 cmpi.b %d1,&QNAN # is operand a QNAN?
2077 bsr.l src_qnan # yes
2080 bsr.l satanhd # operand is a DENORM
2084 # Result is now in FP0
2086 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2087 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2088 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2094 link %a6,&-LOCAL_SIZE
2096 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2097 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2098 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2100 fmov.l &0x0,%fpcr # zero FPCR
2103 # copy, convert, and tag input argument
2106 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2107 mov.l 0x8+0x4(%a6),0x4(%a0)
2108 mov.l 0x8+0x8(%a6),0x8(%a0)
2109 bsr.l tag # fetch operand type
2113 andi.l &0x00ff00ff,USER_FPSR(%a6)
2116 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2120 bsr.l satanh # operand is a NORM
2123 cmpi.b %d1,&ZERO # is operand a ZERO?
2125 bsr.l src_zero # yes
2128 cmpi.b %d1,&INF # is operand an INF?
2133 cmpi.b %d1,&QNAN # is operand a QNAN?
2135 bsr.l src_qnan # yes
2138 bsr.l satanhd # operand is a DENORM
2142 # Result is now in FP0
2144 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2145 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2146 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2151 #########################################################################
2152 # MONADIC TEMPLATE #
2153 #########################################################################
2156 link %a6,&-LOCAL_SIZE
2158 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2159 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2160 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2162 fmov.l &0x0,%fpcr # zero FPCR
2165 # copy, convert, and tag input argument
2167 fmov.s 0x8(%a6),%fp0 # load sgl input
2168 fmov.x %fp0,FP_SRC(%a6)
2170 bsr.l tag # fetch operand type
2174 andi.l &0x00ff00ff,USER_FPSR(%a6)
2177 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2181 bsr.l stan # operand is a NORM
2184 cmpi.b %d1,&ZERO # is operand a ZERO?
2186 bsr.l src_zero # yes
2189 cmpi.b %d1,&INF # is operand an INF?
2194 cmpi.b %d1,&QNAN # is operand a QNAN?
2196 bsr.l src_qnan # yes
2199 bsr.l stand # operand is a DENORM
2203 # Result is now in FP0
2205 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2206 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2207 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2213 link %a6,&-LOCAL_SIZE
2215 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2216 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2217 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2219 fmov.l &0x0,%fpcr # zero FPCR
2222 # copy, convert, and tag input argument
2224 fmov.d 0x8(%a6),%fp0 # load dbl input
2225 fmov.x %fp0,FP_SRC(%a6)
2227 bsr.l tag # fetch operand type
2231 andi.l &0x00ff00ff,USER_FPSR(%a6)
2234 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2239 bsr.l stan # operand is a NORM
2242 cmpi.b %d1,&ZERO # is operand a ZERO?
2244 bsr.l src_zero # yes
2247 cmpi.b %d1,&INF # is operand an INF?
2252 cmpi.b %d1,&QNAN # is operand a QNAN?
2254 bsr.l src_qnan # yes
2257 bsr.l stand # operand is a DENORM
2261 # Result is now in FP0
2263 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2264 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2265 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2271 link %a6,&-LOCAL_SIZE
2273 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2274 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2275 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2277 fmov.l &0x0,%fpcr # zero FPCR
2280 # copy, convert, and tag input argument
2283 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2284 mov.l 0x8+0x4(%a6),0x4(%a0)
2285 mov.l 0x8+0x8(%a6),0x8(%a0)
2286 bsr.l tag # fetch operand type
2290 andi.l &0x00ff00ff,USER_FPSR(%a6)
2293 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2297 bsr.l stan # operand is a NORM
2300 cmpi.b %d1,&ZERO # is operand a ZERO?
2302 bsr.l src_zero # yes
2305 cmpi.b %d1,&INF # is operand an INF?
2310 cmpi.b %d1,&QNAN # is operand a QNAN?
2312 bsr.l src_qnan # yes
2315 bsr.l stand # operand is a DENORM
2319 # Result is now in FP0
2321 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2322 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2323 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2328 #########################################################################
2329 # MONADIC TEMPLATE #
2330 #########################################################################
2333 link %a6,&-LOCAL_SIZE
2335 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2336 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2337 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2339 fmov.l &0x0,%fpcr # zero FPCR
2342 # copy, convert, and tag input argument
2344 fmov.s 0x8(%a6),%fp0 # load sgl input
2345 fmov.x %fp0,FP_SRC(%a6)
2347 bsr.l tag # fetch operand type
2351 andi.l &0x00ff00ff,USER_FPSR(%a6)
2354 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2358 bsr.l setox # operand is a NORM
2361 cmpi.b %d1,&ZERO # is operand a ZERO?
2366 cmpi.b %d1,&INF # is operand an INF?
2371 cmpi.b %d1,&QNAN # is operand a QNAN?
2373 bsr.l src_qnan # yes
2376 bsr.l setoxd # operand is a DENORM
2380 # Result is now in FP0
2382 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2383 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2384 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2390 link %a6,&-LOCAL_SIZE
2392 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2393 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2394 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2396 fmov.l &0x0,%fpcr # zero FPCR
2399 # copy, convert, and tag input argument
2401 fmov.d 0x8(%a6),%fp0 # load dbl input
2402 fmov.x %fp0,FP_SRC(%a6)
2404 bsr.l tag # fetch operand type
2408 andi.l &0x00ff00ff,USER_FPSR(%a6)
2411 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2416 bsr.l setox # operand is a NORM
2419 cmpi.b %d1,&ZERO # is operand a ZERO?
2424 cmpi.b %d1,&INF # is operand an INF?
2429 cmpi.b %d1,&QNAN # is operand a QNAN?
2431 bsr.l src_qnan # yes
2434 bsr.l setoxd # operand is a DENORM
2438 # Result is now in FP0
2440 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2441 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2442 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2448 link %a6,&-LOCAL_SIZE
2450 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2451 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2452 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2454 fmov.l &0x0,%fpcr # zero FPCR
2457 # copy, convert, and tag input argument
2460 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2461 mov.l 0x8+0x4(%a6),0x4(%a0)
2462 mov.l 0x8+0x8(%a6),0x8(%a0)
2463 bsr.l tag # fetch operand type
2467 andi.l &0x00ff00ff,USER_FPSR(%a6)
2470 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2474 bsr.l setox # operand is a NORM
2477 cmpi.b %d1,&ZERO # is operand a ZERO?
2482 cmpi.b %d1,&INF # is operand an INF?
2487 cmpi.b %d1,&QNAN # is operand a QNAN?
2489 bsr.l src_qnan # yes
2492 bsr.l setoxd # operand is a DENORM
2496 # Result is now in FP0
2498 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2499 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2500 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2505 #########################################################################
2506 # MONADIC TEMPLATE #
2507 #########################################################################
2510 link %a6,&-LOCAL_SIZE
2512 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2513 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2514 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2516 fmov.l &0x0,%fpcr # zero FPCR
2519 # copy, convert, and tag input argument
2521 fmov.s 0x8(%a6),%fp0 # load sgl input
2522 fmov.x %fp0,FP_SRC(%a6)
2524 bsr.l tag # fetch operand type
2528 andi.l &0x00ff00ff,USER_FPSR(%a6)
2531 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2535 bsr.l stwotox # operand is a NORM
2538 cmpi.b %d1,&ZERO # is operand a ZERO?
2543 cmpi.b %d1,&INF # is operand an INF?
2548 cmpi.b %d1,&QNAN # is operand a QNAN?
2550 bsr.l src_qnan # yes
2553 bsr.l stwotoxd # operand is a DENORM
2557 # Result is now in FP0
2559 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2560 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2561 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2567 link %a6,&-LOCAL_SIZE
2569 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2570 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2571 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2573 fmov.l &0x0,%fpcr # zero FPCR
2576 # copy, convert, and tag input argument
2578 fmov.d 0x8(%a6),%fp0 # load dbl input
2579 fmov.x %fp0,FP_SRC(%a6)
2581 bsr.l tag # fetch operand type
2585 andi.l &0x00ff00ff,USER_FPSR(%a6)
2588 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2593 bsr.l stwotox # operand is a NORM
2596 cmpi.b %d1,&ZERO # is operand a ZERO?
2601 cmpi.b %d1,&INF # is operand an INF?
2606 cmpi.b %d1,&QNAN # is operand a QNAN?
2608 bsr.l src_qnan # yes
2611 bsr.l stwotoxd # operand is a DENORM
2615 # Result is now in FP0
2617 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2618 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2619 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2625 link %a6,&-LOCAL_SIZE
2627 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2628 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2629 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2631 fmov.l &0x0,%fpcr # zero FPCR
2634 # copy, convert, and tag input argument
2637 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2638 mov.l 0x8+0x4(%a6),0x4(%a0)
2639 mov.l 0x8+0x8(%a6),0x8(%a0)
2640 bsr.l tag # fetch operand type
2644 andi.l &0x00ff00ff,USER_FPSR(%a6)
2647 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2651 bsr.l stwotox # operand is a NORM
2654 cmpi.b %d1,&ZERO # is operand a ZERO?
2659 cmpi.b %d1,&INF # is operand an INF?
2664 cmpi.b %d1,&QNAN # is operand a QNAN?
2666 bsr.l src_qnan # yes
2669 bsr.l stwotoxd # operand is a DENORM
2673 # Result is now in FP0
2675 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2676 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2677 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2682 #########################################################################
2683 # MONADIC TEMPLATE #
2684 #########################################################################
2687 link %a6,&-LOCAL_SIZE
2689 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2690 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2691 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2693 fmov.l &0x0,%fpcr # zero FPCR
2696 # copy, convert, and tag input argument
2698 fmov.s 0x8(%a6),%fp0 # load sgl input
2699 fmov.x %fp0,FP_SRC(%a6)
2701 bsr.l tag # fetch operand type
2705 andi.l &0x00ff00ff,USER_FPSR(%a6)
2708 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2712 bsr.l stentox # operand is a NORM
2715 cmpi.b %d1,&ZERO # is operand a ZERO?
2720 cmpi.b %d1,&INF # is operand an INF?
2725 cmpi.b %d1,&QNAN # is operand a QNAN?
2727 bsr.l src_qnan # yes
2730 bsr.l stentoxd # operand is a DENORM
2734 # Result is now in FP0
2736 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2737 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2738 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2744 link %a6,&-LOCAL_SIZE
2746 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2747 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2748 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2750 fmov.l &0x0,%fpcr # zero FPCR
2753 # copy, convert, and tag input argument
2755 fmov.d 0x8(%a6),%fp0 # load dbl input
2756 fmov.x %fp0,FP_SRC(%a6)
2758 bsr.l tag # fetch operand type
2762 andi.l &0x00ff00ff,USER_FPSR(%a6)
2765 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2770 bsr.l stentox # operand is a NORM
2773 cmpi.b %d1,&ZERO # is operand a ZERO?
2778 cmpi.b %d1,&INF # is operand an INF?
2783 cmpi.b %d1,&QNAN # is operand a QNAN?
2785 bsr.l src_qnan # yes
2788 bsr.l stentoxd # operand is a DENORM
2792 # Result is now in FP0
2794 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2795 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2796 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2802 link %a6,&-LOCAL_SIZE
2804 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2805 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2806 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2808 fmov.l &0x0,%fpcr # zero FPCR
2811 # copy, convert, and tag input argument
2814 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2815 mov.l 0x8+0x4(%a6),0x4(%a0)
2816 mov.l 0x8+0x8(%a6),0x8(%a0)
2817 bsr.l tag # fetch operand type
2821 andi.l &0x00ff00ff,USER_FPSR(%a6)
2824 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2828 bsr.l stentox # operand is a NORM
2831 cmpi.b %d1,&ZERO # is operand a ZERO?
2836 cmpi.b %d1,&INF # is operand an INF?
2841 cmpi.b %d1,&QNAN # is operand a QNAN?
2843 bsr.l src_qnan # yes
2846 bsr.l stentoxd # operand is a DENORM
2850 # Result is now in FP0
2852 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2853 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2854 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2859 #########################################################################
2860 # MONADIC TEMPLATE #
2861 #########################################################################
2864 link %a6,&-LOCAL_SIZE
2866 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2867 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2868 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2870 fmov.l &0x0,%fpcr # zero FPCR
2873 # copy, convert, and tag input argument
2875 fmov.s 0x8(%a6),%fp0 # load sgl input
2876 fmov.x %fp0,FP_SRC(%a6)
2878 bsr.l tag # fetch operand type
2882 andi.l &0x00ff00ff,USER_FPSR(%a6)
2885 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2889 bsr.l slogn # operand is a NORM
2892 cmpi.b %d1,&ZERO # is operand a ZERO?
2897 cmpi.b %d1,&INF # is operand an INF?
2899 bsr.l sopr_inf # yes
2902 cmpi.b %d1,&QNAN # is operand a QNAN?
2904 bsr.l src_qnan # yes
2907 bsr.l slognd # operand is a DENORM
2911 # Result is now in FP0
2913 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2914 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2915 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2921 link %a6,&-LOCAL_SIZE
2923 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2924 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2925 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2927 fmov.l &0x0,%fpcr # zero FPCR
2930 # copy, convert, and tag input argument
2932 fmov.d 0x8(%a6),%fp0 # load dbl input
2933 fmov.x %fp0,FP_SRC(%a6)
2935 bsr.l tag # fetch operand type
2939 andi.l &0x00ff00ff,USER_FPSR(%a6)
2942 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2947 bsr.l slogn # operand is a NORM
2950 cmpi.b %d1,&ZERO # is operand a ZERO?
2955 cmpi.b %d1,&INF # is operand an INF?
2957 bsr.l sopr_inf # yes
2960 cmpi.b %d1,&QNAN # is operand a QNAN?
2962 bsr.l src_qnan # yes
2965 bsr.l slognd # operand is a DENORM
2969 # Result is now in FP0
2971 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2972 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2973 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2979 link %a6,&-LOCAL_SIZE
2981 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2982 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2983 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2985 fmov.l &0x0,%fpcr # zero FPCR
2988 # copy, convert, and tag input argument
2991 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2992 mov.l 0x8+0x4(%a6),0x4(%a0)
2993 mov.l 0x8+0x8(%a6),0x8(%a0)
2994 bsr.l tag # fetch operand type
2998 andi.l &0x00ff00ff,USER_FPSR(%a6)
3001 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3005 bsr.l slogn # operand is a NORM
3008 cmpi.b %d1,&ZERO # is operand a ZERO?
3013 cmpi.b %d1,&INF # is operand an INF?
3015 bsr.l sopr_inf # yes
3018 cmpi.b %d1,&QNAN # is operand a QNAN?
3020 bsr.l src_qnan # yes
3023 bsr.l slognd # operand is a DENORM
3027 # Result is now in FP0
3029 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3030 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3031 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3036 #########################################################################
3037 # MONADIC TEMPLATE #
3038 #########################################################################
3041 link %a6,&-LOCAL_SIZE
3043 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3044 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3045 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3047 fmov.l &0x0,%fpcr # zero FPCR
3050 # copy, convert, and tag input argument
3052 fmov.s 0x8(%a6),%fp0 # load sgl input
3053 fmov.x %fp0,FP_SRC(%a6)
3055 bsr.l tag # fetch operand type
3059 andi.l &0x00ff00ff,USER_FPSR(%a6)
3062 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3066 bsr.l slog10 # operand is a NORM
3069 cmpi.b %d1,&ZERO # is operand a ZERO?
3074 cmpi.b %d1,&INF # is operand an INF?
3076 bsr.l sopr_inf # yes
3079 cmpi.b %d1,&QNAN # is operand a QNAN?
3081 bsr.l src_qnan # yes
3084 bsr.l slog10d # operand is a DENORM
3088 # Result is now in FP0
3090 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3091 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3092 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3098 link %a6,&-LOCAL_SIZE
3100 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3101 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3102 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3104 fmov.l &0x0,%fpcr # zero FPCR
3107 # copy, convert, and tag input argument
3109 fmov.d 0x8(%a6),%fp0 # load dbl input
3110 fmov.x %fp0,FP_SRC(%a6)
3112 bsr.l tag # fetch operand type
3116 andi.l &0x00ff00ff,USER_FPSR(%a6)
3119 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3124 bsr.l slog10 # operand is a NORM
3127 cmpi.b %d1,&ZERO # is operand a ZERO?
3132 cmpi.b %d1,&INF # is operand an INF?
3134 bsr.l sopr_inf # yes
3137 cmpi.b %d1,&QNAN # is operand a QNAN?
3139 bsr.l src_qnan # yes
3142 bsr.l slog10d # operand is a DENORM
3146 # Result is now in FP0
3148 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3150 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3156 link %a6,&-LOCAL_SIZE
3158 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3159 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3160 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3162 fmov.l &0x0,%fpcr # zero FPCR
3165 # copy, convert, and tag input argument
3168 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3169 mov.l 0x8+0x4(%a6),0x4(%a0)
3170 mov.l 0x8+0x8(%a6),0x8(%a0)
3171 bsr.l tag # fetch operand type
3175 andi.l &0x00ff00ff,USER_FPSR(%a6)
3178 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3182 bsr.l slog10 # operand is a NORM
3185 cmpi.b %d1,&ZERO # is operand a ZERO?
3190 cmpi.b %d1,&INF # is operand an INF?
3192 bsr.l sopr_inf # yes
3195 cmpi.b %d1,&QNAN # is operand a QNAN?
3197 bsr.l src_qnan # yes
3200 bsr.l slog10d # operand is a DENORM
3204 # Result is now in FP0
3206 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3207 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3208 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3213 #########################################################################
3214 # MONADIC TEMPLATE #
3215 #########################################################################
3218 link %a6,&-LOCAL_SIZE
3220 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3221 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3222 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3224 fmov.l &0x0,%fpcr # zero FPCR
3227 # copy, convert, and tag input argument
3229 fmov.s 0x8(%a6),%fp0 # load sgl input
3230 fmov.x %fp0,FP_SRC(%a6)
3232 bsr.l tag # fetch operand type
3236 andi.l &0x00ff00ff,USER_FPSR(%a6)
3239 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3243 bsr.l slog2 # operand is a NORM
3246 cmpi.b %d1,&ZERO # is operand a ZERO?
3251 cmpi.b %d1,&INF # is operand an INF?
3253 bsr.l sopr_inf # yes
3256 cmpi.b %d1,&QNAN # is operand a QNAN?
3258 bsr.l src_qnan # yes
3261 bsr.l slog2d # operand is a DENORM
3265 # Result is now in FP0
3267 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3268 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3269 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3275 link %a6,&-LOCAL_SIZE
3277 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3278 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3279 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3281 fmov.l &0x0,%fpcr # zero FPCR
3284 # copy, convert, and tag input argument
3286 fmov.d 0x8(%a6),%fp0 # load dbl input
3287 fmov.x %fp0,FP_SRC(%a6)
3289 bsr.l tag # fetch operand type
3293 andi.l &0x00ff00ff,USER_FPSR(%a6)
3296 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3301 bsr.l slog2 # operand is a NORM
3304 cmpi.b %d1,&ZERO # is operand a ZERO?
3309 cmpi.b %d1,&INF # is operand an INF?
3311 bsr.l sopr_inf # yes
3314 cmpi.b %d1,&QNAN # is operand a QNAN?
3316 bsr.l src_qnan # yes
3319 bsr.l slog2d # operand is a DENORM
3323 # Result is now in FP0
3325 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3326 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3327 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3333 link %a6,&-LOCAL_SIZE
3335 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3336 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3337 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3339 fmov.l &0x0,%fpcr # zero FPCR
3342 # copy, convert, and tag input argument
3345 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3346 mov.l 0x8+0x4(%a6),0x4(%a0)
3347 mov.l 0x8+0x8(%a6),0x8(%a0)
3348 bsr.l tag # fetch operand type
3352 andi.l &0x00ff00ff,USER_FPSR(%a6)
3355 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3359 bsr.l slog2 # operand is a NORM
3362 cmpi.b %d1,&ZERO # is operand a ZERO?
3367 cmpi.b %d1,&INF # is operand an INF?
3369 bsr.l sopr_inf # yes
3372 cmpi.b %d1,&QNAN # is operand a QNAN?
3374 bsr.l src_qnan # yes
3377 bsr.l slog2d # operand is a DENORM
3381 # Result is now in FP0
3383 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3384 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3385 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3390 #########################################################################
3391 # MONADIC TEMPLATE #
3392 #########################################################################
3395 link %a6,&-LOCAL_SIZE
3397 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3398 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3399 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3401 fmov.l &0x0,%fpcr # zero FPCR
3404 # copy, convert, and tag input argument
3406 fmov.s 0x8(%a6),%fp0 # load sgl input
3407 fmov.x %fp0,FP_SRC(%a6)
3409 bsr.l tag # fetch operand type
3413 andi.l &0x00ff00ff,USER_FPSR(%a6)
3416 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3420 bsr.l scosh # operand is a NORM
3423 cmpi.b %d1,&ZERO # is operand a ZERO?
3428 cmpi.b %d1,&INF # is operand an INF?
3433 cmpi.b %d1,&QNAN # is operand a QNAN?
3435 bsr.l src_qnan # yes
3438 bsr.l scoshd # operand is a DENORM
3442 # Result is now in FP0
3444 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3445 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3446 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3452 link %a6,&-LOCAL_SIZE
3454 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3455 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3456 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3458 fmov.l &0x0,%fpcr # zero FPCR
3461 # copy, convert, and tag input argument
3463 fmov.d 0x8(%a6),%fp0 # load dbl input
3464 fmov.x %fp0,FP_SRC(%a6)
3466 bsr.l tag # fetch operand type
3470 andi.l &0x00ff00ff,USER_FPSR(%a6)
3473 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3478 bsr.l scosh # operand is a NORM
3481 cmpi.b %d1,&ZERO # is operand a ZERO?
3486 cmpi.b %d1,&INF # is operand an INF?
3491 cmpi.b %d1,&QNAN # is operand a QNAN?
3493 bsr.l src_qnan # yes
3496 bsr.l scoshd # operand is a DENORM
3500 # Result is now in FP0
3502 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3503 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3504 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3510 link %a6,&-LOCAL_SIZE
3512 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3513 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3514 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3516 fmov.l &0x0,%fpcr # zero FPCR
3519 # copy, convert, and tag input argument
3522 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3523 mov.l 0x8+0x4(%a6),0x4(%a0)
3524 mov.l 0x8+0x8(%a6),0x8(%a0)
3525 bsr.l tag # fetch operand type
3529 andi.l &0x00ff00ff,USER_FPSR(%a6)
3532 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3536 bsr.l scosh # operand is a NORM
3539 cmpi.b %d1,&ZERO # is operand a ZERO?
3544 cmpi.b %d1,&INF # is operand an INF?
3549 cmpi.b %d1,&QNAN # is operand a QNAN?
3551 bsr.l src_qnan # yes
3554 bsr.l scoshd # operand is a DENORM
3558 # Result is now in FP0
3560 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3561 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3562 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3567 #########################################################################
3568 # MONADIC TEMPLATE #
3569 #########################################################################
3572 link %a6,&-LOCAL_SIZE
3574 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3575 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3576 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3578 fmov.l &0x0,%fpcr # zero FPCR
3581 # copy, convert, and tag input argument
3583 fmov.s 0x8(%a6),%fp0 # load sgl input
3584 fmov.x %fp0,FP_SRC(%a6)
3586 bsr.l tag # fetch operand type
3590 andi.l &0x00ff00ff,USER_FPSR(%a6)
3593 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3597 bsr.l sacos # operand is a NORM
3600 cmpi.b %d1,&ZERO # is operand a ZERO?
3605 cmpi.b %d1,&INF # is operand an INF?
3610 cmpi.b %d1,&QNAN # is operand a QNAN?
3612 bsr.l src_qnan # yes
3615 bsr.l sacosd # operand is a DENORM
3619 # Result is now in FP0
3621 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3622 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3623 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3629 link %a6,&-LOCAL_SIZE
3631 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3632 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3633 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3635 fmov.l &0x0,%fpcr # zero FPCR
3638 # copy, convert, and tag input argument
3640 fmov.d 0x8(%a6),%fp0 # load dbl input
3641 fmov.x %fp0,FP_SRC(%a6)
3643 bsr.l tag # fetch operand type
3647 andi.l &0x00ff00ff,USER_FPSR(%a6)
3650 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3655 bsr.l sacos # operand is a NORM
3658 cmpi.b %d1,&ZERO # is operand a ZERO?
3663 cmpi.b %d1,&INF # is operand an INF?
3668 cmpi.b %d1,&QNAN # is operand a QNAN?
3670 bsr.l src_qnan # yes
3673 bsr.l sacosd # operand is a DENORM
3677 # Result is now in FP0
3679 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3680 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3681 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3687 link %a6,&-LOCAL_SIZE
3689 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3690 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3691 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3693 fmov.l &0x0,%fpcr # zero FPCR
3696 # copy, convert, and tag input argument
3699 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3700 mov.l 0x8+0x4(%a6),0x4(%a0)
3701 mov.l 0x8+0x8(%a6),0x8(%a0)
3702 bsr.l tag # fetch operand type
3706 andi.l &0x00ff00ff,USER_FPSR(%a6)
3709 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3713 bsr.l sacos # operand is a NORM
3716 cmpi.b %d1,&ZERO # is operand a ZERO?
3721 cmpi.b %d1,&INF # is operand an INF?
3726 cmpi.b %d1,&QNAN # is operand a QNAN?
3728 bsr.l src_qnan # yes
3731 bsr.l sacosd # operand is a DENORM
3735 # Result is now in FP0
3737 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3738 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3739 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3744 #########################################################################
3745 # MONADIC TEMPLATE #
3746 #########################################################################
3749 link %a6,&-LOCAL_SIZE
3751 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3752 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3753 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3755 fmov.l &0x0,%fpcr # zero FPCR
3758 # copy, convert, and tag input argument
3760 fmov.s 0x8(%a6),%fp0 # load sgl input
3761 fmov.x %fp0,FP_SRC(%a6)
3763 bsr.l tag # fetch operand type
3767 andi.l &0x00ff00ff,USER_FPSR(%a6)
3770 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3774 bsr.l sgetexp # operand is a NORM
3777 cmpi.b %d1,&ZERO # is operand a ZERO?
3779 bsr.l src_zero # yes
3782 cmpi.b %d1,&INF # is operand an INF?
3787 cmpi.b %d1,&QNAN # is operand a QNAN?
3789 bsr.l src_qnan # yes
3792 bsr.l sgetexpd # operand is a DENORM
3796 # Result is now in FP0
3798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3799 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3800 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3806 link %a6,&-LOCAL_SIZE
3808 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3809 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3810 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3812 fmov.l &0x0,%fpcr # zero FPCR
3815 # copy, convert, and tag input argument
3817 fmov.d 0x8(%a6),%fp0 # load dbl input
3818 fmov.x %fp0,FP_SRC(%a6)
3820 bsr.l tag # fetch operand type
3824 andi.l &0x00ff00ff,USER_FPSR(%a6)
3827 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3832 bsr.l sgetexp # operand is a NORM
3835 cmpi.b %d1,&ZERO # is operand a ZERO?
3837 bsr.l src_zero # yes
3840 cmpi.b %d1,&INF # is operand an INF?
3845 cmpi.b %d1,&QNAN # is operand a QNAN?
3847 bsr.l src_qnan # yes
3850 bsr.l sgetexpd # operand is a DENORM
3854 # Result is now in FP0
3856 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3857 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3858 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3864 link %a6,&-LOCAL_SIZE
3866 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3867 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3868 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3870 fmov.l &0x0,%fpcr # zero FPCR
3873 # copy, convert, and tag input argument
3876 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3877 mov.l 0x8+0x4(%a6),0x4(%a0)
3878 mov.l 0x8+0x8(%a6),0x8(%a0)
3879 bsr.l tag # fetch operand type
3883 andi.l &0x00ff00ff,USER_FPSR(%a6)
3886 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3890 bsr.l sgetexp # operand is a NORM
3893 cmpi.b %d1,&ZERO # is operand a ZERO?
3895 bsr.l src_zero # yes
3898 cmpi.b %d1,&INF # is operand an INF?
3903 cmpi.b %d1,&QNAN # is operand a QNAN?
3905 bsr.l src_qnan # yes
3908 bsr.l sgetexpd # operand is a DENORM
3912 # Result is now in FP0
3914 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3915 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3916 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3921 #########################################################################
3922 # MONADIC TEMPLATE #
3923 #########################################################################
3926 link %a6,&-LOCAL_SIZE
3928 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3929 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3930 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3932 fmov.l &0x0,%fpcr # zero FPCR
3935 # copy, convert, and tag input argument
3937 fmov.s 0x8(%a6),%fp0 # load sgl input
3938 fmov.x %fp0,FP_SRC(%a6)
3940 bsr.l tag # fetch operand type
3944 andi.l &0x00ff00ff,USER_FPSR(%a6)
3947 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3951 bsr.l sgetman # operand is a NORM
3954 cmpi.b %d1,&ZERO # is operand a ZERO?
3956 bsr.l src_zero # yes
3959 cmpi.b %d1,&INF # is operand an INF?
3964 cmpi.b %d1,&QNAN # is operand a QNAN?
3966 bsr.l src_qnan # yes
3969 bsr.l sgetmand # operand is a DENORM
3973 # Result is now in FP0
3975 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3976 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3977 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3983 link %a6,&-LOCAL_SIZE
3985 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3986 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3987 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3989 fmov.l &0x0,%fpcr # zero FPCR
3992 # copy, convert, and tag input argument
3994 fmov.d 0x8(%a6),%fp0 # load dbl input
3995 fmov.x %fp0,FP_SRC(%a6)
3997 bsr.l tag # fetch operand type
4001 andi.l &0x00ff00ff,USER_FPSR(%a6)
4004 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4009 bsr.l sgetman # operand is a NORM
4012 cmpi.b %d1,&ZERO # is operand a ZERO?
4014 bsr.l src_zero # yes
4017 cmpi.b %d1,&INF # is operand an INF?
4022 cmpi.b %d1,&QNAN # is operand a QNAN?
4024 bsr.l src_qnan # yes
4027 bsr.l sgetmand # operand is a DENORM
4031 # Result is now in FP0
4033 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4034 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4035 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4041 link %a6,&-LOCAL_SIZE
4043 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4044 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4045 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4047 fmov.l &0x0,%fpcr # zero FPCR
4050 # copy, convert, and tag input argument
4053 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
4054 mov.l 0x8+0x4(%a6),0x4(%a0)
4055 mov.l 0x8+0x8(%a6),0x8(%a0)
4056 bsr.l tag # fetch operand type
4060 andi.l &0x00ff00ff,USER_FPSR(%a6)
4063 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4067 bsr.l sgetman # operand is a NORM
4070 cmpi.b %d1,&ZERO # is operand a ZERO?
4072 bsr.l src_zero # yes
4075 cmpi.b %d1,&INF # is operand an INF?
4080 cmpi.b %d1,&QNAN # is operand a QNAN?
4082 bsr.l src_qnan # yes
4085 bsr.l sgetmand # operand is a DENORM
4089 # Result is now in FP0
4091 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4092 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4093 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4098 #########################################################################
4099 # MONADIC TEMPLATE #
4100 #########################################################################
4103 link %a6,&-LOCAL_SIZE
4105 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4106 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4107 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4109 fmov.l &0x0,%fpcr # zero FPCR
4112 # copy, convert, and tag input argument
4114 fmov.s 0x8(%a6),%fp0 # load sgl input
4115 fmov.x %fp0,FP_SRC(%a6)
4117 bsr.l tag # fetch operand type
4121 andi.l &0x00ff00ff,USER_FPSR(%a6)
4124 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4128 bsr.l ssincos # operand is a NORM
4131 cmpi.b %d1,&ZERO # is operand a ZERO?
4133 bsr.l ssincosz # yes
4136 cmpi.b %d1,&INF # is operand an INF?
4138 bsr.l ssincosi # yes
4141 cmpi.b %d1,&QNAN # is operand a QNAN?
4143 bsr.l ssincosqnan # yes
4146 bsr.l ssincosd # operand is a DENORM
4150 # Result is now in FP0
4152 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4153 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4154 fmovm.x &0x03,-(%sp) # store off fp0/fp1
4155 fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4156 fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4162 link %a6,&-LOCAL_SIZE
4164 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4165 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4166 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4168 fmov.l &0x0,%fpcr # zero FPCR
4171 # copy, convert, and tag input argument
4173 fmov.d 0x8(%a6),%fp0 # load dbl input
4174 fmov.x %fp0,FP_SRC(%a6)
4176 bsr.l tag # fetch operand type
4180 andi.l &0x00ff00ff,USER_FPSR(%a6)
4183 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4188 bsr.l ssincos # operand is a NORM
4191 cmpi.b %d1,&ZERO # is operand a ZERO?
4193 bsr.l ssincosz # yes
4196 cmpi.b %d1,&INF # is operand an INF?
4198 bsr.l ssincosi # yes
4201 cmpi.b %d1,&QNAN # is operand a QNAN?
4203 bsr.l ssincosqnan # yes
4206 bsr.l ssincosd # operand is a DENORM
4210 # Result is now in FP0
4212 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4213 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4214 fmovm.x &0x03,-(%sp) # store off fp0/fp1
4215 fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4216 fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4222 link %a6,&-LOCAL_SIZE
4224 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4225 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4226 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4228 fmov.l &0x0,%fpcr # zero FPCR
4231 # copy, convert, and tag input argument
4234 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
4235 mov.l 0x8+0x4(%a6),0x4(%a0)
4236 mov.l 0x8+0x8(%a6),0x8(%a0)
4237 bsr.l tag # fetch operand type
4241 andi.l &0x00ff00ff,USER_FPSR(%a6)
4244 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4248 bsr.l ssincos # operand is a NORM
4251 cmpi.b %d1,&ZERO # is operand a ZERO?
4253 bsr.l ssincosz # yes
4256 cmpi.b %d1,&INF # is operand an INF?
4258 bsr.l ssincosi # yes
4261 cmpi.b %d1,&QNAN # is operand a QNAN?
4263 bsr.l ssincosqnan # yes
4266 bsr.l ssincosd # operand is a DENORM
4270 # Result is now in FP0
4272 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4273 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4274 fmovm.x &0x03,-(%sp) # store off fp0/fp1
4275 fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4276 fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4281 #########################################################################
4283 #########################################################################
4286 link %a6,&-LOCAL_SIZE
4288 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4289 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4290 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4292 fmov.l &0x0,%fpcr # zero FPCR
4295 # copy, convert, and tag input argument
4297 fmov.s 0x8(%a6),%fp0 # load sgl dst
4298 fmov.x %fp0,FP_DST(%a6)
4300 bsr.l tag # fetch operand type
4303 fmov.s 0xc(%a6),%fp0 # load sgl src
4304 fmov.x %fp0,FP_SRC(%a6)
4306 bsr.l tag # fetch operand type
4310 andi.l &0x00ff00ff,USER_FPSR(%a6)
4313 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4315 lea FP_SRC(%a6),%a0 # pass ptr to src
4316 lea FP_DST(%a6),%a1 # pass ptr to dst
4320 bsr.l srem_snorm # operand is a NORM
4323 cmpi.b %d1,&ZERO # is operand a ZERO?
4325 bsr.l srem_szero # yes
4328 cmpi.b %d1,&INF # is operand an INF?
4330 bsr.l srem_sinf # yes
4333 cmpi.b %d1,&QNAN # is operand a QNAN?
4335 bsr.l sop_sqnan # yes
4338 bsr.l srem_sdnrm # operand is a DENORM
4342 # Result is now in FP0
4344 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4345 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4346 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4352 link %a6,&-LOCAL_SIZE
4354 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4355 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4356 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4358 fmov.l &0x0,%fpcr # zero FPCR
4361 # copy, convert, and tag input argument
4363 fmov.d 0x8(%a6),%fp0 # load dbl dst
4364 fmov.x %fp0,FP_DST(%a6)
4366 bsr.l tag # fetch operand type
4369 fmov.d 0x10(%a6),%fp0 # load dbl src
4370 fmov.x %fp0,FP_SRC(%a6)
4372 bsr.l tag # fetch operand type
4376 andi.l &0x00ff00ff,USER_FPSR(%a6)
4379 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4381 lea FP_SRC(%a6),%a0 # pass ptr to src
4382 lea FP_DST(%a6),%a1 # pass ptr to dst
4386 bsr.l srem_snorm # operand is a NORM
4389 cmpi.b %d1,&ZERO # is operand a ZERO?
4391 bsr.l srem_szero # yes
4394 cmpi.b %d1,&INF # is operand an INF?
4396 bsr.l srem_sinf # yes
4399 cmpi.b %d1,&QNAN # is operand a QNAN?
4401 bsr.l sop_sqnan # yes
4404 bsr.l srem_sdnrm # operand is a DENORM
4408 # Result is now in FP0
4410 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4411 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4412 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4418 link %a6,&-LOCAL_SIZE
4420 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4421 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4422 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4424 fmov.l &0x0,%fpcr # zero FPCR
4427 # copy, convert, and tag input argument
4430 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4431 mov.l 0x8+0x4(%a6),0x4(%a0)
4432 mov.l 0x8+0x8(%a6),0x8(%a0)
4433 bsr.l tag # fetch operand type
4437 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4438 mov.l 0x14+0x4(%a6),0x4(%a0)
4439 mov.l 0x14+0x8(%a6),0x8(%a0)
4440 bsr.l tag # fetch operand type
4444 andi.l &0x00ff00ff,USER_FPSR(%a6)
4447 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4449 lea FP_SRC(%a6),%a0 # pass ptr to src
4450 lea FP_DST(%a6),%a1 # pass ptr to dst
4454 bsr.l srem_snorm # operand is a NORM
4457 cmpi.b %d1,&ZERO # is operand a ZERO?
4459 bsr.l srem_szero # yes
4462 cmpi.b %d1,&INF # is operand an INF?
4464 bsr.l srem_sinf # yes
4467 cmpi.b %d1,&QNAN # is operand a QNAN?
4469 bsr.l sop_sqnan # yes
4472 bsr.l srem_sdnrm # operand is a DENORM
4476 # Result is now in FP0
4478 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4479 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4480 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4485 #########################################################################
4487 #########################################################################
4490 link %a6,&-LOCAL_SIZE
4492 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4493 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4494 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4496 fmov.l &0x0,%fpcr # zero FPCR
4499 # copy, convert, and tag input argument
4501 fmov.s 0x8(%a6),%fp0 # load sgl dst
4502 fmov.x %fp0,FP_DST(%a6)
4504 bsr.l tag # fetch operand type
4507 fmov.s 0xc(%a6),%fp0 # load sgl src
4508 fmov.x %fp0,FP_SRC(%a6)
4510 bsr.l tag # fetch operand type
4514 andi.l &0x00ff00ff,USER_FPSR(%a6)
4517 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4519 lea FP_SRC(%a6),%a0 # pass ptr to src
4520 lea FP_DST(%a6),%a1 # pass ptr to dst
4524 bsr.l smod_snorm # operand is a NORM
4527 cmpi.b %d1,&ZERO # is operand a ZERO?
4529 bsr.l smod_szero # yes
4532 cmpi.b %d1,&INF # is operand an INF?
4534 bsr.l smod_sinf # yes
4537 cmpi.b %d1,&QNAN # is operand a QNAN?
4539 bsr.l sop_sqnan # yes
4542 bsr.l smod_sdnrm # operand is a DENORM
4546 # Result is now in FP0
4548 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4549 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4550 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4556 link %a6,&-LOCAL_SIZE
4558 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4559 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4560 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4562 fmov.l &0x0,%fpcr # zero FPCR
4565 # copy, convert, and tag input argument
4567 fmov.d 0x8(%a6),%fp0 # load dbl dst
4568 fmov.x %fp0,FP_DST(%a6)
4570 bsr.l tag # fetch operand type
4573 fmov.d 0x10(%a6),%fp0 # load dbl src
4574 fmov.x %fp0,FP_SRC(%a6)
4576 bsr.l tag # fetch operand type
4580 andi.l &0x00ff00ff,USER_FPSR(%a6)
4583 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4585 lea FP_SRC(%a6),%a0 # pass ptr to src
4586 lea FP_DST(%a6),%a1 # pass ptr to dst
4590 bsr.l smod_snorm # operand is a NORM
4593 cmpi.b %d1,&ZERO # is operand a ZERO?
4595 bsr.l smod_szero # yes
4598 cmpi.b %d1,&INF # is operand an INF?
4600 bsr.l smod_sinf # yes
4603 cmpi.b %d1,&QNAN # is operand a QNAN?
4605 bsr.l sop_sqnan # yes
4608 bsr.l smod_sdnrm # operand is a DENORM
4612 # Result is now in FP0
4614 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4615 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4616 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4622 link %a6,&-LOCAL_SIZE
4624 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4625 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4626 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4628 fmov.l &0x0,%fpcr # zero FPCR
4631 # copy, convert, and tag input argument
4634 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4635 mov.l 0x8+0x4(%a6),0x4(%a0)
4636 mov.l 0x8+0x8(%a6),0x8(%a0)
4637 bsr.l tag # fetch operand type
4641 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4642 mov.l 0x14+0x4(%a6),0x4(%a0)
4643 mov.l 0x14+0x8(%a6),0x8(%a0)
4644 bsr.l tag # fetch operand type
4648 andi.l &0x00ff00ff,USER_FPSR(%a6)
4651 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4653 lea FP_SRC(%a6),%a0 # pass ptr to src
4654 lea FP_DST(%a6),%a1 # pass ptr to dst
4658 bsr.l smod_snorm # operand is a NORM
4661 cmpi.b %d1,&ZERO # is operand a ZERO?
4663 bsr.l smod_szero # yes
4666 cmpi.b %d1,&INF # is operand an INF?
4668 bsr.l smod_sinf # yes
4671 cmpi.b %d1,&QNAN # is operand a QNAN?
4673 bsr.l sop_sqnan # yes
4676 bsr.l smod_sdnrm # operand is a DENORM
4680 # Result is now in FP0
4682 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4683 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4684 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4689 #########################################################################
4691 #########################################################################
4694 link %a6,&-LOCAL_SIZE
4696 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4697 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4698 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4700 fmov.l &0x0,%fpcr # zero FPCR
4703 # copy, convert, and tag input argument
4705 fmov.s 0x8(%a6),%fp0 # load sgl dst
4706 fmov.x %fp0,FP_DST(%a6)
4708 bsr.l tag # fetch operand type
4711 fmov.s 0xc(%a6),%fp0 # load sgl src
4712 fmov.x %fp0,FP_SRC(%a6)
4714 bsr.l tag # fetch operand type
4718 andi.l &0x00ff00ff,USER_FPSR(%a6)
4721 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4723 lea FP_SRC(%a6),%a0 # pass ptr to src
4724 lea FP_DST(%a6),%a1 # pass ptr to dst
4728 bsr.l sscale_snorm # operand is a NORM
4731 cmpi.b %d1,&ZERO # is operand a ZERO?
4733 bsr.l sscale_szero # yes
4736 cmpi.b %d1,&INF # is operand an INF?
4738 bsr.l sscale_sinf # yes
4741 cmpi.b %d1,&QNAN # is operand a QNAN?
4743 bsr.l sop_sqnan # yes
4746 bsr.l sscale_sdnrm # operand is a DENORM
4750 # Result is now in FP0
4752 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4753 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4754 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4760 link %a6,&-LOCAL_SIZE
4762 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4763 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4764 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4766 fmov.l &0x0,%fpcr # zero FPCR
4769 # copy, convert, and tag input argument
4771 fmov.d 0x8(%a6),%fp0 # load dbl dst
4772 fmov.x %fp0,FP_DST(%a6)
4774 bsr.l tag # fetch operand type
4777 fmov.d 0x10(%a6),%fp0 # load dbl src
4778 fmov.x %fp0,FP_SRC(%a6)
4780 bsr.l tag # fetch operand type
4784 andi.l &0x00ff00ff,USER_FPSR(%a6)
4787 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4789 lea FP_SRC(%a6),%a0 # pass ptr to src
4790 lea FP_DST(%a6),%a1 # pass ptr to dst
4794 bsr.l sscale_snorm # operand is a NORM
4797 cmpi.b %d1,&ZERO # is operand a ZERO?
4799 bsr.l sscale_szero # yes
4802 cmpi.b %d1,&INF # is operand an INF?
4804 bsr.l sscale_sinf # yes
4807 cmpi.b %d1,&QNAN # is operand a QNAN?
4809 bsr.l sop_sqnan # yes
4812 bsr.l sscale_sdnrm # operand is a DENORM
4816 # Result is now in FP0
4818 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4819 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4820 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4826 link %a6,&-LOCAL_SIZE
4828 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4829 fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4830 fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4832 fmov.l &0x0,%fpcr # zero FPCR
4835 # copy, convert, and tag input argument
4838 mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4839 mov.l 0x8+0x4(%a6),0x4(%a0)
4840 mov.l 0x8+0x8(%a6),0x8(%a0)
4841 bsr.l tag # fetch operand type
4845 mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4846 mov.l 0x14+0x4(%a6),0x4(%a0)
4847 mov.l 0x14+0x8(%a6),0x8(%a0)
4848 bsr.l tag # fetch operand type
4852 andi.l &0x00ff00ff,USER_FPSR(%a6)
4855 mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4857 lea FP_SRC(%a6),%a0 # pass ptr to src
4858 lea FP_DST(%a6),%a1 # pass ptr to dst
4862 bsr.l sscale_snorm # operand is a NORM
4865 cmpi.b %d1,&ZERO # is operand a ZERO?
4867 bsr.l sscale_szero # yes
4870 cmpi.b %d1,&INF # is operand an INF?
4872 bsr.l sscale_sinf # yes
4875 cmpi.b %d1,&QNAN # is operand a QNAN?
4877 bsr.l sop_sqnan # yes
4880 bsr.l sscale_sdnrm # operand is a DENORM
4884 # Result is now in FP0
4886 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4887 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4888 fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4893 #########################################################################
4894 # ssin(): computes the sine of a normalized input #
4895 # ssind(): computes the sine of a denormalized input #
4896 # scos(): computes the cosine of a normalized input #
4897 # scosd(): computes the cosine of a denormalized input #
4898 # ssincos(): computes the sine and cosine of a normalized input #
4899 # ssincosd(): computes the sine and cosine of a denormalized input #
4901 # INPUT *************************************************************** #
4902 # a0 = pointer to extended precision input #
4903 # d0 = round precision,mode #
4905 # OUTPUT ************************************************************** #
4906 # fp0 = sin(X) or cos(X) #
4912 # ACCURACY and MONOTONICITY ******************************************* #
4913 # The returned result is within 1 ulp in 64 significant bit, i.e. #
4914 # within 0.5001 ulp to 53 bits if the result is subsequently #
4915 # rounded to double precision. The result is provably monotonic #
4916 # in double precision. #
4918 # ALGORITHM *********************************************************** #
4921 # 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
4923 # 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
4925 # 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4926 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
4927 # Overwrite k by k := k + AdjN. #
4929 # 4. If k is even, go to 6. #
4931 # 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
4932 # Return sgn*cos(r) where cos(r) is approximated by an #
4933 # even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
4937 # 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
4938 # where sin(r) is approximated by an odd polynomial in r #
4939 # r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
4942 # 7. If |X| > 1, go to 9. #
4944 # 8. (|X|<2**(-40)) If SIN is invoked, return X; #
4945 # otherwise return 1. #
4947 # 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4951 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
4953 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4954 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
4956 # 3. If k is even, go to 5. #
4958 # 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
4959 # j1 exclusive or with the l.s.b. of k. #
4960 # sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
4961 # SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
4962 # sin(r) and cos(r) are computed as odd and even #
4963 # polynomials in r, respectively. Exit #
4965 # 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
4966 # SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
4967 # sin(r) and cos(r) are computed as odd and even #
4968 # polynomials in r, respectively. Exit #
4970 # 6. If |X| > 1, go to 8. #
4972 # 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
4974 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4977 #########################################################################
4979 SINA7: long 0xBD6AAA77,0xCCC994F5
4980 SINA6: long 0x3DE61209,0x7AAE8DA1
4981 SINA5: long 0xBE5AE645,0x2A118AE4
4982 SINA4: long 0x3EC71DE3,0xA5341531
4983 SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
4984 SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000
4985 SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
4987 COSB8: long 0x3D2AC4D0,0xD6011EE3
4988 COSB7: long 0xBDA9396F,0x9F45AC19
4989 COSB6: long 0x3E21EED9,0x0612C972
4990 COSB5: long 0xBE927E4F,0xB79D9FCF
4991 COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
4992 COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
4993 COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
4994 COSB1: long 0xBF000000
5013 ############################################
5016 mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0
5019 ############################################
5022 mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1
5024 ############################################
5026 #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5028 fmov.x (%a0),%fp0 # LOAD INPUT
5029 fmov.x %fp0,X(%a6) # save input at X
5032 mov.l (%a0),%d1 # put exp in hi word
5033 mov.w 4(%a0),%d1 # fetch hi(man)
5034 and.l &0x7FFFFFFF,%d1 # strip sign
5036 cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)?
5038 bra.w SINSM # yes; input is very small
5041 cmp.l %d1,&0x4004BC7E # is |X| < 15 PI?
5043 bra.w SREDUCEX # yes; input is very large
5045 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5046 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5049 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5051 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5053 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5055 mov.l INT(%a6),%d1 # make a copy of N
5056 asl.l &4,%d1 # N *= 16
5057 add.l %d1,%a1 # tbl_addr = a1 + (N*16)
5059 # A1 IS THE ADDRESS OF N*PIBY2
5060 # ...WHICH IS IN TWO PIECES Y1 & Y2
5061 fsub.x (%a1)+,%fp0 # X-Y1
5062 fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2
5065 #--continuation from REDUCEX
5067 #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5069 add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN
5070 ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE
5074 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5075 #--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5076 #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5077 #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5078 #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5080 #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5081 #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5083 fmovm.x &0x0c,-(%sp) # save fp2/fp3
5085 fmov.x %fp0,X(%a6) # X IS R
5086 fmul.x %fp0,%fp0 # FP0 IS S
5088 fmov.d SINA7(%pc),%fp3
5089 fmov.d SINA6(%pc),%fp2
5092 fmul.x %fp1,%fp1 # FP1 IS T
5095 and.l &0x80000000,%d1
5096 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5097 eor.l %d1,X(%a6) # X IS NOW R'= SGN*R
5099 fmul.x %fp1,%fp3 # TA7
5100 fmul.x %fp1,%fp2 # TA6
5102 fadd.d SINA5(%pc),%fp3 # A5+TA7
5103 fadd.d SINA4(%pc),%fp2 # A4+TA6
5105 fmul.x %fp1,%fp3 # T(A5+TA7)
5106 fmul.x %fp1,%fp2 # T(A4+TA6)
5108 fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)
5109 fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)
5111 fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))
5113 fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))
5114 fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))
5115 fmul.x X(%a6),%fp0 # R'*S
5117 fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5119 fmul.x %fp1,%fp0 # SIN(R')-R'
5121 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5123 fmov.l %d0,%fpcr # restore users round mode,prec
5124 fadd.x X(%a6),%fp0 # last inst - possible exception set
5127 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5128 #--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5129 #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5130 #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5131 #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5133 #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5134 #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5135 #--AND IS THEREFORE STORED AS SINGLE PRECISION.
5137 fmovm.x &0x0c,-(%sp) # save fp2/fp3
5139 fmul.x %fp0,%fp0 # FP0 IS S
5141 fmov.d COSB8(%pc),%fp2
5142 fmov.d COSB7(%pc),%fp3
5145 fmul.x %fp1,%fp1 # FP1 IS T
5147 fmov.x %fp0,X(%a6) # X IS S
5149 and.l &0x80000000,%d1
5150 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5152 fmul.x %fp1,%fp2 # TB8
5154 eor.l %d1,X(%a6) # X IS NOW S'= SGN*S
5155 and.l &0x80000000,%d1
5157 fmul.x %fp1,%fp3 # TB7
5159 or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE
5160 mov.l %d1,POSNEG1(%a6)
5162 fadd.d COSB6(%pc),%fp2 # B6+TB8
5163 fadd.d COSB5(%pc),%fp3 # B5+TB7
5165 fmul.x %fp1,%fp2 # T(B6+TB8)
5166 fmul.x %fp1,%fp3 # T(B5+TB7)
5168 fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)
5169 fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)
5171 fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))
5172 fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))
5174 fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))
5175 fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))
5177 fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))
5183 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5185 fmov.l %d0,%fpcr # restore users round mode,prec
5186 fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set
5189 ##############################################
5191 # SINe: Big OR Small?
5192 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5193 #--IF |X| < 2**(-40), RETURN X OR 1.
5195 cmp.l %d1,&0x3FFF8000
5203 # here, the operation may underflow iff the precision is sgl or dbl.
5204 # extended denorms are handled through another entry point.
5206 # mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5208 fmov.l %d0,%fpcr # restore users round mode,prec
5209 mov.b &FMOV_OP,%d1 # last inst is MOVE
5210 fmov.x X(%a6),%fp0 # last inst - possible exception set
5214 fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5215 fmov.l %d0,%fpcr # restore users round mode,prec
5216 fadd.s &0x80800000,%fp0 # last inst - possible exception set
5219 ################################################
5221 #--SIN(X) = X FOR DENORMALIZED X
5225 ############################################
5227 #--COS(X) = 1 FOR DENORMALIZED X
5229 fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5232 ##################################################
5239 fmov.x (%a0),%fp0 # LOAD INPUT
5244 and.l &0x7FFFFFFF,%d1 # COMPACTIFY X
5246 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5251 cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5256 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5257 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5261 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5263 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5265 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5269 add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2
5271 fsub.x (%a1)+,%fp0 # X-Y1
5272 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5275 #--continuation point from REDUCEX
5279 cmp.l %d1,&0 # D0 < 0 IFF N IS ODD
5283 #--REGISTERS SAVED SO FAR: D0, A0, FP2.
5284 fmovm.x &0x04,-(%sp) # save fp2
5286 fmov.x %fp0,RPRIME(%a6)
5287 fmul.x %fp0,%fp0 # FP0 IS S = R*R
5288 fmov.d SINA7(%pc),%fp1 # A7
5289 fmov.d COSB8(%pc),%fp2 # B8
5290 fmul.x %fp0,%fp1 # SA7
5291 fmul.x %fp0,%fp2 # SB8
5296 and.l &0x80000000,%d2
5298 and.l &0x80000000,%d2
5300 fadd.d SINA6(%pc),%fp1 # A6+SA7
5301 fadd.d COSB7(%pc),%fp2 # B7+SB8
5303 fmul.x %fp0,%fp1 # S(A6+SA7)
5304 eor.l %d2,RPRIME(%a6)
5306 fmul.x %fp0,%fp2 # S(B7+SB8)
5308 and.l &0x80000000,%d1
5309 mov.l &0x3F800000,POSNEG1(%a6)
5310 eor.l %d1,POSNEG1(%a6)
5312 fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)
5313 fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)
5315 fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))
5316 fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))
5317 fmov.x %fp0,SPRIME(%a6)
5319 fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))
5320 eor.l %d1,SPRIME(%a6)
5321 fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))
5323 fmul.x %fp0,%fp1 # S(A4+...)
5324 fmul.x %fp0,%fp2 # S(B5+...)
5326 fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)
5327 fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)
5329 fmul.x %fp0,%fp1 # S(A3+...)
5330 fmul.x %fp0,%fp2 # S(B4+...)
5332 fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)
5333 fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)
5335 fmul.x %fp0,%fp1 # S(A2+...)
5336 fmul.x %fp0,%fp2 # S(B3+...)
5338 fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)
5339 fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)
5341 fmul.x %fp0,%fp1 # S(A1+...)
5342 fmul.x %fp2,%fp0 # S(B2+...)
5344 fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)
5345 fadd.s COSB1(%pc),%fp0 # B1+S(B2...)
5346 fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))
5348 fmovm.x (%sp)+,&0x20 # restore fp2
5351 fadd.x RPRIME(%a6),%fp1 # COS(X)
5352 bsr sto_cos # store cosine result
5353 fadd.s POSNEG1(%a6),%fp0 # SIN(X)
5357 #--REGISTERS SAVED SO FAR: FP2.
5358 fmovm.x &0x04,-(%sp) # save fp2
5360 fmov.x %fp0,RPRIME(%a6)
5361 fmul.x %fp0,%fp0 # FP0 IS S = R*R
5363 fmov.d COSB8(%pc),%fp1 # B8
5364 fmov.d SINA7(%pc),%fp2 # A7
5366 fmul.x %fp0,%fp1 # SB8
5367 fmov.x %fp0,SPRIME(%a6)
5368 fmul.x %fp0,%fp2 # SA7
5371 and.l &0x80000000,%d1
5373 fadd.d COSB7(%pc),%fp1 # B7+SB8
5374 fadd.d SINA6(%pc),%fp2 # A6+SA7
5376 eor.l %d1,RPRIME(%a6)
5377 eor.l %d1,SPRIME(%a6)
5379 fmul.x %fp0,%fp1 # S(B7+SB8)
5381 or.l &0x3F800000,%d1
5382 mov.l %d1,POSNEG1(%a6)
5384 fmul.x %fp0,%fp2 # S(A6+SA7)
5386 fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)
5387 fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)
5389 fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))
5390 fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))
5392 fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))
5393 fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))
5395 fmul.x %fp0,%fp1 # S(B5+...)
5396 fmul.x %fp0,%fp2 # S(A4+...)
5398 fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)
5399 fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)
5401 fmul.x %fp0,%fp1 # S(B4+...)
5402 fmul.x %fp0,%fp2 # S(A3+...)
5404 fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)
5405 fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)
5407 fmul.x %fp0,%fp1 # S(B3+...)
5408 fmul.x %fp0,%fp2 # S(A2+...)
5410 fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)
5411 fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)
5413 fmul.x %fp0,%fp1 # S(B2+...)
5414 fmul.x %fp2,%fp0 # s(a1+...)
5417 fadd.s COSB1(%pc),%fp1 # B1+S(B2...)
5418 fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)
5419 fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))
5421 fmovm.x (%sp)+,&0x20 # restore fp2
5424 fadd.s POSNEG1(%a6),%fp1 # COS(X)
5425 bsr sto_cos # store cosine result
5426 fadd.x RPRIME(%a6),%fp0 # SIN(X)
5429 ################################################
5432 cmp.l %d1,&0x3FFF8000
5435 ################################################
5438 # mov.w &0x0000,XDCARE(%a6)
5439 fmov.s &0x3F800000,%fp1
5442 fsub.s &0x00800000,%fp1
5443 bsr sto_cos # store cosine result
5444 fmov.l %fpcr,%d0 # d0 must have fpcr,too
5445 mov.b &FMOV_OP,%d1 # last inst is MOVE
5449 ##############################################
5452 #--SIN AND COS OF X FOR DENORMALIZED X
5454 mov.l %d0,-(%sp) # save d0
5455 fmov.s &0x3F800000,%fp1
5456 bsr sto_cos # store cosine result
5457 mov.l (%sp)+,%d0 # restore d0
5460 ############################################
5462 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5463 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5464 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5466 fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5467 mov.l %d2,-(%sp) # save d2
5468 fmov.s &0x00000000,%fp1 # fp1 = 0
5470 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5471 #--there is a danger of unwanted overflow in first LOOP iteration. In this
5472 #--case, reduce argument by one remainder step to make subsequent reduction
5474 cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5477 # yes; create 2**16383*PI/2
5478 mov.w &0x7ffe,FP_SCR0_EX(%a6)
5479 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5480 clr.l FP_SCR0_LO(%a6)
5482 # create low half of 2**16383*PI/2 at FP_SCR1
5483 mov.w &0x7fdc,FP_SCR1_EX(%a6)
5484 mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5485 clr.l FP_SCR1_LO(%a6)
5487 ftest.x %fp0 # test sign of argument
5490 or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5491 or.b &0x80,FP_SCR1_EX(%a6)
5493 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5494 fmov.x %fp0,%fp1 # save high result in fp1
5495 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5496 fsub.x %fp0,%fp1 # determine low component of result
5497 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5499 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5500 #--integer quotient will be stored in N
5501 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5503 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5504 mov.w INARG(%a6),%d1
5505 mov.l %d1,%a1 # save a copy of D0
5506 and.l &0x00007FFF,%d1
5507 sub.l &0x00003FFF,%d1 # d0 = K
5511 sub.l &27,%d1 # d0 = L := K-27
5512 mov.b &0,ENDFLAG(%a6)
5515 clr.l %d1 # d0 = L := 0
5516 mov.b &1,ENDFLAG(%a6)
5519 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5520 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5522 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5523 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5525 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5526 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5528 mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5529 mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5530 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5533 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5535 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5536 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5537 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5538 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5539 #--US THE DESIRED VALUE IN FLOATING POINT.
5542 and.l &0x80000000,%d2
5543 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5544 mov.l %d2,TWOTO63(%a6)
5545 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5546 fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5549 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5550 mov.l %d1,%d2 # d2 = L
5552 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5553 mov.w %d2,FP_SCR0_EX(%a6)
5554 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5555 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5557 add.l &0x00003FDD,%d1
5558 mov.w %d1,FP_SCR1_EX(%a6)
5559 mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5560 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5562 mov.b ENDFLAG(%a6),%d1
5564 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5565 #--P2 = 2**(L) * Piby2_2
5566 fmov.x %fp2,%fp4 # fp4 = N
5567 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5568 fmov.x %fp2,%fp5 # fp5 = N
5569 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
5570 fmov.x %fp4,%fp3 # fp3 = W = N*P1
5572 #--we want P+p = W+w but |p| <= half ulp of P
5573 #--Then, we need to compute A := R-P and a := r-p
5574 fadd.x %fp5,%fp3 # fp3 = P
5575 fsub.x %fp3,%fp4 # fp4 = W-P
5577 fsub.x %fp3,%fp0 # fp0 = A := R - P
5578 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
5580 fmov.x %fp0,%fp3 # fp3 = A
5581 fsub.x %fp4,%fp1 # fp1 = a := r - p
5583 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5584 #--|r| <= half ulp of R.
5585 fadd.x %fp1,%fp0 # fp0 = R := A+a
5586 #--No need to calculate r if this is the last loop
5590 #--Need to calculate r
5591 fsub.x %fp0,%fp3 # fp3 = A-R
5592 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
5596 fmov.l %fp2,INT(%a6)
5597 mov.l (%sp)+,%d2 # restore d2
5598 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
5606 #########################################################################
5607 # stan(): computes the tangent of a normalized input #
5608 # stand(): computes the tangent of a denormalized input #
5610 # INPUT *************************************************************** #
5611 # a0 = pointer to extended precision input #
5612 # d0 = round precision,mode #
5614 # OUTPUT ************************************************************** #
5617 # ACCURACY and MONOTONICITY ******************************************* #
5618 # The returned result is within 3 ulp in 64 significant bit, i.e. #
5619 # within 0.5001 ulp to 53 bits if the result is subsequently #
5620 # rounded to double precision. The result is provably monotonic #
5621 # in double precision. #
5623 # ALGORITHM *********************************************************** #
5625 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5627 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5628 # k = N mod 2, so in particular, k = 0 or 1. #
5630 # 3. If k is odd, go to 5. #
5632 # 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5633 # rational function U/V where #
5634 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5635 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5638 # 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5639 # a rational function U/V where #
5640 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5641 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5642 # -Cot(r) = -V/U. Exit. #
5644 # 6. If |X| > 1, go to 8. #
5646 # 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5648 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5651 #########################################################################
5654 long 0x3EA0B759,0xF50F8688
5656 long 0xBEF2BAA5,0xA8924F04
5659 long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5662 long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5665 long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5668 long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5671 long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5674 long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5677 long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
5679 long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5681 #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5682 #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5683 #--MOST 69 BITS LONG.
5686 long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5687 long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5688 long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5689 long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5690 long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5691 long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5692 long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5693 long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5694 long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5695 long 0xC0040000,0x90836524,0x88034B96,0x20B00000
5696 long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5697 long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5698 long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5699 long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5700 long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5701 long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5702 long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5703 long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5704 long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5705 long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5706 long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5707 long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5708 long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5709 long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5710 long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5711 long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5712 long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5713 long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5714 long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5715 long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5716 long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5717 long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5718 long 0x00000000,0x00000000,0x00000000,0x00000000
5719 long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5720 long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5721 long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5722 long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5723 long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5724 long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5725 long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5726 long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5727 long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5728 long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5729 long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5730 long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5731 long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5732 long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5733 long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5734 long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5735 long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5736 long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5737 long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5738 long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5739 long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5740 long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5741 long 0x40040000,0x90836524,0x88034B96,0xA0B00000
5742 long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5743 long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5744 long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5745 long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5746 long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5747 long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5748 long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5749 long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5750 long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5760 fmov.x (%a0),%fp0 # LOAD INPUT
5764 and.l &0x7FFFFFFF,%d1
5766 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5770 cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5775 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5776 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5778 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5780 lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5782 fmov.l %fp1,%d1 # CONVERT TO INTEGER
5785 add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2
5787 fsub.x (%a1)+,%fp0 # X-Y1
5789 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5792 and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0
5795 fmovm.x &0x0c,-(%sp) # save fp2,fp3
5801 fmul.x %fp1,%fp1 # S = R*R
5803 fmov.d TANQ4(%pc),%fp3
5804 fmov.d TANP3(%pc),%fp2
5806 fmul.x %fp1,%fp3 # SQ4
5807 fmul.x %fp1,%fp2 # SP3
5809 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5810 fadd.x TANP2(%pc),%fp2 # P2+SP3
5812 fmul.x %fp1,%fp3 # S(Q3+SQ4)
5813 fmul.x %fp1,%fp2 # S(P2+SP3)
5815 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5816 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5818 fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))
5819 fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))
5821 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5822 fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))
5824 fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))
5826 fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))
5828 fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)
5830 fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5832 fmov.l %d0,%fpcr # restore users round mode,prec
5833 fdiv.x %fp1,%fp0 # last inst - possible exception set
5838 fmul.x %fp0,%fp0 # S = R*R
5840 fmov.d TANQ4(%pc),%fp3
5841 fmov.d TANP3(%pc),%fp2
5843 fmul.x %fp0,%fp3 # SQ4
5844 fmul.x %fp0,%fp2 # SP3
5846 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5847 fadd.x TANP2(%pc),%fp2 # P2+SP3
5849 fmul.x %fp0,%fp3 # S(Q3+SQ4)
5850 fmul.x %fp0,%fp2 # S(P2+SP3)
5852 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5853 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5855 fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))
5856 fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))
5858 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5859 fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))
5861 fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))
5863 fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))
5864 fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)
5866 fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5869 eor.l &0x80000000,(%sp)
5871 fmov.l %d0,%fpcr # restore users round mode,prec
5872 fdiv.x (%sp)+,%fp0 # last inst - possible exception set
5876 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5877 #--IF |X| < 2**(-40), RETURN X OR 1.
5878 cmp.l %d1,&0x3FFF8000
5883 fmov.l %d0,%fpcr # restore users round mode,prec
5884 mov.b &FMOV_OP,%d1 # last inst is MOVE
5885 fmov.x (%sp)+,%fp0 # last inst - posibble exception set
5889 #--TAN(X) = X FOR DENORMALIZED X
5893 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5894 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5895 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5897 fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5898 mov.l %d2,-(%sp) # save d2
5899 fmov.s &0x00000000,%fp1 # fp1 = 0
5901 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5902 #--there is a danger of unwanted overflow in first LOOP iteration. In this
5903 #--case, reduce argument by one remainder step to make subsequent reduction
5905 cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5908 # yes; create 2**16383*PI/2
5909 mov.w &0x7ffe,FP_SCR0_EX(%a6)
5910 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5911 clr.l FP_SCR0_LO(%a6)
5913 # create low half of 2**16383*PI/2 at FP_SCR1
5914 mov.w &0x7fdc,FP_SCR1_EX(%a6)
5915 mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5916 clr.l FP_SCR1_LO(%a6)
5918 ftest.x %fp0 # test sign of argument
5921 or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5922 or.b &0x80,FP_SCR1_EX(%a6)
5924 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5925 fmov.x %fp0,%fp1 # save high result in fp1
5926 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5927 fsub.x %fp0,%fp1 # determine low component of result
5928 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5930 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5931 #--integer quotient will be stored in N
5932 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5934 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5935 mov.w INARG(%a6),%d1
5936 mov.l %d1,%a1 # save a copy of D0
5937 and.l &0x00007FFF,%d1
5938 sub.l &0x00003FFF,%d1 # d0 = K
5942 sub.l &27,%d1 # d0 = L := K-27
5943 mov.b &0,ENDFLAG(%a6)
5946 clr.l %d1 # d0 = L := 0
5947 mov.b &1,ENDFLAG(%a6)
5950 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5951 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5953 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5954 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5956 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5957 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5959 mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5960 mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5961 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5964 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5966 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5967 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5968 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5969 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5970 #--US THE DESIRED VALUE IN FLOATING POINT.
5973 and.l &0x80000000,%d2
5974 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5975 mov.l %d2,TWOTO63(%a6)
5976 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5977 fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5978 # fintrz.x %fp2,%fp2
5980 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5981 mov.l %d1,%d2 # d2 = L
5983 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5984 mov.w %d2,FP_SCR0_EX(%a6)
5985 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5986 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5988 add.l &0x00003FDD,%d1
5989 mov.w %d1,FP_SCR1_EX(%a6)
5990 mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5991 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5993 mov.b ENDFLAG(%a6),%d1
5995 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5996 #--P2 = 2**(L) * Piby2_2
5997 fmov.x %fp2,%fp4 # fp4 = N
5998 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5999 fmov.x %fp2,%fp5 # fp5 = N
6000 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
6001 fmov.x %fp4,%fp3 # fp3 = W = N*P1
6003 #--we want P+p = W+w but |p| <= half ulp of P
6004 #--Then, we need to compute A := R-P and a := r-p
6005 fadd.x %fp5,%fp3 # fp3 = P
6006 fsub.x %fp3,%fp4 # fp4 = W-P
6008 fsub.x %fp3,%fp0 # fp0 = A := R - P
6009 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
6011 fmov.x %fp0,%fp3 # fp3 = A
6012 fsub.x %fp4,%fp1 # fp1 = a := r - p
6014 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6015 #--|r| <= half ulp of R.
6016 fadd.x %fp1,%fp0 # fp0 = R := A+a
6017 #--No need to calculate r if this is the last loop
6021 #--Need to calculate r
6022 fsub.x %fp0,%fp3 # fp3 = A-R
6023 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
6027 fmov.l %fp2,INT(%a6)
6028 mov.l (%sp)+,%d2 # restore d2
6029 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
6036 #########################################################################
6037 # satan(): computes the arctangent of a normalized number #
6038 # satand(): computes the arctangent of a denormalized number #
6040 # INPUT *************************************************************** #
6041 # a0 = pointer to extended precision input #
6042 # d0 = round precision,mode #
6044 # OUTPUT ************************************************************** #
6047 # ACCURACY and MONOTONICITY ******************************************* #
6048 # The returned result is within 2 ulps in 64 significant bit, #
6049 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6050 # rounded to double precision. The result is provably monotonic #
6051 # in double precision. #
6053 # ALGORITHM *********************************************************** #
6054 # Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6056 # Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6057 # Note that k = -4, -3,..., or 3. #
6058 # Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6059 # significant bits of X with a bit-1 attached at the 6-th #
6060 # bit position. Define u to be u = (X-F) / (1 + X*F). #
6062 # Step 3. Approximate arctan(u) by a polynomial poly. #
6064 # Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6065 # table of values calculated beforehand. Exit. #
6067 # Step 5. If |X| >= 16, go to Step 7. #
6069 # Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6071 # Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6072 # polynomial in X'. #
6073 # Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6075 #########################################################################
6077 ATANA3: long 0xBFF6687E,0x314987D8
6078 ATANA2: long 0x4002AC69,0x34A26DB3
6079 ATANA1: long 0xBFC2476F,0x4E1DA28E
6081 ATANB6: long 0x3FB34444,0x7F876989
6082 ATANB5: long 0xBFB744EE,0x7FAF45DB
6083 ATANB4: long 0x3FBC71C6,0x46940220
6084 ATANB3: long 0xBFC24924,0x921872F9
6085 ATANB2: long 0x3FC99999,0x99998FA9
6086 ATANB1: long 0xBFD55555,0x55555555
6088 ATANC5: long 0xBFB70BF3,0x98539E6A
6089 ATANC4: long 0x3FBC7187,0x962D1D7D
6090 ATANC3: long 0xBFC24924,0x827107B8
6091 ATANC2: long 0x3FC99999,0x9996263E
6092 ATANC1: long 0xBFD55555,0x55555536
6094 PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6095 NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6097 PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000
6098 NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000
6101 long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6102 long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6103 long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6104 long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6105 long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6106 long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6107 long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6108 long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6109 long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6110 long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6111 long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6112 long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6113 long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6114 long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6115 long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6116 long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6117 long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6118 long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6119 long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6120 long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6121 long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6122 long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6123 long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6124 long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6125 long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6126 long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6127 long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6128 long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6129 long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6130 long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6131 long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6132 long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6133 long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6134 long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6135 long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6136 long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6137 long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6138 long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6139 long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6140 long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6141 long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6142 long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6143 long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6144 long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6145 long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6146 long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6147 long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6148 long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6149 long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6150 long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6151 long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6152 long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6153 long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6154 long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
6155 long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6156 long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6157 long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6158 long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6159 long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6160 long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6161 long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6162 long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6163 long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6164 long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6165 long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6166 long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6167 long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6168 long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6169 long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6170 long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6171 long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6172 long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6173 long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6174 long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6175 long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6176 long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6177 long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6178 long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6179 long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6180 long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6181 long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6182 long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6183 long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6184 long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6185 long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6186 long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6187 long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6188 long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6189 long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6190 long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6191 long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6192 long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6193 long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6194 long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6195 long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6196 long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6197 long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6198 long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6199 long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6200 long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6201 long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6202 long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6203 long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6204 long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6205 long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6206 long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6207 long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6208 long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6209 long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6210 long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6211 long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6212 long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6213 long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6214 long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6215 long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6216 long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6217 long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6218 long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6219 long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6220 long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6221 long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6222 long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6223 long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6224 long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6225 long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6226 long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6227 long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6228 long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6240 #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6242 fmov.x (%a0),%fp0 # LOAD INPUT
6247 and.l &0x7FFFFFFF,%d1
6249 cmp.l %d1,&0x3FFB8000 # |X| >= 1/16?
6254 cmp.l %d1,&0x4002FFFF # |X| < 16 ?
6258 #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6259 #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6260 #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6261 #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6262 #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6263 #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6264 #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6265 #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6266 #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6267 #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6268 #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6269 #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6270 #--WILL INVOLVE A VERY LONG POLYNOMIAL.
6272 #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6273 #--WE CHOSE F TO BE +-2^K * 1.BBBB1
6274 #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6275 #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6276 #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6277 #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6281 and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS
6282 or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1
6283 mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6285 fmov.x %fp0,%fp1 # FP1 IS X
6286 fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0
6287 fsub.x X(%a6),%fp0 # FP0 IS X-F
6288 fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F
6289 fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)
6291 #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6292 #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6293 #--SAVE REGISTERS FP2.
6295 mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY
6296 mov.l %d1,%d2 # THE EXP AND 16 BITS OF X
6297 and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION
6298 and.l &0x7FFF0000,%d2 # EXPONENT OF F
6299 sub.l &0x3FFB0000,%d2 # K+4
6301 add.l %d2,%d1 # THE 7 BITS IDENTIFYING F
6302 asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|)
6303 lea ATANTBL(%pc),%a1
6304 add.l %d1,%a1 # ADDRESS OF ATAN(|F|)
6305 mov.l (%a1)+,ATANF(%a6)
6306 mov.l (%a1)+,ATANFHI(%a6)
6307 mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|)
6308 mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN
6309 and.l &0x80000000,%d1 # SIGN(F)
6310 or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6311 mov.l (%sp)+,%d2 # RESTORE d2
6313 #--THAT'S ALL I HAVE TO DO FOR NOW,
6314 #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6316 #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6317 #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6318 #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6319 #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6320 #--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6321 #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6322 #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6324 fmovm.x &0x04,-(%sp) # save fp2
6328 fmov.d ATANA3(%pc),%fp2
6329 fadd.x %fp1,%fp2 # A3+V
6330 fmul.x %fp1,%fp2 # V*(A3+V)
6331 fmul.x %fp0,%fp1 # U*V
6332 fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)
6333 fmul.d ATANA1(%pc),%fp1 # A1*U*V
6334 fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V))
6335 fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED
6337 fmovm.x (%sp)+,&0x20 # restore fp2
6339 fmov.l %d0,%fpcr # restore users rnd mode,prec
6340 fadd.x ATANF(%a6),%fp0 # ATAN(X)
6344 #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6345 #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6346 cmp.l %d1,&0x3FFF8000
6347 bgt.w ATANBIG # I.E. |X| >= 16
6351 #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6352 #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6353 #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6354 #--WHERE Y = X*X, AND Z = Y*Y.
6356 cmp.l %d1,&0x3FD78000
6359 #--COMPUTE POLYNOMIAL
6360 fmovm.x &0x0c,-(%sp) # save fp2/fp3
6362 fmul.x %fp0,%fp0 # FPO IS Y = X*X
6365 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6367 fmov.d ATANB6(%pc),%fp2
6368 fmov.d ATANB5(%pc),%fp3
6370 fmul.x %fp1,%fp2 # Z*B6
6371 fmul.x %fp1,%fp3 # Z*B5
6373 fadd.d ATANB4(%pc),%fp2 # B4+Z*B6
6374 fadd.d ATANB3(%pc),%fp3 # B3+Z*B5
6376 fmul.x %fp1,%fp2 # Z*(B4+Z*B6)
6377 fmul.x %fp3,%fp1 # Z*(B3+Z*B5)
6379 fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6)
6380 fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5)
6382 fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6))
6383 fmul.x X(%a6),%fp0 # X*Y
6385 fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6387 fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6389 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6391 fmov.l %d0,%fpcr # restore users rnd mode,prec
6396 #--|X| < 2^(-40), ATAN(X) = X
6398 fmov.l %d0,%fpcr # restore users rnd mode,prec
6399 mov.b &FMOV_OP,%d1 # last inst is MOVE
6400 fmov.x X(%a6),%fp0 # last inst - possible exception set
6405 #--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6406 #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6407 cmp.l %d1,&0x40638000
6410 #--APPROXIMATE ATAN(-1/X) BY
6411 #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6412 #--THIS CAN BE RE-WRITTEN AS
6413 #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6415 fmovm.x &0x0c,-(%sp) # save fp2/fp3
6417 fmov.s &0xBF800000,%fp1 # LOAD -1
6418 fdiv.x %fp0,%fp1 # FP1 IS -1/X
6420 #--DIVIDE IS STILL CRANKING
6422 fmov.x %fp1,%fp0 # FP0 IS X'
6423 fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'
6424 fmov.x %fp1,X(%a6) # X IS REALLY X'
6427 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6429 fmov.d ATANC5(%pc),%fp3
6430 fmov.d ATANC4(%pc),%fp2
6432 fmul.x %fp1,%fp3 # Z*C5
6433 fmul.x %fp1,%fp2 # Z*B4
6435 fadd.d ATANC3(%pc),%fp3 # C3+Z*C5
6436 fadd.d ATANC2(%pc),%fp2 # C2+Z*C4
6438 fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED
6439 fmul.x %fp0,%fp2 # Y*(C2+Z*C4)
6441 fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5)
6442 fmul.x X(%a6),%fp0 # X'*Y
6444 fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6446 fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)]
6447 # ... +[Y*(B2+Z*(B4+Z*B6))])
6450 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6452 fmov.l %d0,%fpcr # restore users rnd mode,prec
6457 fadd.x NPIBY2(%pc),%fp0
6461 fadd.x PPIBY2(%pc),%fp0
6465 #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6470 fmov.x NPIBY2(%pc),%fp0
6472 fadd.x PTINY(%pc),%fp0
6476 fmov.x PPIBY2(%pc),%fp0
6478 fadd.x NTINY(%pc),%fp0
6482 #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6486 #########################################################################
6487 # sasin(): computes the inverse sine of a normalized input #
6488 # sasind(): computes the inverse sine of a denormalized input #
6490 # INPUT *************************************************************** #
6491 # a0 = pointer to extended precision input #
6492 # d0 = round precision,mode #
6494 # OUTPUT ************************************************************** #
6497 # ACCURACY and MONOTONICITY ******************************************* #
6498 # The returned result is within 3 ulps in 64 significant bit, #
6499 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6500 # rounded to double precision. The result is provably monotonic #
6501 # in double precision. #
6503 # ALGORITHM *********************************************************** #
6506 # 1. If |X| >= 1, go to 3. #
6508 # 2. (|X| < 1) Calculate asin(X) by #
6509 # z := sqrt( [1-X][1+X] ) #
6510 # asin(X) = atan( x / z ). #
6513 # 3. If |X| > 1, go to 5. #
6515 # 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6517 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6520 #########################################################################
6524 fmov.x (%a0),%fp0 # LOAD INPUT
6528 and.l &0x7FFFFFFF,%d1
6529 cmp.l %d1,&0x3FFF8000
6532 # This catch is added here for the '060 QSP. Originally, the call to
6533 # satan() would handle this case by causing the exception which would
6534 # not be caught until gen_except(). Now, with the exceptions being
6535 # detected inside of satan(), the exception would have been handled there
6536 # instead of inside sasin() as expected.
6537 cmp.l %d1,&0x3FD78000
6540 #--THIS IS THE USUAL CASE, |X| < 1
6541 #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6544 fmov.s &0x3F800000,%fp1
6545 fsub.x %fp0,%fp1 # 1-X
6546 fmovm.x &0x4,-(%sp) # {fp2}
6547 fmov.s &0x3F800000,%fp2
6548 fadd.x %fp0,%fp2 # 1+X
6549 fmul.x %fp2,%fp1 # (1+X)(1-X)
6550 fmovm.x (%sp)+,&0x20 # {fp2}
6551 fsqrt.x %fp1 # SQRT([1-X][1+X])
6552 fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])
6553 fmovm.x &0x01,-(%sp) # save X/SQRT(...)
6554 lea (%sp),%a0 # pass ptr to X/SQRT(...)
6556 add.l &0xc,%sp # clear X/SQRT(...) from stack
6561 fcmp.s %fp0,&0x3F800000
6562 fbgt t_operr # cause an operr exception
6564 #--|X| = 1, ASIN(X) = +- PI/2.
6566 fmov.x PIBY2(%pc),%fp0
6568 and.l &0x80000000,%d1 # SIGN BIT OF X
6569 or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT
6570 mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT
6575 #--|X| < 2^(-40), ATAN(X) = X
6577 fmov.l %d0,%fpcr # restore users rnd mode,prec
6578 mov.b &FMOV_OP,%d1 # last inst is MOVE
6579 fmov.x (%a0),%fp0 # last inst - possible exception
6583 #--ASIN(X) = X FOR DENORMALIZED X
6587 #########################################################################
6588 # sacos(): computes the inverse cosine of a normalized input #
6589 # sacosd(): computes the inverse cosine of a denormalized input #
6591 # INPUT *************************************************************** #
6592 # a0 = pointer to extended precision input #
6593 # d0 = round precision,mode #
6595 # OUTPUT ************************************************************** #
6598 # ACCURACY and MONOTONICITY ******************************************* #
6599 # The returned result is within 3 ulps in 64 significant bit, #
6600 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6601 # rounded to double precision. The result is provably monotonic #
6602 # in double precision. #
6604 # ALGORITHM *********************************************************** #
6607 # 1. If |X| >= 1, go to 3. #
6609 # 2. (|X| < 1) Calculate acos(X) by #
6610 # z := (1-X) / (1+X) #
6611 # acos(X) = 2 * atan( sqrt(z) ). #
6614 # 3. If |X| > 1, go to 5. #
6616 # 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6618 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6621 #########################################################################
6625 fmov.x (%a0),%fp0 # LOAD INPUT
6627 mov.l (%a0),%d1 # pack exp w/ upper 16 fraction
6629 and.l &0x7FFFFFFF,%d1
6630 cmp.l %d1,&0x3FFF8000
6633 #--THIS IS THE USUAL CASE, |X| < 1
6634 #--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6637 fmov.s &0x3F800000,%fp1
6638 fadd.x %fp0,%fp1 # 1+X
6640 fadd.s &0x3F800000,%fp0 # 1-X
6641 fdiv.x %fp1,%fp0 # (1-X)/(1+X)
6642 fsqrt.x %fp0 # SQRT((1-X)/(1+X))
6643 mov.l %d0,-(%sp) # save original users fpcr
6645 fmovm.x &0x01,-(%sp) # save SQRT(...) to stack
6646 lea (%sp),%a0 # pass ptr to sqrt
6647 bsr satan # ATAN(SQRT([1-X]/[1+X]))
6648 add.l &0xc,%sp # clear SQRT(...) from stack
6650 fmov.l (%sp)+,%fpcr # restore users round prec,mode
6651 fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )
6656 fcmp.s %fp0,&0x3F800000
6657 fbgt t_operr # cause an operr exception
6659 #--|X| = 1, ACOS(X) = 0 OR PI
6660 tst.b (%a0) # is X positive or negative?
6664 #Returns PI and inexact exception
6666 fmov.x PI(%pc),%fp0 # load PI
6667 fmov.l %d0,%fpcr # load round mode,prec
6668 fadd.s &0x00800000,%fp0 # add a small value
6672 bra ld_pzero # answer is positive zero
6675 #--ACOS(X) = PI/2 FOR DENORMALIZED X
6677 fmov.l %d0,%fpcr # load user's rnd mode/prec
6678 fmov.x PIBY2(%pc),%fp0
6681 #########################################################################
6682 # setox(): computes the exponential for a normalized input #
6683 # setoxd(): computes the exponential for a denormalized input #
6684 # setoxm1(): computes the exponential minus 1 for a normalized input #
6685 # setoxm1d(): computes the exponential minus 1 for a denormalized input #
6687 # INPUT *************************************************************** #
6688 # a0 = pointer to extended precision input #
6689 # d0 = round precision,mode #
6691 # OUTPUT ************************************************************** #
6692 # fp0 = exp(X) or exp(X)-1 #
6694 # ACCURACY and MONOTONICITY ******************************************* #
6695 # The returned result is within 0.85 ulps in 64 significant bit, #
6696 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6697 # rounded to double precision. The result is provably monotonic #
6698 # in double precision. #
6700 # ALGORITHM and IMPLEMENTATION **************************************** #
6704 # Step 1. Set ans := 1.0 #
6706 # Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6707 # Notes: This will always generate one exception -- inexact. #
6713 # Step 1. Filter out extreme cases of input argument. #
6714 # 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6715 # 1.2 Go to Step 7. #
6716 # 1.3 If |X| < 16380 log(2), go to Step 2. #
6717 # 1.4 Go to Step 8. #
6718 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6719 # To avoid the use of floating-point comparisons, a #
6720 # compact representation of |X| is used. This format is a #
6721 # 32-bit integer, the upper (more significant) 16 bits #
6722 # are the sign and biased exponent field of |X|; the #
6723 # lower 16 bits are the 16 most significant fraction #
6724 # (including the explicit bit) bits of |X|. Consequently, #
6725 # the comparisons in Steps 1.1 and 1.3 can be performed #
6726 # by integer comparison. Note also that the constant #
6727 # 16380 log(2) used in Step 1.3 is also in the compact #
6728 # form. Thus taking the branch to Step 2 guarantees #
6729 # |X| < 16380 log(2). There is no harm to have a small #
6730 # number of cases where |X| is less than, but close to, #
6731 # 16380 log(2) and the branch to Step 9 is taken. #
6733 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6734 # 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6736 # 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6737 # 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6739 # 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6740 # 2.5 Calculate the address of the stored value of #
6742 # 2.6 Create the value Scale = 2^M. #
6743 # Notes: The calculation in 2.2 is really performed by #
6744 # Z := X * constant #
6745 # N := round-to-nearest-integer(Z) #
6747 # constant := single-precision( 64/log 2 ). #
6749 # Using a single-precision constant avoids memory #
6750 # access. Another effect of using a single-precision #
6751 # "constant" is that the calculated value Z is #
6753 # Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6755 # This error has to be considered later in Steps 3 and 4. #
6757 # Step 3. Calculate X - N*log2/64. #
6758 # 3.1 R := X + N*L1, #
6759 # where L1 := single-precision(-log2/64). #
6760 # 3.2 R := R + N*L2, #
6761 # L2 := extended-precision(-log2/64 - L1).#
6762 # Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6763 # approximate the value -log2/64 to 88 bits of accuracy. #
6764 # b) N*L1 is exact because N is no longer than 22 bits #
6765 # and L1 is no longer than 24 bits. #
6766 # c) The calculation X+N*L1 is also exact due to #
6767 # cancellation. Thus, R is practically X+N(L1+L2) to full #
6769 # d) It is important to estimate how large can |R| be #
6772 # N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6773 # X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6774 # X*64/log2 - N = f - eps*X 64/log2 #
6775 # X - N*log2/64 = f*log2/64 - eps*X #
6778 # Now |X| <= 16446 log2, thus #
6780 # |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6781 # <= 0.57 log2/64. #
6782 # This bound will be used in Step 4. #
6784 # Step 4. Approximate exp(R)-1 by a polynomial #
6785 # p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6786 # Notes: a) In order to reduce memory access, the coefficients #
6787 # are made as "short" as possible: A1 (which is 1/2), A4 #
6788 # and A5 are single precision; A2 and A3 are double #
6790 # b) Even with the restrictions above, #
6791 # |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6792 # Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6793 # c) To fully utilize the pipeline, p is separated into #
6794 # two independent pieces of roughly equal complexities #
6795 # p = [ R + R*S*(A2 + S*A4) ] + #
6796 # [ S*(A1 + S*(A3 + S*A5)) ] #
6799 # Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6800 # ans := T + ( T*p + t) #
6801 # where T and t are the stored values for 2^(J/64). #
6802 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6803 # 2^(J/64) to roughly 85 bits; T is in extended precision #
6804 # and t is in single precision. Note also that T is #
6805 # rounded to 62 bits so that the last two bits of T are #
6806 # zero. The reason for such a special form is that T-1, #
6807 # T-2, and T-8 will all be exact --- a property that will #
6808 # give much more accurate computation of the function #
6811 # Step 6. Reconstruction of exp(X) #
6812 # exp(X) = 2^M * 2^(J/64) * exp(R). #
6813 # 6.1 If AdjFlag = 0, go to 6.3 #
6814 # 6.2 ans := ans * AdjScale #
6815 # 6.3 Restore the user FPCR #
6816 # 6.4 Return ans := ans * Scale. Exit. #
6817 # Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6818 # |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6819 # neither overflow nor underflow. If AdjFlag = 1, that #
6821 # X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6822 # Hence, exp(X) may overflow or underflow or neither. #
6823 # When that is the case, AdjScale = 2^(M1) where M1 is #
6824 # approximately M. Thus 6.2 will never cause #
6825 # over/underflow. Possible exception in 6.4 is overflow #
6826 # or underflow. The inexact exception is not generated in #
6827 # 6.4. Although one can argue that the inexact flag #
6828 # should always be raised, to simulate that exception #
6829 # cost to much than the flag is worth in practical uses. #
6831 # Step 7. Return 1 + X. #
6833 # 7.2 Restore user FPCR. #
6834 # 7.3 Return ans := 1 + ans. Exit #
6835 # Notes: For non-zero X, the inexact exception will always be #
6836 # raised by 7.3. That is the only exception raised by 7.3.#
6837 # Note also that we use the FMOVEM instruction to move X #
6838 # in Step 7.1 to avoid unnecessary trapping. (Although #
6839 # the FMOVEM may not seem relevant since X is normalized, #
6840 # the precaution will be useful in the library version of #
6841 # this code where the separate entry for denormalized #
6842 # inputs will be done away with.) #
6844 # Step 8. Handle exp(X) where |X| >= 16380log2. #
6845 # 8.1 If |X| > 16480 log2, go to Step 9. #
6846 # (mimic 2.2 - 2.6) #
6847 # 8.2 N := round-to-integer( X * 64/log2 ) #
6848 # 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6849 # 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6851 # 8.5 Calculate the address of the stored value #
6853 # 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6854 # 8.7 Go to Step 3. #
6855 # Notes: Refer to notes for 2.2 - 2.6. #
6857 # Step 9. Handle exp(X), |X| > 16480 log2. #
6858 # 9.1 If X < 0, go to 9.3 #
6859 # 9.2 ans := Huge, go to 9.4 #
6860 # 9.3 ans := Tiny. #
6861 # 9.4 Restore user FPCR. #
6862 # 9.5 Return ans := ans * ans. Exit. #
6863 # Notes: Exp(X) will surely overflow or underflow, depending on #
6864 # X's sign. "Huge" and "Tiny" are respectively large/tiny #
6865 # extended-precision numbers whose square over/underflow #
6866 # with an inexact result. Thus, 9.5 always raises the #
6867 # inexact together with either overflow or underflow. #
6872 # Step 1. Set ans := 0 #
6874 # Step 2. Return ans := X + ans. Exit. #
6875 # Notes: This will return X with the appropriate rounding #
6876 # precision prescribed by the user FPCR. #
6881 # Step 1. Check |X| #
6882 # 1.1 If |X| >= 1/4, go to Step 1.3. #
6883 # 1.2 Go to Step 7. #
6884 # 1.3 If |X| < 70 log(2), go to Step 2. #
6885 # 1.4 Go to Step 10. #
6886 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6887 # However, it is conceivable |X| can be small very often #
6888 # because EXPM1 is intended to evaluate exp(X)-1 #
6889 # accurately when |X| is small. For further details on #
6890 # the comparisons, see the notes on Step 1 of setox. #
6892 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6893 # 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
6894 # 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
6896 # 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
6897 # 2.4 Calculate the address of the stored value of #
6899 # 2.5 Create the values Sc = 2^M and #
6900 # OnebySc := -2^(-M). #
6901 # Notes: See the notes on Step 2 of setox. #
6903 # Step 3. Calculate X - N*log2/64. #
6904 # 3.1 R := X + N*L1, #
6905 # where L1 := single-precision(-log2/64). #
6906 # 3.2 R := R + N*L2, #
6907 # L2 := extended-precision(-log2/64 - L1).#
6908 # Notes: Applying the analysis of Step 3 of setox in this case #
6909 # shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
6912 # Step 4. Approximate exp(R)-1 by a polynomial #
6913 # p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
6914 # Notes: a) In order to reduce memory access, the coefficients #
6915 # are made as "short" as possible: A1 (which is 1/2), A5 #
6916 # and A6 are single precision; A2, A3 and A4 are double #
6918 # b) Even with the restriction above, #
6919 # |p - (exp(R)-1)| < |R| * 2^(-72.7) #
6920 # for all |R| <= 0.0055. #
6921 # c) To fully utilize the pipeline, p is separated into #
6922 # two independent pieces of roughly equal complexity #
6923 # p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
6924 # [ R + S*(A1 + S*(A3 + S*A5)) ] #
6927 # Step 5. Compute 2^(J/64)*p by #
6929 # where T and t are the stored values for 2^(J/64). #
6930 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6931 # 2^(J/64) to roughly 85 bits; T is in extended precision #
6932 # and t is in single precision. Note also that T is #
6933 # rounded to 62 bits so that the last two bits of T are #
6934 # zero. The reason for such a special form is that T-1, #
6935 # T-2, and T-8 will all be exact --- a property that will #
6936 # be exploited in Step 6 below. The total relative error #
6937 # in p is no bigger than 2^(-67.7) compared to the final #
6940 # Step 6. Reconstruction of exp(X)-1 #
6941 # exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
6942 # 6.1 If M <= 63, go to Step 6.3. #
6943 # 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
6944 # 6.3 If M >= -3, go to 6.5. #
6945 # 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
6946 # 6.5 ans := (T + OnebySc) + (p + t). #
6947 # 6.6 Restore user FPCR. #
6948 # 6.7 Return ans := Sc * ans. Exit. #
6949 # Notes: The various arrangements of the expressions give #
6950 # accurate evaluations. #
6952 # Step 7. exp(X)-1 for |X| < 1/4. #
6953 # 7.1 If |X| >= 2^(-65), go to Step 9. #
6954 # 7.2 Go to Step 8. #
6956 # Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
6957 # 8.1 If |X| < 2^(-16312), goto 8.3 #
6958 # 8.2 Restore FPCR; return ans := X - 2^(-16382). #
6960 # 8.3 X := X * 2^(140). #
6961 # 8.4 Restore FPCR; ans := ans - 2^(-16382). #
6962 # Return ans := ans*2^(140). Exit #
6963 # Notes: The idea is to return "X - tiny" under the user #
6964 # precision and rounding modes. To avoid unnecessary #
6965 # inefficiency, we stay away from denormalized numbers #
6966 # the best we can. For |X| >= 2^(-16312), the #
6967 # straightforward 8.2 generates the inexact exception as #
6968 # the case warrants. #
6970 # Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
6971 # p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
6972 # Notes: a) In order to reduce memory access, the coefficients #
6973 # are made as "short" as possible: B1 (which is 1/2), B9 #
6974 # to B12 are single precision; B3 to B8 are double #
6975 # precision; and B2 is double extended. #
6976 # b) Even with the restriction above, #
6977 # |p - (exp(X)-1)| < |X| 2^(-70.6) #
6978 # for all |X| <= 0.251. #
6979 # Note that 0.251 is slightly bigger than 1/4. #
6980 # c) To fully preserve accuracy, the polynomial is #
6982 # X + ( S*B1 + Q ) where S = X*X and #
6983 # Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
6984 # d) To fully utilize the pipeline, Q is separated into #
6985 # two independent pieces of roughly equal complexity #
6986 # Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
6987 # [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
6989 # Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
6990 # 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
6991 # practical purposes. Therefore, go to Step 1 of setox. #
6992 # 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
6995 # Restore user FPCR #
6996 # Return ans := ans + 2^(-126). Exit. #
6997 # Notes: 10.2 will always create an inexact and return -1 + tiny #
6998 # in the user rounding precision and mode. #
7000 #########################################################################
7002 L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7004 EEXPA3: long 0x3FA55555,0x55554CC1
7005 EEXPA2: long 0x3FC55555,0x55554A54
7007 EM1A4: long 0x3F811111,0x11174385
7008 EM1A3: long 0x3FA55555,0x55554F5A
7010 EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000
7012 EM1B8: long 0x3EC71DE3,0xA5774682
7013 EM1B7: long 0x3EFA01A0,0x19D7CB68
7015 EM1B6: long 0x3F2A01A0,0x1A019DF3
7016 EM1B5: long 0x3F56C16C,0x16C170E2
7018 EM1B4: long 0x3F811111,0x11111111
7019 EM1B3: long 0x3FA55555,0x55555555
7021 EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7024 TWO140: long 0x48B00000,0x00000000
7026 long 0x37300000,0x00000000
7029 long 0x3FFF0000,0x80000000,0x00000000,0x00000000
7030 long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7031 long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7032 long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7033 long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7034 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7035 long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7036 long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7037 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7038 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7039 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7040 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7041 long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7042 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7043 long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7044 long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7045 long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7046 long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7047 long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7048 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7049 long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7050 long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7051 long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7052 long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7053 long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7054 long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7055 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7056 long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7057 long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7058 long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7059 long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7060 long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7061 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7062 long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7063 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7064 long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7065 long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7066 long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7067 long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7068 long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7069 long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7070 long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7071 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7072 long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7073 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7074 long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7075 long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7076 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7077 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7078 long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7079 long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7080 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7081 long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7082 long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7083 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7084 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7085 long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7086 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7087 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7088 long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7089 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7090 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7091 long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7092 long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7096 set ADJSCALE,FP_SCR1
7102 #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7105 mov.l (%a0),%d1 # load part of input X
7106 and.l &0x7FFF0000,%d1 # biased expo. of X
7107 cmp.l %d1,&0x3FBE0000 # 2^(-65)
7108 bge.b EXPC1 # normal case
7112 #--The case |X| >= 2^(-65)
7113 mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7114 cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits
7115 blt.b EXPMAIN # normal case
7120 #--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7121 fmov.x (%a0),%fp0 # load input from (a0)
7124 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7125 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7126 mov.l &0,ADJFLAG(%a6)
7127 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7128 lea EEXPTBL(%pc),%a1
7129 fmov.l %d1,%fp0 # convert to floating-format
7131 mov.l %d1,L_SCR1(%a6) # save N temporarily
7132 and.l &0x3F,%d1 # D0 is J = N mod 64
7134 add.l %d1,%a1 # address of 2^(J/64)
7135 mov.l L_SCR1(%a6),%d1
7136 asr.l &6,%d1 # D0 is M
7137 add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7138 mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB
7142 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7143 #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7145 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7146 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7147 fadd.x %fp1,%fp0 # X + N*L1
7148 fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7151 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7152 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7153 #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7154 #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7157 fmul.x %fp1,%fp1 # fp1 IS S = R*R
7159 fmov.s &0x3AB60B70,%fp2 # fp2 IS A5
7161 fmul.x %fp1,%fp2 # fp2 IS S*A5
7163 fmul.s &0x3C088895,%fp3 # fp3 IS S*A4
7165 fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5
7166 fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4
7168 fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5)
7169 mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended
7170 mov.l &0x80000000,SCALE+4(%a6)
7173 fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4)
7175 fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5)
7176 fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4)
7178 fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5))
7179 fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4),
7181 fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)
7182 fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1
7185 #--final reconstruction process
7186 #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7188 fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)
7189 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7190 fadd.s (%a1),%fp0 # accurate 2^(J/64)
7192 fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...
7193 mov.l ADJFLAG(%a6),%d1
7199 fmul.x ADJSCALE(%a6),%fp0
7201 fmov.l %d0,%fpcr # restore user FPCR
7202 mov.b &FMUL_OP,%d1 # last inst is MUL
7203 fmul.x SCALE(%a6),%fp0 # multiply 2^(M)
7208 fmovm.x (%a0),&0x80 # load X
7210 fadd.s &0x3F800000,%fp0 # 1+X in user mode
7215 cmp.l %d1,&0x400CB27C # 16480 log2
7218 fmov.x (%a0),%fp0 # load input from (a0)
7221 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7222 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7223 mov.l &1,ADJFLAG(%a6)
7224 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7225 lea EEXPTBL(%pc),%a1
7226 fmov.l %d1,%fp0 # convert to floating-format
7227 mov.l %d1,L_SCR1(%a6) # save N temporarily
7228 and.l &0x3F,%d1 # D0 is J = N mod 64
7230 add.l %d1,%a1 # address of 2^(J/64)
7231 mov.l L_SCR1(%a6),%d1
7232 asr.l &6,%d1 # D0 is K
7233 mov.l %d1,L_SCR1(%a6) # save K temporarily
7234 asr.l &1,%d1 # D0 is M1
7235 sub.l %d1,L_SCR1(%a6) # a1 is M
7236 add.w &0x3FFF,%d1 # biased expo. of 2^(M1)
7237 mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)
7238 mov.l &0x80000000,ADJSCALE+4(%a6)
7239 clr.l ADJSCALE+8(%a6)
7240 mov.l L_SCR1(%a6),%d1 # D0 is M
7241 add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7242 bra.w EXPCONT1 # go back to Step 3
7246 tst.b (%a0) # is X positive or negative?
7252 #--entry point for EXP(X), X is denormalized
7254 andi.l &0x80000000,(%sp)
7255 ori.l &0x00800000,(%sp) # sign(X)*2^(-126)
7257 fmov.s &0x3F800000,%fp0
7265 #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7269 mov.l (%a0),%d1 # load part of input X
7270 and.l &0x7FFF0000,%d1 # biased expo. of X
7271 cmp.l %d1,&0x3FFD0000 # 1/4
7272 bge.b EM1CON1 # |X| >= 1/4
7277 #--The case |X| >= 1/4
7278 mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7279 cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits
7280 ble.b EM1MAIN # 1/4 <= |X| <= 70log2
7285 #--This is the case: 1/4 <= |X| <= 70 log2.
7286 fmov.x (%a0),%fp0 # load input from (a0)
7289 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7290 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7291 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7292 lea EEXPTBL(%pc),%a1
7293 fmov.l %d1,%fp0 # convert to floating-format
7295 mov.l %d1,L_SCR1(%a6) # save N temporarily
7296 and.l &0x3F,%d1 # D0 is J = N mod 64
7298 add.l %d1,%a1 # address of 2^(J/64)
7299 mov.l L_SCR1(%a6),%d1
7300 asr.l &6,%d1 # D0 is M
7301 mov.l %d1,L_SCR1(%a6) # save a copy of M
7304 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7305 #--a0 points to 2^(J/64), D0 and a1 both contain M
7307 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7308 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7309 fadd.x %fp1,%fp0 # X + N*L1
7310 fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7311 add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M
7314 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7315 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7316 #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7317 #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7320 fmul.x %fp1,%fp1 # fp1 IS S = R*R
7322 fmov.s &0x3950097B,%fp2 # fp2 IS a6
7324 fmul.x %fp1,%fp2 # fp2 IS S*A6
7326 fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5
7328 fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6
7329 fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5
7330 mov.w %d1,SC(%a6) # SC is 2^(M) in extended
7331 mov.l &0x80000000,SC+4(%a6)
7334 fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6)
7335 mov.l L_SCR1(%a6),%d1 # D0 is M
7336 neg.w %d1 # D0 is -M
7337 fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5)
7338 add.w &0x3FFF,%d1 # biased expo. of 2^(-M)
7339 fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6)
7340 fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5)
7342 fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6))
7343 or.w &0x8000,%d1 # signed/expo. of -2^(-M)
7344 mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)
7345 mov.l &0x80000000,ONEBYSC+4(%a6)
7346 clr.l ONEBYSC+8(%a6)
7347 fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5))
7349 fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6))
7350 fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5))
7352 fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1
7354 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7357 #--Compute 2^(J/64)*p
7359 fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)
7363 mov.l L_SCR1(%a6),%d1 # retrieve M
7367 fmov.s 12(%a1),%fp1 # fp1 is t
7368 fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc
7369 fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released
7370 fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))
7378 fadd.s 12(%a1),%fp0 # p+t
7379 fadd.x (%a1),%fp0 # T+(p+t)
7380 fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))
7383 #--Step 6.5 -3 <= M <= 63
7384 fmov.x (%a1)+,%fp1 # fp1 is T
7385 fadd.s (%a1),%fp0 # fp0 is p+t
7386 fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc
7387 fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)
7396 #--Step 7 |X| < 1/4.
7397 cmp.l %d1,&0x3FBE0000 # 2^(-65)
7401 #--Step 8 |X| < 2^(-65)
7402 cmp.l %d1,&0x00330000 # 2^(-16312)
7405 mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)
7406 mov.l &0x80000000,SC+4(%a6)
7410 mov.b &FADD_OP,%d1 # last inst is ADD
7417 fmul.d TWO140(%pc),%fp0
7418 mov.l &0x80010000,SC(%a6)
7419 mov.l &0x80000000,SC+4(%a6)
7423 mov.b &FMUL_OP,%d1 # last inst is MUL
7424 fmul.d TWON140(%pc),%fp0
7428 #--Step 9 exp(X)-1 by a simple polynomial
7429 fmov.x (%a0),%fp0 # fp0 is X
7430 fmul.x %fp0,%fp0 # fp0 is S := X*X
7431 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7432 fmov.s &0x2F30CAA8,%fp1 # fp1 is B12
7433 fmul.x %fp0,%fp1 # fp1 is S*B12
7434 fmov.s &0x310F8290,%fp2 # fp2 is B11
7435 fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12
7437 fmul.x %fp0,%fp2 # fp2 is S*B11
7438 fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...
7440 fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...
7441 fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...
7443 fmul.x %fp0,%fp2 # fp2 is S*(B9+...
7444 fmul.x %fp0,%fp1 # fp1 is S*(B8+...
7446 fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...
7447 fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...
7449 fmul.x %fp0,%fp2 # fp2 is S*(B7+...
7450 fmul.x %fp0,%fp1 # fp1 is S*(B6+...
7452 fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...
7453 fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...
7455 fmul.x %fp0,%fp2 # fp2 is S*(B5+...
7456 fmul.x %fp0,%fp1 # fp1 is S*(B4+...
7458 fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...
7459 fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...
7461 fmul.x %fp0,%fp2 # fp2 is S*(B3+...
7462 fmul.x %fp0,%fp1 # fp1 is S*(B2+...
7464 fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...)
7465 fmul.x (%a0),%fp1 # fp1 is X*S*(B2...
7467 fmul.s &0x3F000000,%fp0 # fp0 is S*B1
7468 fadd.x %fp2,%fp1 # fp1 is Q
7470 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7472 fadd.x %fp1,%fp0 # fp0 is S*B1+Q
7479 #--Step 10 |X| > 70 log2
7484 fmov.s &0xBF800000,%fp0 # fp0 is -1
7486 fadd.s &0x00800000,%fp0 # -1 + 2^(-126)
7491 #--entry point for EXPM1(X), here X is denormalized
7495 #########################################################################
7496 # sgetexp(): returns the exponent portion of the input argument. #
7497 # The exponent bias is removed and the exponent value is #
7498 # returned as an extended precision number in fp0. #
7499 # sgetexpd(): handles denormalized numbers. #
7501 # sgetman(): extracts the mantissa of the input argument. The #
7502 # mantissa is converted to an extended precision number w/ #
7503 # an exponent of $3fff and is returned in fp0. The range of #
7504 # the result is [1.0 - 2.0). #
7505 # sgetmand(): handles denormalized numbers. #
7507 # INPUT *************************************************************** #
7508 # a0 = pointer to extended precision input #
7510 # OUTPUT ************************************************************** #
7511 # fp0 = exponent(X) or mantissa(X) #
7513 #########################################################################
7517 mov.w SRC_EX(%a0),%d0 # get the exponent
7518 bclr &0xf,%d0 # clear the sign bit
7519 subi.w &0x3fff,%d0 # subtract off the bias
7520 fmov.w %d0,%fp0 # return exp in fp0
7521 blt.b sgetexpn # it's negative
7525 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7530 bsr.l norm # normalize
7531 neg.w %d0 # new exp = -(shft amt)
7532 subi.w &0x3fff,%d0 # subtract off the bias
7533 fmov.w %d0,%fp0 # return exp in fp0
7534 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7539 mov.w SRC_EX(%a0),%d0 # get the exp
7540 ori.w &0x7fff,%d0 # clear old exp
7541 bclr &0xe,%d0 # make it the new exp +-3fff
7543 # here, we build the result in a tmp location so as not to disturb the input
7544 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7545 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7546 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7547 fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0
7548 bmi.b sgetmann # it's negative
7552 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7556 # For denormalized numbers, shift the mantissa until the j-bit = 1,
7557 # then load the exponent with +/1 $3fff.
7561 bsr.l norm # normalize exponent
7564 #########################################################################
7565 # scosh(): computes the hyperbolic cosine of a normalized input #
7566 # scoshd(): computes the hyperbolic cosine of a denormalized input #
7568 # INPUT *************************************************************** #
7569 # a0 = pointer to extended precision input #
7570 # d0 = round precision,mode #
7572 # OUTPUT ************************************************************** #
7575 # ACCURACY and MONOTONICITY ******************************************* #
7576 # The returned result is within 3 ulps in 64 significant bit, #
7577 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7578 # rounded to double precision. The result is provably monotonic #
7579 # in double precision. #
7581 # ALGORITHM *********************************************************** #
7584 # 1. If |X| > 16380 log2, go to 3. #
7586 # 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7587 # y = |X|, z = exp(Y), and #
7588 # cosh(X) = (1/2)*( z + 1/z ). #
7591 # 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7593 # 4. (16380 log2 < |X| <= 16480 log2) #
7594 # cosh(X) = sign(X) * exp(|X|)/2. #
7595 # However, invoking exp(|X|) may cause premature #
7596 # overflow. Thus, we calculate sinh(X) as follows: #
7598 # Fact := 2**(16380) #
7599 # Y' := Y - 16381 log2 #
7600 # cosh(X) := Fact * exp(Y'). #
7603 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7604 # Huge*Huge to generate overflow and an infinity with #
7605 # the appropriate sign. Huge is the largest finite number #
7606 # in extended format. Exit. #
7608 #########################################################################
7611 long 0x7FFB0000,0x80000000,0x00000000,0x00000000
7615 fmov.x (%a0),%fp0 # LOAD INPUT
7619 and.l &0x7FFFFFFF,%d1
7620 cmp.l %d1,&0x400CB167
7623 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7624 #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7630 fmovm.x &0x01,-(%sp) # save |X| to stack
7631 lea (%sp),%a0 # pass ptr to |X|
7632 bsr setox # FP0 IS EXP(|X|)
7633 add.l &0xc,%sp # erase |X| from stack
7634 fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|)
7637 fmov.s &0x3E800000,%fp1 # (1/4)
7638 fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|))
7641 mov.b &FADD_OP,%d1 # last inst is ADD
7646 cmp.l %d1,&0x400CB2B3
7650 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7651 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7655 fmovm.x &0x01,-(%sp) # save fp0 to stack
7656 lea (%sp),%a0 # pass ptr to fp0
7658 add.l &0xc,%sp # clear fp0 from stack
7662 mov.b &FMUL_OP,%d1 # last inst is MUL
7663 fmul.x TWO16380(%pc),%fp0
7670 #--COSH(X) = 1 FOR DENORMALIZED X
7672 fmov.s &0x3F800000,%fp0
7675 fadd.s &0x00800000,%fp0
7678 #########################################################################
7679 # ssinh(): computes the hyperbolic sine of a normalized input #
7680 # ssinhd(): computes the hyperbolic sine of a denormalized input #
7682 # INPUT *************************************************************** #
7683 # a0 = pointer to extended precision input #
7684 # d0 = round precision,mode #
7686 # OUTPUT ************************************************************** #
7689 # ACCURACY and MONOTONICITY ******************************************* #
7690 # The returned result is within 3 ulps in 64 significant bit, #
7691 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7692 # rounded to double precision. The result is provably monotonic #
7693 # in double precision. #
7695 # ALGORITHM *********************************************************** #
7698 # 1. If |X| > 16380 log2, go to 3. #
7700 # 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7701 # y = |X|, sgn = sign(X), and z = expm1(Y), #
7702 # sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7705 # 3. If |X| > 16480 log2, go to 5. #
7707 # 4. (16380 log2 < |X| <= 16480 log2) #
7708 # sinh(X) = sign(X) * exp(|X|)/2. #
7709 # However, invoking exp(|X|) may cause premature overflow. #
7710 # Thus, we calculate sinh(X) as follows: #
7713 # sgnFact := sgn * 2**(16380) #
7714 # Y' := Y - 16381 log2 #
7715 # sinh(X) := sgnFact * exp(Y'). #
7718 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7719 # sign(X)*Huge*Huge to generate overflow and an infinity with #
7720 # the appropriate sign. Huge is the largest finite number in #
7721 # extended format. Exit. #
7723 #########################################################################
7727 fmov.x (%a0),%fp0 # LOAD INPUT
7731 mov.l %d1,%a1 # save (compacted) operand
7732 and.l &0x7FFFFFFF,%d1
7733 cmp.l %d1,&0x400CB167
7736 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7737 #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7739 fabs.x %fp0 # Y = |X|
7741 movm.l &0x8040,-(%sp) # {a1/d0}
7742 fmovm.x &0x01,-(%sp) # save Y on stack
7743 lea (%sp),%a0 # pass ptr to Y
7745 bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7746 add.l &0xc,%sp # clear Y from stack
7748 movm.l (%sp)+,&0x0201 # {a1/d0}
7751 fadd.s &0x3F800000,%fp1 # 1+Z
7753 fdiv.x %fp1,%fp0 # Z/(1+Z)
7755 and.l &0x80000000,%d1
7756 or.l &0x3F000000,%d1
7761 mov.b &FMUL_OP,%d1 # last inst is MUL
7762 fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set
7766 cmp.l %d1,&0x400CB2B3
7769 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7771 mov.l &0x80000000,-(%sp)
7773 and.l &0x80000000,%d1
7774 or.l &0x7FFB0000,%d1
7775 mov.l %d1,-(%sp) # EXTENDED FMT
7776 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7780 fmovm.x &0x01,-(%sp) # save fp0 on stack
7781 lea (%sp),%a0 # pass ptr to fp0
7783 add.l &0xc,%sp # clear fp0 from stack
7787 mov.b &FMUL_OP,%d1 # last inst is MUL
7788 fmul.x (%sp)+,%fp0 # possible exception
7792 #--SINH(X) = X FOR DENORMALIZED X
7796 #########################################################################
7797 # stanh(): computes the hyperbolic tangent of a normalized input #
7798 # stanhd(): computes the hyperbolic tangent of a denormalized input #
7800 # INPUT *************************************************************** #
7801 # a0 = pointer to extended precision input #
7802 # d0 = round precision,mode #
7804 # OUTPUT ************************************************************** #
7807 # ACCURACY and MONOTONICITY ******************************************* #
7808 # The returned result is within 3 ulps in 64 significant bit, #
7809 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7810 # rounded to double precision. The result is provably monotonic #
7811 # in double precision. #
7813 # ALGORITHM *********************************************************** #
7816 # 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7818 # 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7819 # sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7820 # tanh(X) = sgn*( z/(2+z) ). #
7823 # 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7826 # 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7828 # 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7829 # sgn := sign(X), y := 2|X|, z := exp(Y), #
7830 # tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7833 # 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7834 # calculate Tanh(X) by #
7835 # sgn := sign(X), Tiny := 2**(-126), #
7836 # tanh(X) := sgn - sgn*Tiny. #
7839 # 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7841 #########################################################################
7852 fmov.x (%a0),%fp0 # LOAD INPUT
7858 and.l &0x7FFFFFFF,%d1
7859 cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)?
7860 blt.w TANHBORS # yes
7861 cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2?
7862 bgt.w TANHBORS # yes
7864 #--THIS IS THE USUAL CASE
7865 #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7869 and.l &0x7FFF0000,%d1
7870 add.l &0x00010000,%d1 # EXPONENT OF 2|X|
7872 and.l &0x80000000,SGN(%a6)
7873 fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X|
7877 fmovm.x &0x1,-(%sp) # save Y on stack
7878 lea (%sp),%a0 # pass ptr to Y
7879 bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7880 add.l &0xc,%sp # clear Y from stack
7884 fadd.s &0x40000000,%fp1 # Z+2
7889 fmov.l %d0,%fpcr # restore users round prec,mode
7894 cmp.l %d1,&0x3FFF8000
7897 cmp.l %d1,&0x40048AA1
7900 #-- (5/2) LOG2 < |X| < 50 LOG2,
7901 #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
7902 #--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
7906 and.l &0x7FFF0000,%d1
7907 add.l &0x00010000,%d1 # EXPO OF 2|X|
7908 mov.l %d1,X(%a6) # Y = 2|X|
7909 and.l &0x80000000,SGN(%a6)
7911 fmov.x X(%a6),%fp0 # Y = 2|X|
7915 fmovm.x &0x01,-(%sp) # save Y on stack
7916 lea (%sp),%a0 # pass ptr to Y
7917 bsr setox # FP0 IS EXP(Y)
7918 add.l &0xc,%sp # clear Y from stack
7921 fadd.s &0x3F800000,%fp0 # EXP(Y)+1
7923 eor.l &0xC0000000,%d1 # -SIGN(X)*2
7924 fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT
7925 fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]
7928 or.l &0x3F800000,%d1 # SGN
7929 fmov.s %d1,%fp0 # SGN IN SGL FMT
7931 fmov.l %d0,%fpcr # restore users round prec,mode
7932 mov.b &FADD_OP,%d1 # last inst is ADD
7937 fmov.l %d0,%fpcr # restore users round prec,mode
7938 mov.b &FMOV_OP,%d1 # last inst is MOVE
7939 fmov.x X(%a6),%fp0 # last inst - possible exception set
7942 #---RETURN SGN(X) - SGN(X)EPS
7945 and.l &0x80000000,%d1
7946 or.l &0x3F800000,%d1
7948 and.l &0x80000000,%d1
7949 eor.l &0x80800000,%d1 # -SIGN(X)*EPS
7951 fmov.l %d0,%fpcr # restore users round prec,mode
7956 #--TANH(X) = X FOR DENORMALIZED X
7960 #########################################################################
7961 # slogn(): computes the natural logarithm of a normalized input #
7962 # slognd(): computes the natural logarithm of a denormalized input #
7963 # slognp1(): computes the log(1+X) of a normalized input #
7964 # slognp1d(): computes the log(1+X) of a denormalized input #
7966 # INPUT *************************************************************** #
7967 # a0 = pointer to extended precision input #
7968 # d0 = round precision,mode #
7970 # OUTPUT ************************************************************** #
7971 # fp0 = log(X) or log(1+X) #
7973 # ACCURACY and MONOTONICITY ******************************************* #
7974 # The returned result is within 2 ulps in 64 significant bit, #
7975 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7976 # rounded to double precision. The result is provably monotonic #
7977 # in double precision. #
7979 # ALGORITHM *********************************************************** #
7981 # Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
7982 # polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
7983 # move on to Step 2. #
7985 # Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
7986 # seven significant bits of Y plus 2**(-7), i.e. #
7987 # F = 1.xxxxxx1 in base 2 where the six "x" match those #
7988 # of Y. Note that |Y-F| <= 2**(-7). #
7990 # Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
7991 # polynomial in u, log(1+u) = poly. #
7993 # Step 4. Reconstruct #
7994 # log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
7995 # by k*log(2) + (log(F) + poly). The values of log(F) are #
7996 # calculated beforehand and stored in the program. #
7999 # Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8000 # polynomial in u where u = 2X/(2+X). Otherwise, move on #
8003 # Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8004 # in Step 2 of the algorithm for LOGN and compute #
8005 # log(1+X) as k*log(2) + log(F) + poly where poly #
8006 # approximates log(1+u), u = (Y-F)/F. #
8008 # Implementation Notes: #
8009 # Note 1. There are 64 different possible values for F, thus 64 #
8010 # log(F)'s need to be tabulated. Moreover, the values of #
8011 # 1/F are also tabulated so that the division in (Y-F)/F #
8012 # can be performed by a multiplication. #
8014 # Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8015 # the value Y-F has to be calculated carefully when #
8018 # Note 3. To fully exploit the pipeline, polynomials are usually #
8019 # separated into two parts evaluated independently before #
8022 #########################################################################
8024 long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8036 long 0x3FC2499A,0xB5E4040B
8038 long 0xBFC555B5,0x848CB7DB
8041 long 0x3FC99999,0x987D8730
8043 long 0xBFCFFFFF,0xFF6F7E97
8046 long 0x3FD55555,0x555555A4
8048 long 0xBFE00000,0x00000008
8051 long 0x3F175496,0xADD7DAD6
8053 long 0x3F3C71C2,0xFE80C7E0
8056 long 0x3F624924,0x928BCCFF
8058 long 0x3F899999,0x999995EC
8061 long 0x3FB55555,0x55555555
8063 long 0x40000000,0x00000000
8066 long 0x3f990000,0x80000000,0x00000000,0x00000000
8069 long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8070 long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8071 long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8072 long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8073 long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8074 long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8075 long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8076 long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8077 long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8078 long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8079 long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8080 long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8081 long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8082 long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8083 long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8084 long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8085 long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8086 long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8087 long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8088 long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8089 long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8090 long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8091 long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8092 long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8093 long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8094 long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8095 long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8096 long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8097 long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8098 long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8099 long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8100 long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8101 long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8102 long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8103 long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8104 long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8105 long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8106 long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8107 long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8108 long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8109 long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8110 long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8111 long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8112 long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8113 long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8114 long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8115 long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8116 long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8117 long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8118 long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8119 long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8120 long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8121 long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8122 long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8123 long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8124 long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8125 long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8126 long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8127 long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8128 long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8129 long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8130 long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8131 long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8132 long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8133 long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8134 long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8135 long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8136 long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8137 long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8138 long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8139 long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8140 long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8141 long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8142 long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8143 long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8144 long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8145 long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8146 long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8147 long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8148 long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8149 long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8150 long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8151 long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8152 long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8153 long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8154 long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8155 long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8156 long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8157 long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8158 long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8159 long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8160 long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8161 long 0x3FFE0000,0x94458094,0x45809446,0x00000000
8162 long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8163 long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8164 long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8165 long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8166 long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8167 long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8168 long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8169 long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8170 long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8171 long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8172 long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8173 long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8174 long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8175 long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8176 long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8177 long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8178 long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8179 long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8180 long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8181 long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8182 long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8183 long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8184 long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8185 long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8186 long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8187 long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8188 long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8189 long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8190 long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8191 long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8192 long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8193 long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8194 long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8195 long 0x3FFE0000,0x80808080,0x80808081,0x00000000
8196 long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8212 #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8214 fmov.x (%a0),%fp0 # LOAD INPUT
8215 mov.l &0x00000000,ADJK(%a6)
8218 #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8219 #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8225 mov.l 4(%a0),X+4(%a6)
8226 mov.l 8(%a0),X+8(%a6)
8228 cmp.l %d1,&0 # CHECK IF X IS NEGATIVE
8229 blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID
8230 # X IS POSITIVE, CHECK IF X IS NEAR 1
8231 cmp.l %d1,&0x3ffef07d # IS X < 15/16?
8233 cmp.l %d1,&0x3fff8841 # IS X > 17/16?
8237 #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8239 #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8240 #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8241 #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8242 #-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8243 #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8244 #--LOG(1+U) CAN BE VERY EFFICIENT.
8245 #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8246 #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8248 #--GET K, Y, F, AND ADDRESS OF 1/F.
8250 asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X
8251 sub.l &0x3FFF,%d1 # THIS IS K
8252 add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8253 lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)
8254 fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT
8256 #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8257 mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X
8258 mov.l XFRAC(%a6),FFRAC(%a6)
8259 and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y
8260 or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8261 mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F
8262 and.l &0x7E000000,%d1
8265 asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT
8266 add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F
8269 mov.l &0x3fff0000,F(%a6)
8271 fsub.x F(%a6),%fp0 # Y-F
8272 fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY
8273 #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8274 #--REGISTERS SAVED: FPCR, FP1, FP2
8277 #--AN RE-ENTRY POINT FOR LOGNP1
8278 fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F
8279 fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY
8281 fmul.x %fp2,%fp2 # FP2 IS V=U*U
8282 fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1
8284 #--LOG(1+U) IS APPROXIMATED BY
8285 #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8286 #--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8291 fmul.d LOGA6(%pc),%fp1 # V*A6
8292 fmul.d LOGA5(%pc),%fp2 # V*A5
8294 fadd.d LOGA4(%pc),%fp1 # A4+V*A6
8295 fadd.d LOGA3(%pc),%fp2 # A3+V*A5
8297 fmul.x %fp3,%fp1 # V*(A4+V*A6)
8298 fmul.x %fp3,%fp2 # V*(A3+V*A5)
8300 fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6)
8301 fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5)
8303 fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6))
8304 add.l &16,%a0 # ADDRESS OF LOG(F)
8305 fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5))
8307 fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6))
8308 fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5))
8310 fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6))
8311 fmovm.x (%sp)+,&0x30 # RESTORE FP2-3
8312 fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)
8315 fadd.x KLOG2(%a6),%fp0 # FINAL ADD
8321 # if the input is exactly equal to one, then exit through ld_pzero.
8322 # if these 2 lines weren't here, the correct answer would be returned
8323 # but the INEX2 bit would be set.
8324 fcmp.b %fp0,&0x1 # is it equal to one?
8325 fbeq.l ld_pzero # yes
8327 #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8329 fsub.s one(%pc),%fp1 # FP1 IS X-1
8330 fadd.s one(%pc),%fp0 # FP0 IS X+1
8331 fadd.x %fp1,%fp1 # FP1 IS 2(X-1)
8332 #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8333 #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8336 #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8337 fdiv.x %fp0,%fp1 # FP1 IS U
8338 fmovm.x &0xc,-(%sp) # SAVE FP2-3
8339 #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8340 #--LET V=U*U, W=V*V, CALCULATE
8341 #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8342 #--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8344 fmul.x %fp0,%fp0 # FP0 IS V
8345 fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1
8347 fmul.x %fp1,%fp1 # FP1 IS W
8349 fmov.d LOGB5(%pc),%fp3
8350 fmov.d LOGB4(%pc),%fp2
8352 fmul.x %fp1,%fp3 # W*B5
8353 fmul.x %fp1,%fp2 # W*B4
8355 fadd.d LOGB3(%pc),%fp3 # B3+W*B5
8356 fadd.d LOGB2(%pc),%fp2 # B2+W*B4
8358 fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED
8360 fmul.x %fp0,%fp2 # V*(B2+W*B4)
8362 fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5)
8363 fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V
8365 fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8366 fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED
8368 fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8371 fadd.x SAVEU(%a6),%fp0
8374 #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8380 #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8382 mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0
8384 #----normalize the input value by left shifting k bits (k to be determined
8385 #----below), adjusting exponent and storing -k to ADJK
8386 #----the value TWOTO100 is no longer needed.
8387 #----Note that this code assumes the denormalized input is NON-ZERO.
8389 movm.l &0x3f00,-(%sp) # save some registers {d2-d7}
8390 mov.l (%a0),%d3 # D3 is exponent of smallest norm. #
8392 mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)
8393 clr.l %d2 # D2 used for holding K
8403 bfffo %d4{&0:&32},%d6
8405 add.l %d6,%d2 # (D3,D4,D5) is normalized
8408 mov.l %d4,XFRAC(%a6)
8409 mov.l %d5,XFRAC+4(%a6)
8413 movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8415 bra.w LOGBGN # begin regular log(X)
8419 bfffo %d4{&0:&32},%d6 # find first 1
8420 mov.l %d6,%d2 # get k
8422 mov.l %d5,%d7 # a copy of D5
8427 or.l %d7,%d4 # (D3,D4,D5) normalized
8430 mov.l %d4,XFRAC(%a6)
8431 mov.l %d5,XFRAC+4(%a6)
8435 movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8437 bra.w LOGBGN # begin regular log(X)
8440 #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8442 fmov.x (%a0),%fp0 # LOAD INPUT
8443 fabs.x %fp0 # test magnitude
8444 fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold
8445 fbgt.w LP1REAL # if greater, continue
8447 mov.b &FMOV_OP,%d1 # last inst is MOVE
8448 fmov.x (%a0),%fp0 # return signed argument
8452 fmov.x (%a0),%fp0 # LOAD INPUT
8453 mov.l &0x00000000,ADJK(%a6)
8454 fmov.x %fp0,%fp1 # FP1 IS INPUT Z
8455 fadd.s one(%pc),%fp0 # X := ROUND(1+Z)
8457 mov.w XFRAC(%a6),XDCARE(%a6)
8460 ble.w LP1NEG0 # LOG OF ZERO OR -VE
8461 cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8463 cmp.l %d1,&0x3fffc000
8465 #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8466 #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8467 #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8470 #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8471 cmp.l %d1,&0x3ffef07d
8473 cmp.l %d1,&0x3fff8841
8477 #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8478 #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8479 fadd.x %fp1,%fp1 # FP1 IS 2Z
8480 fadd.s one(%pc),%fp0 # FP0 IS 1+X
8485 #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8486 #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8487 #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8488 #--THERE ARE ONLY TWO CASES.
8489 #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8490 #--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8491 #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8492 #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8494 mov.l XFRAC(%a6),FFRAC(%a6)
8495 and.l &0xFE000000,FFRAC(%a6)
8496 or.l &0x01000000,FFRAC(%a6) # F OBTAINED
8497 cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1
8501 fmov.s TWO(%pc),%fp0
8502 mov.l &0x3fff0000,F(%a6)
8504 fsub.x F(%a6),%fp0 # 2-F
8505 mov.l FFRAC(%a6),%d1
8506 and.l &0x7E000000,%d1
8509 asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F
8510 fadd.x %fp1,%fp1 # GET 2Z
8511 fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}
8512 fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z
8513 lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F
8515 fmov.s negone(%pc),%fp1 # FP1 IS K = -1
8519 fmov.s one(%pc),%fp0
8520 mov.l &0x3fff0000,F(%a6)
8522 fsub.x F(%a6),%fp0 # 1-F
8523 mov.l FFRAC(%a6),%d1
8524 and.l &0x7E000000,%d1
8528 fadd.x %fp1,%fp0 # FP0 IS Y-F
8529 fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}
8531 add.l %d1,%a0 # A0 IS ADDRESS OF 1/F
8532 fmov.s zero(%pc),%fp1 # FP1 IS K = 0
8536 #--FPCR SAVED. D0 IS X IN COMPACT FORM.
8540 fmov.s negone(%pc),%fp0
8546 fmov.s zero(%pc),%fp0
8552 #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8553 # Simply return the denorm
8557 #########################################################################
8558 # satanh(): computes the inverse hyperbolic tangent of a norm input #
8559 # satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8561 # INPUT *************************************************************** #
8562 # a0 = pointer to extended precision input #
8563 # d0 = round precision,mode #
8565 # OUTPUT ************************************************************** #
8566 # fp0 = arctanh(X) #
8568 # ACCURACY and MONOTONICITY ******************************************* #
8569 # The returned result is within 3 ulps in 64 significant bit, #
8570 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8571 # rounded to double precision. The result is provably monotonic #
8572 # in double precision. #
8574 # ALGORITHM *********************************************************** #
8577 # 1. If |X| >= 1, go to 3. #
8579 # 2. (|X| < 1) Calculate atanh(X) by #
8583 # atanh(X) := sgn * (1/2) * logp1(z) #
8586 # 3. If |X| > 1, go to 5. #
8588 # 4. (|X| = 1) Generate infinity with an appropriate sign and #
8589 # divide-by-zero by #
8591 # atan(X) := sgn / (+0). #
8594 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8597 #########################################################################
8603 and.l &0x7FFFFFFF,%d1
8604 cmp.l %d1,&0x3FFF8000
8607 #--THIS IS THE USUAL CASE, |X| < 1
8608 #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8610 fabs.x (%a0),%fp0 # Y = |X|
8613 fadd.x %fp0,%fp0 # 2Y
8614 fadd.s &0x3F800000,%fp1 # 1-Y
8615 fdiv.x %fp1,%fp0 # 2Y/(1-Y)
8617 and.l &0x80000000,%d1
8618 or.l &0x3F000000,%d1 # SIGN(X)*HALF
8621 mov.l %d0,-(%sp) # save rnd prec,mode
8622 clr.l %d0 # pass ext prec,RN
8623 fmovm.x &0x01,-(%sp) # save Z on stack
8624 lea (%sp),%a0 # pass ptr to Z
8625 bsr slognp1 # LOG1P(Z)
8626 add.l &0xc,%sp # clear Z from stack
8628 mov.l (%sp)+,%d0 # fetch old prec,mode
8629 fmov.l %d0,%fpcr # load it
8630 mov.b &FMUL_OP,%d1 # last inst is MUL
8635 fabs.x (%a0),%fp0 # |X|
8636 fcmp.s %fp0,&0x3F800000
8641 #--ATANH(X) = X FOR DENORMALIZED X
8645 #########################################################################
8646 # slog10(): computes the base-10 logarithm of a normalized input #
8647 # slog10d(): computes the base-10 logarithm of a denormalized input #
8648 # slog2(): computes the base-2 logarithm of a normalized input #
8649 # slog2d(): computes the base-2 logarithm of a denormalized input #
8651 # INPUT *************************************************************** #
8652 # a0 = pointer to extended precision input #
8653 # d0 = round precision,mode #
8655 # OUTPUT ************************************************************** #
8656 # fp0 = log_10(X) or log_2(X) #
8658 # ACCURACY and MONOTONICITY ******************************************* #
8659 # The returned result is within 1.7 ulps in 64 significant bit, #
8660 # i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8661 # rounded to double precision. The result is provably monotonic #
8662 # in double precision. #
8664 # ALGORITHM *********************************************************** #
8668 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8669 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8670 # Notes: Default means round-to-nearest mode, no floating-point #
8671 # traps, and precision control = double extended. #
8673 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8674 # Notes: Even if X is denormalized, log(X) is always normalized. #
8676 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8677 # 2.1 Restore the user FPCR #
8678 # 2.2 Return ans := Y * INV_L10. #
8682 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8683 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8684 # Notes: Default means round-to-nearest mode, no floating-point #
8685 # traps, and precision control = double extended. #
8687 # Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8689 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8690 # 2.1 Restore the user FPCR #
8691 # 2.2 Return ans := Y * INV_L10. #
8695 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8696 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8697 # Notes: Default means round-to-nearest mode, no floating-point #
8698 # traps, and precision control = double extended. #
8700 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8701 # Notes: Even if X is denormalized, log(X) is always normalized. #
8703 # Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8704 # 2.1 Restore the user FPCR #
8705 # 2.2 Return ans := Y * INV_L2. #
8709 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8710 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8711 # Notes: Default means round-to-nearest mode, no floating-point #
8712 # traps, and precision control = double extended. #
8714 # Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8717 # Step 2. Return k. #
8718 # 2.1 Get integer k, X = 2^k. #
8719 # 2.2 Restore the user FPCR. #
8720 # 2.3 Return ans := convert-to-double-extended(k). #
8722 # Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8724 # Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8725 # 4.1 Restore the user FPCR #
8726 # 4.2 Return ans := Y * INV_L2. #
8728 #########################################################################
8731 long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8734 long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8737 #--entry point for Log10(X), X is normalized
8740 fcmp.x %fp0,(%a0) # if operand == 1,
8741 fbeq.l ld_pzero # return an EXACT zero
8747 bsr slogn # log(X), X normal.
8749 fmul.x INV_L10(%pc),%fp0
8753 #--entry point for Log10(X), X is denormalized
8759 bsr slognd # log(X), X denorm.
8761 fmul.x INV_L10(%pc),%fp0
8765 #--entry point for Log2(X), X is normalized
8771 bne.b continue # X is not 2^k
8774 and.l &0x7FFFFFFF,%d1
8779 and.l &0x00007FFF,%d1
8789 bsr slogn # log(X), X normal.
8791 fmul.x INV_L2(%pc),%fp0
8798 #--entry point for Log2(X), X is denormalized
8804 bsr slognd # log(X), X denorm.
8806 fmul.x INV_L2(%pc),%fp0
8809 #########################################################################
8810 # stwotox(): computes 2**X for a normalized input #
8811 # stwotoxd(): computes 2**X for a denormalized input #
8812 # stentox(): computes 10**X for a normalized input #
8813 # stentoxd(): computes 10**X for a denormalized input #
8815 # INPUT *************************************************************** #
8816 # a0 = pointer to extended precision input #
8817 # d0 = round precision,mode #
8819 # OUTPUT ************************************************************** #
8820 # fp0 = 2**X or 10**X #
8822 # ACCURACY and MONOTONICITY ******************************************* #
8823 # The returned result is within 2 ulps in 64 significant bit, #
8824 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8825 # rounded to double precision. The result is provably monotonic #
8826 # in double precision. #
8828 # ALGORITHM *********************************************************** #
8831 # 1. If |X| > 16480, go to ExpBig. #
8833 # 2. If |X| < 2**(-70), go to ExpSm. #
8835 # 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8837 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8839 # 4. Overwrite r := r * log2. Then #
8840 # 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8841 # Go to expr to compute that expression. #
8844 # 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8846 # 2. If |X| < 2**(-70), go to ExpSm. #
8848 # 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8849 # N := round-to-int(y). Decompose N as #
8850 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8853 # r := ((X - N*L1)-N*L2) * L10 #
8854 # where L1, L2 are the leading and trailing parts of #
8855 # log_10(2)/64 and L10 is the natural log of 10. Then #
8856 # 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8857 # Go to expr to compute that expression. #
8860 # 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8862 # 2. Overwrite Fact1 and Fact2 by #
8863 # Fact1 := 2**(M) * Fact1 #
8864 # Fact2 := 2**(M) * Fact2 #
8865 # Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8867 # 3. Calculate P where 1 + P approximates exp(r): #
8868 # P = r + r*r*(A1+r*(A2+...+r*A5)). #
8870 # 4. Let AdjFact := 2**(M'). Return #
8871 # AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8875 # 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8876 # generate underflow by Tiny * Tiny. #
8879 # 1. Return 1 + X. #
8881 #########################################################################
8884 long 0x406A934F,0x0979A371 # 64LOG10/LOG2
8886 long 0x3F734413,0x509F8000 # LOG2/64LOG10
8889 long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8891 LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8893 LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8895 EXPA5: long 0x3F56C16D,0x6F7BD0B2
8896 EXPA4: long 0x3F811112,0x302C712C
8897 EXPA3: long 0x3FA55555,0x55554CC1
8898 EXPA2: long 0x3FC55555,0x55554A54
8899 EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000
8902 long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
8903 long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
8904 long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
8905 long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
8906 long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
8907 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
8908 long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
8909 long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
8910 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
8911 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
8912 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
8913 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
8914 long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
8915 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
8916 long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
8917 long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
8918 long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
8919 long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
8920 long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
8921 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
8922 long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
8923 long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
8924 long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
8925 long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
8926 long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
8927 long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
8928 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
8929 long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
8930 long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
8931 long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
8932 long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
8933 long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
8934 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
8935 long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
8936 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
8937 long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
8938 long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
8939 long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
8940 long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
8941 long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
8942 long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
8943 long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
8944 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
8945 long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
8946 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
8947 long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
8948 long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
8949 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
8950 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
8951 long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
8952 long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
8953 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
8954 long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
8955 long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
8956 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
8957 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
8958 long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
8959 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
8960 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
8961 long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
8962 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
8963 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
8964 long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
8965 long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
8977 set FACT1LOW,FACT1+8
8981 set FACT2LOW,FACT2+8
8984 #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
8986 fmovm.x (%a0),&0x80 # LOAD INPUT
8991 and.l &0x7FFFFFFF,%d1
8993 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
8998 cmp.l %d1,&0x400D80C0 # |X| > 16480?
9003 #--USUAL CASE, 2^(-70) <= |X| <= 16480
9006 fmul.s &0x42800000,%fp1 # 64 * X
9007 fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)
9009 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9010 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9013 and.l &0x3F,%d1 # D0 IS J
9014 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9015 add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9016 asr.l &6,%d2 # d2 IS L, N = 64L + J
9018 asr.l &1,%d1 # D0 IS M
9019 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9022 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9023 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9024 #--ADJFACT = 2^(M').
9025 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9027 fmovm.x &0x0c,-(%sp) # save fp2/fp3
9029 fmul.s &0x3C800000,%fp1 # (1/64)*N
9030 mov.l (%a1)+,FACT1(%a6)
9031 mov.l (%a1)+,FACT1HI(%a6)
9032 mov.l (%a1)+,FACT1LOW(%a6)
9033 mov.w (%a1)+,FACT2(%a6)
9035 fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)
9037 mov.w (%a1)+,FACT2HI(%a6)
9038 clr.w FACT2HI+2(%a6)
9040 add.w %d1,FACT1(%a6)
9041 fmul.x LOG2(%pc),%fp0 # FP0 IS R
9042 add.w %d1,FACT2(%a6)
9048 cmp.l %d1,&0x3FFF8000
9051 #--|X| IS SMALL, RETURN 1 + X
9053 fmov.l %d0,%fpcr # restore users round prec,mode
9054 fadd.s &0x3F800000,%fp0 # RETURN 1 + X
9058 #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9059 #--REGISTERS SAVE SO FAR ARE FPCR AND D0
9064 bra t_ovfl2 # t_ovfl expects positive value
9067 bra t_unfl2 # t_unfl expects positive value
9071 #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9073 fmov.l %d0,%fpcr # set user's rounding mode/precision
9074 fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9076 or.l &0x00800001,%d1
9081 #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9083 fmovm.x (%a0),&0x80 # LOAD INPUT
9088 and.l &0x7FFFFFFF,%d1
9090 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9095 cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9100 #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9103 fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2
9104 fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2)
9106 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9107 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9110 and.l &0x3F,%d1 # D0 IS J
9111 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9112 add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9113 asr.l &6,%d2 # d2 IS L, N = 64L + J
9115 asr.l &1,%d1 # D0 IS M
9116 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9119 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9120 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9121 #--ADJFACT = 2^(M').
9122 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9123 fmovm.x &0x0c,-(%sp) # save fp2/fp3
9127 fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD
9128 mov.l (%a1)+,FACT1(%a6)
9130 fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL
9132 mov.l (%a1)+,FACT1HI(%a6)
9133 mov.l (%a1)+,FACT1LOW(%a6)
9134 fsub.x %fp1,%fp0 # X - N L_LEAD
9135 mov.w (%a1)+,FACT2(%a6)
9137 fsub.x %fp2,%fp0 # X - N L_TRAIL
9139 mov.w (%a1)+,FACT2HI(%a6)
9140 clr.w FACT2HI+2(%a6)
9143 fmul.x LOG10(%pc),%fp0 # FP0 IS R
9144 add.w %d1,FACT1(%a6)
9145 add.w %d1,FACT2(%a6)
9148 #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9149 #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9150 #--FP0 IS R. THE FOLLOWING CODE COMPUTES
9151 #-- 2**(M'+M) * 2**(J/64) * EXP(R)
9154 fmul.x %fp1,%fp1 # FP1 IS S = R*R
9156 fmov.d EXPA5(%pc),%fp2 # FP2 IS A5
9157 fmov.d EXPA4(%pc),%fp3 # FP3 IS A4
9159 fmul.x %fp1,%fp2 # FP2 IS S*A5
9160 fmul.x %fp1,%fp3 # FP3 IS S*A4
9162 fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5
9163 fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4
9165 fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5)
9166 fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4)
9168 fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5)
9169 fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4)
9171 fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5))
9172 fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4)
9173 fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1
9175 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
9177 #--FINAL RECONSTRUCTION PROCESS
9178 #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9180 fmul.x FACT1(%a6),%fp0
9181 fadd.x FACT2(%a6),%fp0
9182 fadd.x FACT1(%a6),%fp0
9184 fmov.l %d0,%fpcr # restore users round prec,mode
9185 mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT
9187 mov.l &0x80000000,ADJFACT+4(%a6)
9188 clr.l ADJFACT+8(%a6)
9189 mov.b &FMUL_OP,%d1 # last inst is MUL
9190 fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT
9195 #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9197 fmov.l %d0,%fpcr # set user's rounding mode/precision
9198 fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9200 or.l &0x00800001,%d1
9204 #########################################################################
9205 # sscale(): computes the destination operand scaled by the source #
9206 # operand. If the absoulute value of the source operand is #
9207 # >= 2^14, an overflow or underflow is returned. #
9209 # INPUT *************************************************************** #
9210 # a0 = pointer to double-extended source operand X #
9211 # a1 = pointer to double-extended destination operand Y #
9213 # OUTPUT ************************************************************** #
9214 # fp0 = scale(X,Y) #
9216 #########################################################################
9222 mov.l %d0,-(%sp) # store off ctrl bits for now
9224 mov.w DST_EX(%a1),%d1 # get dst exponent
9225 smi.b SIGN(%a6) # use SIGN to hold dst sign
9226 andi.l &0x00007fff,%d1 # strip sign from dst exp
9228 mov.w SRC_EX(%a0),%d0 # check src bounds
9229 andi.w &0x7fff,%d0 # clr src sign bit
9230 cmpi.w %d0,&0x3fff # is src ~ ZERO?
9231 blt.w src_small # yes
9232 cmpi.w %d0,&0x400c # no; is src too big?
9236 # Source is within 2^14 range.
9239 fintrz.x SRC(%a0),%fp0 # calc int of src
9240 fmov.l %fp0,%d0 # int src to d0
9241 # don't want any accrued bits from the fintrz showing up later since
9242 # we may need to read the fpsr for the last fp op in t_catch2().
9245 tst.b DST_HI(%a1) # is dst denormalized?
9248 # the dst is a DENORM. normalize the DENORM and add the adjustment to
9249 # the src value. then, jump to the norm part of the routine.
9251 mov.l %d0,-(%sp) # save src for now
9253 mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9254 mov.l DST_HI(%a1),FP_SCR0_HI(%a6)
9255 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
9257 lea FP_SCR0(%a6),%a0 # pass ptr to DENORM
9258 bsr.l norm # normalize the DENORM
9260 add.l (%sp)+,%d0 # add adjustment to src
9262 fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM
9264 cmpi.w %d0,&-0x3fff # is the shft amt really low?
9265 bge.b sok_norm2 # thank goodness no
9267 # the multiply factor that we're trying to create should be a denorm
9268 # for the multiply to work. therefore, we're going to actually do a
9269 # multiply with a denorm which will cause an unimplemented data type
9270 # exception to be put into the machine which will be caught and corrected
9271 # later. we don't do this with the DENORMs above because this method
9272 # is slower. but, don't fret, I don't see it being used much either.
9273 fmov.l (%sp)+,%fpcr # restore user fpcr
9274 mov.l &0x80000000,%d1 # load normalized mantissa
9275 subi.l &-0x3fff,%d0 # how many should we shift?
9276 neg.l %d0 # make it positive
9277 cmpi.b %d0,&0x20 # is it > 32?
9278 bge.b sok_dnrm_32 # yes
9279 lsr.l %d0,%d1 # no; bit stays in upper lw
9280 clr.l -(%sp) # insert zero low mantissa
9281 mov.l %d1,-(%sp) # insert new high mantissa
9282 clr.l -(%sp) # make zero exponent
9285 subi.b &0x20,%d0 # get shift count
9286 lsr.l %d0,%d1 # make low mantissa longword
9287 mov.l %d1,-(%sp) # insert new low mantissa
9288 clr.l -(%sp) # insert zero high mantissa
9289 clr.l -(%sp) # make zero exponent
9292 # the src will force the dst to a DENORM value or worse. so, let's
9293 # create an fp multiply that will create the result.
9295 fmovm.x DST(%a1),&0x80 # load fp0 with normalized src
9297 fmov.l (%sp)+,%fpcr # restore user fpcr
9299 addi.w &0x3fff,%d0 # turn src amt into exp value
9300 swap %d0 # put exponent in high word
9301 clr.l -(%sp) # insert new exponent
9302 mov.l &0x80000000,-(%sp) # insert new high mantissa
9303 mov.l %d0,-(%sp) # insert new lo mantissa
9306 fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2
9307 mov.b &FMUL_OP,%d1 # last inst is MUL
9308 fmul.x (%sp)+,%fp0 # do the multiply
9309 bra t_catch2 # catch any exceptions
9312 # Source is outside of 2^14 range. Test the sign and branch
9313 # to the appropriate exception handler.
9316 mov.l (%sp)+,%d0 # restore ctrl bits
9317 exg %a0,%a1 # swap src,dst ptrs
9318 tst.b SRC_EX(%a1) # is src negative?
9319 bmi t_unfl # yes; underflow
9320 bra t_ovfl_sc # no; overflow
9323 # The source input is below 1, so we check for denormalized numbers
9327 tst.b DST_HI(%a1) # is dst denormalized?
9328 bpl.b ssmall_done # yes
9331 fmov.l %d0,%fpcr # no; load control bits
9332 mov.b &FMOV_OP,%d1 # last inst is MOVE
9333 fmov.x DST(%a1),%fp0 # simply return dest
9336 mov.l (%sp)+,%d0 # load control bits into d1
9337 mov.l %a1,%a0 # pass ptr to dst
9340 #########################################################################
9341 # smod(): computes the fp MOD of the input values X,Y. #
9342 # srem(): computes the fp (IEEE) REM of the input values X,Y. #
9344 # INPUT *************************************************************** #
9345 # a0 = pointer to extended precision input X #
9346 # a1 = pointer to extended precision input Y #
9347 # d0 = round precision,mode #
9349 # The input operands X and Y can be either normalized or #
9352 # OUTPUT ************************************************************** #
9353 # fp0 = FREM(X,Y) or FMOD(X,Y) #
9355 # ALGORITHM *********************************************************** #
9357 # Step 1. Save and strip signs of X and Y: signX := sign(X), #
9358 # signY := sign(Y), X := |X|, Y := |Y|, #
9359 # signQ := signX EOR signY. Record whether MOD or REM #
9362 # Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9364 # R := X, go to Step 4. #
9366 # R := 2^(-L)X, j := L. #
9369 # Step 3. Perform MOD(X,Y) #
9370 # 3.1 If R = Y, go to Step 9. #
9371 # 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9372 # 3.3 If j = 0, go to Step 4. #
9373 # 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9376 # Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9377 # Last_Subtract := false (used in Step 7 below). If #
9378 # MOD is requested, go to Step 6. #
9380 # Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9381 # 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9383 # 5.2 If R > Y/2, then { set Last_Subtract := true, #
9384 # Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9385 # 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9386 # then { Q := Q + 1, signX := -signX }. #
9388 # Step 6. R := signX*R. #
9390 # Step 7. If Last_Subtract = true, R := R - Y. #
9392 # Step 8. Return signQ, last 7 bits of Q, and R as required. #
9394 # Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9395 # X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9396 # R := 0. Return signQ, last 7 bits of Q, and R. #
9398 #########################################################################
9401 set Sc_Flag,L_SCR3+1
9416 long 0x00010000,0x80000000,0x00000000,0x00000000
9420 clr.b FPSR_QBYTE(%a6)
9421 mov.l %d0,-(%sp) # save ctrl bits
9427 clr.b FPSR_QBYTE(%a6)
9428 mov.l %d0,-(%sp) # save ctrl bits
9429 mov.b &0x1,Mod_Flag(%a6)
9432 #..Save sign of X and Y
9433 movm.l &0x3f00,-(%sp) # save data registers
9434 mov.w SRC_EX(%a0),%d3
9435 mov.w %d3,SignY(%a6)
9436 and.l &0x00007FFF,%d3 # Y := |Y|
9439 mov.l SRC_HI(%a0),%d4
9440 mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y|
9445 mov.l &0x00003FFE,%d3 # $3FFD + 1
9454 bfffo %d4{&0:&32},%d6
9456 sub.l %d6,%d3 # (D3,D4,D5) is normalized
9457 # ...with bias $7FFD
9462 bfffo %d4{&0:&32},%d6
9465 mov.l %d5,%d7 # a copy of D5
9470 or.l %d7,%d4 # (D3,D4,D5) normalized
9471 # ...with bias $7FFD
9475 add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized
9476 # ...with bias $7FFD
9479 mov.w DST_EX(%a1),%d0
9480 mov.w %d0,SignX(%a6)
9481 mov.w SignY(%a6),%d1
9483 and.l &0x00008000,%d1
9484 mov.w %d1,SignQ(%a6) # sign(Q) obtained
9485 and.l &0x00007FFF,%d0
9486 mov.l DST_HI(%a1),%d1
9487 mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X|
9490 mov.l &0x00003FFE,%d0
9499 bfffo %d1{&0:&32},%d6
9501 sub.l %d6,%d0 # (D0,D1,D2) is normalized
9502 # ...with bias $7FFD
9507 bfffo %d1{&0:&32},%d6
9510 mov.l %d2,%d7 # a copy of D2
9515 or.l %d7,%d1 # (D0,D1,D2) normalized
9516 # ...with bias $7FFD
9520 add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized
9521 # ...with bias $7FFD
9525 mov.l %d3,L_SCR1(%a6) # save biased exp(Y)
9526 mov.l %d0,-(%sp) # save biased exp(X)
9527 sub.l %d3,%d0 # L := expo(X)-expo(Y)
9529 clr.l %d6 # D6 := carry <- 0
9531 mov.l &0,%a1 # A1 is k; j+k=L, Q=0
9533 #..(Carry,D1,D2) is R
9537 #..expo(X) < expo(Y). Thus X = mod(X,Y)
9539 mov.l (%sp)+,%d0 # restore d0
9543 addq.l &0x4,%sp # erase exp(X)
9544 #..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9546 tst.l %d6 # test carry bit
9549 #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9550 cmp.l %d1,%d4 # compare hi(R) and hi(Y)
9552 cmp.l %d2,%d5 # compare lo(R) and lo(Y)
9555 #..At this point, R = Y
9559 #..use the borrow of the previous compare
9560 bcs.b R_LT_Y # borrow is set iff R < Y
9563 #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9564 #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9565 sub.l %d5,%d2 # lo(R) - lo(Y)
9566 subx.l %d4,%d1 # hi(R) - hi(Y)
9567 clr.l %d6 # clear carry
9568 addq.l &1,%d3 # Q := Q + 1
9571 #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9572 tst.l %d0 # see if j = 0.
9575 add.l %d3,%d3 # Q := 2Q
9576 add.l %d2,%d2 # lo(R) = 2lo(R)
9577 roxl.l &1,%d1 # hi(R) = 2hi(R) + carry
9578 scs %d6 # set Carry if 2(R) overflows
9579 addq.l &1,%a1 # k := k+1
9580 subq.l &1,%d0 # j := j - 1
9581 #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9586 #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9589 mov.l L_SCR1(%a6),%d0 # new biased expo of R
9598 bfffo %d1{&0:&32},%d6
9600 sub.l %d6,%d0 # (D0,D1,D2) is normalized
9601 # ...with bias $7FFD
9606 bfffo %d1{&0:&32},%d6
9607 bmi.b Get_Mod # already normalized
9610 mov.l %d2,%d7 # a copy of D2
9615 or.l %d7,%d1 # (D0,D1,D2) normalized
9619 cmp.l %d0,&0x000041FE
9625 mov.l L_SCR1(%a6),%d6
9629 fmov.x R(%a6),%fp0 # no exception
9630 mov.b &1,Sc_Flag(%a6)
9637 mov.l L_SCR1(%a6),%d6
9639 mov.l %d6,L_SCR1(%a6)
9651 mov.l L_SCR1(%a6),%d6 # new biased expo(Y)
9652 subq.l &1,%d6 # biased expo(Y/2)
9668 fsub.x Y(%a6),%fp0 # no exceptions
9669 addq.l &1,%d3 # Q := Q + 1
9674 mov.w SignX(%a6),%d6
9682 mov.w SignQ(%a6),%d6 # D6 is sign(Q)
9685 and.l &0x0000007F,%d3 # 7 bits of Q
9686 or.l %d6,%d3 # sign and bits of Q
9689 # and.l &0xFF00FFFF,%d6
9691 # fmov.l %d6,%fpsr # put Q in fpsr
9692 mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr
9696 movm.l (%sp)+,&0xfc # {%d2-%d7}
9701 mov.b &FMUL_OP,%d1 # last inst is MUL
9702 fmul.x Scale(%pc),%fp0 # may cause underflow
9704 # the '040 package did this apparently to see if the dst operand for the
9705 # preceding fmul was a denorm. but, it better not have been since the
9706 # algorithm just got done playing with fp0 and expected no exceptions
9707 # as a result. trust me...
9708 # bra t_avoid_unsupp # check for denorm as a
9709 # ;result of the scaling
9712 mov.b &FMOV_OP,%d1 # last inst is MOVE
9713 fmov.x %fp0,%fp0 # capture exceptions & round
9717 #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
9719 cmp.l %d0,&8 # D0 is j
9729 fmov.s &0x00000000,%fp0
9734 #..Check parity of Q
9736 and.l &0x00000001,%d6
9738 beq.w Fix_Sign # Q is even
9740 #..Q is odd, Q := Q + 1, signX := -signX
9742 mov.w SignX(%a6),%d6
9743 eor.l &0x00008000,%d6
9744 mov.w %d6,SignX(%a6)
9747 #########################################################################
9748 # XDEF **************************************************************** #
9749 # tag(): return the optype of the input ext fp number #
9751 # This routine is used by the 060FPLSP. #
9753 # XREF **************************************************************** #
9756 # INPUT *************************************************************** #
9757 # a0 = pointer to extended precision operand #
9759 # OUTPUT ************************************************************** #
9760 # d0 = value of type tag #
9761 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
9763 # ALGORITHM *********************************************************** #
9764 # Simply test the exponent, j-bit, and mantissa values to #
9765 # determine the type of operand. #
9766 # If it's an unnormalized zero, alter the operand and force it #
9767 # to be a normal zero. #
9769 #########################################################################
9773 mov.w FTEMP_EX(%a0), %d0 # extract exponent
9774 andi.w &0x7fff, %d0 # strip off sign
9775 cmpi.w %d0, &0x7fff # is (EXP == MAX)?
9778 btst &0x7,FTEMP_HI(%a0)
9784 tst.w %d0 # is exponent = 0?
9798 bsr.l unnorm_fix # convert to norm,denorm,or zero
9806 mov.l FTEMP_HI(%a0), %d0
9807 and.l &0x7fffffff, %d0 # msb is a don't care!
9816 #############################################################
9818 qnan: long 0x7fff0000, 0xffffffff, 0xffffffff
9820 #########################################################################
9821 # XDEF **************************************************************** #
9822 # t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. #
9823 # t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. #
9825 # These rouitnes are used by the 060FPLSP package. #
9827 # XREF **************************************************************** #
9830 # INPUT *************************************************************** #
9831 # a0 = pointer to extended precision source operand. #
9833 # OUTPUT ************************************************************** #
9834 # fp0 = default DZ result. #
9836 # ALGORITHM *********************************************************** #
9837 # Transcendental emulation for the 060FPLSP has detected that #
9838 # a DZ exception should occur for the instruction. If DZ is disabled, #
9839 # return the default result. #
9840 # If DZ is enabled, the dst operand should be returned unscathed #
9841 # in fp0 while fp1 is used to create a DZ exception so that the #
9842 # operating system can log that such an event occurred. #
9844 #########################################################################
9848 tst.b SRC_EX(%a0) # check sign for neg or pos
9849 bpl.b dz_pinf # branch if pos sign
9853 ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
9855 btst &dz_bit,FPCR_ENABLE(%a6)
9858 # dz is disabled. return a -INF.
9859 fmov.s &0xff800000,%fp0 # return -INF
9862 # dz is enabled. create a dz exception so the user can record it
9863 # but use fp1 instead. return the dst operand unscathed in fp0.
9865 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9866 fmov.l USER_FPCR(%a6),%fpcr
9867 fmov.s &0xbf800000,%fp1 # load -1
9868 fdiv.s &0x00000000,%fp1 # -1 / 0
9872 ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
9874 btst &dz_bit,FPCR_ENABLE(%a6)
9877 # dz is disabled. return a +INF.
9878 fmov.s &0x7f800000,%fp0 # return +INF
9881 # dz is enabled. create a dz exception so the user can record it
9882 # but use fp1 instead. return the dst operand unscathed in fp0.
9884 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9885 fmov.l USER_FPCR(%a6),%fpcr
9886 fmov.s &0x3f800000,%fp1 # load +1
9887 fdiv.s &0x00000000,%fp1 # +1 / 0
9890 #########################################################################
9891 # XDEF **************************************************************** #
9892 # t_operr(): Handle 060FPLSP OPERR exception during emulation. #
9894 # This routine is used by the 060FPLSP package. #
9896 # XREF **************************************************************** #
9899 # INPUT *************************************************************** #
9900 # fp1 = source operand #
9902 # OUTPUT ************************************************************** #
9903 # fp0 = default result #
9906 # ALGORITHM *********************************************************** #
9907 # An operand error should occur as the result of transcendental #
9908 # emulation in the 060FPLSP. If OPERR is disabled, just return a NAN #
9909 # in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 #
9910 # and the source operand in fp1. Use fp2 to create an OPERR exception #
9911 # so that the operating system can log the event. #
9913 #########################################################################
9917 ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP
9919 btst &operr_bit,FPCR_ENABLE(%a6)
9922 # operr is disabled. return a QNAN in fp0
9923 fmovm.x qnan(%pc),&0x80 # return QNAN
9926 # operr is enabled. create an operr exception so the user can record it
9927 # but use fp2 instead. return the dst operand unscathed in fp0.
9929 fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9930 fmov.l USER_FPCR(%a6),%fpcr
9931 fmovm.x &0x04,-(%sp) # save fp2
9932 fmov.s &0x7f800000,%fp2 # load +INF
9933 fmul.s &0x00000000,%fp2 # +INF x 0
9934 fmovm.x (%sp)+,&0x20 # restore fp2
9938 long 0x7ffe0000,0xffffffff,0xffffffff
9940 long 0xfffe0000,0xffffffff,0xffffffff
9942 long 0x00000000,0x80000000,0x00000000
9944 long 0x80000000,0x80000000,0x00000000
9946 #########################################################################
9947 # XDEF **************************************************************** #
9948 # t_unfl(): Handle 060FPLSP underflow exception during emulation. #
9949 # t_unfl2(): Handle 060FPLSP underflow exception during #
9950 # emulation. result always positive. #
9952 # This routine is used by the 060FPLSP package. #
9954 # XREF **************************************************************** #
9957 # INPUT *************************************************************** #
9958 # a0 = pointer to extended precision source operand #
9960 # OUTPUT ************************************************************** #
9961 # fp0 = default underflow result #
9963 # ALGORITHM *********************************************************** #
9964 # An underflow should occur as the result of transcendental #
9965 # emulation in the 060FPLSP. Create an underflow by using "fmul" #
9966 # and two very small numbers of appropriate sign so the operating #
9967 # system can log the event. #
9969 #########################################################################
9978 ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX
9980 fmov.l USER_FPCR(%a6),%fpcr
9981 fmovm.x mns_tiny(%pc),&0x80
9982 fmul.x pls_tiny(%pc),%fp0
9986 mov.b %d0,FPSR_CC(%a6)
9989 ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX
9991 fmov.l USER_FPCR(%a6),%fpcr
9992 fmovm.x pls_tiny(%pc),&0x80
9997 mov.b %d0,FPSR_CC(%a6)
10000 #########################################################################
10001 # XDEF **************************************************************** #
10002 # t_ovfl(): Handle 060FPLSP overflow exception during emulation. #
10004 # t_ovfl2(): Handle 060FPLSP overflow exception during #
10005 # emulation. result always positive. (dyadic) #
10006 # t_ovfl_sc(): Handle 060FPLSP overflow exception during #
10007 # emulation for "fscale". #
10009 # This routine is used by the 060FPLSP package. #
10011 # XREF **************************************************************** #
10014 # INPUT *************************************************************** #
10015 # a0 = pointer to extended precision source operand #
10017 # OUTPUT ************************************************************** #
10018 # fp0 = default underflow result #
10020 # ALGORITHM *********************************************************** #
10021 # An overflow should occur as the result of transcendental #
10022 # emulation in the 060FPLSP. Create an overflow by using "fmul" #
10023 # and two very lareg numbers of appropriate sign so the operating #
10024 # system can log the event. #
10025 # For t_ovfl_sc() we take special care not to lose the INEX2 bit. #
10027 #########################################################################
10031 ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10033 mov.b %d0,%d1 # fetch rnd prec,mode
10034 andi.b &0xc0,%d1 # extract prec
10037 # dst op is a DENORM. we have to normalize the mantissa to see if the
10038 # result would be inexact for the given precision. make a copy of the
10039 # dst so we don't screw up the version passed to us.
10040 mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10041 mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10042 mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10043 lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0
10044 movm.l &0xc080,-(%sp) # save d0-d1/a0
10045 bsr.l norm # normalize mantissa
10046 movm.l (%sp)+,&0x0103 # restore d0-d1/a0
10048 cmpi.b %d1,&0x40 # is precision sgl?
10049 bne.b ovfl_sc_dbl # no; dbl
10051 tst.l LOCAL_LO(%a0) # is lo lw of sgl set?
10052 bne.b ovfl_sc_inx # yes
10053 tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?
10054 bne.b ovfl_sc_inx # yes
10055 bra.w ovfl_work # don't set INEX2
10057 mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of
10058 andi.l &0x7ff,%d1 # dbl mantissa set?
10059 beq.w ovfl_work # no; don't set INEX2
10061 ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2
10062 bra.b ovfl_work # continue
10066 ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
10071 fmov.l USER_FPCR(%a6),%fpcr
10072 fmovm.x mns_huge(%pc),&0x80
10073 fmul.x pls_huge(%pc),%fp0
10077 ori.b &neg_mask,%d0
10078 mov.b %d0,FPSR_CC(%a6)
10081 fmov.l USER_FPCR(%a6),%fpcr
10082 fmovm.x pls_huge(%pc),&0x80
10083 fmul.x pls_huge(%pc),%fp0
10087 mov.b %d0,FPSR_CC(%a6)
10092 ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
10093 fmov.l USER_FPCR(%a6),%fpcr
10094 fmovm.x pls_huge(%pc),&0x80
10095 fmul.x pls_huge(%pc),%fp0
10099 mov.b %d0,FPSR_CC(%a6)
10102 #########################################################################
10103 # XDEF **************************************************************** #
10104 # t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10106 # t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10109 # These routines are used by the 060FPLSP package. #
10111 # XREF **************************************************************** #
10114 # INPUT *************************************************************** #
10115 # fp0 = default underflow or overflow result #
10117 # OUTPUT ************************************************************** #
10118 # fp0 = default result #
10120 # ALGORITHM *********************************************************** #
10121 # If an overflow or underflow occurred during the last #
10122 # instruction of transcendental 060FPLSP emulation, then it has already #
10123 # occurred and has been logged. Now we need to see if an inexact #
10124 # exception should occur. #
10126 #########################################################################
10131 or.l %d0,USER_FPSR(%a6)
10137 or.l %d0,USER_FPSR(%a6)
10139 #########################################################################
10140 # XDEF **************************************************************** #
10141 # t_inx2(): Handle inexact 060FPLSP exception during emulation. #
10142 # t_pinx2(): Handle inexact 060FPLSP exception for "+" results. #
10143 # t_minx2(): Handle inexact 060FPLSP exception for "-" results. #
10145 # XREF **************************************************************** #
10148 # INPUT *************************************************************** #
10149 # fp0 = default result #
10151 # OUTPUT ************************************************************** #
10152 # fp0 = default result #
10154 # ALGORITHM *********************************************************** #
10155 # The last instruction of transcendental emulation for the #
10156 # 060FPLSP should be inexact. So, if inexact is enabled, then we create #
10157 # the event here by adding a large and very small number together #
10158 # so that the operating system can log the event. #
10159 # Must check, too, if the result was zero, in which case we just #
10160 # set the FPSR bits and return. #
10162 #########################################################################
10171 ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX
10176 ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6)
10179 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
10180 bne.b inx2_work_ena # yes
10183 fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions
10184 fmov.s &0x3f800000,%fp1 # load +1
10185 fadd.x pls_tiny(%pc),%fp1 # cause exception
10189 mov.b &z_bmask,FPSR_CC(%a6)
10190 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX
10193 #########################################################################
10194 # XDEF **************************************************************** #
10195 # t_extdnrm(): Handle DENORM inputs in 060FPLSP. #
10196 # t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". #
10198 # This routine is used by the 060FPLSP package. #
10200 # XREF **************************************************************** #
10203 # INPUT *************************************************************** #
10204 # a0 = pointer to extended precision input operand #
10206 # OUTPUT ************************************************************** #
10207 # fp0 = default result #
10209 # ALGORITHM *********************************************************** #
10210 # For all functions that have a denormalized input and that #
10211 # f(x)=x, this is the entry point. #
10212 # DENORM value is moved using "fmove" which triggers an exception #
10213 # if enabled so the operating system can log the event. #
10215 #########################################################################
10219 fmov.l USER_FPCR(%a6),%fpcr
10220 fmov.x SRC_EX(%a0),%fp0
10222 ori.l &unfinx_mask,%d0
10223 or.l %d0,USER_FPSR(%a6)
10228 fmov.l USER_FPCR(%a6),%fpcr
10229 fmov.x SRC_EX(%a0),%fp0
10231 or.l %d0,USER_FPSR(%a6)
10234 ##########################################
10238 # This is used by fsincos library emulation. The correct
10239 # values are already in fp0 and fp1 so we do nothing here.
10245 ##########################################
10248 # dst_qnan --- force result when destination is a NaN
10252 fmov.x DST(%a1),%fp0
10256 mov.b &nan_bmask,FPSR_CC(%a6)
10259 mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6)
10263 # src_qnan --- force result when source is a NaN
10267 fmov.x SRC(%a0),%fp0
10271 mov.b &nan_bmask,FPSR_CC(%a6)
10274 mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6)
10277 ##########################################
10280 # Native instruction support
10282 # Some systems may need entry points even for 68060 native
10283 # instructions. These routines are provided for
10288 fmov.l %fpcr,-(%sp) # save fpcr
10289 fmov.l &0x00000000,%fpcr # clear fpcr for load
10290 fmov.s 0x8(%sp),%fp0 # load sgl dst
10291 fmov.l (%sp)+,%fpcr # restore fpcr
10292 fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src
10297 fmov.l %fpcr,-(%sp) # save fpcr
10298 fmov.l &0x00000000,%fpcr # clear fpcr for load
10299 fmov.d 0x8(%sp),%fp0 # load dbl dst
10300 fmov.l (%sp)+,%fpcr # restore fpcr
10301 fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src
10306 fmovm.x 0x4(%sp),&0x80 # load ext dst
10307 fadd.x 0x10(%sp),%fp0 # fadd w/ ext src
10312 fmov.l %fpcr,-(%sp) # save fpcr
10313 fmov.l &0x00000000,%fpcr # clear fpcr for load
10314 fmov.s 0x8(%sp),%fp0 # load sgl dst
10315 fmov.l (%sp)+,%fpcr # restore fpcr
10316 fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src
10321 fmov.l %fpcr,-(%sp) # save fpcr
10322 fmov.l &0x00000000,%fpcr # clear fpcr for load
10323 fmov.d 0x8(%sp),%fp0 # load dbl dst
10324 fmov.l (%sp)+,%fpcr # restore fpcr
10325 fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src
10330 fmovm.x 0x4(%sp),&0x80 # load ext dst
10331 fsub.x 0x10(%sp),%fp0 # fsub w/ ext src
10336 fmov.l %fpcr,-(%sp) # save fpcr
10337 fmov.l &0x00000000,%fpcr # clear fpcr for load
10338 fmov.s 0x8(%sp),%fp0 # load sgl dst
10339 fmov.l (%sp)+,%fpcr # restore fpcr
10340 fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src
10345 fmov.l %fpcr,-(%sp) # save fpcr
10346 fmov.l &0x00000000,%fpcr # clear fpcr for load
10347 fmov.d 0x8(%sp),%fp0 # load dbl dst
10348 fmov.l (%sp)+,%fpcr # restore fpcr
10349 fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src
10354 fmovm.x 0x4(%sp),&0x80 # load ext dst
10355 fmul.x 0x10(%sp),%fp0 # fmul w/ ext src
10360 fmov.l %fpcr,-(%sp) # save fpcr
10361 fmov.l &0x00000000,%fpcr # clear fpcr for load
10362 fmov.s 0x8(%sp),%fp0 # load sgl dst
10363 fmov.l (%sp)+,%fpcr # restore fpcr
10364 fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src
10369 fmov.l %fpcr,-(%sp) # save fpcr
10370 fmov.l &0x00000000,%fpcr # clear fpcr for load
10371 fmov.d 0x8(%sp),%fp0 # load dbl dst
10372 fmov.l (%sp)+,%fpcr # restore fpcr
10373 fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src
10378 fmovm.x 0x4(%sp),&0x80 # load ext dst
10379 fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src
10384 fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src
10389 fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src
10394 fabs.x 0x4(%sp),%fp0 # fabs w/ ext src
10399 fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src
10404 fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src
10409 fneg.x 0x4(%sp),%fp0 # fneg w/ ext src
10414 fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src
10419 fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src
10424 fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src
10429 fint.s 0x4(%sp),%fp0 # fint w/ sgl src
10434 fint.d 0x4(%sp),%fp0 # fint w/ dbl src
10439 fint.x 0x4(%sp),%fp0 # fint w/ ext src
10444 fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src
10449 fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src
10454 fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src
10457 ########################################################################
10459 #########################################################################
10460 # src_zero(): Return signed zero according to sign of src operand. #
10461 #########################################################################
10464 tst.b SRC_EX(%a0) # get sign of src operand
10465 bmi.b ld_mzero # if neg, load neg zero
10468 # ld_pzero(): return a positive zero.
10472 fmov.s &0x00000000,%fp0 # load +0
10473 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10476 # ld_mzero(): return a negative zero.
10479 fmov.s &0x80000000,%fp0 # load -0
10480 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10483 #########################################################################
10484 # dst_zero(): Return signed zero according to sign of dst operand. #
10485 #########################################################################
10488 tst.b DST_EX(%a1) # get sign of dst operand
10489 bmi.b ld_mzero # if neg, load neg zero
10490 bra.b ld_pzero # load positive zero
10492 #########################################################################
10493 # src_inf(): Return signed inf according to sign of src operand. #
10494 #########################################################################
10497 tst.b SRC_EX(%a0) # get sign of src operand
10498 bmi.b ld_minf # if negative branch
10501 # ld_pinf(): return a positive infinity.
10505 fmov.s &0x7f800000,%fp0 # load +INF
10506 mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10510 # ld_minf():return a negative infinity.
10514 fmov.s &0xff800000,%fp0 # load -INF
10515 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10518 #########################################################################
10519 # dst_inf(): Return signed inf according to sign of dst operand. #
10520 #########################################################################
10523 tst.b DST_EX(%a1) # get sign of dst operand
10524 bmi.b ld_minf # if negative branch
10528 #################################################################
10529 # szr_inf(): Return +ZERO for a negative src operand or #
10530 # +INF for a positive src operand. #
10531 # Routine used for fetox, ftwotox, and ftentox. #
10532 #################################################################
10534 tst.b SRC_EX(%a0) # check sign of source
10538 #########################################################################
10539 # sopr_inf(): Return +INF for a positive src operand or #
10540 # jump to operand error routine for a negative src operand. #
10541 # Routine used for flogn, flognp1, flog10, and flog2. #
10542 #########################################################################
10545 tst.b SRC_EX(%a0) # check sign of source
10549 #################################################################
10550 # setoxm1i(): Return minus one for a negative src operand or #
10551 # positive infinity for a positive src operand. #
10552 # Routine used for fetoxm1. #
10553 #################################################################
10556 tst.b SRC_EX(%a0) # check sign of source
10560 #########################################################################
10561 # src_one(): Return signed one according to sign of src operand. #
10562 #########################################################################
10565 tst.b SRC_EX(%a0) # check sign of source
10569 # ld_pone(): return positive one.
10573 fmov.s &0x3f800000,%fp0 # load +1
10578 # ld_mone(): return negative one.
10582 fmov.s &0xbf800000,%fp0 # load -1
10583 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10586 ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235
10587 mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235
10589 #################################################################
10590 # spi_2(): Return signed PI/2 according to sign of src operand. #
10591 #################################################################
10594 tst.b SRC_EX(%a0) # check sign of source
10598 # ld_ppi2(): return positive PI/2.
10603 fmov.x ppiby2(%pc),%fp0 # load +pi/2
10604 bra.w t_pinx2 # set INEX2
10607 # ld_mpi2(): return negative PI/2.
10612 fmov.x mpiby2(%pc),%fp0 # load -pi/2
10613 bra.w t_minx2 # set INEX2
10615 ####################################################
10616 # The following routines give support for fsincos. #
10617 ####################################################
10620 # ssincosz(): When the src operand is ZERO, store a one in the
10621 # cosine register and return a ZERO in fp0 w/ the same sign
10622 # as the src operand.
10626 fmov.s &0x3f800000,%fp1
10627 tst.b SRC_EX(%a0) # test sign
10629 fmov.s &0x80000000,%fp0 # return sin result in fp0
10630 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)
10633 fmov.s &0x00000000,%fp0 # return sin result in fp0
10634 mov.b &z_bmask,FPSR_CC(%a6)
10638 # ssincosi(): When the src operand is INF, store a QNAN in the cosine
10639 # register and jump to the operand error routine for negative
10644 fmov.x qnan(%pc),%fp1 # load NAN
10648 # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10649 # register and branch to the src QNAN routine.
10653 fmov.x LOCAL_EX(%a0),%fp1
10656 ########################################################################
10662 mov.b DTAG(%a6),%d1
10674 mov.b DTAG(%a6),%d1
10686 mov.b DTAG(%a6),%d1
10698 mov.b SRC_EX(%a0),%d1 # get src sign
10699 mov.b DST_EX(%a1),%d0 # get dst sign
10700 eor.b %d0,%d1 # get qbyte sign
10702 mov.b %d1,FPSR_QBYTE(%a6)
10709 clr.b FPSR_QBYTE(%a6)
10711 mov.b SRC_EX(%a0),%d1 # get src sign
10712 mov.b DST_EX(%a1),%d0 # get dst sign
10713 eor.b %d0,%d1 # get qbyte sign
10715 mov.b %d1,FPSR_QBYTE(%a6)
10716 cmpi.b DTAG(%a6),&DENORM
10722 fmov.l (%sp)+,%fpcr
10723 fmov.x DST(%a1),%fp0
10729 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code
10732 #########################################################################
10737 mov.b DTAG(%a6),%d1
10749 mov.b DTAG(%a6),%d1
10761 mov.b DTAG(%a6),%d1
10771 #########################################################################
10773 global sscale_snorm
10774 global sscale_sdnrm
10777 mov.b DTAG(%a6),%d1
10787 global sscale_szero
10789 mov.b DTAG(%a6),%d1
10801 mov.b DTAG(%a6),%d1
10807 ########################################################################
10811 mov.b DTAG(%a6),%d1
10816 #########################################################################
10817 # norm(): normalize the mantissa of an extended precision input. the #
10818 # input operand should not be normalized already. #
10820 # XDEF **************************************************************** #
10823 # XREF **************************************************************** #
10826 # INPUT *************************************************************** #
10827 # a0 = pointer fp extended precision operand to normalize #
10829 # OUTPUT ************************************************************** #
10830 # d0 = number of bit positions the mantissa was shifted #
10831 # a0 = the input operand's mantissa is normalized; the exponent #
10834 #########################################################################
10837 mov.l %d2, -(%sp) # create some temp regs
10840 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
10841 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
10843 bfffo %d0{&0:&32}, %d2 # how many places to shift?
10844 beq.b norm_lo # hi(man) is all zeroes!
10847 lsl.l %d2, %d0 # left shift hi(man)
10848 bfextu %d1{&0:%d2}, %d3 # extract lo bits
10850 or.l %d3, %d0 # create hi(man)
10851 lsl.l %d2, %d1 # create lo(man)
10853 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
10854 mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
10856 mov.l %d2, %d0 # return shift amount
10858 mov.l (%sp)+, %d3 # restore temp regs
10864 bfffo %d1{&0:&32}, %d2 # how many places to shift?
10865 lsl.l %d2, %d1 # shift lo(man)
10866 add.l &32, %d2 # add 32 to shft amount
10868 mov.l %d1, FTEMP_HI(%a0) # store hi(man)
10869 clr.l FTEMP_LO(%a0) # lo(man) is now zero
10871 mov.l %d2, %d0 # return shift amount
10873 mov.l (%sp)+, %d3 # restore temp regs
10878 #########################################################################
10879 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
10880 # - returns corresponding optype tag #
10882 # XDEF **************************************************************** #
10885 # XREF **************************************************************** #
10886 # norm() - normalize the mantissa #
10888 # INPUT *************************************************************** #
10889 # a0 = pointer to unnormalized extended precision number #
10891 # OUTPUT ************************************************************** #
10892 # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
10893 # a0 = input operand has been converted to a norm, denorm, or #
10894 # zero; both the exponent and mantissa are changed. #
10896 #########################################################################
10900 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
10901 bne.b unnorm_shift # hi(man) is not all zeroes
10904 # hi(man) is all zeroes so see if any bits in lo(man) are set
10907 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
10908 beq.w unnorm_zero # yes
10910 add.w &32, %d0 # no; fix shift distance
10913 # d0 = # shifts needed for complete normalization
10916 clr.l %d1 # clear top word
10917 mov.w FTEMP_EX(%a0), %d1 # extract exponent
10918 and.w &0x7fff, %d1 # strip off sgn
10920 cmp.w %d0, %d1 # will denorm push exp < 0?
10921 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
10924 # exponent would not go < 0. therefore, number stays normalized
10926 sub.w %d0, %d1 # shift exponent value
10927 mov.w FTEMP_EX(%a0), %d0 # load old exponent
10928 and.w &0x8000, %d0 # save old sign
10929 or.w %d0, %d1 # {sgn,new exp}
10930 mov.w %d1, FTEMP_EX(%a0) # insert new exponent
10932 bsr.l norm # normalize UNNORM
10934 mov.b &NORM, %d0 # return new optype tag
10938 # exponent would go < 0, so only denormalize until exp = 0
10941 cmp.b %d1, &32 # is exp <= 32?
10942 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
10944 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
10945 mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
10947 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
10948 lsl.l %d1, %d0 # extract new lo(man)
10949 mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
10951 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
10953 mov.b &DENORM, %d0 # return new optype tag
10957 # only mantissa bits set are in lo(man)
10959 unnorm_nrm_zero_lrg:
10960 sub.w &32, %d1 # adjust shft amt by 32
10962 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
10963 lsl.l %d1, %d0 # left shift lo(man)
10965 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
10966 clr.l FTEMP_LO(%a0) # lo(man) = 0
10968 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
10970 mov.b &DENORM, %d0 # return new optype tag
10974 # whole mantissa is zero so this UNNORM is actually a zero
10977 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
10979 mov.b &ZERO, %d0 # fix optype tag