2 # $NetBSD: fplsp.s,v 1.1 2000/04/14 20:24:37 is Exp $
5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7 # M68000 Hi-Performance Microprocessor Division
8 # M68060 Software Package Production Release
10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11 # All rights reserved.
13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14 # To the maximum extent permitted by applicable law,
15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19 # and any accompanying written materials.
21 # To the maximum extent permitted by applicable law,
22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
27 # Motorola assumes no responsibility for the maintenance and support
30 # You are hereby granted a copyright license to use, modify, and distribute the
31 # SOFTWARE so long as this entire notice is retained without alteration
32 # in any modified and/or redistributed versions, and that such modified
33 # versions are clearly identified as such.
34 # No licenses are granted by implication, estoppel or otherwise under any
35 # patents or trademarks of Motorola, Inc.
36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
40 # This file is appended to the top of the 060ILSP package
41 # and contains the entry points into the package. The user, in
42 # effect, branches to one of the branch table entries located here.
276 # leave room for future possible additions
280 # This file contains a set of define statements for constants
281 # in order to promote readability within the corecode itself.
284 set LOCAL_SIZE
, 192 # stack frame size(bytes)
285 set LV
, -LOCAL_SIZE
# stack offset
287 set EXC_SR
, 0x4 # stack status register
288 set EXC_PC
, 0x6 # stack pc
289 set EXC_VOFF
, 0xa # stacked vector offset
290 set EXC_EA
, 0xc # stacked <ea>
292 set EXC_FP
, 0x0 # frame pointer
294 set EXC_AREGS
, -68 # offset of all address regs
295 set EXC_DREGS
, -100 # offset of all data regs
296 set EXC_FPREGS
, -36 # offset of all fp regs
298 set EXC_A7
, EXC_AREGS+
(7*4) # offset of saved a7
299 set OLD_A7
, EXC_AREGS+
(6*4) # extra copy of saved a7
300 set EXC_A6
, EXC_AREGS+
(6*4) # offset of saved a6
301 set EXC_A5
, EXC_AREGS+
(5*4)
302 set EXC_A4
, EXC_AREGS+
(4*4)
303 set EXC_A3
, EXC_AREGS+
(3*4)
304 set EXC_A2
, EXC_AREGS+
(2*4)
305 set EXC_A1
, EXC_AREGS+
(1*4)
306 set EXC_A0
, EXC_AREGS+
(0*4)
307 set EXC_D7
, EXC_DREGS+
(7*4)
308 set EXC_D6
, EXC_DREGS+
(6*4)
309 set EXC_D5
, EXC_DREGS+
(5*4)
310 set EXC_D4
, EXC_DREGS+
(4*4)
311 set EXC_D3
, EXC_DREGS+
(3*4)
312 set EXC_D2
, EXC_DREGS+
(2*4)
313 set EXC_D1
, EXC_DREGS+
(1*4)
314 set EXC_D0
, EXC_DREGS+
(0*4)
316 set EXC_FP0
, EXC_FPREGS+
(0*12) # offset of saved fp0
317 set EXC_FP1
, EXC_FPREGS+
(1*12) # offset of saved fp1
318 set EXC_FP2
, EXC_FPREGS+
(2*12) # offset of saved fp2 (not used)
320 set FP_SCR1
, LV+
80 # fp scratch 1
321 set FP_SCR1_EX
, FP_SCR1+
0
322 set FP_SCR1_SGN
, FP_SCR1+
2
323 set FP_SCR1_HI
, FP_SCR1+
4
324 set FP_SCR1_LO
, FP_SCR1+
8
326 set FP_SCR0
, LV+
68 # fp scratch 0
327 set FP_SCR0_EX
, FP_SCR0+
0
328 set FP_SCR0_SGN
, FP_SCR0+
2
329 set FP_SCR0_HI
, FP_SCR0+
4
330 set FP_SCR0_LO
, FP_SCR0+
8
332 set FP_DST
, LV+
56 # fp destination operand
333 set FP_DST_EX
, FP_DST+
0
334 set FP_DST_SGN
, FP_DST+
2
335 set FP_DST_HI
, FP_DST+
4
336 set FP_DST_LO
, FP_DST+
8
338 set FP_SRC
, LV+
44 # fp source operand
339 set FP_SRC_EX
, FP_SRC+
0
340 set FP_SRC_SGN
, FP_SRC+
2
341 set FP_SRC_HI
, FP_SRC+
4
342 set FP_SRC_LO
, FP_SRC+
8
344 set USER_FPIAR
, LV+
40 # FP instr address register
346 set USER_FPSR
, LV+
36 # FP status register
347 set FPSR_CC
, USER_FPSR+
0 # FPSR condition codes
348 set FPSR_QBYTE
, USER_FPSR+
1 # FPSR qoutient byte
349 set FPSR_EXCEPT
, USER_FPSR+
2 # FPSR exception status byte
350 set FPSR_AEXCEPT
, USER_FPSR+
3 # FPSR accrued exception byte
352 set USER_FPCR
, LV+
32 # FP control register
353 set FPCR_ENABLE
, USER_FPCR+
2 # FPCR exception enable
354 set FPCR_MODE
, USER_FPCR+
3 # FPCR rounding mode control
356 set L_SCR3
, LV+
28 # integer scratch 3
357 set L_SCR2
, LV+
24 # integer scratch 2
358 set L_SCR1
, LV+
20 # integer scratch 1
360 set STORE_FLG
, LV+
19 # flag: operand store (ie. not fcmp/ftst)
362 set EXC_TEMP2
, LV+
24 # temporary space
363 set EXC_TEMP
, LV+
16 # temporary space
365 set DTAG
, LV+
15 # destination operand type
366 set STAG
, LV+
14 # source operand type
368 set SPCOND_FLG
, LV+
10 # flag: special case (see below)
370 set EXC_CC
, LV+
8 # saved condition codes
371 set EXC_EXTWPTR
, LV+
4 # saved current PC (active)
372 set EXC_EXTWORD
, LV+
2 # saved extension word
373 set EXC_CMDREG
, LV+
2 # saved extension word
374 set EXC_OPWORD
, LV+
0 # saved operation word
376 ################################
380 set FTEMP
, 0 # offsets within an
381 set FTEMP_EX
, 0 # extended precision
382 set FTEMP_SGN
, 2 # value saved in memory.
387 set LOCAL
, 0 # offsets within an
388 set LOCAL_EX
, 0 # extended precision
389 set LOCAL_SGN
, 2 # value saved in memory.
394 set
DST, 0 # offsets within an
395 set DST_EX
, 0 # extended precision
396 set DST_HI
, 4 # value saved in memory.
399 set SRC
, 0 # offsets within an
400 set SRC_EX
, 0 # extended precision
401 set SRC_HI
, 4 # value saved in memory.
404 set SGL_LO
, 0x3f81 # min sgl prec exponent
405 set SGL_HI
, 0x407e # max sgl prec exponent
406 set DBL_LO
, 0x3c01 # min dbl prec exponent
407 set DBL_HI
, 0x43fe # max dbl prec exponent
408 set EXT_LO
, 0x0 # min ext prec exponent
409 set EXT_HI
, 0x7ffe # max ext prec exponent
411 set EXT_BIAS
, 0x3fff # extended precision bias
412 set SGL_BIAS
, 0x007f # single precision bias
413 set DBL_BIAS
, 0x03ff # double precision bias
415 set NORM
, 0x00 # operand type for STAG/DTAG
416 set ZERO
, 0x01 # operand type for STAG/DTAG
417 set INF
, 0x02 # operand type for STAG/DTAG
418 set QNAN
, 0x03 # operand type for STAG/DTAG
419 set DENORM
, 0x04 # operand type for STAG/DTAG
420 set SNAN
, 0x05 # operand type for STAG/DTAG
421 set UNNORM
, 0x06 # operand type for STAG/DTAG
426 set neg_bit
, 0x3 # negative result
427 set z_bit
, 0x2 # zero result
428 set inf_bit
, 0x1 # infinite result
429 set nan_bit
, 0x0 # NAN result
431 set q_sn_bit
, 0x7 # sign bit of quotient byte
433 set bsun_bit
, 7 # branch on unordered
434 set snan_bit
, 6 # signalling NAN
435 set operr_bit
, 5 # operand error
436 set ovfl_bit
, 4 # overflow
437 set unfl_bit
, 3 # underflow
438 set dz_bit
, 2 # divide by zero
439 set inex2_bit
, 1 # inexact result 2
440 set inex1_bit
, 0 # inexact result 1
442 set aiop_bit
, 7 # accrued inexact operation bit
443 set aovfl_bit
, 6 # accrued overflow bit
444 set aunfl_bit
, 5 # accrued underflow bit
445 set adz_bit
, 4 # accrued dz bit
446 set ainex_bit
, 3 # accrued inexact bit
448 #############################
449 # FPSR individual bit masks #
450 #############################
451 set neg_mask
, 0x08000000 # negative bit mask (lw)
452 set inf_mask
, 0x02000000 # infinity bit mask (lw)
453 set z_mask
, 0x04000000 # zero bit mask (lw)
454 set nan_mask
, 0x01000000 # nan bit mask (lw)
456 set neg_bmask
, 0x08 # negative bit mask (byte)
457 set inf_bmask
, 0x02 # infinity bit mask (byte)
458 set z_bmask
, 0x04 # zero bit mask (byte)
459 set nan_bmask
, 0x01 # nan bit mask (byte)
461 set bsun_mask
, 0x00008000 # bsun exception mask
462 set snan_mask
, 0x00004000 # snan exception mask
463 set operr_mask
, 0x00002000 # operr exception mask
464 set ovfl_mask
, 0x00001000 # overflow exception mask
465 set unfl_mask
, 0x00000800 # underflow exception mask
466 set dz_mask
, 0x00000400 # dz exception mask
467 set inex2_mask
, 0x00000200 # inex2 exception mask
468 set inex1_mask
, 0x00000100 # inex1 exception mask
470 set aiop_mask
, 0x00000080 # accrued illegal operation
471 set aovfl_mask
, 0x00000040 # accrued overflow
472 set aunfl_mask
, 0x00000020 # accrued underflow
473 set adz_mask
, 0x00000010 # accrued divide by zero
474 set ainex_mask
, 0x00000008 # accrued inexact
476 ######################################
477 # FPSR combinations used in the FPSP #
478 ######################################
479 set dzinf_mask
, inf_mask+dz_mask+adz_mask
480 set opnan_mask
, nan_mask+operr_mask+aiop_mask
481 set nzi_mask
, 0x01ffffff #clears N, Z, and I
482 set unfinx_mask
, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
483 set unf2inx_mask
, unfl_mask+inex2_mask+ainex_mask
484 set ovfinx_mask
, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
485 set inx1a_mask
, inex1_mask+ainex_mask
486 set inx2a_mask
, inex2_mask+ainex_mask
487 set snaniop_mask
, nan_mask+snan_mask+aiop_mask
488 set snaniop2_mask
, snan_mask+aiop_mask
489 set naniop_mask
, nan_mask+aiop_mask
490 set neginf_mask
, neg_mask+inf_mask
491 set infaiop_mask
, inf_mask+aiop_mask
492 set negz_mask
, neg_mask+z_mask
493 set opaop_mask
, operr_mask+aiop_mask
494 set unfl_inx_mask
, unfl_mask+aunfl_mask+ainex_mask
495 set ovfl_inx_mask
, ovfl_mask+aovfl_mask+ainex_mask
500 set rnd_stky_bit
, 29 # stky bit pos in longword
502 set sign_bit
, 0x7 # sign bit
503 set signan_bit
, 0x6 # signalling nan bit
505 set sgl_thresh
, 0x3f81 # minimum sgl exponent
506 set dbl_thresh
, 0x3c01 # minimum dbl exponent
508 set x_mode
, 0x0 # extended precision
509 set s_mode
, 0x4 # single precision
510 set d_mode
, 0x8 # double precision
512 set rn_mode
, 0x0 # round-to-nearest
513 set rz_mode
, 0x1 # round-to-zero
514 set rm_mode
, 0x2 # round-tp-minus-infinity
515 set rp_mode
, 0x3 # round-to-plus-infinity
517 set mantissalen
, 64 # length of mantissa in bits
519 set BYTE
, 1 # len(byte) == 1 byte
520 set WORD
, 2 # len(word) == 2 bytes
521 set LONG
, 4 # len(longword) == 2 bytes
523 set BSUN_VEC
, 0xc0 # bsun vector offset
524 set INEX_VEC
, 0xc4 # inexact vector offset
525 set DZ_VEC
, 0xc8 # dz vector offset
526 set UNFL_VEC
, 0xcc # unfl vector offset
527 set OPERR_VEC
, 0xd0 # operr vector offset
528 set OVFL_VEC
, 0xd4 # ovfl vector offset
529 set SNAN_VEC
, 0xd8 # snan vector offset
531 ###########################
532 # SPecial CONDition FLaGs #
533 ###########################
534 set ftrapcc_flg
, 0x01 # flag bit: ftrapcc exception
535 set fbsun_flg
, 0x02 # flag bit: bsun exception
536 set mia7_flg
, 0x04 # flag bit: (a7)+ <ea>
537 set mda7_flg
, 0x08 # flag bit: -(a7) <ea>
538 set fmovm_flg
, 0x40 # flag bit: fmovm instruction
539 set immed_flg
, 0x80 # flag bit: &<data> <ea>
547 ##################################
548 # TRANSCENDENTAL "LAST-OP" FLAGS #
549 ##################################
550 set FMUL_OP
, 0x0 # fmul instr performed last
551 set FDIV_OP
, 0x1 # fdiv performed last
552 set FADD_OP
, 0x2 # fadd performed last
553 set FMOV_OP
, 0x3 # fmov performed last
558 T1
: long
0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
559 T2
: long
0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
561 PI
: long
0x40000000,0xC90FDAA2,0x2168C235,0x00000000
562 PIBY2
: long
0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
565 long
0x3FE45F30,0x6DC9C883
567 #########################################################################
569 #########################################################################
572 link
%a6
,&-LOCAL_SIZE
574 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
575 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
576 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
578 fmov.
l &0x0,%fpcr
# zero FPCR
581 # copy, convert, and tag input argument
583 fmov.s
0x8(%a6
),%fp0
# load sgl input
584 fmov.x
%fp0
,FP_SRC
(%a6
)
586 bsr.
l tag
# fetch operand type
590 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
593 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
597 bsr.
l ssin
# operand is a NORM
600 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
605 cmpi.
b %d1
,&INF
# is operand an INF?
610 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
615 bsr.
l ssind
# operand is a DENORM
619 # Result is now in FP0
621 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
622 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
623 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
629 link
%a6
,&-LOCAL_SIZE
631 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
632 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
633 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
635 fmov.
l &0x0,%fpcr
# zero FPCR
638 # copy, convert, and tag input argument
640 fmov.d
0x8(%a6
),%fp0
# load dbl input
641 fmov.x
%fp0
,FP_SRC
(%a6
)
643 bsr.
l tag
# fetch operand type
647 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
650 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
655 bsr.
l ssin
# operand is a NORM
658 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
663 cmpi.
b %d1
,&INF
# is operand an INF?
668 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
673 bsr.
l ssind
# operand is a DENORM
677 # Result is now in FP0
679 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
680 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
681 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
687 link
%a6
,&-LOCAL_SIZE
689 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
690 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
691 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
693 fmov.
l &0x0,%fpcr
# zero FPCR
696 # copy, convert, and tag input argument
699 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
700 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
701 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
702 bsr.
l tag
# fetch operand type
706 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
709 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
713 bsr.
l ssin
# operand is a NORM
716 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
721 cmpi.
b %d1
,&INF
# is operand an INF?
726 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
731 bsr.
l ssind
# operand is a DENORM
735 # Result is now in FP0
737 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
738 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
739 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
744 #########################################################################
746 #########################################################################
749 link
%a6
,&-LOCAL_SIZE
751 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
752 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
753 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
755 fmov.
l &0x0,%fpcr
# zero FPCR
758 # copy, convert, and tag input argument
760 fmov.s
0x8(%a6
),%fp0
# load sgl input
761 fmov.x
%fp0
,FP_SRC
(%a6
)
763 bsr.
l tag
# fetch operand type
767 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
770 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
774 bsr.
l scos
# operand is a NORM
777 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
782 cmpi.
b %d1
,&INF
# is operand an INF?
787 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
792 bsr.
l scosd
# operand is a DENORM
796 # Result is now in FP0
798 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
799 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
800 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
806 link
%a6
,&-LOCAL_SIZE
808 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
809 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
810 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
812 fmov.
l &0x0,%fpcr
# zero FPCR
815 # copy, convert, and tag input argument
817 fmov.d
0x8(%a6
),%fp0
# load dbl input
818 fmov.x
%fp0
,FP_SRC
(%a6
)
820 bsr.
l tag
# fetch operand type
824 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
827 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
832 bsr.
l scos
# operand is a NORM
835 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
840 cmpi.
b %d1
,&INF
# is operand an INF?
845 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
850 bsr.
l scosd
# operand is a DENORM
854 # Result is now in FP0
856 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
857 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
858 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
864 link
%a6
,&-LOCAL_SIZE
866 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
867 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
868 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
870 fmov.
l &0x0,%fpcr
# zero FPCR
873 # copy, convert, and tag input argument
876 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
877 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
878 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
879 bsr.
l tag
# fetch operand type
883 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
886 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
890 bsr.
l scos
# operand is a NORM
893 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
898 cmpi.
b %d1
,&INF
# is operand an INF?
903 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
908 bsr.
l scosd
# operand is a DENORM
912 # Result is now in FP0
914 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
915 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
916 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
921 #########################################################################
923 #########################################################################
926 link
%a6
,&-LOCAL_SIZE
928 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
929 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
930 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
932 fmov.
l &0x0,%fpcr
# zero FPCR
935 # copy, convert, and tag input argument
937 fmov.s
0x8(%a6
),%fp0
# load sgl input
938 fmov.x
%fp0
,FP_SRC
(%a6
)
940 bsr.
l tag
# fetch operand type
944 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
947 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
951 bsr.
l ssinh
# operand is a NORM
954 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
959 cmpi.
b %d1
,&INF
# is operand an INF?
964 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
969 bsr.
l ssinhd
# operand is a DENORM
973 # Result is now in FP0
975 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
976 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
977 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
983 link
%a6
,&-LOCAL_SIZE
985 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
986 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
987 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
989 fmov.
l &0x0,%fpcr
# zero FPCR
992 # copy, convert, and tag input argument
994 fmov.d
0x8(%a6
),%fp0
# load dbl input
995 fmov.x
%fp0
,FP_SRC
(%a6
)
997 bsr.
l tag
# fetch operand type
1001 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1004 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1009 bsr.
l ssinh
# operand is a NORM
1012 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1014 bsr.
l src_zero
# yes
1017 cmpi.
b %d1
,&INF
# is operand an INF?
1022 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1024 bsr.
l src_qnan
# yes
1027 bsr.
l ssinhd
# operand is a DENORM
1031 # Result is now in FP0
1033 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1034 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1035 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1041 link
%a6
,&-LOCAL_SIZE
1043 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1044 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1045 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1047 fmov.
l &0x0,%fpcr
# zero FPCR
1050 # copy, convert, and tag input argument
1053 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
1054 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
1055 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
1056 bsr.
l tag
# fetch operand type
1060 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1063 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1067 bsr.
l ssinh
# operand is a NORM
1070 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1072 bsr.
l src_zero
# yes
1075 cmpi.
b %d1
,&INF
# is operand an INF?
1080 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1082 bsr.
l src_qnan
# yes
1085 bsr.
l ssinhd
# operand is a DENORM
1089 # Result is now in FP0
1091 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1092 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1093 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1098 #########################################################################
1099 # MONADIC TEMPLATE #
1100 #########################################################################
1103 link
%a6
,&-LOCAL_SIZE
1105 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1106 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1107 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1109 fmov.
l &0x0,%fpcr
# zero FPCR
1112 # copy, convert, and tag input argument
1114 fmov.s
0x8(%a6
),%fp0
# load sgl input
1115 fmov.x
%fp0
,FP_SRC
(%a6
)
1117 bsr.
l tag
# fetch operand type
1121 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1124 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1128 bsr.
l slognp1
# operand is a NORM
1131 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1133 bsr.
l src_zero
# yes
1136 cmpi.
b %d1
,&INF
# is operand an INF?
1138 bsr.
l sopr_inf
# yes
1141 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1143 bsr.
l src_qnan
# yes
1146 bsr.
l slognp1d
# operand is a DENORM
1150 # Result is now in FP0
1152 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1153 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1154 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1160 link
%a6
,&-LOCAL_SIZE
1162 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1163 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1164 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1166 fmov.
l &0x0,%fpcr
# zero FPCR
1169 # copy, convert, and tag input argument
1171 fmov.d
0x8(%a6
),%fp0
# load dbl input
1172 fmov.x
%fp0
,FP_SRC
(%a6
)
1174 bsr.
l tag
# fetch operand type
1178 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1181 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1186 bsr.
l slognp1
# operand is a NORM
1189 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1191 bsr.
l src_zero
# yes
1194 cmpi.
b %d1
,&INF
# is operand an INF?
1196 bsr.
l sopr_inf
# yes
1199 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1201 bsr.
l src_qnan
# yes
1204 bsr.
l slognp1d
# operand is a DENORM
1208 # Result is now in FP0
1210 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1211 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1212 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1218 link
%a6
,&-LOCAL_SIZE
1220 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1221 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1222 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1224 fmov.
l &0x0,%fpcr
# zero FPCR
1227 # copy, convert, and tag input argument
1230 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
1231 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
1232 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
1233 bsr.
l tag
# fetch operand type
1237 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1240 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1244 bsr.
l slognp1
# operand is a NORM
1247 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1249 bsr.
l src_zero
# yes
1252 cmpi.
b %d1
,&INF
# is operand an INF?
1254 bsr.
l sopr_inf
# yes
1257 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1259 bsr.
l src_qnan
# yes
1262 bsr.
l slognp1d
# operand is a DENORM
1266 # Result is now in FP0
1268 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1269 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1270 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1275 #########################################################################
1276 # MONADIC TEMPLATE #
1277 #########################################################################
1280 link
%a6
,&-LOCAL_SIZE
1282 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1283 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1284 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1286 fmov.
l &0x0,%fpcr
# zero FPCR
1289 # copy, convert, and tag input argument
1291 fmov.s
0x8(%a6
),%fp0
# load sgl input
1292 fmov.x
%fp0
,FP_SRC
(%a6
)
1294 bsr.
l tag
# fetch operand type
1298 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1301 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1305 bsr.
l setoxm1
# operand is a NORM
1308 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1310 bsr.
l src_zero
# yes
1313 cmpi.
b %d1
,&INF
# is operand an INF?
1315 bsr.
l setoxm1i
# yes
1318 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1320 bsr.
l src_qnan
# yes
1323 bsr.
l setoxm1d
# operand is a DENORM
1327 # Result is now in FP0
1329 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1330 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1331 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1337 link
%a6
,&-LOCAL_SIZE
1339 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1340 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1341 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1343 fmov.
l &0x0,%fpcr
# zero FPCR
1346 # copy, convert, and tag input argument
1348 fmov.d
0x8(%a6
),%fp0
# load dbl input
1349 fmov.x
%fp0
,FP_SRC
(%a6
)
1351 bsr.
l tag
# fetch operand type
1355 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1358 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1363 bsr.
l setoxm1
# operand is a NORM
1366 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1368 bsr.
l src_zero
# yes
1371 cmpi.
b %d1
,&INF
# is operand an INF?
1373 bsr.
l setoxm1i
# yes
1376 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1378 bsr.
l src_qnan
# yes
1381 bsr.
l setoxm1d
# operand is a DENORM
1385 # Result is now in FP0
1387 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1388 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1389 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1395 link
%a6
,&-LOCAL_SIZE
1397 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1398 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1399 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1401 fmov.
l &0x0,%fpcr
# zero FPCR
1404 # copy, convert, and tag input argument
1407 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
1408 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
1409 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
1410 bsr.
l tag
# fetch operand type
1414 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1417 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1421 bsr.
l setoxm1
# operand is a NORM
1424 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1426 bsr.
l src_zero
# yes
1429 cmpi.
b %d1
,&INF
# is operand an INF?
1431 bsr.
l setoxm1i
# yes
1434 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1436 bsr.
l src_qnan
# yes
1439 bsr.
l setoxm1d
# operand is a DENORM
1443 # Result is now in FP0
1445 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1446 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1447 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1452 #########################################################################
1453 # MONADIC TEMPLATE #
1454 #########################################################################
1457 link
%a6
,&-LOCAL_SIZE
1459 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1460 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1461 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1463 fmov.
l &0x0,%fpcr
# zero FPCR
1466 # copy, convert, and tag input argument
1468 fmov.s
0x8(%a6
),%fp0
# load sgl input
1469 fmov.x
%fp0
,FP_SRC
(%a6
)
1471 bsr.
l tag
# fetch operand type
1475 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1478 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1482 bsr.
l stanh
# operand is a NORM
1485 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1487 bsr.
l src_zero
# yes
1490 cmpi.
b %d1
,&INF
# is operand an INF?
1495 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1497 bsr.
l src_qnan
# yes
1500 bsr.
l stanhd
# operand is a DENORM
1504 # Result is now in FP0
1506 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1507 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1508 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1514 link
%a6
,&-LOCAL_SIZE
1516 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1517 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1518 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1520 fmov.
l &0x0,%fpcr
# zero FPCR
1523 # copy, convert, and tag input argument
1525 fmov.d
0x8(%a6
),%fp0
# load dbl input
1526 fmov.x
%fp0
,FP_SRC
(%a6
)
1528 bsr.
l tag
# fetch operand type
1532 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1535 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1540 bsr.
l stanh
# operand is a NORM
1543 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1545 bsr.
l src_zero
# yes
1548 cmpi.
b %d1
,&INF
# is operand an INF?
1553 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1555 bsr.
l src_qnan
# yes
1558 bsr.
l stanhd
# operand is a DENORM
1562 # Result is now in FP0
1564 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1565 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1566 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1572 link
%a6
,&-LOCAL_SIZE
1574 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1575 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1576 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1578 fmov.
l &0x0,%fpcr
# zero FPCR
1581 # copy, convert, and tag input argument
1584 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
1585 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
1586 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
1587 bsr.
l tag
# fetch operand type
1591 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1594 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1598 bsr.
l stanh
# operand is a NORM
1601 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1603 bsr.
l src_zero
# yes
1606 cmpi.
b %d1
,&INF
# is operand an INF?
1611 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1613 bsr.
l src_qnan
# yes
1616 bsr.
l stanhd
# operand is a DENORM
1620 # Result is now in FP0
1622 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1623 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1624 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1629 #########################################################################
1630 # MONADIC TEMPLATE #
1631 #########################################################################
1634 link
%a6
,&-LOCAL_SIZE
1636 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1637 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1638 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1640 fmov.
l &0x0,%fpcr
# zero FPCR
1643 # copy, convert, and tag input argument
1645 fmov.s
0x8(%a6
),%fp0
# load sgl input
1646 fmov.x
%fp0
,FP_SRC
(%a6
)
1648 bsr.
l tag
# fetch operand type
1652 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1655 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1659 bsr.
l satan
# operand is a NORM
1662 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1664 bsr.
l src_zero
# yes
1667 cmpi.
b %d1
,&INF
# is operand an INF?
1672 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1674 bsr.
l src_qnan
# yes
1677 bsr.
l satand
# operand is a DENORM
1681 # Result is now in FP0
1683 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1684 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1685 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1691 link
%a6
,&-LOCAL_SIZE
1693 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1694 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1695 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1697 fmov.
l &0x0,%fpcr
# zero FPCR
1700 # copy, convert, and tag input argument
1702 fmov.d
0x8(%a6
),%fp0
# load dbl input
1703 fmov.x
%fp0
,FP_SRC
(%a6
)
1705 bsr.
l tag
# fetch operand type
1709 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1712 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1717 bsr.
l satan
# operand is a NORM
1720 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1722 bsr.
l src_zero
# yes
1725 cmpi.
b %d1
,&INF
# is operand an INF?
1730 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1732 bsr.
l src_qnan
# yes
1735 bsr.
l satand
# operand is a DENORM
1739 # Result is now in FP0
1741 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1742 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1743 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1749 link
%a6
,&-LOCAL_SIZE
1751 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1752 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1753 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1755 fmov.
l &0x0,%fpcr
# zero FPCR
1758 # copy, convert, and tag input argument
1761 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
1762 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
1763 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
1764 bsr.
l tag
# fetch operand type
1768 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1771 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1775 bsr.
l satan
# operand is a NORM
1778 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1780 bsr.
l src_zero
# yes
1783 cmpi.
b %d1
,&INF
# is operand an INF?
1788 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1790 bsr.
l src_qnan
# yes
1793 bsr.
l satand
# operand is a DENORM
1797 # Result is now in FP0
1799 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1800 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1801 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1806 #########################################################################
1807 # MONADIC TEMPLATE #
1808 #########################################################################
1811 link
%a6
,&-LOCAL_SIZE
1813 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1814 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1815 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1817 fmov.
l &0x0,%fpcr
# zero FPCR
1820 # copy, convert, and tag input argument
1822 fmov.s
0x8(%a6
),%fp0
# load sgl input
1823 fmov.x
%fp0
,FP_SRC
(%a6
)
1825 bsr.
l tag
# fetch operand type
1829 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1832 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1836 bsr.
l sasin
# operand is a NORM
1839 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1841 bsr.
l src_zero
# yes
1844 cmpi.
b %d1
,&INF
# is operand an INF?
1849 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1851 bsr.
l src_qnan
# yes
1854 bsr.
l sasind
# operand is a DENORM
1858 # Result is now in FP0
1860 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1861 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1862 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1868 link
%a6
,&-LOCAL_SIZE
1870 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1871 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1872 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1874 fmov.
l &0x0,%fpcr
# zero FPCR
1877 # copy, convert, and tag input argument
1879 fmov.d
0x8(%a6
),%fp0
# load dbl input
1880 fmov.x
%fp0
,FP_SRC
(%a6
)
1882 bsr.
l tag
# fetch operand type
1886 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1889 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1894 bsr.
l sasin
# operand is a NORM
1897 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1899 bsr.
l src_zero
# yes
1902 cmpi.
b %d1
,&INF
# is operand an INF?
1907 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1909 bsr.
l src_qnan
# yes
1912 bsr.
l sasind
# operand is a DENORM
1916 # Result is now in FP0
1918 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1919 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1920 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1926 link
%a6
,&-LOCAL_SIZE
1928 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1929 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1930 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1932 fmov.
l &0x0,%fpcr
# zero FPCR
1935 # copy, convert, and tag input argument
1938 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
1939 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
1940 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
1941 bsr.
l tag
# fetch operand type
1945 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
1948 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
1952 bsr.
l sasin
# operand is a NORM
1955 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
1957 bsr.
l src_zero
# yes
1960 cmpi.
b %d1
,&INF
# is operand an INF?
1965 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
1967 bsr.
l src_qnan
# yes
1970 bsr.
l sasind
# operand is a DENORM
1974 # Result is now in FP0
1976 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1977 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
1978 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
1983 #########################################################################
1984 # MONADIC TEMPLATE #
1985 #########################################################################
1988 link
%a6
,&-LOCAL_SIZE
1990 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1991 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
1992 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
1994 fmov.
l &0x0,%fpcr
# zero FPCR
1997 # copy, convert, and tag input argument
1999 fmov.s
0x8(%a6
),%fp0
# load sgl input
2000 fmov.x
%fp0
,FP_SRC
(%a6
)
2002 bsr.
l tag
# fetch operand type
2006 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2009 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2013 bsr.
l satanh
# operand is a NORM
2016 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2018 bsr.
l src_zero
# yes
2021 cmpi.
b %d1
,&INF
# is operand an INF?
2026 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2028 bsr.
l src_qnan
# yes
2031 bsr.
l satanhd
# operand is a DENORM
2035 # Result is now in FP0
2037 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2038 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2039 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2045 link
%a6
,&-LOCAL_SIZE
2047 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2048 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2049 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2051 fmov.
l &0x0,%fpcr
# zero FPCR
2054 # copy, convert, and tag input argument
2056 fmov.d
0x8(%a6
),%fp0
# load dbl input
2057 fmov.x
%fp0
,FP_SRC
(%a6
)
2059 bsr.
l tag
# fetch operand type
2063 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2066 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2071 bsr.
l satanh
# operand is a NORM
2074 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2076 bsr.
l src_zero
# yes
2079 cmpi.
b %d1
,&INF
# is operand an INF?
2084 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2086 bsr.
l src_qnan
# yes
2089 bsr.
l satanhd
# operand is a DENORM
2093 # Result is now in FP0
2095 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2096 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2097 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2103 link
%a6
,&-LOCAL_SIZE
2105 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2106 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2107 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2109 fmov.
l &0x0,%fpcr
# zero FPCR
2112 # copy, convert, and tag input argument
2115 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
2116 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
2117 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
2118 bsr.
l tag
# fetch operand type
2122 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2125 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2129 bsr.
l satanh
# operand is a NORM
2132 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2134 bsr.
l src_zero
# yes
2137 cmpi.
b %d1
,&INF
# is operand an INF?
2142 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2144 bsr.
l src_qnan
# yes
2147 bsr.
l satanhd
# operand is a DENORM
2151 # Result is now in FP0
2153 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2154 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2155 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2160 #########################################################################
2161 # MONADIC TEMPLATE #
2162 #########################################################################
2165 link
%a6
,&-LOCAL_SIZE
2167 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2168 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2169 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2171 fmov.
l &0x0,%fpcr
# zero FPCR
2174 # copy, convert, and tag input argument
2176 fmov.s
0x8(%a6
),%fp0
# load sgl input
2177 fmov.x
%fp0
,FP_SRC
(%a6
)
2179 bsr.
l tag
# fetch operand type
2183 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2186 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2190 bsr.
l stan
# operand is a NORM
2193 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2195 bsr.
l src_zero
# yes
2198 cmpi.
b %d1
,&INF
# is operand an INF?
2203 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2205 bsr.
l src_qnan
# yes
2208 bsr.
l stand
# operand is a DENORM
2212 # Result is now in FP0
2214 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2215 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2216 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2222 link
%a6
,&-LOCAL_SIZE
2224 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2225 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2226 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2228 fmov.
l &0x0,%fpcr
# zero FPCR
2231 # copy, convert, and tag input argument
2233 fmov.d
0x8(%a6
),%fp0
# load dbl input
2234 fmov.x
%fp0
,FP_SRC
(%a6
)
2236 bsr.
l tag
# fetch operand type
2240 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2243 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2248 bsr.
l stan
# operand is a NORM
2251 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2253 bsr.
l src_zero
# yes
2256 cmpi.
b %d1
,&INF
# is operand an INF?
2261 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2263 bsr.
l src_qnan
# yes
2266 bsr.
l stand
# operand is a DENORM
2270 # Result is now in FP0
2272 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2273 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2274 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2280 link
%a6
,&-LOCAL_SIZE
2282 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2283 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2284 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2286 fmov.
l &0x0,%fpcr
# zero FPCR
2289 # copy, convert, and tag input argument
2292 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
2293 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
2294 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
2295 bsr.
l tag
# fetch operand type
2299 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2302 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2306 bsr.
l stan
# operand is a NORM
2309 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2311 bsr.
l src_zero
# yes
2314 cmpi.
b %d1
,&INF
# is operand an INF?
2319 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2321 bsr.
l src_qnan
# yes
2324 bsr.
l stand
# operand is a DENORM
2328 # Result is now in FP0
2330 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2331 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2332 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2337 #########################################################################
2338 # MONADIC TEMPLATE #
2339 #########################################################################
2342 link
%a6
,&-LOCAL_SIZE
2344 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2345 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2346 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2348 fmov.
l &0x0,%fpcr
# zero FPCR
2351 # copy, convert, and tag input argument
2353 fmov.s
0x8(%a6
),%fp0
# load sgl input
2354 fmov.x
%fp0
,FP_SRC
(%a6
)
2356 bsr.
l tag
# fetch operand type
2360 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2363 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2367 bsr.
l setox
# operand is a NORM
2370 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2375 cmpi.
b %d1
,&INF
# is operand an INF?
2380 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2382 bsr.
l src_qnan
# yes
2385 bsr.
l setoxd
# operand is a DENORM
2389 # Result is now in FP0
2391 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2392 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2393 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2399 link
%a6
,&-LOCAL_SIZE
2401 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2402 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2403 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2405 fmov.
l &0x0,%fpcr
# zero FPCR
2408 # copy, convert, and tag input argument
2410 fmov.d
0x8(%a6
),%fp0
# load dbl input
2411 fmov.x
%fp0
,FP_SRC
(%a6
)
2413 bsr.
l tag
# fetch operand type
2417 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2420 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2425 bsr.
l setox
# operand is a NORM
2428 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2433 cmpi.
b %d1
,&INF
# is operand an INF?
2438 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2440 bsr.
l src_qnan
# yes
2443 bsr.
l setoxd
# operand is a DENORM
2447 # Result is now in FP0
2449 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2450 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2451 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2457 link
%a6
,&-LOCAL_SIZE
2459 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2460 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2461 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2463 fmov.
l &0x0,%fpcr
# zero FPCR
2466 # copy, convert, and tag input argument
2469 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
2470 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
2471 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
2472 bsr.
l tag
# fetch operand type
2476 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2479 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2483 bsr.
l setox
# operand is a NORM
2486 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2491 cmpi.
b %d1
,&INF
# is operand an INF?
2496 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2498 bsr.
l src_qnan
# yes
2501 bsr.
l setoxd
# operand is a DENORM
2505 # Result is now in FP0
2507 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2508 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2509 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2514 #########################################################################
2515 # MONADIC TEMPLATE #
2516 #########################################################################
2519 link
%a6
,&-LOCAL_SIZE
2521 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2522 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2523 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2525 fmov.
l &0x0,%fpcr
# zero FPCR
2528 # copy, convert, and tag input argument
2530 fmov.s
0x8(%a6
),%fp0
# load sgl input
2531 fmov.x
%fp0
,FP_SRC
(%a6
)
2533 bsr.
l tag
# fetch operand type
2537 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2540 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2544 bsr.
l stwotox
# operand is a NORM
2547 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2552 cmpi.
b %d1
,&INF
# is operand an INF?
2557 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2559 bsr.
l src_qnan
# yes
2562 bsr.
l stwotoxd
# operand is a DENORM
2566 # Result is now in FP0
2568 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2569 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2570 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2576 link
%a6
,&-LOCAL_SIZE
2578 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2579 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2580 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2582 fmov.
l &0x0,%fpcr
# zero FPCR
2585 # copy, convert, and tag input argument
2587 fmov.d
0x8(%a6
),%fp0
# load dbl input
2588 fmov.x
%fp0
,FP_SRC
(%a6
)
2590 bsr.
l tag
# fetch operand type
2594 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2597 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2602 bsr.
l stwotox
# operand is a NORM
2605 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2610 cmpi.
b %d1
,&INF
# is operand an INF?
2615 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2617 bsr.
l src_qnan
# yes
2620 bsr.
l stwotoxd
# operand is a DENORM
2624 # Result is now in FP0
2626 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2627 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2628 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2634 link
%a6
,&-LOCAL_SIZE
2636 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2637 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2638 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2640 fmov.
l &0x0,%fpcr
# zero FPCR
2643 # copy, convert, and tag input argument
2646 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
2647 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
2648 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
2649 bsr.
l tag
# fetch operand type
2653 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2656 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2660 bsr.
l stwotox
# operand is a NORM
2663 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2668 cmpi.
b %d1
,&INF
# is operand an INF?
2673 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2675 bsr.
l src_qnan
# yes
2678 bsr.
l stwotoxd
# operand is a DENORM
2682 # Result is now in FP0
2684 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2685 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2686 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2691 #########################################################################
2692 # MONADIC TEMPLATE #
2693 #########################################################################
2696 link
%a6
,&-LOCAL_SIZE
2698 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2699 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2700 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2702 fmov.
l &0x0,%fpcr
# zero FPCR
2705 # copy, convert, and tag input argument
2707 fmov.s
0x8(%a6
),%fp0
# load sgl input
2708 fmov.x
%fp0
,FP_SRC
(%a6
)
2710 bsr.
l tag
# fetch operand type
2714 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2717 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2721 bsr.
l stentox
# operand is a NORM
2724 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2729 cmpi.
b %d1
,&INF
# is operand an INF?
2734 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2736 bsr.
l src_qnan
# yes
2739 bsr.
l stentoxd
# operand is a DENORM
2743 # Result is now in FP0
2745 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2746 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2747 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2753 link
%a6
,&-LOCAL_SIZE
2755 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2756 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2757 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2759 fmov.
l &0x0,%fpcr
# zero FPCR
2762 # copy, convert, and tag input argument
2764 fmov.d
0x8(%a6
),%fp0
# load dbl input
2765 fmov.x
%fp0
,FP_SRC
(%a6
)
2767 bsr.
l tag
# fetch operand type
2771 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2774 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2779 bsr.
l stentox
# operand is a NORM
2782 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2787 cmpi.
b %d1
,&INF
# is operand an INF?
2792 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2794 bsr.
l src_qnan
# yes
2797 bsr.
l stentoxd
# operand is a DENORM
2801 # Result is now in FP0
2803 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2804 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2805 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2811 link
%a6
,&-LOCAL_SIZE
2813 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2814 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2815 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2817 fmov.
l &0x0,%fpcr
# zero FPCR
2820 # copy, convert, and tag input argument
2823 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
2824 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
2825 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
2826 bsr.
l tag
# fetch operand type
2830 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2833 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2837 bsr.
l stentox
# operand is a NORM
2840 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2845 cmpi.
b %d1
,&INF
# is operand an INF?
2850 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2852 bsr.
l src_qnan
# yes
2855 bsr.
l stentoxd
# operand is a DENORM
2859 # Result is now in FP0
2861 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2862 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2863 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2868 #########################################################################
2869 # MONADIC TEMPLATE #
2870 #########################################################################
2873 link
%a6
,&-LOCAL_SIZE
2875 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2876 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2877 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2879 fmov.
l &0x0,%fpcr
# zero FPCR
2882 # copy, convert, and tag input argument
2884 fmov.s
0x8(%a6
),%fp0
# load sgl input
2885 fmov.x
%fp0
,FP_SRC
(%a6
)
2887 bsr.
l tag
# fetch operand type
2891 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2894 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2898 bsr.
l slogn
# operand is a NORM
2901 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2906 cmpi.
b %d1
,&INF
# is operand an INF?
2908 bsr.
l sopr_inf
# yes
2911 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2913 bsr.
l src_qnan
# yes
2916 bsr.
l slognd
# operand is a DENORM
2920 # Result is now in FP0
2922 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2923 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2924 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2930 link
%a6
,&-LOCAL_SIZE
2932 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2933 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2934 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2936 fmov.
l &0x0,%fpcr
# zero FPCR
2939 # copy, convert, and tag input argument
2941 fmov.d
0x8(%a6
),%fp0
# load dbl input
2942 fmov.x
%fp0
,FP_SRC
(%a6
)
2944 bsr.
l tag
# fetch operand type
2948 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2951 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
2956 bsr.
l slogn
# operand is a NORM
2959 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
2964 cmpi.
b %d1
,&INF
# is operand an INF?
2966 bsr.
l sopr_inf
# yes
2969 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
2971 bsr.
l src_qnan
# yes
2974 bsr.
l slognd
# operand is a DENORM
2978 # Result is now in FP0
2980 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2981 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
2982 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
2988 link
%a6
,&-LOCAL_SIZE
2990 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2991 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
2992 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
2994 fmov.
l &0x0,%fpcr
# zero FPCR
2997 # copy, convert, and tag input argument
3000 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
3001 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
3002 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
3003 bsr.
l tag
# fetch operand type
3007 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3010 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3014 bsr.
l slogn
# operand is a NORM
3017 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3022 cmpi.
b %d1
,&INF
# is operand an INF?
3024 bsr.
l sopr_inf
# yes
3027 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3029 bsr.
l src_qnan
# yes
3032 bsr.
l slognd
# operand is a DENORM
3036 # Result is now in FP0
3038 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3039 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3040 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3045 #########################################################################
3046 # MONADIC TEMPLATE #
3047 #########################################################################
3050 link
%a6
,&-LOCAL_SIZE
3052 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3053 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3054 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3056 fmov.
l &0x0,%fpcr
# zero FPCR
3059 # copy, convert, and tag input argument
3061 fmov.s
0x8(%a6
),%fp0
# load sgl input
3062 fmov.x
%fp0
,FP_SRC
(%a6
)
3064 bsr.
l tag
# fetch operand type
3068 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3071 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3075 bsr.
l slog10
# operand is a NORM
3078 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3083 cmpi.
b %d1
,&INF
# is operand an INF?
3085 bsr.
l sopr_inf
# yes
3088 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3090 bsr.
l src_qnan
# yes
3093 bsr.
l slog10d
# operand is a DENORM
3097 # Result is now in FP0
3099 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3100 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3101 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3107 link
%a6
,&-LOCAL_SIZE
3109 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3110 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3111 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3113 fmov.
l &0x0,%fpcr
# zero FPCR
3116 # copy, convert, and tag input argument
3118 fmov.d
0x8(%a6
),%fp0
# load dbl input
3119 fmov.x
%fp0
,FP_SRC
(%a6
)
3121 bsr.
l tag
# fetch operand type
3125 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3128 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3133 bsr.
l slog10
# operand is a NORM
3136 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3141 cmpi.
b %d1
,&INF
# is operand an INF?
3143 bsr.
l sopr_inf
# yes
3146 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3148 bsr.
l src_qnan
# yes
3151 bsr.
l slog10d
# operand is a DENORM
3155 # Result is now in FP0
3157 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3158 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3159 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3165 link
%a6
,&-LOCAL_SIZE
3167 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3168 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3169 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3171 fmov.
l &0x0,%fpcr
# zero FPCR
3174 # copy, convert, and tag input argument
3177 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
3178 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
3179 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
3180 bsr.
l tag
# fetch operand type
3184 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3187 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3191 bsr.
l slog10
# operand is a NORM
3194 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3199 cmpi.
b %d1
,&INF
# is operand an INF?
3201 bsr.
l sopr_inf
# yes
3204 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3206 bsr.
l src_qnan
# yes
3209 bsr.
l slog10d
# operand is a DENORM
3213 # Result is now in FP0
3215 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3216 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3217 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3222 #########################################################################
3223 # MONADIC TEMPLATE #
3224 #########################################################################
3227 link
%a6
,&-LOCAL_SIZE
3229 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3230 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3231 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3233 fmov.
l &0x0,%fpcr
# zero FPCR
3236 # copy, convert, and tag input argument
3238 fmov.s
0x8(%a6
),%fp0
# load sgl input
3239 fmov.x
%fp0
,FP_SRC
(%a6
)
3241 bsr.
l tag
# fetch operand type
3245 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3248 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3252 bsr.
l slog2
# operand is a NORM
3255 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3260 cmpi.
b %d1
,&INF
# is operand an INF?
3262 bsr.
l sopr_inf
# yes
3265 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3267 bsr.
l src_qnan
# yes
3270 bsr.
l slog2d
# operand is a DENORM
3274 # Result is now in FP0
3276 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3277 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3278 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3284 link
%a6
,&-LOCAL_SIZE
3286 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3287 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3288 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3290 fmov.
l &0x0,%fpcr
# zero FPCR
3293 # copy, convert, and tag input argument
3295 fmov.d
0x8(%a6
),%fp0
# load dbl input
3296 fmov.x
%fp0
,FP_SRC
(%a6
)
3298 bsr.
l tag
# fetch operand type
3302 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3305 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3310 bsr.
l slog2
# operand is a NORM
3313 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3318 cmpi.
b %d1
,&INF
# is operand an INF?
3320 bsr.
l sopr_inf
# yes
3323 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3325 bsr.
l src_qnan
# yes
3328 bsr.
l slog2d
# operand is a DENORM
3332 # Result is now in FP0
3334 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3335 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3336 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3342 link
%a6
,&-LOCAL_SIZE
3344 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3345 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3346 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3348 fmov.
l &0x0,%fpcr
# zero FPCR
3351 # copy, convert, and tag input argument
3354 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
3355 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
3356 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
3357 bsr.
l tag
# fetch operand type
3361 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3364 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3368 bsr.
l slog2
# operand is a NORM
3371 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3376 cmpi.
b %d1
,&INF
# is operand an INF?
3378 bsr.
l sopr_inf
# yes
3381 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3383 bsr.
l src_qnan
# yes
3386 bsr.
l slog2d
# operand is a DENORM
3390 # Result is now in FP0
3392 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3393 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3394 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3399 #########################################################################
3400 # MONADIC TEMPLATE #
3401 #########################################################################
3404 link
%a6
,&-LOCAL_SIZE
3406 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3407 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3408 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3410 fmov.
l &0x0,%fpcr
# zero FPCR
3413 # copy, convert, and tag input argument
3415 fmov.s
0x8(%a6
),%fp0
# load sgl input
3416 fmov.x
%fp0
,FP_SRC
(%a6
)
3418 bsr.
l tag
# fetch operand type
3422 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3425 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3429 bsr.
l scosh
# operand is a NORM
3432 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3437 cmpi.
b %d1
,&INF
# is operand an INF?
3442 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3444 bsr.
l src_qnan
# yes
3447 bsr.
l scoshd
# operand is a DENORM
3451 # Result is now in FP0
3453 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3454 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3455 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3461 link
%a6
,&-LOCAL_SIZE
3463 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3464 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3465 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3467 fmov.
l &0x0,%fpcr
# zero FPCR
3470 # copy, convert, and tag input argument
3472 fmov.d
0x8(%a6
),%fp0
# load dbl input
3473 fmov.x
%fp0
,FP_SRC
(%a6
)
3475 bsr.
l tag
# fetch operand type
3479 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3482 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3487 bsr.
l scosh
# operand is a NORM
3490 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3495 cmpi.
b %d1
,&INF
# is operand an INF?
3500 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3502 bsr.
l src_qnan
# yes
3505 bsr.
l scoshd
# operand is a DENORM
3509 # Result is now in FP0
3511 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3512 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3513 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3519 link
%a6
,&-LOCAL_SIZE
3521 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3522 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3523 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3525 fmov.
l &0x0,%fpcr
# zero FPCR
3528 # copy, convert, and tag input argument
3531 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
3532 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
3533 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
3534 bsr.
l tag
# fetch operand type
3538 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3541 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3545 bsr.
l scosh
# operand is a NORM
3548 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3553 cmpi.
b %d1
,&INF
# is operand an INF?
3558 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3560 bsr.
l src_qnan
# yes
3563 bsr.
l scoshd
# operand is a DENORM
3567 # Result is now in FP0
3569 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3570 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3571 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3576 #########################################################################
3577 # MONADIC TEMPLATE #
3578 #########################################################################
3581 link
%a6
,&-LOCAL_SIZE
3583 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3584 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3585 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3587 fmov.
l &0x0,%fpcr
# zero FPCR
3590 # copy, convert, and tag input argument
3592 fmov.s
0x8(%a6
),%fp0
# load sgl input
3593 fmov.x
%fp0
,FP_SRC
(%a6
)
3595 bsr.
l tag
# fetch operand type
3599 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3602 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3606 bsr.
l sacos
# operand is a NORM
3609 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3614 cmpi.
b %d1
,&INF
# is operand an INF?
3619 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3621 bsr.
l src_qnan
# yes
3624 bsr.
l sacosd
# operand is a DENORM
3628 # Result is now in FP0
3630 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3631 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3632 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3638 link
%a6
,&-LOCAL_SIZE
3640 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3641 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3642 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3644 fmov.
l &0x0,%fpcr
# zero FPCR
3647 # copy, convert, and tag input argument
3649 fmov.d
0x8(%a6
),%fp0
# load dbl input
3650 fmov.x
%fp0
,FP_SRC
(%a6
)
3652 bsr.
l tag
# fetch operand type
3656 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3659 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3664 bsr.
l sacos
# operand is a NORM
3667 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3672 cmpi.
b %d1
,&INF
# is operand an INF?
3677 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3679 bsr.
l src_qnan
# yes
3682 bsr.
l sacosd
# operand is a DENORM
3686 # Result is now in FP0
3688 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3689 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3690 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3696 link
%a6
,&-LOCAL_SIZE
3698 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3699 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3700 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3702 fmov.
l &0x0,%fpcr
# zero FPCR
3705 # copy, convert, and tag input argument
3708 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
3709 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
3710 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
3711 bsr.
l tag
# fetch operand type
3715 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3718 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3722 bsr.
l sacos
# operand is a NORM
3725 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3730 cmpi.
b %d1
,&INF
# is operand an INF?
3735 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3737 bsr.
l src_qnan
# yes
3740 bsr.
l sacosd
# operand is a DENORM
3744 # Result is now in FP0
3746 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3747 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3748 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3753 #########################################################################
3754 # MONADIC TEMPLATE #
3755 #########################################################################
3758 link
%a6
,&-LOCAL_SIZE
3760 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3761 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3762 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3764 fmov.
l &0x0,%fpcr
# zero FPCR
3767 # copy, convert, and tag input argument
3769 fmov.s
0x8(%a6
),%fp0
# load sgl input
3770 fmov.x
%fp0
,FP_SRC
(%a6
)
3772 bsr.
l tag
# fetch operand type
3776 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3779 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3783 bsr.
l sgetexp
# operand is a NORM
3786 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3788 bsr.
l src_zero
# yes
3791 cmpi.
b %d1
,&INF
# is operand an INF?
3796 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3798 bsr.
l src_qnan
# yes
3801 bsr.
l sgetexpd
# operand is a DENORM
3805 # Result is now in FP0
3807 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3808 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3809 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3815 link
%a6
,&-LOCAL_SIZE
3817 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3818 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3819 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3821 fmov.
l &0x0,%fpcr
# zero FPCR
3824 # copy, convert, and tag input argument
3826 fmov.d
0x8(%a6
),%fp0
# load dbl input
3827 fmov.x
%fp0
,FP_SRC
(%a6
)
3829 bsr.
l tag
# fetch operand type
3833 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3836 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3841 bsr.
l sgetexp
# operand is a NORM
3844 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3846 bsr.
l src_zero
# yes
3849 cmpi.
b %d1
,&INF
# is operand an INF?
3854 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3856 bsr.
l src_qnan
# yes
3859 bsr.
l sgetexpd
# operand is a DENORM
3863 # Result is now in FP0
3865 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3866 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3867 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3873 link
%a6
,&-LOCAL_SIZE
3875 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3876 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3877 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3879 fmov.
l &0x0,%fpcr
# zero FPCR
3882 # copy, convert, and tag input argument
3885 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
3886 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
3887 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
3888 bsr.
l tag
# fetch operand type
3892 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3895 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3899 bsr.
l sgetexp
# operand is a NORM
3902 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3904 bsr.
l src_zero
# yes
3907 cmpi.
b %d1
,&INF
# is operand an INF?
3912 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3914 bsr.
l src_qnan
# yes
3917 bsr.
l sgetexpd
# operand is a DENORM
3921 # Result is now in FP0
3923 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3924 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3925 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3930 #########################################################################
3931 # MONADIC TEMPLATE #
3932 #########################################################################
3935 link
%a6
,&-LOCAL_SIZE
3937 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3938 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3939 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3941 fmov.
l &0x0,%fpcr
# zero FPCR
3944 # copy, convert, and tag input argument
3946 fmov.s
0x8(%a6
),%fp0
# load sgl input
3947 fmov.x
%fp0
,FP_SRC
(%a6
)
3949 bsr.
l tag
# fetch operand type
3953 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
3956 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
3960 bsr.
l sgetman
# operand is a NORM
3963 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
3965 bsr.
l src_zero
# yes
3968 cmpi.
b %d1
,&INF
# is operand an INF?
3973 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
3975 bsr.
l src_qnan
# yes
3978 bsr.
l sgetmand
# operand is a DENORM
3982 # Result is now in FP0
3984 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3985 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
3986 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
3992 link
%a6
,&-LOCAL_SIZE
3994 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3995 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
3996 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
3998 fmov.
l &0x0,%fpcr
# zero FPCR
4001 # copy, convert, and tag input argument
4003 fmov.d
0x8(%a6
),%fp0
# load dbl input
4004 fmov.x
%fp0
,FP_SRC
(%a6
)
4006 bsr.
l tag
# fetch operand type
4010 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4013 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4018 bsr.
l sgetman
# operand is a NORM
4021 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4023 bsr.
l src_zero
# yes
4026 cmpi.
b %d1
,&INF
# is operand an INF?
4031 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4033 bsr.
l src_qnan
# yes
4036 bsr.
l sgetmand
# operand is a DENORM
4040 # Result is now in FP0
4042 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4043 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4044 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4050 link
%a6
,&-LOCAL_SIZE
4052 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4053 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4054 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4056 fmov.
l &0x0,%fpcr
# zero FPCR
4059 # copy, convert, and tag input argument
4062 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
4063 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
4064 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
4065 bsr.
l tag
# fetch operand type
4069 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4072 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4076 bsr.
l sgetman
# operand is a NORM
4079 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4081 bsr.
l src_zero
# yes
4084 cmpi.
b %d1
,&INF
# is operand an INF?
4089 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4091 bsr.
l src_qnan
# yes
4094 bsr.
l sgetmand
# operand is a DENORM
4098 # Result is now in FP0
4100 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4101 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4102 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4107 #########################################################################
4108 # MONADIC TEMPLATE #
4109 #########################################################################
4112 link
%a6
,&-LOCAL_SIZE
4114 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4115 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4116 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4118 fmov.
l &0x0,%fpcr
# zero FPCR
4121 # copy, convert, and tag input argument
4123 fmov.s
0x8(%a6
),%fp0
# load sgl input
4124 fmov.x
%fp0
,FP_SRC
(%a6
)
4126 bsr.
l tag
# fetch operand type
4130 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4133 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4137 bsr.
l ssincos
# operand is a NORM
4140 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4142 bsr.
l ssincosz
# yes
4145 cmpi.
b %d1
,&INF
# is operand an INF?
4147 bsr.
l ssincosi
# yes
4150 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4152 bsr.
l ssincosqnan
# yes
4155 bsr.
l ssincosd
# operand is a DENORM
4159 # Result is now in FP0
4161 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4162 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4163 fmovm.x
&0x03,-(%sp
) # store off fp0/fp1
4164 fmovm.x
(%sp
)+,&0x40 # fp0 now in fp1
4165 fmovm.x
(%sp
)+,&0x80 # fp1 now in fp0
4171 link
%a6
,&-LOCAL_SIZE
4173 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4174 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4175 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4177 fmov.
l &0x0,%fpcr
# zero FPCR
4180 # copy, convert, and tag input argument
4182 fmov.d
0x8(%a6
),%fp0
# load dbl input
4183 fmov.x
%fp0
,FP_SRC
(%a6
)
4185 bsr.
l tag
# fetch operand type
4189 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4192 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4197 bsr.
l ssincos
# operand is a NORM
4200 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4202 bsr.
l ssincosz
# yes
4205 cmpi.
b %d1
,&INF
# is operand an INF?
4207 bsr.
l ssincosi
# yes
4210 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4212 bsr.
l ssincosqnan
# yes
4215 bsr.
l ssincosd
# operand is a DENORM
4219 # Result is now in FP0
4221 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4222 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4223 fmovm.x
&0x03,-(%sp
) # store off fp0/fp1
4224 fmovm.x
(%sp
)+,&0x40 # fp0 now in fp1
4225 fmovm.x
(%sp
)+,&0x80 # fp1 now in fp0
4231 link
%a6
,&-LOCAL_SIZE
4233 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4234 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4235 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4237 fmov.
l &0x0,%fpcr
# zero FPCR
4240 # copy, convert, and tag input argument
4243 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext input
4244 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
4245 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
4246 bsr.
l tag
# fetch operand type
4250 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4253 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4257 bsr.
l ssincos
# operand is a NORM
4260 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4262 bsr.
l ssincosz
# yes
4265 cmpi.
b %d1
,&INF
# is operand an INF?
4267 bsr.
l ssincosi
# yes
4270 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4272 bsr.
l ssincosqnan
# yes
4275 bsr.
l ssincosd
# operand is a DENORM
4279 # Result is now in FP0
4281 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4282 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4283 fmovm.x
&0x03,-(%sp
) # store off fp0/fp1
4284 fmovm.x
(%sp
)+,&0x40 # fp0 now in fp1
4285 fmovm.x
(%sp
)+,&0x80 # fp1 now in fp0
4290 #########################################################################
4292 #########################################################################
4295 link
%a6
,&-LOCAL_SIZE
4297 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4298 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4299 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4301 fmov.
l &0x0,%fpcr
# zero FPCR
4304 # copy, convert, and tag input argument
4306 fmov.s
0x8(%a6
),%fp0
# load sgl dst
4307 fmov.x
%fp0
,FP_DST
(%a6
)
4309 bsr.
l tag
# fetch operand type
4312 fmov.s
0xc(%a6
),%fp0
# load sgl src
4313 fmov.x
%fp0
,FP_SRC
(%a6
)
4315 bsr.
l tag
# fetch operand type
4319 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4322 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4324 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4325 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4329 bsr.
l srem_snorm
# operand is a NORM
4332 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4334 bsr.
l srem_szero
# yes
4337 cmpi.
b %d1
,&INF
# is operand an INF?
4339 bsr.
l srem_sinf
# yes
4342 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4344 bsr.
l sop_sqnan
# yes
4347 bsr.
l srem_sdnrm
# operand is a DENORM
4351 # Result is now in FP0
4353 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4354 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4355 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4361 link
%a6
,&-LOCAL_SIZE
4363 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4364 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4365 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4367 fmov.
l &0x0,%fpcr
# zero FPCR
4370 # copy, convert, and tag input argument
4372 fmov.d
0x8(%a6
),%fp0
# load dbl dst
4373 fmov.x
%fp0
,FP_DST
(%a6
)
4375 bsr.
l tag
# fetch operand type
4378 fmov.d
0x10(%a6
),%fp0
# load dbl src
4379 fmov.x
%fp0
,FP_SRC
(%a6
)
4381 bsr.
l tag
# fetch operand type
4385 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4388 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4390 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4391 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4395 bsr.
l srem_snorm
# operand is a NORM
4398 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4400 bsr.
l srem_szero
# yes
4403 cmpi.
b %d1
,&INF
# is operand an INF?
4405 bsr.
l srem_sinf
# yes
4408 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4410 bsr.
l sop_sqnan
# yes
4413 bsr.
l srem_sdnrm
# operand is a DENORM
4417 # Result is now in FP0
4419 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4420 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4421 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4427 link
%a6
,&-LOCAL_SIZE
4429 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4430 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4431 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4433 fmov.
l &0x0,%fpcr
# zero FPCR
4436 # copy, convert, and tag input argument
4439 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext dst
4440 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
4441 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
4442 bsr.
l tag
# fetch operand type
4446 mov.
l 0x14+0x0(%a6
),0x0(%a0
) # load ext src
4447 mov.
l 0x14+0x4(%a6
),0x4(%a0
)
4448 mov.
l 0x14+0x8(%a6
),0x8(%a0
)
4449 bsr.
l tag
# fetch operand type
4453 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4456 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4458 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4459 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4463 bsr.
l srem_snorm
# operand is a NORM
4466 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4468 bsr.
l srem_szero
# yes
4471 cmpi.
b %d1
,&INF
# is operand an INF?
4473 bsr.
l srem_sinf
# yes
4476 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4478 bsr.
l sop_sqnan
# yes
4481 bsr.
l srem_sdnrm
# operand is a DENORM
4485 # Result is now in FP0
4487 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4488 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4489 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4494 #########################################################################
4496 #########################################################################
4499 link
%a6
,&-LOCAL_SIZE
4501 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4502 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4503 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4505 fmov.
l &0x0,%fpcr
# zero FPCR
4508 # copy, convert, and tag input argument
4510 fmov.s
0x8(%a6
),%fp0
# load sgl dst
4511 fmov.x
%fp0
,FP_DST
(%a6
)
4513 bsr.
l tag
# fetch operand type
4516 fmov.s
0xc(%a6
),%fp0
# load sgl src
4517 fmov.x
%fp0
,FP_SRC
(%a6
)
4519 bsr.
l tag
# fetch operand type
4523 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4526 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4528 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4529 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4533 bsr.
l smod_snorm
# operand is a NORM
4536 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4538 bsr.
l smod_szero
# yes
4541 cmpi.
b %d1
,&INF
# is operand an INF?
4543 bsr.
l smod_sinf
# yes
4546 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4548 bsr.
l sop_sqnan
# yes
4551 bsr.
l smod_sdnrm
# operand is a DENORM
4555 # Result is now in FP0
4557 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4558 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4559 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4565 link
%a6
,&-LOCAL_SIZE
4567 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4568 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4569 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4571 fmov.
l &0x0,%fpcr
# zero FPCR
4574 # copy, convert, and tag input argument
4576 fmov.d
0x8(%a6
),%fp0
# load dbl dst
4577 fmov.x
%fp0
,FP_DST
(%a6
)
4579 bsr.
l tag
# fetch operand type
4582 fmov.d
0x10(%a6
),%fp0
# load dbl src
4583 fmov.x
%fp0
,FP_SRC
(%a6
)
4585 bsr.
l tag
# fetch operand type
4589 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4592 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4594 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4595 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4599 bsr.
l smod_snorm
# operand is a NORM
4602 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4604 bsr.
l smod_szero
# yes
4607 cmpi.
b %d1
,&INF
# is operand an INF?
4609 bsr.
l smod_sinf
# yes
4612 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4614 bsr.
l sop_sqnan
# yes
4617 bsr.
l smod_sdnrm
# operand is a DENORM
4621 # Result is now in FP0
4623 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4624 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4625 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4631 link
%a6
,&-LOCAL_SIZE
4633 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4634 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4635 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4637 fmov.
l &0x0,%fpcr
# zero FPCR
4640 # copy, convert, and tag input argument
4643 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext dst
4644 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
4645 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
4646 bsr.
l tag
# fetch operand type
4650 mov.
l 0x14+0x0(%a6
),0x0(%a0
) # load ext src
4651 mov.
l 0x14+0x4(%a6
),0x4(%a0
)
4652 mov.
l 0x14+0x8(%a6
),0x8(%a0
)
4653 bsr.
l tag
# fetch operand type
4657 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4660 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4662 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4663 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4667 bsr.
l smod_snorm
# operand is a NORM
4670 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4672 bsr.
l smod_szero
# yes
4675 cmpi.
b %d1
,&INF
# is operand an INF?
4677 bsr.
l smod_sinf
# yes
4680 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4682 bsr.
l sop_sqnan
# yes
4685 bsr.
l smod_sdnrm
# operand is a DENORM
4689 # Result is now in FP0
4691 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4692 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4693 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4698 #########################################################################
4700 #########################################################################
4703 link
%a6
,&-LOCAL_SIZE
4705 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4706 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4707 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4709 fmov.
l &0x0,%fpcr
# zero FPCR
4712 # copy, convert, and tag input argument
4714 fmov.s
0x8(%a6
),%fp0
# load sgl dst
4715 fmov.x
%fp0
,FP_DST
(%a6
)
4717 bsr.
l tag
# fetch operand type
4720 fmov.s
0xc(%a6
),%fp0
# load sgl src
4721 fmov.x
%fp0
,FP_SRC
(%a6
)
4723 bsr.
l tag
# fetch operand type
4727 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4730 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4732 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4733 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4737 bsr.
l sscale_snorm
# operand is a NORM
4740 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4742 bsr.
l sscale_szero
# yes
4745 cmpi.
b %d1
,&INF
# is operand an INF?
4747 bsr.
l sscale_sinf
# yes
4750 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4752 bsr.
l sop_sqnan
# yes
4755 bsr.
l sscale_sdnrm
# operand is a DENORM
4759 # Result is now in FP0
4761 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4762 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4763 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4769 link
%a6
,&-LOCAL_SIZE
4771 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4772 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4773 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4775 fmov.
l &0x0,%fpcr
# zero FPCR
4778 # copy, convert, and tag input argument
4780 fmov.d
0x8(%a6
),%fp0
# load dbl dst
4781 fmov.x
%fp0
,FP_DST
(%a6
)
4783 bsr.
l tag
# fetch operand type
4786 fmov.d
0x10(%a6
),%fp0
# load dbl src
4787 fmov.x
%fp0
,FP_SRC
(%a6
)
4789 bsr.
l tag
# fetch operand type
4793 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4796 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4798 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4799 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4803 bsr.
l sscale_snorm
# operand is a NORM
4806 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4808 bsr.
l sscale_szero
# yes
4811 cmpi.
b %d1
,&INF
# is operand an INF?
4813 bsr.
l sscale_sinf
# yes
4816 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4818 bsr.
l sop_sqnan
# yes
4821 bsr.
l sscale_sdnrm
# operand is a DENORM
4825 # Result is now in FP0
4827 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4828 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4829 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4835 link
%a6
,&-LOCAL_SIZE
4837 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4838 fmovm.
l %fpcr
,%fpsr
,USER_FPCR
(%a6
) # save ctrl regs
4839 fmovm.x
&0xc0,EXC_FP0
(%a6
) # save fp0/fp1
4841 fmov.
l &0x0,%fpcr
# zero FPCR
4844 # copy, convert, and tag input argument
4847 mov.
l 0x8+0x0(%a6
),0x0(%a0
) # load ext dst
4848 mov.
l 0x8+0x4(%a6
),0x4(%a0
)
4849 mov.
l 0x8+0x8(%a6
),0x8(%a0
)
4850 bsr.
l tag
# fetch operand type
4854 mov.
l 0x14+0x0(%a6
),0x0(%a0
) # load ext src
4855 mov.
l 0x14+0x4(%a6
),0x4(%a0
)
4856 mov.
l 0x14+0x8(%a6
),0x8(%a0
)
4857 bsr.
l tag
# fetch operand type
4861 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4864 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd mode,prec
4866 lea FP_SRC
(%a6
),%a0
# pass ptr to src
4867 lea FP_DST
(%a6
),%a1
# pass ptr to dst
4871 bsr.
l sscale_snorm
# operand is a NORM
4874 cmpi.
b %d1
,&ZERO
# is operand a ZERO?
4876 bsr.
l sscale_szero
# yes
4879 cmpi.
b %d1
,&INF
# is operand an INF?
4881 bsr.
l sscale_sinf
# yes
4884 cmpi.
b %d1
,&QNAN
# is operand a QNAN?
4886 bsr.
l sop_sqnan
# yes
4889 bsr.
l sscale_sdnrm
# operand is a DENORM
4893 # Result is now in FP0
4895 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4896 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
# restore ctrl regs
4897 fmovm.x EXC_FP1
(%a6
),&0x40 # restore fp1
4902 #########################################################################
4903 # ssin(): computes the sine of a normalized input #
4904 # ssind(): computes the sine of a denormalized input #
4905 # scos(): computes the cosine of a normalized input #
4906 # scosd(): computes the cosine of a denormalized input #
4907 # ssincos(): computes the sine and cosine of a normalized input #
4908 # ssincosd(): computes the sine and cosine of a denormalized input #
4910 # INPUT *************************************************************** #
4911 # a0 = pointer to extended precision input #
4912 # d0 = round precision,mode #
4914 # OUTPUT ************************************************************** #
4915 # fp0 = sin(X) or cos(X) #
4921 # ACCURACY and MONOTONICITY ******************************************* #
4922 # The returned result is within 1 ulp in 64 significant bit, i.e. #
4923 # within 0.5001 ulp to 53 bits if the result is subsequently #
4924 # rounded to double precision. The result is provably monotonic #
4925 # in double precision. #
4927 # ALGORITHM *********************************************************** #
4930 # 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
4932 # 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
4934 # 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4935 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
4936 # Overwrite k by k := k + AdjN. #
4938 # 4. If k is even, go to 6. #
4940 # 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
4941 # Return sgn*cos(r) where cos(r) is approximated by an #
4942 # even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
4946 # 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
4947 # where sin(r) is approximated by an odd polynomial in r #
4948 # r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
4951 # 7. If |X| > 1, go to 9. #
4953 # 8. (|X|<2**(-40)) If SIN is invoked, return X; #
4954 # otherwise return 1. #
4956 # 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4960 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
4962 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4963 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
4965 # 3. If k is even, go to 5. #
4967 # 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
4968 # j1 exclusive or with the l.s.b. of k. #
4969 # sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
4970 # SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
4971 # sin(r) and cos(r) are computed as odd and even #
4972 # polynomials in r, respectively. Exit #
4974 # 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
4975 # SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
4976 # sin(r) and cos(r) are computed as odd and even #
4977 # polynomials in r, respectively. Exit #
4979 # 6. If |X| > 1, go to 8. #
4981 # 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
4983 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4986 #########################################################################
4988 SINA7
: long
0xBD6AAA77,0xCCC994F5
4989 SINA6
: long
0x3DE61209,0x7AAE8DA1
4990 SINA5
: long
0xBE5AE645,0x2A118AE4
4991 SINA4
: long
0x3EC71DE3,0xA5341531
4992 SINA3
: long
0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
4993 SINA2
: long
0x3FF80000,0x88888888,0x888859AF,0x00000000
4994 SINA1
: long
0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
4996 COSB8
: long
0x3D2AC4D0,0xD6011EE3
4997 COSB7
: long
0xBDA9396F,0x9F45AC19
4998 COSB6
: long
0x3E21EED9,0x0612C972
4999 COSB5
: long
0xBE927E4F,0xB79D9FCF
5000 COSB4
: long
0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5001 COSB3
: long
0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5002 COSB2
: long
0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5003 COSB1
: long
0xBF000000
5022 ############################################
5025 mov.
l &0,ADJN
(%a6
) # yes; SET ADJN TO 0
5028 ############################################
5031 mov.
l &1,ADJN
(%a6
) # yes; SET ADJN TO 1
5033 ############################################
5035 #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5037 fmov.x
(%a0
),%fp0
# LOAD INPUT
5038 fmov.x
%fp0
,X
(%a6
) # save input at X
5041 mov.
l (%a0
),%d1
# put exp in hi word
5042 mov.w
4(%a0
),%d1
# fetch hi(man)
5043 and.l &0x7FFFFFFF,%d1
# strip sign
5045 cmpi.
l %d1
,&0x3FD78000 # is |X| >= 2**(-40)?
5047 bra.w SINSM
# yes; input is very small
5050 cmp.
l %d1
,&0x4004BC7E # is |X| < 15 PI?
5052 bra.w SREDUCEX
# yes; input is very large
5054 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5055 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5058 fmul.d TWOBYPI
(%pc
),%fp1
# X*2/PI
5060 lea PITBL+
0x200(%pc
),%a1
# TABLE OF N*PI/2, N = -32,...,32
5062 fmov.
l %fp1
,INT
(%a6
) # CONVERT TO INTEGER
5064 mov.
l INT
(%a6
),%d1
# make a copy of N
5065 asl.
l &4,%d1
# N *= 16
5066 add.l %d1
,%a1
# tbl_addr = a1 + (N*16)
5068 # A1 IS THE ADDRESS OF N*PIBY2
5069 # ...WHICH IS IN TWO PIECES Y1 & Y2
5070 fsub.x
(%a1
)+,%fp0
# X-Y1
5071 fsub.s
(%a1
),%fp0
# fp0 = R = (X-Y1)-Y2
5074 #--continuation from REDUCEX
5076 #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5078 add.l ADJN
(%a6
),%d1
# SEE IF D0 IS ODD OR EVEN
5079 ror.
l &1,%d1
# D0 WAS ODD IFF D0 IS NEGATIVE
5083 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5084 #--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5085 #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5086 #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5087 #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5089 #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5090 #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5092 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
5094 fmov.x
%fp0
,X
(%a6
) # X IS R
5095 fmul.x
%fp0
,%fp0
# FP0 IS S
5097 fmov.d SINA7
(%pc
),%fp3
5098 fmov.d SINA6
(%pc
),%fp2
5101 fmul.x
%fp1
,%fp1
# FP1 IS T
5104 and.l &0x80000000,%d1
5105 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5106 eor.
l %d1
,X
(%a6
) # X IS NOW R'= SGN*R
5108 fmul.x
%fp1
,%fp3
# TA7
5109 fmul.x
%fp1
,%fp2
# TA6
5111 fadd.d SINA5
(%pc
),%fp3
# A5+TA7
5112 fadd.d SINA4
(%pc
),%fp2
# A4+TA6
5114 fmul.x
%fp1
,%fp3
# T(A5+TA7)
5115 fmul.x
%fp1
,%fp2
# T(A4+TA6)
5117 fadd.d SINA3
(%pc
),%fp3
# A3+T(A5+TA7)
5118 fadd.x SINA2
(%pc
),%fp2
# A2+T(A4+TA6)
5120 fmul.x
%fp3
,%fp1
# T(A3+T(A5+TA7))
5122 fmul.x
%fp0
,%fp2
# S(A2+T(A4+TA6))
5123 fadd.x SINA1
(%pc
),%fp1
# A1+T(A3+T(A5+TA7))
5124 fmul.x X
(%a6
),%fp0
# R'*S
5126 fadd.x
%fp2
,%fp1
# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5128 fmul.x
%fp1
,%fp0
# SIN(R')-R'
5130 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
5132 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5133 fadd.x X
(%a6
),%fp0
# last inst - possible exception set
5136 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5137 #--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5138 #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5139 #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5140 #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5142 #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5143 #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5144 #--AND IS THEREFORE STORED AS SINGLE PRECISION.
5146 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
5148 fmul.x
%fp0
,%fp0
# FP0 IS S
5150 fmov.d COSB8
(%pc
),%fp2
5151 fmov.d COSB7
(%pc
),%fp3
5154 fmul.x
%fp1
,%fp1
# FP1 IS T
5156 fmov.x
%fp0
,X
(%a6
) # X IS S
5158 and.l &0x80000000,%d1
5159 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5161 fmul.x
%fp1
,%fp2
# TB8
5163 eor.
l %d1
,X
(%a6
) # X IS NOW S'= SGN*S
5164 and.l &0x80000000,%d1
5166 fmul.x
%fp1
,%fp3
# TB7
5168 or.l &0x3F800000,%d1
# D0 IS SGN IN SINGLE
5169 mov.
l %d1
,POSNEG1
(%a6
)
5171 fadd.d COSB6
(%pc
),%fp2
# B6+TB8
5172 fadd.d COSB5
(%pc
),%fp3
# B5+TB7
5174 fmul.x
%fp1
,%fp2
# T(B6+TB8)
5175 fmul.x
%fp1
,%fp3
# T(B5+TB7)
5177 fadd.d COSB4
(%pc
),%fp2
# B4+T(B6+TB8)
5178 fadd.x COSB3
(%pc
),%fp3
# B3+T(B5+TB7)
5180 fmul.x
%fp1
,%fp2
# T(B4+T(B6+TB8))
5181 fmul.x
%fp3
,%fp1
# T(B3+T(B5+TB7))
5183 fadd.x COSB2
(%pc
),%fp2
# B2+T(B4+T(B6+TB8))
5184 fadd.s COSB1
(%pc
),%fp1
# B1+T(B3+T(B5+TB7))
5186 fmul.x
%fp2
,%fp0
# S(B2+T(B4+T(B6+TB8)))
5192 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
5194 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5195 fadd.s POSNEG1
(%a6
),%fp0
# last inst - possible exception set
5198 ##############################################
5200 # SINe: Big OR Small?
5201 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5202 #--IF |X| < 2**(-40), RETURN X OR 1.
5204 cmp.
l %d1
,&0x3FFF8000
5212 # here, the operation may underflow iff the precision is sgl or dbl.
5213 # extended denorms are handled through another entry point.
5215 # mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5217 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5218 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
5219 fmov.x X
(%a6
),%fp0
# last inst - possible exception set
5223 fmov.s
&0x3F800000,%fp0
# fp0 = 1.0
5224 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5225 fadd.s
&0x80800000,%fp0
# last inst - possible exception set
5228 ################################################
5230 #--SIN(X) = X FOR DENORMALIZED X
5234 ############################################
5236 #--COS(X) = 1 FOR DENORMALIZED X
5238 fmov.s
&0x3F800000,%fp0
# fp0 = 1.0
5241 ##################################################
5248 fmov.x
(%a0
),%fp0
# LOAD INPUT
5253 and.l &0x7FFFFFFF,%d1
# COMPACTIFY X
5255 cmp.
l %d1
,&0x3FD78000 # |X| >= 2**(-40)?
5260 cmp.
l %d1
,&0x4004BC7E # |X| < 15 PI?
5265 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5266 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5270 fmul.d TWOBYPI
(%pc
),%fp1
# X*2/PI
5272 lea PITBL+
0x200(%pc
),%a1
# TABLE OF N*PI/2, N = -32,...,32
5274 fmov.
l %fp1
,INT
(%a6
) # CONVERT TO INTEGER
5278 add.l %d1
,%a1
# ADDRESS OF N*PIBY2, IN Y1, Y2
5280 fsub.x
(%a1
)+,%fp0
# X-Y1
5281 fsub.s
(%a1
),%fp0
# FP0 IS R = (X-Y1)-Y2
5284 #--continuation point from REDUCEX
5288 cmp.
l %d1
,&0 # D0 < 0 IFF N IS ODD
5292 #--REGISTERS SAVED SO FAR: D0, A0, FP2.
5293 fmovm.x
&0x04,-(%sp
) # save fp2
5295 fmov.x
%fp0
,RPRIME
(%a6
)
5296 fmul.x
%fp0
,%fp0
# FP0 IS S = R*R
5297 fmov.d SINA7
(%pc
),%fp1
# A7
5298 fmov.d COSB8
(%pc
),%fp2
# B8
5299 fmul.x
%fp0
,%fp1
# SA7
5300 fmul.x
%fp0
,%fp2
# SB8
5305 and.l &0x80000000,%d2
5307 and.l &0x80000000,%d2
5309 fadd.d SINA6
(%pc
),%fp1
# A6+SA7
5310 fadd.d COSB7
(%pc
),%fp2
# B7+SB8
5312 fmul.x
%fp0
,%fp1
# S(A6+SA7)
5313 eor.
l %d2
,RPRIME
(%a6
)
5315 fmul.x
%fp0
,%fp2
# S(B7+SB8)
5317 and.l &0x80000000,%d1
5318 mov.
l &0x3F800000,POSNEG1
(%a6
)
5319 eor.
l %d1
,POSNEG1
(%a6
)
5321 fadd.d SINA5
(%pc
),%fp1
# A5+S(A6+SA7)
5322 fadd.d COSB6
(%pc
),%fp2
# B6+S(B7+SB8)
5324 fmul.x
%fp0
,%fp1
# S(A5+S(A6+SA7))
5325 fmul.x
%fp0
,%fp2
# S(B6+S(B7+SB8))
5326 fmov.x
%fp0
,SPRIME
(%a6
)
5328 fadd.d SINA4
(%pc
),%fp1
# A4+S(A5+S(A6+SA7))
5329 eor.
l %d1
,SPRIME
(%a6
)
5330 fadd.d COSB5
(%pc
),%fp2
# B5+S(B6+S(B7+SB8))
5332 fmul.x
%fp0
,%fp1
# S(A4+...)
5333 fmul.x
%fp0
,%fp2
# S(B5+...)
5335 fadd.d SINA3
(%pc
),%fp1
# A3+S(A4+...)
5336 fadd.d COSB4
(%pc
),%fp2
# B4+S(B5+...)
5338 fmul.x
%fp0
,%fp1
# S(A3+...)
5339 fmul.x
%fp0
,%fp2
# S(B4+...)
5341 fadd.x SINA2
(%pc
),%fp1
# A2+S(A3+...)
5342 fadd.x COSB3
(%pc
),%fp2
# B3+S(B4+...)
5344 fmul.x
%fp0
,%fp1
# S(A2+...)
5345 fmul.x
%fp0
,%fp2
# S(B3+...)
5347 fadd.x SINA1
(%pc
),%fp1
# A1+S(A2+...)
5348 fadd.x COSB2
(%pc
),%fp2
# B2+S(B3+...)
5350 fmul.x
%fp0
,%fp1
# S(A1+...)
5351 fmul.x
%fp2
,%fp0
# S(B2+...)
5353 fmul.x RPRIME
(%a6
),%fp1
# R'S(A1+...)
5354 fadd.s COSB1
(%pc
),%fp0
# B1+S(B2...)
5355 fmul.x SPRIME
(%a6
),%fp0
# S'(B1+S(B2+...))
5357 fmovm.x
(%sp
)+,&0x20 # restore fp2
5360 fadd.x RPRIME
(%a6
),%fp1
# COS(X)
5361 bsr sto_cos
# store cosine result
5362 fadd.s POSNEG1
(%a6
),%fp0
# SIN(X)
5366 #--REGISTERS SAVED SO FAR: FP2.
5367 fmovm.x
&0x04,-(%sp
) # save fp2
5369 fmov.x
%fp0
,RPRIME
(%a6
)
5370 fmul.x
%fp0
,%fp0
# FP0 IS S = R*R
5372 fmov.d COSB8
(%pc
),%fp1
# B8
5373 fmov.d SINA7
(%pc
),%fp2
# A7
5375 fmul.x
%fp0
,%fp1
# SB8
5376 fmov.x
%fp0
,SPRIME
(%a6
)
5377 fmul.x
%fp0
,%fp2
# SA7
5380 and.l &0x80000000,%d1
5382 fadd.d COSB7
(%pc
),%fp1
# B7+SB8
5383 fadd.d SINA6
(%pc
),%fp2
# A6+SA7
5385 eor.
l %d1
,RPRIME
(%a6
)
5386 eor.
l %d1
,SPRIME
(%a6
)
5388 fmul.x
%fp0
,%fp1
# S(B7+SB8)
5390 or.l &0x3F800000,%d1
5391 mov.
l %d1
,POSNEG1
(%a6
)
5393 fmul.x
%fp0
,%fp2
# S(A6+SA7)
5395 fadd.d COSB6
(%pc
),%fp1
# B6+S(B7+SB8)
5396 fadd.d SINA5
(%pc
),%fp2
# A5+S(A6+SA7)
5398 fmul.x
%fp0
,%fp1
# S(B6+S(B7+SB8))
5399 fmul.x
%fp0
,%fp2
# S(A5+S(A6+SA7))
5401 fadd.d COSB5
(%pc
),%fp1
# B5+S(B6+S(B7+SB8))
5402 fadd.d SINA4
(%pc
),%fp2
# A4+S(A5+S(A6+SA7))
5404 fmul.x
%fp0
,%fp1
# S(B5+...)
5405 fmul.x
%fp0
,%fp2
# S(A4+...)
5407 fadd.d COSB4
(%pc
),%fp1
# B4+S(B5+...)
5408 fadd.d SINA3
(%pc
),%fp2
# A3+S(A4+...)
5410 fmul.x
%fp0
,%fp1
# S(B4+...)
5411 fmul.x
%fp0
,%fp2
# S(A3+...)
5413 fadd.x COSB3
(%pc
),%fp1
# B3+S(B4+...)
5414 fadd.x SINA2
(%pc
),%fp2
# A2+S(A3+...)
5416 fmul.x
%fp0
,%fp1
# S(B3+...)
5417 fmul.x
%fp0
,%fp2
# S(A2+...)
5419 fadd.x COSB2
(%pc
),%fp1
# B2+S(B3+...)
5420 fadd.x SINA1
(%pc
),%fp2
# A1+S(A2+...)
5422 fmul.x
%fp0
,%fp1
# S(B2+...)
5423 fmul.x
%fp2
,%fp0
# s(a1+...)
5426 fadd.s COSB1
(%pc
),%fp1
# B1+S(B2...)
5427 fmul.x RPRIME
(%a6
),%fp0
# R'S(A1+...)
5428 fmul.x SPRIME
(%a6
),%fp1
# S'(B1+S(B2+...))
5430 fmovm.x
(%sp
)+,&0x20 # restore fp2
5433 fadd.s POSNEG1
(%a6
),%fp1
# COS(X)
5434 bsr sto_cos
# store cosine result
5435 fadd.x RPRIME
(%a6
),%fp0
# SIN(X)
5438 ################################################
5441 cmp.
l %d1
,&0x3FFF8000
5444 ################################################
5447 # mov.w &0x0000,XDCARE(%a6)
5448 fmov.s
&0x3F800000,%fp1
5451 fsub.s
&0x00800000,%fp1
5452 bsr sto_cos
# store cosine result
5453 fmov.
l %fpcr
,%d0
# d0 must have fpcr,too
5454 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
5458 ##############################################
5461 #--SIN AND COS OF X FOR DENORMALIZED X
5463 mov.
l %d0
,-(%sp
) # save d0
5464 fmov.s
&0x3F800000,%fp1
5465 bsr sto_cos
# store cosine result
5466 mov.
l (%sp
)+,%d0
# restore d0
5469 ############################################
5471 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5472 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5473 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5475 fmovm.x
&0x3c,-(%sp
) # save {fp2-fp5}
5476 mov.
l %d2
,-(%sp
) # save d2
5477 fmov.s
&0x00000000,%fp1
# fp1 = 0
5479 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5480 #--there is a danger of unwanted overflow in first LOOP iteration. In this
5481 #--case, reduce argument by one remainder step to make subsequent reduction
5483 cmp.
l %d1
,&0x7ffeffff # is arg dangerously large?
5486 # yes; create 2**16383*PI/2
5487 mov.w
&0x7ffe,FP_SCR0_EX
(%a6
)
5488 mov.
l &0xc90fdaa2,FP_SCR0_HI
(%a6
)
5489 clr.
l FP_SCR0_LO
(%a6
)
5491 # create low half of 2**16383*PI/2 at FP_SCR1
5492 mov.w
&0x7fdc,FP_SCR1_EX
(%a6
)
5493 mov.
l &0x85a308d3,FP_SCR1_HI
(%a6
)
5494 clr.
l FP_SCR1_LO
(%a6
)
5496 ftest.x
%fp0
# test sign of argument
5499 or.b &0x80,FP_SCR0_EX
(%a6
) # positive arg
5500 or.b &0x80,FP_SCR1_EX
(%a6
)
5502 fadd.x FP_SCR0
(%a6
),%fp0
# high part of reduction is exact
5503 fmov.x
%fp0
,%fp1
# save high result in fp1
5504 fadd.x FP_SCR1
(%a6
),%fp0
# low part of reduction
5505 fsub.x
%fp0
,%fp1
# determine low component of result
5506 fadd.x FP_SCR1
(%a6
),%fp1
# fp0/fp1 are reduced argument.
5508 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5509 #--integer quotient will be stored in N
5510 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5512 fmov.x
%fp0
,INARG
(%a6
) # +-2**K * F, 1 <= F < 2
5513 mov.w INARG
(%a6
),%d1
5514 mov.
l %d1
,%a1
# save a copy of D0
5515 and.l &0x00007FFF,%d1
5516 sub.l &0x00003FFF,%d1
# d0 = K
5520 sub.l &27,%d1
# d0 = L := K-27
5521 mov.
b &0,ENDFLAG
(%a6
)
5524 clr.
l %d1
# d0 = L := 0
5525 mov.
b &1,ENDFLAG
(%a6
)
5528 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5529 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5531 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5532 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5534 mov.
l &0x00003FFE,%d2
# BIASED EXP OF 2/PI
5535 sub.l %d1
,%d2
# BIASED EXP OF 2**(-L)*(2/PI)
5537 mov.
l &0xA2F9836E,FP_SCR0_HI
(%a6
)
5538 mov.
l &0x4E44152A,FP_SCR0_LO
(%a6
)
5539 mov.w
%d2
,FP_SCR0_EX
(%a6
) # FP_SCR0 = 2**(-L)*(2/PI)
5542 fmul.x FP_SCR0
(%a6
),%fp2
# fp2 = X * 2**(-L)*(2/PI)
5544 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5545 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5546 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5547 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5548 #--US THE DESIRED VALUE IN FLOATING POINT.
5551 and.l &0x80000000,%d2
5552 or.l &0x5F000000,%d2
# d2 = SIGN(INARG)*2**63 IN SGL
5553 mov.
l %d2
,TWOTO63
(%a6
)
5554 fadd.s TWOTO63
(%a6
),%fp2
# THE FRACTIONAL PART OF FP1 IS ROUNDED
5555 fsub.s TWOTO63
(%a6
),%fp2
# fp2 = N
5558 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5559 mov.
l %d1
,%d2
# d2 = L
5561 add.l &0x00003FFF,%d2
# BIASED EXP OF 2**L * (PI/2)
5562 mov.w
%d2
,FP_SCR0_EX
(%a6
)
5563 mov.
l &0xC90FDAA2,FP_SCR0_HI
(%a6
)
5564 clr.
l FP_SCR0_LO
(%a6
) # FP_SCR0 = 2**(L) * Piby2_1
5566 add.l &0x00003FDD,%d1
5567 mov.w
%d1
,FP_SCR1_EX
(%a6
)
5568 mov.
l &0x85A308D3,FP_SCR1_HI
(%a6
)
5569 clr.
l FP_SCR1_LO
(%a6
) # FP_SCR1 = 2**(L) * Piby2_2
5571 mov.
b ENDFLAG
(%a6
),%d1
5573 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5574 #--P2 = 2**(L) * Piby2_2
5575 fmov.x
%fp2
,%fp4
# fp4 = N
5576 fmul.x FP_SCR0
(%a6
),%fp4
# fp4 = W = N*P1
5577 fmov.x
%fp2
,%fp5
# fp5 = N
5578 fmul.x FP_SCR1
(%a6
),%fp5
# fp5 = w = N*P2
5579 fmov.x
%fp4
,%fp3
# fp3 = W = N*P1
5581 #--we want P+p = W+w but |p| <= half ulp of P
5582 #--Then, we need to compute A := R-P and a := r-p
5583 fadd.x
%fp5
,%fp3
# fp3 = P
5584 fsub.x
%fp3
,%fp4
# fp4 = W-P
5586 fsub.x
%fp3
,%fp0
# fp0 = A := R - P
5587 fadd.x
%fp5
,%fp4
# fp4 = p = (W-P)+w
5589 fmov.x
%fp0
,%fp3
# fp3 = A
5590 fsub.x
%fp4
,%fp1
# fp1 = a := r - p
5592 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5593 #--|r| <= half ulp of R.
5594 fadd.x
%fp1
,%fp0
# fp0 = R := A+a
5595 #--No need to calculate r if this is the last loop
5599 #--Need to calculate r
5600 fsub.x
%fp0
,%fp3
# fp3 = A-R
5601 fadd.x
%fp3
,%fp1
# fp1 = r := (A-R)+a
5605 fmov.
l %fp2
,INT
(%a6
)
5606 mov.
l (%sp
)+,%d2
# restore d2
5607 fmovm.x
(%sp
)+,&0x3c # restore {fp2-fp5}
5615 #########################################################################
5616 # stan(): computes the tangent of a normalized input #
5617 # stand(): computes the tangent of a denormalized input #
5619 # INPUT *************************************************************** #
5620 # a0 = pointer to extended precision input #
5621 # d0 = round precision,mode #
5623 # OUTPUT ************************************************************** #
5626 # ACCURACY and MONOTONICITY ******************************************* #
5627 # The returned result is within 3 ulp in 64 significant bit, i.e. #
5628 # within 0.5001 ulp to 53 bits if the result is subsequently #
5629 # rounded to double precision. The result is provably monotonic #
5630 # in double precision. #
5632 # ALGORITHM *********************************************************** #
5634 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5636 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5637 # k = N mod 2, so in particular, k = 0 or 1. #
5639 # 3. If k is odd, go to 5. #
5641 # 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5642 # rational function U/V where #
5643 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5644 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5647 # 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5648 # a rational function U/V where #
5649 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5650 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5651 # -Cot(r) = -V/U. Exit. #
5653 # 6. If |X| > 1, go to 8. #
5655 # 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5657 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5660 #########################################################################
5663 long
0x3EA0B759,0xF50F8688
5665 long
0xBEF2BAA5,0xA8924F04
5668 long
0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5671 long
0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5674 long
0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5677 long
0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5680 long
0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5683 long
0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5686 long
0x40010000,0xC90FDAA2,0x00000000,0x00000000
5688 long
0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5690 #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5691 #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5692 #--MOST 69 BITS LONG.
5695 long
0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5696 long
0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5697 long
0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5698 long
0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5699 long
0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5700 long
0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5701 long
0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5702 long
0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5703 long
0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5704 long
0xC0040000,0x90836524,0x88034B96,0x20B00000
5705 long
0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5706 long
0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5707 long
0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5708 long
0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5709 long
0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5710 long
0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5711 long
0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5712 long
0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5713 long
0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5714 long
0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5715 long
0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5716 long
0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5717 long
0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5718 long
0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5719 long
0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5720 long
0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5721 long
0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5722 long
0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5723 long
0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5724 long
0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5725 long
0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5726 long
0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5727 long
0x00000000,0x00000000,0x00000000,0x00000000
5728 long
0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5729 long
0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5730 long
0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5731 long
0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5732 long
0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5733 long
0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5734 long
0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5735 long
0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5736 long
0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5737 long
0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5738 long
0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5739 long
0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5740 long
0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5741 long
0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5742 long
0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5743 long
0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5744 long
0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5745 long
0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5746 long
0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5747 long
0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5748 long
0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5749 long
0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5750 long
0x40040000,0x90836524,0x88034B96,0xA0B00000
5751 long
0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5752 long
0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5753 long
0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5754 long
0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5755 long
0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5756 long
0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5757 long
0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5758 long
0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5759 long
0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5769 fmov.x
(%a0
),%fp0
# LOAD INPUT
5773 and.l &0x7FFFFFFF,%d1
5775 cmp.
l %d1
,&0x3FD78000 # |X| >= 2**(-40)?
5779 cmp.
l %d1
,&0x4004BC7E # |X| < 15 PI?
5784 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5785 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5787 fmul.d TWOBYPI
(%pc
),%fp1
# X*2/PI
5789 lea.
l PITBL+
0x200(%pc
),%a1
# TABLE OF N*PI/2, N = -32,...,32
5791 fmov.
l %fp1
,%d1
# CONVERT TO INTEGER
5794 add.l %d1
,%a1
# ADDRESS N*PIBY2 IN Y1, Y2
5796 fsub.x
(%a1
)+,%fp0
# X-Y1
5798 fsub.s
(%a1
),%fp0
# FP0 IS R = (X-Y1)-Y2
5801 and.l &0x80000000,%d1
# D0 WAS ODD IFF D0 < 0
5804 fmovm.x
&0x0c,-(%sp
) # save fp2,fp3
5810 fmul.x
%fp1
,%fp1
# S = R*R
5812 fmov.d TANQ4
(%pc
),%fp3
5813 fmov.d TANP3
(%pc
),%fp2
5815 fmul.x
%fp1
,%fp3
# SQ4
5816 fmul.x
%fp1
,%fp2
# SP3
5818 fadd.d TANQ3
(%pc
),%fp3
# Q3+SQ4
5819 fadd.x TANP2
(%pc
),%fp2
# P2+SP3
5821 fmul.x
%fp1
,%fp3
# S(Q3+SQ4)
5822 fmul.x
%fp1
,%fp2
# S(P2+SP3)
5824 fadd.x TANQ2
(%pc
),%fp3
# Q2+S(Q3+SQ4)
5825 fadd.x TANP1
(%pc
),%fp2
# P1+S(P2+SP3)
5827 fmul.x
%fp1
,%fp3
# S(Q2+S(Q3+SQ4))
5828 fmul.x
%fp1
,%fp2
# S(P1+S(P2+SP3))
5830 fadd.x TANQ1
(%pc
),%fp3
# Q1+S(Q2+S(Q3+SQ4))
5831 fmul.x
%fp0
,%fp2
# RS(P1+S(P2+SP3))
5833 fmul.x
%fp3
,%fp1
# S(Q1+S(Q2+S(Q3+SQ4)))
5835 fadd.x
%fp2
,%fp0
# R+RS(P1+S(P2+SP3))
5837 fadd.s
&0x3F800000,%fp1
# 1+S(Q1+...)
5839 fmovm.x
(%sp
)+,&0x30 # restore fp2,fp3
5841 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5842 fdiv.x
%fp1
,%fp0
# last inst - possible exception set
5847 fmul.x
%fp0
,%fp0
# S = R*R
5849 fmov.d TANQ4
(%pc
),%fp3
5850 fmov.d TANP3
(%pc
),%fp2
5852 fmul.x
%fp0
,%fp3
# SQ4
5853 fmul.x
%fp0
,%fp2
# SP3
5855 fadd.d TANQ3
(%pc
),%fp3
# Q3+SQ4
5856 fadd.x TANP2
(%pc
),%fp2
# P2+SP3
5858 fmul.x
%fp0
,%fp3
# S(Q3+SQ4)
5859 fmul.x
%fp0
,%fp2
# S(P2+SP3)
5861 fadd.x TANQ2
(%pc
),%fp3
# Q2+S(Q3+SQ4)
5862 fadd.x TANP1
(%pc
),%fp2
# P1+S(P2+SP3)
5864 fmul.x
%fp0
,%fp3
# S(Q2+S(Q3+SQ4))
5865 fmul.x
%fp0
,%fp2
# S(P1+S(P2+SP3))
5867 fadd.x TANQ1
(%pc
),%fp3
# Q1+S(Q2+S(Q3+SQ4))
5868 fmul.x
%fp1
,%fp2
# RS(P1+S(P2+SP3))
5870 fmul.x
%fp3
,%fp0
# S(Q1+S(Q2+S(Q3+SQ4)))
5872 fadd.x
%fp2
,%fp1
# R+RS(P1+S(P2+SP3))
5873 fadd.s
&0x3F800000,%fp0
# 1+S(Q1+...)
5875 fmovm.x
(%sp
)+,&0x30 # restore fp2,fp3
5878 eor.
l &0x80000000,(%sp
)
5880 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5881 fdiv.x
(%sp
)+,%fp0
# last inst - possible exception set
5885 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5886 #--IF |X| < 2**(-40), RETURN X OR 1.
5887 cmp.
l %d1
,&0x3FFF8000
5892 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5893 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
5894 fmov.x
(%sp
)+,%fp0
# last inst - posibble exception set
5898 #--TAN(X) = X FOR DENORMALIZED X
5902 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5903 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5904 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5906 fmovm.x
&0x3c,-(%sp
) # save {fp2-fp5}
5907 mov.
l %d2
,-(%sp
) # save d2
5908 fmov.s
&0x00000000,%fp1
# fp1 = 0
5910 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5911 #--there is a danger of unwanted overflow in first LOOP iteration. In this
5912 #--case, reduce argument by one remainder step to make subsequent reduction
5914 cmp.
l %d1
,&0x7ffeffff # is arg dangerously large?
5917 # yes; create 2**16383*PI/2
5918 mov.w
&0x7ffe,FP_SCR0_EX
(%a6
)
5919 mov.
l &0xc90fdaa2,FP_SCR0_HI
(%a6
)
5920 clr.
l FP_SCR0_LO
(%a6
)
5922 # create low half of 2**16383*PI/2 at FP_SCR1
5923 mov.w
&0x7fdc,FP_SCR1_EX
(%a6
)
5924 mov.
l &0x85a308d3,FP_SCR1_HI
(%a6
)
5925 clr.
l FP_SCR1_LO
(%a6
)
5927 ftest.x
%fp0
# test sign of argument
5930 or.b &0x80,FP_SCR0_EX
(%a6
) # positive arg
5931 or.b &0x80,FP_SCR1_EX
(%a6
)
5933 fadd.x FP_SCR0
(%a6
),%fp0
# high part of reduction is exact
5934 fmov.x
%fp0
,%fp1
# save high result in fp1
5935 fadd.x FP_SCR1
(%a6
),%fp0
# low part of reduction
5936 fsub.x
%fp0
,%fp1
# determine low component of result
5937 fadd.x FP_SCR1
(%a6
),%fp1
# fp0/fp1 are reduced argument.
5939 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5940 #--integer quotient will be stored in N
5941 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5943 fmov.x
%fp0
,INARG
(%a6
) # +-2**K * F, 1 <= F < 2
5944 mov.w INARG
(%a6
),%d1
5945 mov.
l %d1
,%a1
# save a copy of D0
5946 and.l &0x00007FFF,%d1
5947 sub.l &0x00003FFF,%d1
# d0 = K
5951 sub.l &27,%d1
# d0 = L := K-27
5952 mov.
b &0,ENDFLAG
(%a6
)
5955 clr.
l %d1
# d0 = L := 0
5956 mov.
b &1,ENDFLAG
(%a6
)
5959 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5960 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5962 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5963 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5965 mov.
l &0x00003FFE,%d2
# BIASED EXP OF 2/PI
5966 sub.l %d1
,%d2
# BIASED EXP OF 2**(-L)*(2/PI)
5968 mov.
l &0xA2F9836E,FP_SCR0_HI
(%a6
)
5969 mov.
l &0x4E44152A,FP_SCR0_LO
(%a6
)
5970 mov.w
%d2
,FP_SCR0_EX
(%a6
) # FP_SCR0 = 2**(-L)*(2/PI)
5973 fmul.x FP_SCR0
(%a6
),%fp2
# fp2 = X * 2**(-L)*(2/PI)
5975 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5976 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5977 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5978 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5979 #--US THE DESIRED VALUE IN FLOATING POINT.
5982 and.l &0x80000000,%d2
5983 or.l &0x5F000000,%d2
# d2 = SIGN(INARG)*2**63 IN SGL
5984 mov.
l %d2
,TWOTO63
(%a6
)
5985 fadd.s TWOTO63
(%a6
),%fp2
# THE FRACTIONAL PART OF FP1 IS ROUNDED
5986 fsub.s TWOTO63
(%a6
),%fp2
# fp2 = N
5987 # fintrz.x %fp2,%fp2
5989 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5990 mov.
l %d1
,%d2
# d2 = L
5992 add.l &0x00003FFF,%d2
# BIASED EXP OF 2**L * (PI/2)
5993 mov.w
%d2
,FP_SCR0_EX
(%a6
)
5994 mov.
l &0xC90FDAA2,FP_SCR0_HI
(%a6
)
5995 clr.
l FP_SCR0_LO
(%a6
) # FP_SCR0 = 2**(L) * Piby2_1
5997 add.l &0x00003FDD,%d1
5998 mov.w
%d1
,FP_SCR1_EX
(%a6
)
5999 mov.
l &0x85A308D3,FP_SCR1_HI
(%a6
)
6000 clr.
l FP_SCR1_LO
(%a6
) # FP_SCR1 = 2**(L) * Piby2_2
6002 mov.
b ENDFLAG
(%a6
),%d1
6004 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6005 #--P2 = 2**(L) * Piby2_2
6006 fmov.x
%fp2
,%fp4
# fp4 = N
6007 fmul.x FP_SCR0
(%a6
),%fp4
# fp4 = W = N*P1
6008 fmov.x
%fp2
,%fp5
# fp5 = N
6009 fmul.x FP_SCR1
(%a6
),%fp5
# fp5 = w = N*P2
6010 fmov.x
%fp4
,%fp3
# fp3 = W = N*P1
6012 #--we want P+p = W+w but |p| <= half ulp of P
6013 #--Then, we need to compute A := R-P and a := r-p
6014 fadd.x
%fp5
,%fp3
# fp3 = P
6015 fsub.x
%fp3
,%fp4
# fp4 = W-P
6017 fsub.x
%fp3
,%fp0
# fp0 = A := R - P
6018 fadd.x
%fp5
,%fp4
# fp4 = p = (W-P)+w
6020 fmov.x
%fp0
,%fp3
# fp3 = A
6021 fsub.x
%fp4
,%fp1
# fp1 = a := r - p
6023 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6024 #--|r| <= half ulp of R.
6025 fadd.x
%fp1
,%fp0
# fp0 = R := A+a
6026 #--No need to calculate r if this is the last loop
6030 #--Need to calculate r
6031 fsub.x
%fp0
,%fp3
# fp3 = A-R
6032 fadd.x
%fp3
,%fp1
# fp1 = r := (A-R)+a
6036 fmov.
l %fp2
,INT
(%a6
)
6037 mov.
l (%sp
)+,%d2
# restore d2
6038 fmovm.x
(%sp
)+,&0x3c # restore {fp2-fp5}
6045 #########################################################################
6046 # satan(): computes the arctangent of a normalized number #
6047 # satand(): computes the arctangent of a denormalized number #
6049 # INPUT *************************************************************** #
6050 # a0 = pointer to extended precision input #
6051 # d0 = round precision,mode #
6053 # OUTPUT ************************************************************** #
6056 # ACCURACY and MONOTONICITY ******************************************* #
6057 # The returned result is within 2 ulps in 64 significant bit, #
6058 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6059 # rounded to double precision. The result is provably monotonic #
6060 # in double precision. #
6062 # ALGORITHM *********************************************************** #
6063 # Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6065 # Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6066 # Note that k = -4, -3,..., or 3. #
6067 # Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6068 # significant bits of X with a bit-1 attached at the 6-th #
6069 # bit position. Define u to be u = (X-F) / (1 + X*F). #
6071 # Step 3. Approximate arctan(u) by a polynomial poly. #
6073 # Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6074 # table of values calculated beforehand. Exit. #
6076 # Step 5. If |X| >= 16, go to Step 7. #
6078 # Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6080 # Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6081 # polynomial in X'. #
6082 # Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6084 #########################################################################
6086 ATANA3
: long
0xBFF6687E,0x314987D8
6087 ATANA2
: long
0x4002AC69,0x34A26DB3
6088 ATANA1
: long
0xBFC2476F,0x4E1DA28E
6090 ATANB6
: long
0x3FB34444,0x7F876989
6091 ATANB5
: long
0xBFB744EE,0x7FAF45DB
6092 ATANB4
: long
0x3FBC71C6,0x46940220
6093 ATANB3
: long
0xBFC24924,0x921872F9
6094 ATANB2
: long
0x3FC99999,0x99998FA9
6095 ATANB1
: long
0xBFD55555,0x55555555
6097 ATANC5
: long
0xBFB70BF3,0x98539E6A
6098 ATANC4
: long
0x3FBC7187,0x962D1D7D
6099 ATANC3
: long
0xBFC24924,0x827107B8
6100 ATANC2
: long
0x3FC99999,0x9996263E
6101 ATANC1
: long
0xBFD55555,0x55555536
6103 PPIBY2
: long
0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6104 NPIBY2
: long
0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6106 PTINY
: long
0x00010000,0x80000000,0x00000000,0x00000000
6107 NTINY
: long
0x80010000,0x80000000,0x00000000,0x00000000
6110 long
0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6111 long
0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6112 long
0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6113 long
0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6114 long
0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6115 long
0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6116 long
0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6117 long
0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6118 long
0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6119 long
0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6120 long
0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6121 long
0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6122 long
0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6123 long
0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6124 long
0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6125 long
0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6126 long
0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6127 long
0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6128 long
0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6129 long
0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6130 long
0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6131 long
0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6132 long
0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6133 long
0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6134 long
0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6135 long
0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6136 long
0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6137 long
0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6138 long
0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6139 long
0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6140 long
0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6141 long
0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6142 long
0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6143 long
0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6144 long
0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6145 long
0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6146 long
0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6147 long
0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6148 long
0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6149 long
0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6150 long
0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6151 long
0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6152 long
0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6153 long
0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6154 long
0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6155 long
0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6156 long
0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6157 long
0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6158 long
0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6159 long
0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6160 long
0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6161 long
0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6162 long
0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6163 long
0x3FFE0000,0x97731420,0x365E538C,0x00000000
6164 long
0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6165 long
0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6166 long
0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6167 long
0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6168 long
0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6169 long
0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6170 long
0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6171 long
0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6172 long
0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6173 long
0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6174 long
0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6175 long
0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6176 long
0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6177 long
0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6178 long
0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6179 long
0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6180 long
0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6181 long
0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6182 long
0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6183 long
0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6184 long
0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6185 long
0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6186 long
0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6187 long
0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6188 long
0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6189 long
0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6190 long
0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6191 long
0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6192 long
0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6193 long
0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6194 long
0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6195 long
0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6196 long
0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6197 long
0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6198 long
0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6199 long
0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6200 long
0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6201 long
0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6202 long
0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6203 long
0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6204 long
0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6205 long
0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6206 long
0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6207 long
0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6208 long
0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6209 long
0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6210 long
0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6211 long
0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6212 long
0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6213 long
0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6214 long
0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6215 long
0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6216 long
0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6217 long
0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6218 long
0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6219 long
0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6220 long
0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6221 long
0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6222 long
0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6223 long
0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6224 long
0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6225 long
0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6226 long
0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6227 long
0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6228 long
0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6229 long
0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6230 long
0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6231 long
0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6232 long
0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6233 long
0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6234 long
0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6235 long
0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6236 long
0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6237 long
0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6249 #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6251 fmov.x
(%a0
),%fp0
# LOAD INPUT
6256 and.l &0x7FFFFFFF,%d1
6258 cmp.
l %d1
,&0x3FFB8000 # |X| >= 1/16?
6263 cmp.
l %d1
,&0x4002FFFF # |X| < 16 ?
6267 #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6268 #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6269 #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6270 #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6271 #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6272 #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6273 #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6274 #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6275 #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6276 #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6277 #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6278 #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6279 #--WILL INVOLVE A VERY LONG POLYNOMIAL.
6281 #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6282 #--WE CHOSE F TO BE +-2^K * 1.BBBB1
6283 #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6284 #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6285 #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6286 #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6290 and.l &0xF8000000,XFRAC
(%a6
) # FIRST 5 BITS
6291 or.l &0x04000000,XFRAC
(%a6
) # SET 6-TH BIT TO 1
6292 mov.
l &0x00000000,XFRACLO
(%a6
) # LOCATION OF X IS NOW F
6294 fmov.x
%fp0
,%fp1
# FP1 IS X
6295 fmul.x X
(%a6
),%fp1
# FP1 IS X*F, NOTE THAT X*F > 0
6296 fsub.x X
(%a6
),%fp0
# FP0 IS X-F
6297 fadd.s
&0x3F800000,%fp1
# FP1 IS 1 + X*F
6298 fdiv.x
%fp1
,%fp0
# FP0 IS U = (X-F)/(1+X*F)
6300 #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6301 #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6302 #--SAVE REGISTERS FP2.
6304 mov.
l %d2
,-(%sp
) # SAVE d2 TEMPORARILY
6305 mov.
l %d1
,%d2
# THE EXP AND 16 BITS OF X
6306 and.l &0x00007800,%d1
# 4 VARYING BITS OF F'S FRACTION
6307 and.l &0x7FFF0000,%d2
# EXPONENT OF F
6308 sub.l &0x3FFB0000,%d2
# K+4
6310 add.l %d2
,%d1
# THE 7 BITS IDENTIFYING F
6311 asr.
l &7,%d1
# INDEX INTO TBL OF ATAN(|F|)
6312 lea ATANTBL
(%pc
),%a1
6313 add.l %d1
,%a1
# ADDRESS OF ATAN(|F|)
6314 mov.
l (%a1
)+,ATANF
(%a6
)
6315 mov.
l (%a1
)+,ATANFHI
(%a6
)
6316 mov.
l (%a1
)+,ATANFLO
(%a6
) # ATANF IS NOW ATAN(|F|)
6317 mov.
l X
(%a6
),%d1
# LOAD SIGN AND EXPO. AGAIN
6318 and.l &0x80000000,%d1
# SIGN(F)
6319 or.l %d1
,ATANF
(%a6
) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6320 mov.
l (%sp
)+,%d2
# RESTORE d2
6322 #--THAT'S ALL I HAVE TO DO FOR NOW,
6323 #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6325 #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6326 #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6327 #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6328 #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6329 #--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6330 #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6331 #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6333 fmovm.x
&0x04,-(%sp
) # save fp2
6337 fmov.d ATANA3
(%pc
),%fp2
6338 fadd.x
%fp1
,%fp2
# A3+V
6339 fmul.x
%fp1
,%fp2
# V*(A3+V)
6340 fmul.x
%fp0
,%fp1
# U*V
6341 fadd.d ATANA2
(%pc
),%fp2
# A2+V*(A3+V)
6342 fmul.d ATANA1
(%pc
),%fp1
# A1*U*V
6343 fmul.x
%fp2
,%fp1
# A1*U*V*(A2+V*(A3+V))
6344 fadd.x
%fp1
,%fp0
# ATAN(U), FP1 RELEASED
6346 fmovm.x
(%sp
)+,&0x20 # restore fp2
6348 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6349 fadd.x ATANF
(%a6
),%fp0
# ATAN(X)
6353 #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6354 #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6355 cmp.
l %d1
,&0x3FFF8000
6356 bgt.w ATANBIG
# I.E. |X| >= 16
6360 #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6361 #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6362 #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6363 #--WHERE Y = X*X, AND Z = Y*Y.
6365 cmp.
l %d1
,&0x3FD78000
6368 #--COMPUTE POLYNOMIAL
6369 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
6371 fmul.x
%fp0
,%fp0
# FPO IS Y = X*X
6374 fmul.x
%fp1
,%fp1
# FP1 IS Z = Y*Y
6376 fmov.d ATANB6
(%pc
),%fp2
6377 fmov.d ATANB5
(%pc
),%fp3
6379 fmul.x
%fp1
,%fp2
# Z*B6
6380 fmul.x
%fp1
,%fp3
# Z*B5
6382 fadd.d ATANB4
(%pc
),%fp2
# B4+Z*B6
6383 fadd.d ATANB3
(%pc
),%fp3
# B3+Z*B5
6385 fmul.x
%fp1
,%fp2
# Z*(B4+Z*B6)
6386 fmul.x
%fp3
,%fp1
# Z*(B3+Z*B5)
6388 fadd.d ATANB2
(%pc
),%fp2
# B2+Z*(B4+Z*B6)
6389 fadd.d ATANB1
(%pc
),%fp1
# B1+Z*(B3+Z*B5)
6391 fmul.x
%fp0
,%fp2
# Y*(B2+Z*(B4+Z*B6))
6392 fmul.x X
(%a6
),%fp0
# X*Y
6394 fadd.x
%fp2
,%fp1
# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6396 fmul.x
%fp1
,%fp0
# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6398 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
6400 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6405 #--|X| < 2^(-40), ATAN(X) = X
6407 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6408 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
6409 fmov.x X
(%a6
),%fp0
# last inst - possible exception set
6414 #--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6415 #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6416 cmp.
l %d1
,&0x40638000
6419 #--APPROXIMATE ATAN(-1/X) BY
6420 #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6421 #--THIS CAN BE RE-WRITTEN AS
6422 #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6424 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
6426 fmov.s
&0xBF800000,%fp1
# LOAD -1
6427 fdiv.x
%fp0
,%fp1
# FP1 IS -1/X
6429 #--DIVIDE IS STILL CRANKING
6431 fmov.x
%fp1
,%fp0
# FP0 IS X'
6432 fmul.x
%fp0
,%fp0
# FP0 IS Y = X'*X'
6433 fmov.x
%fp1
,X
(%a6
) # X IS REALLY X'
6436 fmul.x
%fp1
,%fp1
# FP1 IS Z = Y*Y
6438 fmov.d ATANC5
(%pc
),%fp3
6439 fmov.d ATANC4
(%pc
),%fp2
6441 fmul.x
%fp1
,%fp3
# Z*C5
6442 fmul.x
%fp1
,%fp2
# Z*B4
6444 fadd.d ATANC3
(%pc
),%fp3
# C3+Z*C5
6445 fadd.d ATANC2
(%pc
),%fp2
# C2+Z*C4
6447 fmul.x
%fp3
,%fp1
# Z*(C3+Z*C5), FP3 RELEASED
6448 fmul.x
%fp0
,%fp2
# Y*(C2+Z*C4)
6450 fadd.d ATANC1
(%pc
),%fp1
# C1+Z*(C3+Z*C5)
6451 fmul.x X
(%a6
),%fp0
# X'*Y
6453 fadd.x
%fp2
,%fp1
# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6455 fmul.x
%fp1
,%fp0
# X'*Y*([B1+Z*(B3+Z*B5)]
6456 # ... +[Y*(B2+Z*(B4+Z*B6))])
6459 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
6461 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6466 fadd.x NPIBY2
(%pc
),%fp0
6470 fadd.x PPIBY2
(%pc
),%fp0
6474 #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6479 fmov.x NPIBY2
(%pc
),%fp0
6481 fadd.x PTINY
(%pc
),%fp0
6485 fmov.x PPIBY2
(%pc
),%fp0
6487 fadd.x NTINY
(%pc
),%fp0
6491 #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6495 #########################################################################
6496 # sasin(): computes the inverse sine of a normalized input #
6497 # sasind(): computes the inverse sine of a denormalized input #
6499 # INPUT *************************************************************** #
6500 # a0 = pointer to extended precision input #
6501 # d0 = round precision,mode #
6503 # OUTPUT ************************************************************** #
6506 # ACCURACY and MONOTONICITY ******************************************* #
6507 # The returned result is within 3 ulps in 64 significant bit, #
6508 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6509 # rounded to double precision. The result is provably monotonic #
6510 # in double precision. #
6512 # ALGORITHM *********************************************************** #
6515 # 1. If |X| >= 1, go to 3. #
6517 # 2. (|X| < 1) Calculate asin(X) by #
6518 # z := sqrt( [1-X][1+X] ) #
6519 # asin(X) = atan( x / z ). #
6522 # 3. If |X| > 1, go to 5. #
6524 # 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6526 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6529 #########################################################################
6533 fmov.x
(%a0
),%fp0
# LOAD INPUT
6537 and.l &0x7FFFFFFF,%d1
6538 cmp.
l %d1
,&0x3FFF8000
6541 # This catch is added here for the '060 QSP. Originally, the call to
6542 # satan() would handle this case by causing the exception which would
6543 # not be caught until gen_except(). Now, with the exceptions being
6544 # detected inside of satan(), the exception would have been handled there
6545 # instead of inside sasin() as expected.
6546 cmp.
l %d1
,&0x3FD78000
6549 #--THIS IS THE USUAL CASE, |X| < 1
6550 #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6553 fmov.s
&0x3F800000,%fp1
6554 fsub.x
%fp0
,%fp1
# 1-X
6555 fmovm.x
&0x4,-(%sp
) # {fp2}
6556 fmov.s
&0x3F800000,%fp2
6557 fadd.x
%fp0
,%fp2
# 1+X
6558 fmul.x
%fp2
,%fp1
# (1+X)(1-X)
6559 fmovm.x
(%sp
)+,&0x20 # {fp2}
6560 fsqrt.x
%fp1
# SQRT([1-X][1+X])
6561 fdiv.x
%fp1
,%fp0
# X/SQRT([1-X][1+X])
6562 fmovm.x
&0x01,-(%sp
) # save X/SQRT(...)
6563 lea
(%sp
),%a0
# pass ptr to X/SQRT(...)
6565 add.l &0xc,%sp
# clear X/SQRT(...) from stack
6570 fcmp.s
%fp0
,&0x3F800000
6571 fbgt t_operr
# cause an operr exception
6573 #--|X| = 1, ASIN(X) = +- PI/2.
6575 fmov.x PIBY2
(%pc
),%fp0
6577 and.l &0x80000000,%d1
# SIGN BIT OF X
6578 or.l &0x3F800000,%d1
# +-1 IN SGL FORMAT
6579 mov.
l %d1
,-(%sp
) # push SIGN(X) IN SGL-FMT
6584 #--|X| < 2^(-40), ATAN(X) = X
6586 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6587 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
6588 fmov.x
(%a0
),%fp0
# last inst - possible exception
6592 #--ASIN(X) = X FOR DENORMALIZED X
6596 #########################################################################
6597 # sacos(): computes the inverse cosine of a normalized input #
6598 # sacosd(): computes the inverse cosine of a denormalized input #
6600 # INPUT *************************************************************** #
6601 # a0 = pointer to extended precision input #
6602 # d0 = round precision,mode #
6604 # OUTPUT ************************************************************** #
6607 # ACCURACY and MONOTONICITY ******************************************* #
6608 # The returned result is within 3 ulps in 64 significant bit, #
6609 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6610 # rounded to double precision. The result is provably monotonic #
6611 # in double precision. #
6613 # ALGORITHM *********************************************************** #
6616 # 1. If |X| >= 1, go to 3. #
6618 # 2. (|X| < 1) Calculate acos(X) by #
6619 # z := (1-X) / (1+X) #
6620 # acos(X) = 2 * atan( sqrt(z) ). #
6623 # 3. If |X| > 1, go to 5. #
6625 # 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6627 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6630 #########################################################################
6634 fmov.x
(%a0
),%fp0
# LOAD INPUT
6636 mov.
l (%a0
),%d1
# pack exp w/ upper 16 fraction
6638 and.l &0x7FFFFFFF,%d1
6639 cmp.
l %d1
,&0x3FFF8000
6642 #--THIS IS THE USUAL CASE, |X| < 1
6643 #--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6646 fmov.s
&0x3F800000,%fp1
6647 fadd.x
%fp0
,%fp1
# 1+X
6649 fadd.s
&0x3F800000,%fp0
# 1-X
6650 fdiv.x
%fp1
,%fp0
# (1-X)/(1+X)
6651 fsqrt.x
%fp0
# SQRT((1-X)/(1+X))
6652 mov.
l %d0
,-(%sp
) # save original users fpcr
6654 fmovm.x
&0x01,-(%sp
) # save SQRT(...) to stack
6655 lea
(%sp
),%a0
# pass ptr to sqrt
6656 bsr satan
# ATAN(SQRT([1-X]/[1+X]))
6657 add.l &0xc,%sp
# clear SQRT(...) from stack
6659 fmov.
l (%sp
)+,%fpcr
# restore users round prec,mode
6660 fadd.x
%fp0
,%fp0
# 2 * ATAN( STUFF )
6665 fcmp.s
%fp0
,&0x3F800000
6666 fbgt t_operr
# cause an operr exception
6668 #--|X| = 1, ACOS(X) = 0 OR PI
6669 tst.
b (%a0
) # is X positive or negative?
6673 #Returns PI and inexact exception
6675 fmov.x PI
(%pc
),%fp0
# load PI
6676 fmov.
l %d0
,%fpcr
# load round mode,prec
6677 fadd.s
&0x00800000,%fp0
# add a small value
6681 bra ld_pzero
# answer is positive zero
6684 #--ACOS(X) = PI/2 FOR DENORMALIZED X
6686 fmov.
l %d0
,%fpcr
# load user's rnd mode/prec
6687 fmov.x PIBY2
(%pc
),%fp0
6690 #########################################################################
6691 # setox(): computes the exponential for a normalized input #
6692 # setoxd(): computes the exponential for a denormalized input #
6693 # setoxm1(): computes the exponential minus 1 for a normalized input #
6694 # setoxm1d(): computes the exponential minus 1 for a denormalized input #
6696 # INPUT *************************************************************** #
6697 # a0 = pointer to extended precision input #
6698 # d0 = round precision,mode #
6700 # OUTPUT ************************************************************** #
6701 # fp0 = exp(X) or exp(X)-1 #
6703 # ACCURACY and MONOTONICITY ******************************************* #
6704 # The returned result is within 0.85 ulps in 64 significant bit, #
6705 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6706 # rounded to double precision. The result is provably monotonic #
6707 # in double precision. #
6709 # ALGORITHM and IMPLEMENTATION **************************************** #
6713 # Step 1. Set ans := 1.0 #
6715 # Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6716 # Notes: This will always generate one exception -- inexact. #
6722 # Step 1. Filter out extreme cases of input argument. #
6723 # 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6724 # 1.2 Go to Step 7. #
6725 # 1.3 If |X| < 16380 log(2), go to Step 2. #
6726 # 1.4 Go to Step 8. #
6727 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6728 # To avoid the use of floating-point comparisons, a #
6729 # compact representation of |X| is used. This format is a #
6730 # 32-bit integer, the upper (more significant) 16 bits #
6731 # are the sign and biased exponent field of |X|; the #
6732 # lower 16 bits are the 16 most significant fraction #
6733 # (including the explicit bit) bits of |X|. Consequently, #
6734 # the comparisons in Steps 1.1 and 1.3 can be performed #
6735 # by integer comparison. Note also that the constant #
6736 # 16380 log(2) used in Step 1.3 is also in the compact #
6737 # form. Thus taking the branch to Step 2 guarantees #
6738 # |X| < 16380 log(2). There is no harm to have a small #
6739 # number of cases where |X| is less than, but close to, #
6740 # 16380 log(2) and the branch to Step 9 is taken. #
6742 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6743 # 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6745 # 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6746 # 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6748 # 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6749 # 2.5 Calculate the address of the stored value of #
6751 # 2.6 Create the value Scale = 2^M. #
6752 # Notes: The calculation in 2.2 is really performed by #
6753 # Z := X * constant #
6754 # N := round-to-nearest-integer(Z) #
6756 # constant := single-precision( 64/log 2 ). #
6758 # Using a single-precision constant avoids memory #
6759 # access. Another effect of using a single-precision #
6760 # "constant" is that the calculated value Z is #
6762 # Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6764 # This error has to be considered later in Steps 3 and 4. #
6766 # Step 3. Calculate X - N*log2/64. #
6767 # 3.1 R := X + N*L1, #
6768 # where L1 := single-precision(-log2/64). #
6769 # 3.2 R := R + N*L2, #
6770 # L2 := extended-precision(-log2/64 - L1).#
6771 # Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6772 # approximate the value -log2/64 to 88 bits of accuracy. #
6773 # b) N*L1 is exact because N is no longer than 22 bits #
6774 # and L1 is no longer than 24 bits. #
6775 # c) The calculation X+N*L1 is also exact due to #
6776 # cancellation. Thus, R is practically X+N(L1+L2) to full #
6778 # d) It is important to estimate how large can |R| be #
6781 # N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6782 # X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6783 # X*64/log2 - N = f - eps*X 64/log2 #
6784 # X - N*log2/64 = f*log2/64 - eps*X #
6787 # Now |X| <= 16446 log2, thus #
6789 # |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6790 # <= 0.57 log2/64. #
6791 # This bound will be used in Step 4. #
6793 # Step 4. Approximate exp(R)-1 by a polynomial #
6794 # p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6795 # Notes: a) In order to reduce memory access, the coefficients #
6796 # are made as "short" as possible: A1 (which is 1/2), A4 #
6797 # and A5 are single precision; A2 and A3 are double #
6799 # b) Even with the restrictions above, #
6800 # |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6801 # Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6802 # c) To fully use the pipeline, p is separated into #
6803 # two independent pieces of roughly equal complexities #
6804 # p = [ R + R*S*(A2 + S*A4) ] + #
6805 # [ S*(A1 + S*(A3 + S*A5)) ] #
6808 # Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6809 # ans := T + ( T*p + t) #
6810 # where T and t are the stored values for 2^(J/64). #
6811 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6812 # 2^(J/64) to roughly 85 bits; T is in extended precision #
6813 # and t is in single precision. Note also that T is #
6814 # rounded to 62 bits so that the last two bits of T are #
6815 # zero. The reason for such a special form is that T-1, #
6816 # T-2, and T-8 will all be exact --- a property that will #
6817 # give much more accurate computation of the function #
6820 # Step 6. Reconstruction of exp(X) #
6821 # exp(X) = 2^M * 2^(J/64) * exp(R). #
6822 # 6.1 If AdjFlag = 0, go to 6.3 #
6823 # 6.2 ans := ans * AdjScale #
6824 # 6.3 Restore the user FPCR #
6825 # 6.4 Return ans := ans * Scale. Exit. #
6826 # Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6827 # |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6828 # neither overflow nor underflow. If AdjFlag = 1, that #
6830 # X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6831 # Hence, exp(X) may overflow or underflow or neither. #
6832 # When that is the case, AdjScale = 2^(M1) where M1 is #
6833 # approximately M. Thus 6.2 will never cause #
6834 # over/underflow. Possible exception in 6.4 is overflow #
6835 # or underflow. The inexact exception is not generated in #
6836 # 6.4. Although one can argue that the inexact flag #
6837 # should always be raised, to simulate that exception #
6838 # cost to much than the flag is worth in practical uses. #
6840 # Step 7. Return 1 + X. #
6842 # 7.2 Restore user FPCR. #
6843 # 7.3 Return ans := 1 + ans. Exit #
6844 # Notes: For non-zero X, the inexact exception will always be #
6845 # raised by 7.3. That is the only exception raised by 7.3.#
6846 # Note also that we use the FMOVEM instruction to move X #
6847 # in Step 7.1 to avoid unnecessary trapping. (Although #
6848 # the FMOVEM may not seem relevant since X is normalized, #
6849 # the precaution will be useful in the library version of #
6850 # this code where the separate entry for denormalized #
6851 # inputs will be done away with.) #
6853 # Step 8. Handle exp(X) where |X| >= 16380log2. #
6854 # 8.1 If |X| > 16480 log2, go to Step 9. #
6855 # (mimic 2.2 - 2.6) #
6856 # 8.2 N := round-to-integer( X * 64/log2 ) #
6857 # 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6858 # 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6860 # 8.5 Calculate the address of the stored value #
6862 # 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6863 # 8.7 Go to Step 3. #
6864 # Notes: Refer to notes for 2.2 - 2.6. #
6866 # Step 9. Handle exp(X), |X| > 16480 log2. #
6867 # 9.1 If X < 0, go to 9.3 #
6868 # 9.2 ans := Huge, go to 9.4 #
6869 # 9.3 ans := Tiny. #
6870 # 9.4 Restore user FPCR. #
6871 # 9.5 Return ans := ans * ans. Exit. #
6872 # Notes: Exp(X) will surely overflow or underflow, depending on #
6873 # X's sign. "Huge" and "Tiny" are respectively large/tiny #
6874 # extended-precision numbers whose square over/underflow #
6875 # with an inexact result. Thus, 9.5 always raises the #
6876 # inexact together with either overflow or underflow. #
6881 # Step 1. Set ans := 0 #
6883 # Step 2. Return ans := X + ans. Exit. #
6884 # Notes: This will return X with the appropriate rounding #
6885 # precision prescribed by the user FPCR. #
6890 # Step 1. Check |X| #
6891 # 1.1 If |X| >= 1/4, go to Step 1.3. #
6892 # 1.2 Go to Step 7. #
6893 # 1.3 If |X| < 70 log(2), go to Step 2. #
6894 # 1.4 Go to Step 10. #
6895 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6896 # However, it is conceivable |X| can be small very often #
6897 # because EXPM1 is intended to evaluate exp(X)-1 #
6898 # accurately when |X| is small. For further details on #
6899 # the comparisons, see the notes on Step 1 of setox. #
6901 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6902 # 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
6903 # 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
6905 # 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
6906 # 2.4 Calculate the address of the stored value of #
6908 # 2.5 Create the values Sc = 2^M and #
6909 # OnebySc := -2^(-M). #
6910 # Notes: See the notes on Step 2 of setox. #
6912 # Step 3. Calculate X - N*log2/64. #
6913 # 3.1 R := X + N*L1, #
6914 # where L1 := single-precision(-log2/64). #
6915 # 3.2 R := R + N*L2, #
6916 # L2 := extended-precision(-log2/64 - L1).#
6917 # Notes: Applying the analysis of Step 3 of setox in this case #
6918 # shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
6921 # Step 4. Approximate exp(R)-1 by a polynomial #
6922 # p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
6923 # Notes: a) In order to reduce memory access, the coefficients #
6924 # are made as "short" as possible: A1 (which is 1/2), A5 #
6925 # and A6 are single precision; A2, A3 and A4 are double #
6927 # b) Even with the restriction above, #
6928 # |p - (exp(R)-1)| < |R| * 2^(-72.7) #
6929 # for all |R| <= 0.0055. #
6930 # c) To fully use the pipeline, p is separated into #
6931 # two independent pieces of roughly equal complexity #
6932 # p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
6933 # [ R + S*(A1 + S*(A3 + S*A5)) ] #
6936 # Step 5. Compute 2^(J/64)*p by #
6938 # where T and t are the stored values for 2^(J/64). #
6939 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6940 # 2^(J/64) to roughly 85 bits; T is in extended precision #
6941 # and t is in single precision. Note also that T is #
6942 # rounded to 62 bits so that the last two bits of T are #
6943 # zero. The reason for such a special form is that T-1, #
6944 # T-2, and T-8 will all be exact --- a property that will #
6945 # be exploited in Step 6 below. The total relative error #
6946 # in p is no bigger than 2^(-67.7) compared to the final #
6949 # Step 6. Reconstruction of exp(X)-1 #
6950 # exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
6951 # 6.1 If M <= 63, go to Step 6.3. #
6952 # 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
6953 # 6.3 If M >= -3, go to 6.5. #
6954 # 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
6955 # 6.5 ans := (T + OnebySc) + (p + t). #
6956 # 6.6 Restore user FPCR. #
6957 # 6.7 Return ans := Sc * ans. Exit. #
6958 # Notes: The various arrangements of the expressions give #
6959 # accurate evaluations. #
6961 # Step 7. exp(X)-1 for |X| < 1/4. #
6962 # 7.1 If |X| >= 2^(-65), go to Step 9. #
6963 # 7.2 Go to Step 8. #
6965 # Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
6966 # 8.1 If |X| < 2^(-16312), goto 8.3 #
6967 # 8.2 Restore FPCR; return ans := X - 2^(-16382). #
6969 # 8.3 X := X * 2^(140). #
6970 # 8.4 Restore FPCR; ans := ans - 2^(-16382). #
6971 # Return ans := ans*2^(140). Exit #
6972 # Notes: The idea is to return "X - tiny" under the user #
6973 # precision and rounding modes. To avoid unnecessary #
6974 # inefficiency, we stay away from denormalized numbers #
6975 # the best we can. For |X| >= 2^(-16312), the #
6976 # straightforward 8.2 generates the inexact exception as #
6977 # the case warrants. #
6979 # Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
6980 # p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
6981 # Notes: a) In order to reduce memory access, the coefficients #
6982 # are made as "short" as possible: B1 (which is 1/2), B9 #
6983 # to B12 are single precision; B3 to B8 are double #
6984 # precision; and B2 is double extended. #
6985 # b) Even with the restriction above, #
6986 # |p - (exp(X)-1)| < |X| 2^(-70.6) #
6987 # for all |X| <= 0.251. #
6988 # Note that 0.251 is slightly bigger than 1/4. #
6989 # c) To fully preserve accuracy, the polynomial is #
6991 # X + ( S*B1 + Q ) where S = X*X and #
6992 # Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
6993 # d) To fully use the pipeline, Q is separated into #
6994 # two independent pieces of roughly equal complexity #
6995 # Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
6996 # [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
6998 # Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
6999 # 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
7000 # practical purposes. Therefore, go to Step 1 of setox. #
7001 # 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
7004 # Restore user FPCR #
7005 # Return ans := ans + 2^(-126). Exit. #
7006 # Notes: 10.2 will always create an inexact and return -1 + tiny #
7007 # in the user rounding precision and mode. #
7009 #########################################################################
7011 L2
: long
0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7013 EEXPA3
: long
0x3FA55555,0x55554CC1
7014 EEXPA2
: long
0x3FC55555,0x55554A54
7016 EM1A4
: long
0x3F811111,0x11174385
7017 EM1A3
: long
0x3FA55555,0x55554F5A
7019 EM1A2
: long
0x3FC55555,0x55555555,0x00000000,0x00000000
7021 EM1B8
: long
0x3EC71DE3,0xA5774682
7022 EM1B7
: long
0x3EFA01A0,0x19D7CB68
7024 EM1B6
: long
0x3F2A01A0,0x1A019DF3
7025 EM1B5
: long
0x3F56C16C,0x16C170E2
7027 EM1B4
: long
0x3F811111,0x11111111
7028 EM1B3
: long
0x3FA55555,0x55555555
7030 EM1B2
: long
0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7033 TWO140
: long
0x48B00000,0x00000000
7035 long
0x37300000,0x00000000
7038 long
0x3FFF0000,0x80000000,0x00000000,0x00000000
7039 long
0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7040 long
0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7041 long
0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7042 long
0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7043 long
0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7044 long
0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7045 long
0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7046 long
0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7047 long
0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7048 long
0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7049 long
0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7050 long
0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7051 long
0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7052 long
0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7053 long
0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7054 long
0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7055 long
0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7056 long
0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7057 long
0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7058 long
0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7059 long
0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7060 long
0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7061 long
0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7062 long
0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7063 long
0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7064 long
0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7065 long
0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7066 long
0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7067 long
0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7068 long
0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7069 long
0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7070 long
0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7071 long
0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7072 long
0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7073 long
0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7074 long
0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7075 long
0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7076 long
0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7077 long
0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7078 long
0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7079 long
0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7080 long
0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7081 long
0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7082 long
0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7083 long
0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7084 long
0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7085 long
0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7086 long
0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7087 long
0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7088 long
0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7089 long
0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7090 long
0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7091 long
0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7092 long
0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7093 long
0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7094 long
0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7095 long
0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7096 long
0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7097 long
0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7098 long
0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7099 long
0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7100 long
0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7101 long
0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7105 set ADJSCALE
,FP_SCR1
7111 #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7114 mov.
l (%a0
),%d1
# load part of input X
7115 and.l &0x7FFF0000,%d1
# biased expo. of X
7116 cmp.
l %d1
,&0x3FBE0000 # 2^(-65)
7117 bge.
b EXPC1
# normal case
7121 #--The case |X| >= 2^(-65)
7122 mov.w
4(%a0
),%d1
# expo. and partial sig. of |X|
7123 cmp.
l %d1
,&0x400CB167 # 16380 log2 trunc. 16 bits
7124 blt.
b EXPMAIN
# normal case
7129 #--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7130 fmov.x
(%a0
),%fp0
# load input from (a0)
7133 fmul.s
&0x42B8AA3B,%fp0
# 64/log2 * X
7134 fmovm.x
&0xc,-(%sp
) # save fp2 {%fp2/%fp3}
7135 mov.
l &0,ADJFLAG
(%a6
)
7136 fmov.
l %fp0
,%d1
# N = int( X * 64/log2 )
7137 lea EEXPTBL
(%pc
),%a1
7138 fmov.
l %d1
,%fp0
# convert to floating-format
7140 mov.
l %d1
,L_SCR1
(%a6
) # save N temporarily
7141 and.l &0x3F,%d1
# D0 is J = N mod 64
7143 add.l %d1
,%a1
# address of 2^(J/64)
7144 mov.
l L_SCR1
(%a6
),%d1
7145 asr.
l &6,%d1
# D0 is M
7146 add.w
&0x3FFF,%d1
# biased expo. of 2^(M)
7147 mov.w L2
(%pc
),L_SCR1
(%a6
) # prefetch L2, no need in CB
7151 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7152 #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7154 fmul.s
&0xBC317218,%fp0
# N * L1, L1 = lead(-log2/64)
7155 fmul.x L2
(%pc
),%fp2
# N * L2, L1+L2 = -log2/64
7156 fadd.x
%fp1
,%fp0
# X + N*L1
7157 fadd.x
%fp2
,%fp0
# fp0 is R, reduced arg.
7160 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7161 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7162 #--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7163 #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7166 fmul.x
%fp1
,%fp1
# fp1 IS S = R*R
7168 fmov.s
&0x3AB60B70,%fp2
# fp2 IS A5
7170 fmul.x
%fp1
,%fp2
# fp2 IS S*A5
7172 fmul.s
&0x3C088895,%fp3
# fp3 IS S*A4
7174 fadd.d EEXPA3
(%pc
),%fp2
# fp2 IS A3+S*A5
7175 fadd.d EEXPA2
(%pc
),%fp3
# fp3 IS A2+S*A4
7177 fmul.x
%fp1
,%fp2
# fp2 IS S*(A3+S*A5)
7178 mov.w
%d1
,SCALE
(%a6
) # SCALE is 2^(M) in extended
7179 mov.
l &0x80000000,SCALE+
4(%a6
)
7182 fmul.x
%fp1
,%fp3
# fp3 IS S*(A2+S*A4)
7184 fadd.s
&0x3F000000,%fp2
# fp2 IS A1+S*(A3+S*A5)
7185 fmul.x
%fp0
,%fp3
# fp3 IS R*S*(A2+S*A4)
7187 fmul.x
%fp1
,%fp2
# fp2 IS S*(A1+S*(A3+S*A5))
7188 fadd.x
%fp3
,%fp0
# fp0 IS R+R*S*(A2+S*A4),
7190 fmov.x
(%a1
)+,%fp1
# fp1 is lead. pt. of 2^(J/64)
7191 fadd.x
%fp2
,%fp0
# fp0 is EXP(R) - 1
7194 #--final reconstruction process
7195 #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7197 fmul.x
%fp1
,%fp0
# 2^(J/64)*(Exp(R)-1)
7198 fmovm.x
(%sp
)+,&0x30 # fp2 restored {%fp2/%fp3}
7199 fadd.s
(%a1
),%fp0
# accurate 2^(J/64)
7201 fadd.x
%fp1
,%fp0
# 2^(J/64) + 2^(J/64)*...
7202 mov.
l ADJFLAG
(%a6
),%d1
7208 fmul.x ADJSCALE
(%a6
),%fp0
7210 fmov.
l %d0
,%fpcr
# restore user FPCR
7211 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7212 fmul.x SCALE
(%a6
),%fp0
# multiply 2^(M)
7217 fmovm.x
(%a0
),&0x80 # load X
7219 fadd.s
&0x3F800000,%fp0
# 1+X in user mode
7224 cmp.
l %d1
,&0x400CB27C # 16480 log2
7227 fmov.x
(%a0
),%fp0
# load input from (a0)
7230 fmul.s
&0x42B8AA3B,%fp0
# 64/log2 * X
7231 fmovm.x
&0xc,-(%sp
) # save fp2 {%fp2/%fp3}
7232 mov.
l &1,ADJFLAG
(%a6
)
7233 fmov.
l %fp0
,%d1
# N = int( X * 64/log2 )
7234 lea EEXPTBL
(%pc
),%a1
7235 fmov.
l %d1
,%fp0
# convert to floating-format
7236 mov.
l %d1
,L_SCR1
(%a6
) # save N temporarily
7237 and.l &0x3F,%d1
# D0 is J = N mod 64
7239 add.l %d1
,%a1
# address of 2^(J/64)
7240 mov.
l L_SCR1
(%a6
),%d1
7241 asr.
l &6,%d1
# D0 is K
7242 mov.
l %d1
,L_SCR1
(%a6
) # save K temporarily
7243 asr.
l &1,%d1
# D0 is M1
7244 sub.l %d1
,L_SCR1
(%a6
) # a1 is M
7245 add.w
&0x3FFF,%d1
# biased expo. of 2^(M1)
7246 mov.w
%d1
,ADJSCALE
(%a6
) # ADJSCALE := 2^(M1)
7247 mov.
l &0x80000000,ADJSCALE+
4(%a6
)
7248 clr.
l ADJSCALE+
8(%a6
)
7249 mov.
l L_SCR1
(%a6
),%d1
# D0 is M
7250 add.w
&0x3FFF,%d1
# biased expo. of 2^(M)
7251 bra.w EXPCONT1
# go back to Step 3
7255 tst.
b (%a0
) # is X positive or negative?
7261 #--entry point for EXP(X), X is denormalized
7263 andi.l &0x80000000,(%sp
)
7264 ori.
l &0x00800000,(%sp
) # sign(X)*2^(-126)
7266 fmov.s
&0x3F800000,%fp0
7274 #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7278 mov.
l (%a0
),%d1
# load part of input X
7279 and.l &0x7FFF0000,%d1
# biased expo. of X
7280 cmp.
l %d1
,&0x3FFD0000 # 1/4
7281 bge.
b EM1CON1
# |X| >= 1/4
7286 #--The case |X| >= 1/4
7287 mov.w
4(%a0
),%d1
# expo. and partial sig. of |X|
7288 cmp.
l %d1
,&0x4004C215 # 70log2 rounded up to 16 bits
7289 ble.
b EM1MAIN
# 1/4 <= |X| <= 70log2
7294 #--This is the case: 1/4 <= |X| <= 70 log2.
7295 fmov.x
(%a0
),%fp0
# load input from (a0)
7298 fmul.s
&0x42B8AA3B,%fp0
# 64/log2 * X
7299 fmovm.x
&0xc,-(%sp
) # save fp2 {%fp2/%fp3}
7300 fmov.
l %fp0
,%d1
# N = int( X * 64/log2 )
7301 lea EEXPTBL
(%pc
),%a1
7302 fmov.
l %d1
,%fp0
# convert to floating-format
7304 mov.
l %d1
,L_SCR1
(%a6
) # save N temporarily
7305 and.l &0x3F,%d1
# D0 is J = N mod 64
7307 add.l %d1
,%a1
# address of 2^(J/64)
7308 mov.
l L_SCR1
(%a6
),%d1
7309 asr.
l &6,%d1
# D0 is M
7310 mov.
l %d1
,L_SCR1
(%a6
) # save a copy of M
7313 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7314 #--a0 points to 2^(J/64), D0 and a1 both contain M
7316 fmul.s
&0xBC317218,%fp0
# N * L1, L1 = lead(-log2/64)
7317 fmul.x L2
(%pc
),%fp2
# N * L2, L1+L2 = -log2/64
7318 fadd.x
%fp1
,%fp0
# X + N*L1
7319 fadd.x
%fp2
,%fp0
# fp0 is R, reduced arg.
7320 add.w
&0x3FFF,%d1
# D0 is biased expo. of 2^M
7323 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7324 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7325 #--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7326 #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7329 fmul.x
%fp1
,%fp1
# fp1 IS S = R*R
7331 fmov.s
&0x3950097B,%fp2
# fp2 IS a6
7333 fmul.x
%fp1
,%fp2
# fp2 IS S*A6
7335 fmul.s
&0x3AB60B6A,%fp3
# fp3 IS S*A5
7337 fadd.d EM1A4
(%pc
),%fp2
# fp2 IS A4+S*A6
7338 fadd.d EM1A3
(%pc
),%fp3
# fp3 IS A3+S*A5
7339 mov.w
%d1
,SC(%a6
) # SC is 2^(M) in extended
7340 mov.
l &0x80000000,SC+
4(%a6
)
7343 fmul.x
%fp1
,%fp2
# fp2 IS S*(A4+S*A6)
7344 mov.
l L_SCR1
(%a6
),%d1
# D0 is M
7345 neg.w
%d1
# D0 is -M
7346 fmul.x
%fp1
,%fp3
# fp3 IS S*(A3+S*A5)
7347 add.w
&0x3FFF,%d1
# biased expo. of 2^(-M)
7348 fadd.d EM1A2
(%pc
),%fp2
# fp2 IS A2+S*(A4+S*A6)
7349 fadd.s
&0x3F000000,%fp3
# fp3 IS A1+S*(A3+S*A5)
7351 fmul.x
%fp1
,%fp2
# fp2 IS S*(A2+S*(A4+S*A6))
7352 or.w
&0x8000,%d1
# signed/expo. of -2^(-M)
7353 mov.w
%d1
,ONEBYSC
(%a6
) # OnebySc is -2^(-M)
7354 mov.
l &0x80000000,ONEBYSC+
4(%a6
)
7355 clr.
l ONEBYSC+
8(%a6
)
7356 fmul.x
%fp3
,%fp1
# fp1 IS S*(A1+S*(A3+S*A5))
7358 fmul.x
%fp0
,%fp2
# fp2 IS R*S*(A2+S*(A4+S*A6))
7359 fadd.x
%fp1
,%fp0
# fp0 IS R+S*(A1+S*(A3+S*A5))
7361 fadd.x
%fp2
,%fp0
# fp0 IS EXP(R)-1
7363 fmovm.x
(%sp
)+,&0x30 # fp2 restored {%fp2/%fp3}
7366 #--Compute 2^(J/64)*p
7368 fmul.x
(%a1
),%fp0
# 2^(J/64)*(Exp(R)-1)
7372 mov.
l L_SCR1
(%a6
),%d1
# retrieve M
7376 fmov.s
12(%a1
),%fp1
# fp1 is t
7377 fadd.x ONEBYSC
(%a6
),%fp1
# fp1 is t+OnebySc
7378 fadd.x
%fp1
,%fp0
# p+(t+OnebySc), fp1 released
7379 fadd.x
(%a1
),%fp0
# T+(p+(t+OnebySc))
7387 fadd.s
12(%a1
),%fp0
# p+t
7388 fadd.x
(%a1
),%fp0
# T+(p+t)
7389 fadd.x ONEBYSC
(%a6
),%fp0
# OnebySc + (T+(p+t))
7392 #--Step 6.5 -3 <= M <= 63
7393 fmov.x
(%a1
)+,%fp1
# fp1 is T
7394 fadd.s
(%a1
),%fp0
# fp0 is p+t
7395 fadd.x ONEBYSC
(%a6
),%fp1
# fp1 is T+OnebySc
7396 fadd.x
%fp1
,%fp0
# (T+OnebySc)+(p+t)
7405 #--Step 7 |X| < 1/4.
7406 cmp.
l %d1
,&0x3FBE0000 # 2^(-65)
7410 #--Step 8 |X| < 2^(-65)
7411 cmp.
l %d1
,&0x00330000 # 2^(-16312)
7414 mov.
l &0x80010000,SC(%a6
) # SC is -2^(-16382)
7415 mov.
l &0x80000000,SC+
4(%a6
)
7419 mov.
b &FADD_OP
,%d1
# last inst is ADD
7426 fmul.d TWO140
(%pc
),%fp0
7427 mov.
l &0x80010000,SC(%a6
)
7428 mov.
l &0x80000000,SC+
4(%a6
)
7432 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7433 fmul.d TWON140
(%pc
),%fp0
7437 #--Step 9 exp(X)-1 by a simple polynomial
7438 fmov.x
(%a0
),%fp0
# fp0 is X
7439 fmul.x
%fp0
,%fp0
# fp0 is S := X*X
7440 fmovm.x
&0xc,-(%sp
) # save fp2 {%fp2/%fp3}
7441 fmov.s
&0x2F30CAA8,%fp1
# fp1 is B12
7442 fmul.x
%fp0
,%fp1
# fp1 is S*B12
7443 fmov.s
&0x310F8290,%fp2
# fp2 is B11
7444 fadd.s
&0x32D73220,%fp1
# fp1 is B10+S*B12
7446 fmul.x
%fp0
,%fp2
# fp2 is S*B11
7447 fmul.x
%fp0
,%fp1
# fp1 is S*(B10 + ...
7449 fadd.s
&0x3493F281,%fp2
# fp2 is B9+S*...
7450 fadd.d EM1B8
(%pc
),%fp1
# fp1 is B8+S*...
7452 fmul.x
%fp0
,%fp2
# fp2 is S*(B9+...
7453 fmul.x
%fp0
,%fp1
# fp1 is S*(B8+...
7455 fadd.d EM1B7
(%pc
),%fp2
# fp2 is B7+S*...
7456 fadd.d EM1B6
(%pc
),%fp1
# fp1 is B6+S*...
7458 fmul.x
%fp0
,%fp2
# fp2 is S*(B7+...
7459 fmul.x
%fp0
,%fp1
# fp1 is S*(B6+...
7461 fadd.d EM1B5
(%pc
),%fp2
# fp2 is B5+S*...
7462 fadd.d EM1B4
(%pc
),%fp1
# fp1 is B4+S*...
7464 fmul.x
%fp0
,%fp2
# fp2 is S*(B5+...
7465 fmul.x
%fp0
,%fp1
# fp1 is S*(B4+...
7467 fadd.d EM1B3
(%pc
),%fp2
# fp2 is B3+S*...
7468 fadd.x EM1B2
(%pc
),%fp1
# fp1 is B2+S*...
7470 fmul.x
%fp0
,%fp2
# fp2 is S*(B3+...
7471 fmul.x
%fp0
,%fp1
# fp1 is S*(B2+...
7473 fmul.x
%fp0
,%fp2
# fp2 is S*S*(B3+...)
7474 fmul.x
(%a0
),%fp1
# fp1 is X*S*(B2...
7476 fmul.s
&0x3F000000,%fp0
# fp0 is S*B1
7477 fadd.x
%fp2
,%fp1
# fp1 is Q
7479 fmovm.x
(%sp
)+,&0x30 # fp2 restored {%fp2/%fp3}
7481 fadd.x
%fp1
,%fp0
# fp0 is S*B1+Q
7488 #--Step 10 |X| > 70 log2
7493 fmov.s
&0xBF800000,%fp0
# fp0 is -1
7495 fadd.s
&0x00800000,%fp0
# -1 + 2^(-126)
7500 #--entry point for EXPM1(X), here X is denormalized
7504 #########################################################################
7505 # sgetexp(): returns the exponent portion of the input argument. #
7506 # The exponent bias is removed and the exponent value is #
7507 # returned as an extended precision number in fp0. #
7508 # sgetexpd(): handles denormalized numbers. #
7510 # sgetman(): extracts the mantissa of the input argument. The #
7511 # mantissa is converted to an extended precision number w/ #
7512 # an exponent of $3fff and is returned in fp0. The range of #
7513 # the result is [1.0 - 2.0). #
7514 # sgetmand(): handles denormalized numbers. #
7516 # INPUT *************************************************************** #
7517 # a0 = pointer to extended precision input #
7519 # OUTPUT ************************************************************** #
7520 # fp0 = exponent(X) or mantissa(X) #
7522 #########################################################################
7526 mov.w SRC_EX
(%a0
),%d0
# get the exponent
7527 bclr &0xf,%d0
# clear the sign bit
7528 subi.w
&0x3fff,%d0
# subtract off the bias
7529 fmov.w
%d0
,%fp0
# return exp in fp0
7530 blt.
b sgetexpn
# it's negative
7534 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
7539 bsr.
l norm
# normalize
7540 neg.w
%d0
# new exp = -(shft amt)
7541 subi.w
&0x3fff,%d0
# subtract off the bias
7542 fmov.w
%d0
,%fp0
# return exp in fp0
7543 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
7548 mov.w SRC_EX
(%a0
),%d0
# get the exp
7549 ori.w
&0x7fff,%d0
# clear old exp
7550 bclr &0xe,%d0
# make it the new exp +-3fff
7552 # here, we build the result in a tmp location so as not to disturb the input
7553 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
) # copy to tmp loc
7554 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
) # copy to tmp loc
7555 mov.w
%d0
,FP_SCR0_EX
(%a6
) # insert new exponent
7556 fmov.x FP_SCR0
(%a6
),%fp0
# put new value back in fp0
7557 bmi.
b sgetmann
# it's negative
7561 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
7565 # For denormalized numbers, shift the mantissa until the j-bit = 1,
7566 # then load the exponent with +/1 $3fff.
7570 bsr.
l norm
# normalize exponent
7573 #########################################################################
7574 # scosh(): computes the hyperbolic cosine of a normalized input #
7575 # scoshd(): computes the hyperbolic cosine of a denormalized input #
7577 # INPUT *************************************************************** #
7578 # a0 = pointer to extended precision input #
7579 # d0 = round precision,mode #
7581 # OUTPUT ************************************************************** #
7584 # ACCURACY and MONOTONICITY ******************************************* #
7585 # The returned result is within 3 ulps in 64 significant bit, #
7586 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7587 # rounded to double precision. The result is provably monotonic #
7588 # in double precision. #
7590 # ALGORITHM *********************************************************** #
7593 # 1. If |X| > 16380 log2, go to 3. #
7595 # 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7596 # y = |X|, z = exp(Y), and #
7597 # cosh(X) = (1/2)*( z + 1/z ). #
7600 # 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7602 # 4. (16380 log2 < |X| <= 16480 log2) #
7603 # cosh(X) = sign(X) * exp(|X|)/2. #
7604 # However, invoking exp(|X|) may cause premature #
7605 # overflow. Thus, we calculate sinh(X) as follows: #
7607 # Fact := 2**(16380) #
7608 # Y' := Y - 16381 log2 #
7609 # cosh(X) := Fact * exp(Y'). #
7612 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7613 # Huge*Huge to generate overflow and an infinity with #
7614 # the appropriate sign. Huge is the largest finite number #
7615 # in extended format. Exit. #
7617 #########################################################################
7620 long
0x7FFB0000,0x80000000,0x00000000,0x00000000
7624 fmov.x
(%a0
),%fp0
# LOAD INPUT
7628 and.l &0x7FFFFFFF,%d1
7629 cmp.
l %d1
,&0x400CB167
7632 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7633 #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7639 fmovm.x
&0x01,-(%sp
) # save |X| to stack
7640 lea
(%sp
),%a0
# pass ptr to |X|
7641 bsr setox
# FP0 IS EXP(|X|)
7642 add.l &0xc,%sp
# erase |X| from stack
7643 fmul.s
&0x3F000000,%fp0
# (1/2)EXP(|X|)
7646 fmov.s
&0x3E800000,%fp1
# (1/4)
7647 fdiv.x
%fp0
,%fp1
# 1/(2 EXP(|X|))
7650 mov.
b &FADD_OP
,%d1
# last inst is ADD
7655 cmp.
l %d1
,&0x400CB2B3
7659 fsub.d T1
(%pc
),%fp0
# (|X|-16381LOG2_LEAD)
7660 fsub.d T2
(%pc
),%fp0
# |X| - 16381 LOG2, ACCURATE
7664 fmovm.x
&0x01,-(%sp
) # save fp0 to stack
7665 lea
(%sp
),%a0
# pass ptr to fp0
7667 add.l &0xc,%sp
# clear fp0 from stack
7671 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7672 fmul.x TWO16380
(%pc
),%fp0
7679 #--COSH(X) = 1 FOR DENORMALIZED X
7681 fmov.s
&0x3F800000,%fp0
7684 fadd.s
&0x00800000,%fp0
7687 #########################################################################
7688 # ssinh(): computes the hyperbolic sine of a normalized input #
7689 # ssinhd(): computes the hyperbolic sine of a denormalized input #
7691 # INPUT *************************************************************** #
7692 # a0 = pointer to extended precision input #
7693 # d0 = round precision,mode #
7695 # OUTPUT ************************************************************** #
7698 # ACCURACY and MONOTONICITY ******************************************* #
7699 # The returned result is within 3 ulps in 64 significant bit, #
7700 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7701 # rounded to double precision. The result is provably monotonic #
7702 # in double precision. #
7704 # ALGORITHM *********************************************************** #
7707 # 1. If |X| > 16380 log2, go to 3. #
7709 # 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7710 # y = |X|, sgn = sign(X), and z = expm1(Y), #
7711 # sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7714 # 3. If |X| > 16480 log2, go to 5. #
7716 # 4. (16380 log2 < |X| <= 16480 log2) #
7717 # sinh(X) = sign(X) * exp(|X|)/2. #
7718 # However, invoking exp(|X|) may cause premature overflow. #
7719 # Thus, we calculate sinh(X) as follows: #
7722 # sgnFact := sgn * 2**(16380) #
7723 # Y' := Y - 16381 log2 #
7724 # sinh(X) := sgnFact * exp(Y'). #
7727 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7728 # sign(X)*Huge*Huge to generate overflow and an infinity with #
7729 # the appropriate sign. Huge is the largest finite number in #
7730 # extended format. Exit. #
7732 #########################################################################
7736 fmov.x
(%a0
),%fp0
# LOAD INPUT
7740 mov.
l %d1
,%a1
# save (compacted) operand
7741 and.l &0x7FFFFFFF,%d1
7742 cmp.
l %d1
,&0x400CB167
7745 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7746 #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7748 fabs.x
%fp0
# Y = |X|
7750 movm.
l &0x8040,-(%sp
) # {a1/d0}
7751 fmovm.x
&0x01,-(%sp
) # save Y on stack
7752 lea
(%sp
),%a0
# pass ptr to Y
7754 bsr setoxm1
# FP0 IS Z = EXPM1(Y)
7755 add.l &0xc,%sp
# clear Y from stack
7757 movm.
l (%sp
)+,&0x0201 # {a1/d0}
7760 fadd.s
&0x3F800000,%fp1
# 1+Z
7762 fdiv.x
%fp1
,%fp0
# Z/(1+Z)
7764 and.l &0x80000000,%d1
7765 or.l &0x3F000000,%d1
7770 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7771 fmul.s
(%sp
)+,%fp0
# last fp inst - possible exceptions set
7775 cmp.
l %d1
,&0x400CB2B3
7778 fsub.d T1
(%pc
),%fp0
# (|X|-16381LOG2_LEAD)
7780 mov.
l &0x80000000,-(%sp
)
7782 and.l &0x80000000,%d1
7783 or.l &0x7FFB0000,%d1
7784 mov.
l %d1
,-(%sp
) # EXTENDED FMT
7785 fsub.d T2
(%pc
),%fp0
# |X| - 16381 LOG2, ACCURATE
7789 fmovm.x
&0x01,-(%sp
) # save fp0 on stack
7790 lea
(%sp
),%a0
# pass ptr to fp0
7792 add.l &0xc,%sp
# clear fp0 from stack
7796 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7797 fmul.x
(%sp
)+,%fp0
# possible exception
7801 #--SINH(X) = X FOR DENORMALIZED X
7805 #########################################################################
7806 # stanh(): computes the hyperbolic tangent of a normalized input #
7807 # stanhd(): computes the hyperbolic tangent of a denormalized input #
7809 # INPUT *************************************************************** #
7810 # a0 = pointer to extended precision input #
7811 # d0 = round precision,mode #
7813 # OUTPUT ************************************************************** #
7816 # ACCURACY and MONOTONICITY ******************************************* #
7817 # The returned result is within 3 ulps in 64 significant bit, #
7818 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7819 # rounded to double precision. The result is provably monotonic #
7820 # in double precision. #
7822 # ALGORITHM *********************************************************** #
7825 # 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7827 # 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7828 # sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7829 # tanh(X) = sgn*( z/(2+z) ). #
7832 # 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7835 # 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7837 # 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7838 # sgn := sign(X), y := 2|X|, z := exp(Y), #
7839 # tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7842 # 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7843 # calculate Tanh(X) by #
7844 # sgn := sign(X), Tiny := 2**(-126), #
7845 # tanh(X) := sgn - sgn*Tiny. #
7848 # 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7850 #########################################################################
7861 fmov.x
(%a0
),%fp0
# LOAD INPUT
7867 and.l &0x7FFFFFFF,%d1
7868 cmp.
l %d1
, &0x3fd78000 # is |X| < 2^(-40)?
7869 blt.w TANHBORS
# yes
7870 cmp.
l %d1
, &0x3fffddce # is |X| > (5/2)LOG2?
7871 bgt.w TANHBORS
# yes
7873 #--THIS IS THE USUAL CASE
7874 #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7878 and.l &0x7FFF0000,%d1
7879 add.l &0x00010000,%d1
# EXPONENT OF 2|X|
7881 and.l &0x80000000,SGN
(%a6
)
7882 fmov.x X
(%a6
),%fp0
# FP0 IS Y = 2|X|
7886 fmovm.x
&0x1,-(%sp
) # save Y on stack
7887 lea
(%sp
),%a0
# pass ptr to Y
7888 bsr setoxm1
# FP0 IS Z = EXPM1(Y)
7889 add.l &0xc,%sp
# clear Y from stack
7893 fadd.s
&0x40000000,%fp1
# Z+2
7898 fmov.
l %d0
,%fpcr
# restore users round prec,mode
7903 cmp.
l %d1
,&0x3FFF8000
7906 cmp.
l %d1
,&0x40048AA1
7909 #-- (5/2) LOG2 < |X| < 50 LOG2,
7910 #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
7911 #--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
7915 and.l &0x7FFF0000,%d1
7916 add.l &0x00010000,%d1
# EXPO OF 2|X|
7917 mov.
l %d1
,X
(%a6
) # Y = 2|X|
7918 and.l &0x80000000,SGN
(%a6
)
7920 fmov.x X
(%a6
),%fp0
# Y = 2|X|
7924 fmovm.x
&0x01,-(%sp
) # save Y on stack
7925 lea
(%sp
),%a0
# pass ptr to Y
7926 bsr setox
# FP0 IS EXP(Y)
7927 add.l &0xc,%sp
# clear Y from stack
7930 fadd.s
&0x3F800000,%fp0
# EXP(Y)+1
7932 eor.
l &0xC0000000,%d1
# -SIGN(X)*2
7933 fmov.s
%d1
,%fp1
# -SIGN(X)*2 IN SGL FMT
7934 fdiv.x
%fp0
,%fp1
# -SIGN(X)2 / [EXP(Y)+1 ]
7937 or.l &0x3F800000,%d1
# SGN
7938 fmov.s
%d1
,%fp0
# SGN IN SGL FMT
7940 fmov.
l %d0
,%fpcr
# restore users round prec,mode
7941 mov.
b &FADD_OP
,%d1
# last inst is ADD
7946 fmov.
l %d0
,%fpcr
# restore users round prec,mode
7947 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
7948 fmov.x X
(%a6
),%fp0
# last inst - possible exception set
7951 #---RETURN SGN(X) - SGN(X)EPS
7954 and.l &0x80000000,%d1
7955 or.l &0x3F800000,%d1
7957 and.l &0x80000000,%d1
7958 eor.
l &0x80800000,%d1
# -SIGN(X)*EPS
7960 fmov.
l %d0
,%fpcr
# restore users round prec,mode
7965 #--TANH(X) = X FOR DENORMALIZED X
7969 #########################################################################
7970 # slogn(): computes the natural logarithm of a normalized input #
7971 # slognd(): computes the natural logarithm of a denormalized input #
7972 # slognp1(): computes the log(1+X) of a normalized input #
7973 # slognp1d(): computes the log(1+X) of a denormalized input #
7975 # INPUT *************************************************************** #
7976 # a0 = pointer to extended precision input #
7977 # d0 = round precision,mode #
7979 # OUTPUT ************************************************************** #
7980 # fp0 = log(X) or log(1+X) #
7982 # ACCURACY and MONOTONICITY ******************************************* #
7983 # The returned result is within 2 ulps in 64 significant bit, #
7984 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7985 # rounded to double precision. The result is provably monotonic #
7986 # in double precision. #
7988 # ALGORITHM *********************************************************** #
7990 # Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
7991 # polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
7992 # move on to Step 2. #
7994 # Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
7995 # seven significant bits of Y plus 2**(-7), i.e. #
7996 # F = 1.xxxxxx1 in base 2 where the six "x" match those #
7997 # of Y. Note that |Y-F| <= 2**(-7). #
7999 # Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
8000 # polynomial in u, log(1+u) = poly. #
8002 # Step 4. Reconstruct #
8003 # log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
8004 # by k*log(2) + (log(F) + poly). The values of log(F) are #
8005 # calculated beforehand and stored in the program. #
8008 # Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8009 # polynomial in u where u = 2X/(2+X). Otherwise, move on #
8012 # Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8013 # in Step 2 of the algorithm for LOGN and compute #
8014 # log(1+X) as k*log(2) + log(F) + poly where poly #
8015 # approximates log(1+u), u = (Y-F)/F. #
8017 # Implementation Notes: #
8018 # Note 1. There are 64 different possible values for F, thus 64 #
8019 # log(F)'s need to be tabulated. Moreover, the values of #
8020 # 1/F are also tabulated so that the division in (Y-F)/F #
8021 # can be performed by a multiplication. #
8023 # Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8024 # the value Y-F has to be calculated carefully when #
8027 # Note 3. To fully exploit the pipeline, polynomials are usually #
8028 # separated into two parts evaluated independently before #
8031 #########################################################################
8033 long
0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8045 long
0x3FC2499A,0xB5E4040B
8047 long
0xBFC555B5,0x848CB7DB
8050 long
0x3FC99999,0x987D8730
8052 long
0xBFCFFFFF,0xFF6F7E97
8055 long
0x3FD55555,0x555555A4
8057 long
0xBFE00000,0x00000008
8060 long
0x3F175496,0xADD7DAD6
8062 long
0x3F3C71C2,0xFE80C7E0
8065 long
0x3F624924,0x928BCCFF
8067 long
0x3F899999,0x999995EC
8070 long
0x3FB55555,0x55555555
8072 long
0x40000000,0x00000000
8075 long
0x3f990000,0x80000000,0x00000000,0x00000000
8078 long
0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8079 long
0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8080 long
0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8081 long
0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8082 long
0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8083 long
0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8084 long
0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8085 long
0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8086 long
0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8087 long
0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8088 long
0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8089 long
0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8090 long
0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8091 long
0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8092 long
0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8093 long
0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8094 long
0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8095 long
0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8096 long
0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8097 long
0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8098 long
0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8099 long
0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8100 long
0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8101 long
0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8102 long
0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8103 long
0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8104 long
0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8105 long
0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8106 long
0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8107 long
0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8108 long
0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8109 long
0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8110 long
0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8111 long
0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8112 long
0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8113 long
0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8114 long
0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8115 long
0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8116 long
0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8117 long
0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8118 long
0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8119 long
0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8120 long
0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8121 long
0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8122 long
0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8123 long
0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8124 long
0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8125 long
0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8126 long
0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8127 long
0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8128 long
0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8129 long
0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8130 long
0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8131 long
0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8132 long
0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8133 long
0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8134 long
0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8135 long
0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8136 long
0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8137 long
0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8138 long
0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8139 long
0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8140 long
0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8141 long
0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8142 long
0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8143 long
0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8144 long
0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8145 long
0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8146 long
0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8147 long
0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8148 long
0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8149 long
0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8150 long
0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8151 long
0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8152 long
0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8153 long
0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8154 long
0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8155 long
0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8156 long
0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8157 long
0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8158 long
0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8159 long
0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8160 long
0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8161 long
0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8162 long
0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8163 long
0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8164 long
0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8165 long
0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8166 long
0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8167 long
0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8168 long
0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8169 long
0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8170 long
0x3FFE0000,0x94458094,0x45809446,0x00000000
8171 long
0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8172 long
0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8173 long
0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8174 long
0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8175 long
0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8176 long
0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8177 long
0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8178 long
0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8179 long
0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8180 long
0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8181 long
0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8182 long
0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8183 long
0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8184 long
0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8185 long
0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8186 long
0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8187 long
0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8188 long
0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8189 long
0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8190 long
0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8191 long
0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8192 long
0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8193 long
0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8194 long
0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8195 long
0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8196 long
0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8197 long
0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8198 long
0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8199 long
0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8200 long
0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8201 long
0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8202 long
0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8203 long
0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8204 long
0x3FFE0000,0x80808080,0x80808081,0x00000000
8205 long
0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8221 #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8223 fmov.x
(%a0
),%fp0
# LOAD INPUT
8224 mov.
l &0x00000000,ADJK
(%a6
)
8227 #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8228 #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8234 mov.
l 4(%a0
),X+
4(%a6
)
8235 mov.
l 8(%a0
),X+
8(%a6
)
8237 cmp.
l %d1
,&0 # CHECK IF X IS NEGATIVE
8238 blt.w LOGNEG
# LOG OF NEGATIVE ARGUMENT IS INVALID
8239 # X IS POSITIVE, CHECK IF X IS NEAR 1
8240 cmp.
l %d1
,&0x3ffef07d # IS X < 15/16?
8242 cmp.
l %d1
,&0x3fff8841 # IS X > 17/16?
8246 #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8248 #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8249 #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8250 #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8251 #-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8252 #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8253 #--LOG(1+U) CAN BE VERY EFFICIENT.
8254 #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8255 #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8257 #--GET K, Y, F, AND ADDRESS OF 1/F.
8259 asr.
l &8,%d1
# SHIFTED 16 BITS, BIASED EXPO. OF X
8260 sub.l &0x3FFF,%d1
# THIS IS K
8261 add.l ADJK
(%a6
),%d1
# ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8262 lea LOGTBL
(%pc
),%a0
# BASE ADDRESS OF 1/F AND LOG(F)
8263 fmov.
l %d1
,%fp1
# CONVERT K TO FLOATING-POINT FORMAT
8265 #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8266 mov.
l &0x3FFF0000,X
(%a6
) # X IS NOW Y, I.E. 2^(-K)*X
8267 mov.
l XFRAC
(%a6
),FFRAC
(%a6
)
8268 and.l &0xFE000000,FFRAC
(%a6
) # FIRST 7 BITS OF Y
8269 or.l &0x01000000,FFRAC
(%a6
) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8270 mov.
l FFRAC
(%a6
),%d1
# READY TO GET ADDRESS OF 1/F
8271 and.l &0x7E000000,%d1
8274 asr.
l &4,%d1
# SHIFTED 20, D0 IS THE DISPLACEMENT
8275 add.l %d1
,%a0
# A0 IS THE ADDRESS FOR 1/F
8278 mov.
l &0x3fff0000,F
(%a6
)
8280 fsub.x F
(%a6
),%fp0
# Y-F
8281 fmovm.x
&0xc,-(%sp
) # SAVE FP2-3 WHILE FP0 IS NOT READY
8282 #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8283 #--REGISTERS SAVED: FPCR, FP1, FP2
8286 #--AN RE-ENTRY POINT FOR LOGNP1
8287 fmul.x
(%a0
),%fp0
# FP0 IS U = (Y-F)/F
8288 fmul.x LOGOF2
(%pc
),%fp1
# GET K*LOG2 WHILE FP0 IS NOT READY
8290 fmul.x
%fp2
,%fp2
# FP2 IS V=U*U
8291 fmov.x
%fp1
,KLOG2
(%a6
) # PUT K*LOG2 IN MEMEORY, FREE FP1
8293 #--LOG(1+U) IS APPROXIMATED BY
8294 #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8295 #--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8300 fmul.d LOGA6
(%pc
),%fp1
# V*A6
8301 fmul.d LOGA5
(%pc
),%fp2
# V*A5
8303 fadd.d LOGA4
(%pc
),%fp1
# A4+V*A6
8304 fadd.d LOGA3
(%pc
),%fp2
# A3+V*A5
8306 fmul.x
%fp3
,%fp1
# V*(A4+V*A6)
8307 fmul.x
%fp3
,%fp2
# V*(A3+V*A5)
8309 fadd.d LOGA2
(%pc
),%fp1
# A2+V*(A4+V*A6)
8310 fadd.d LOGA1
(%pc
),%fp2
# A1+V*(A3+V*A5)
8312 fmul.x
%fp3
,%fp1
# V*(A2+V*(A4+V*A6))
8313 add.l &16,%a0
# ADDRESS OF LOG(F)
8314 fmul.x
%fp3
,%fp2
# V*(A1+V*(A3+V*A5))
8316 fmul.x
%fp0
,%fp1
# U*V*(A2+V*(A4+V*A6))
8317 fadd.x
%fp2
,%fp0
# U+V*(A1+V*(A3+V*A5))
8319 fadd.x
(%a0
),%fp1
# LOG(F)+U*V*(A2+V*(A4+V*A6))
8320 fmovm.x
(%sp
)+,&0x30 # RESTORE FP2-3
8321 fadd.x
%fp1
,%fp0
# FP0 IS LOG(F) + LOG(1+U)
8324 fadd.x KLOG2
(%a6
),%fp0
# FINAL ADD
8330 # if the input is exactly equal to one, then exit through ld_pzero.
8331 # if these 2 lines weren't here, the correct answer would be returned
8332 # but the INEX2 bit would be set.
8333 fcmp.
b %fp0
,&0x1 # is it equal to one?
8334 fbeq.
l ld_pzero
# yes
8336 #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8338 fsub.s one
(%pc
),%fp1
# FP1 IS X-1
8339 fadd.s one
(%pc
),%fp0
# FP0 IS X+1
8340 fadd.x
%fp1
,%fp1
# FP1 IS 2(X-1)
8341 #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8342 #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8345 #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8346 fdiv.x
%fp0
,%fp1
# FP1 IS U
8347 fmovm.x
&0xc,-(%sp
) # SAVE FP2-3
8348 #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8349 #--LET V=U*U, W=V*V, CALCULATE
8350 #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8351 #--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8353 fmul.x
%fp0
,%fp0
# FP0 IS V
8354 fmov.x
%fp1
,SAVEU
(%a6
) # STORE U IN MEMORY, FREE FP1
8356 fmul.x
%fp1
,%fp1
# FP1 IS W
8358 fmov.d LOGB5
(%pc
),%fp3
8359 fmov.d LOGB4
(%pc
),%fp2
8361 fmul.x
%fp1
,%fp3
# W*B5
8362 fmul.x
%fp1
,%fp2
# W*B4
8364 fadd.d LOGB3
(%pc
),%fp3
# B3+W*B5
8365 fadd.d LOGB2
(%pc
),%fp2
# B2+W*B4
8367 fmul.x
%fp3
,%fp1
# W*(B3+W*B5), FP3 RELEASED
8369 fmul.x
%fp0
,%fp2
# V*(B2+W*B4)
8371 fadd.d LOGB1
(%pc
),%fp1
# B1+W*(B3+W*B5)
8372 fmul.x SAVEU
(%a6
),%fp0
# FP0 IS U*V
8374 fadd.x
%fp2
,%fp1
# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8375 fmovm.x
(%sp
)+,&0x30 # FP2-3 RESTORED
8377 fmul.x
%fp1
,%fp0
# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8380 fadd.x SAVEU
(%a6
),%fp0
8383 #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8389 #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8391 mov.
l &-100,ADJK
(%a6
) # INPUT = 2^(ADJK) * FP0
8393 #----normalize the input value by left shifting k bits (k to be determined
8394 #----below), adjusting exponent and storing -k to ADJK
8395 #----the value TWOTO100 is no longer needed.
8396 #----Note that this code assumes the denormalized input is NON-ZERO.
8398 movm.
l &0x3f00,-(%sp
) # save some registers {d2-d7}
8399 mov.
l (%a0
),%d3
# D3 is exponent of smallest norm. #
8401 mov.
l 8(%a0
),%d5
# (D4,D5) is (Hi_X,Lo_X)
8402 clr.
l %d2
# D2 used for holding K
8412 bfffo
%d4
{&0:&32},%d6
8414 add.l %d6
,%d2
# (D3,D4,D5) is normalized
8417 mov.
l %d4
,XFRAC
(%a6
)
8418 mov.
l %d5
,XFRAC+
4(%a6
)
8422 movm.
l (%sp
)+,&0xfc # restore registers {d2-d7}
8424 bra.w LOGBGN
# begin regular log(X)
8428 bfffo
%d4
{&0:&32},%d6
# find first 1
8429 mov.
l %d6
,%d2
# get k
8431 mov.
l %d5
,%d7
# a copy of D5
8436 or.l %d7
,%d4
# (D3,D4,D5) normalized
8439 mov.
l %d4
,XFRAC
(%a6
)
8440 mov.
l %d5
,XFRAC+
4(%a6
)
8444 movm.
l (%sp
)+,&0xfc # restore registers {d2-d7}
8446 bra.w LOGBGN
# begin regular log(X)
8449 #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8451 fmov.x
(%a0
),%fp0
# LOAD INPUT
8452 fabs.x
%fp0
# test magnitude
8453 fcmp.x
%fp0
,LTHOLD
(%pc
) # compare with min threshold
8454 fbgt.w LP1REAL
# if greater, continue
8456 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
8457 fmov.x
(%a0
),%fp0
# return signed argument
8461 fmov.x
(%a0
),%fp0
# LOAD INPUT
8462 mov.
l &0x00000000,ADJK
(%a6
)
8463 fmov.x
%fp0
,%fp1
# FP1 IS INPUT Z
8464 fadd.s one
(%pc
),%fp0
# X := ROUND(1+Z)
8466 mov.w XFRAC
(%a6
),XDCARE
(%a6
)
8469 ble.w LP1NEG0
# LOG OF ZERO OR -VE
8470 cmp.
l %d1
,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8472 cmp.
l %d1
,&0x3fffc000
8474 #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8475 #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8476 #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8479 #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8480 cmp.
l %d1
,&0x3ffef07d
8482 cmp.
l %d1
,&0x3fff8841
8486 #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8487 #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8488 fadd.x
%fp1
,%fp1
# FP1 IS 2Z
8489 fadd.s one
(%pc
),%fp0
# FP0 IS 1+X
8494 #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8495 #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8496 #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8497 #--THERE ARE ONLY TWO CASES.
8498 #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8499 #--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8500 #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8501 #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8503 mov.
l XFRAC
(%a6
),FFRAC
(%a6
)
8504 and.l &0xFE000000,FFRAC
(%a6
)
8505 or.l &0x01000000,FFRAC
(%a6
) # F OBTAINED
8506 cmp.
l %d1
,&0x3FFF8000 # SEE IF 1+Z > 1
8510 fmov.s TWO
(%pc
),%fp0
8511 mov.
l &0x3fff0000,F
(%a6
)
8513 fsub.x F
(%a6
),%fp0
# 2-F
8514 mov.
l FFRAC
(%a6
),%d1
8515 and.l &0x7E000000,%d1
8518 asr.
l &4,%d1
# D0 CONTAINS DISPLACEMENT FOR 1/F
8519 fadd.x
%fp1
,%fp1
# GET 2Z
8520 fmovm.x
&0xc,-(%sp
) # SAVE FP2 {%fp2/%fp3}
8521 fadd.x
%fp1
,%fp0
# FP0 IS Y-F = (2-F)+2Z
8522 lea LOGTBL
(%pc
),%a0
# A0 IS ADDRESS OF 1/F
8524 fmov.s negone
(%pc
),%fp1
# FP1 IS K = -1
8528 fmov.s one
(%pc
),%fp0
8529 mov.
l &0x3fff0000,F
(%a6
)
8531 fsub.x F
(%a6
),%fp0
# 1-F
8532 mov.
l FFRAC
(%a6
),%d1
8533 and.l &0x7E000000,%d1
8537 fadd.x
%fp1
,%fp0
# FP0 IS Y-F
8538 fmovm.x
&0xc,-(%sp
) # FP2 SAVED {%fp2/%fp3}
8540 add.l %d1
,%a0
# A0 IS ADDRESS OF 1/F
8541 fmov.s zero
(%pc
),%fp1
# FP1 IS K = 0
8545 #--FPCR SAVED. D0 IS X IN COMPACT FORM.
8549 fmov.s negone
(%pc
),%fp0
8555 fmov.s zero
(%pc
),%fp0
8561 #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8562 # Simply return the denorm
8566 #########################################################################
8567 # satanh(): computes the inverse hyperbolic tangent of a norm input #
8568 # satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8570 # INPUT *************************************************************** #
8571 # a0 = pointer to extended precision input #
8572 # d0 = round precision,mode #
8574 # OUTPUT ************************************************************** #
8575 # fp0 = arctanh(X) #
8577 # ACCURACY and MONOTONICITY ******************************************* #
8578 # The returned result is within 3 ulps in 64 significant bit, #
8579 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8580 # rounded to double precision. The result is provably monotonic #
8581 # in double precision. #
8583 # ALGORITHM *********************************************************** #
8586 # 1. If |X| >= 1, go to 3. #
8588 # 2. (|X| < 1) Calculate atanh(X) by #
8592 # atanh(X) := sgn * (1/2) * logp1(z) #
8595 # 3. If |X| > 1, go to 5. #
8597 # 4. (|X| = 1) Generate infinity with an appropriate sign and #
8598 # divide-by-zero by #
8600 # atan(X) := sgn / (+0). #
8603 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8606 #########################################################################
8612 and.l &0x7FFFFFFF,%d1
8613 cmp.
l %d1
,&0x3FFF8000
8616 #--THIS IS THE USUAL CASE, |X| < 1
8617 #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8619 fabs.x
(%a0
),%fp0
# Y = |X|
8622 fadd.x
%fp0
,%fp0
# 2Y
8623 fadd.s
&0x3F800000,%fp1
# 1-Y
8624 fdiv.x
%fp1
,%fp0
# 2Y/(1-Y)
8626 and.l &0x80000000,%d1
8627 or.l &0x3F000000,%d1
# SIGN(X)*HALF
8630 mov.
l %d0
,-(%sp
) # save rnd prec,mode
8631 clr.
l %d0
# pass ext prec,RN
8632 fmovm.x
&0x01,-(%sp
) # save Z on stack
8633 lea
(%sp
),%a0
# pass ptr to Z
8634 bsr slognp1
# LOG1P(Z)
8635 add.l &0xc,%sp
# clear Z from stack
8637 mov.
l (%sp
)+,%d0
# fetch old prec,mode
8638 fmov.
l %d0
,%fpcr
# load it
8639 mov.
b &FMUL_OP
,%d1
# last inst is MUL
8644 fabs.x
(%a0
),%fp0
# |X|
8645 fcmp.s
%fp0
,&0x3F800000
8650 #--ATANH(X) = X FOR DENORMALIZED X
8654 #########################################################################
8655 # slog10(): computes the base-10 logarithm of a normalized input #
8656 # slog10d(): computes the base-10 logarithm of a denormalized input #
8657 # slog2(): computes the base-2 logarithm of a normalized input #
8658 # slog2d(): computes the base-2 logarithm of a denormalized input #
8660 # INPUT *************************************************************** #
8661 # a0 = pointer to extended precision input #
8662 # d0 = round precision,mode #
8664 # OUTPUT ************************************************************** #
8665 # fp0 = log_10(X) or log_2(X) #
8667 # ACCURACY and MONOTONICITY ******************************************* #
8668 # The returned result is within 1.7 ulps in 64 significant bit, #
8669 # i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8670 # rounded to double precision. The result is provably monotonic #
8671 # in double precision. #
8673 # ALGORITHM *********************************************************** #
8677 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8678 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8679 # Notes: Default means round-to-nearest mode, no floating-point #
8680 # traps, and precision control = double extended. #
8682 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8683 # Notes: Even if X is denormalized, log(X) is always normalized. #
8685 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8686 # 2.1 Restore the user FPCR #
8687 # 2.2 Return ans := Y * INV_L10. #
8691 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8692 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8693 # Notes: Default means round-to-nearest mode, no floating-point #
8694 # traps, and precision control = double extended. #
8696 # Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8698 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8699 # 2.1 Restore the user FPCR #
8700 # 2.2 Return ans := Y * INV_L10. #
8704 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8705 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8706 # Notes: Default means round-to-nearest mode, no floating-point #
8707 # traps, and precision control = double extended. #
8709 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8710 # Notes: Even if X is denormalized, log(X) is always normalized. #
8712 # Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8713 # 2.1 Restore the user FPCR #
8714 # 2.2 Return ans := Y * INV_L2. #
8718 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8719 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8720 # Notes: Default means round-to-nearest mode, no floating-point #
8721 # traps, and precision control = double extended. #
8723 # Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8726 # Step 2. Return k. #
8727 # 2.1 Get integer k, X = 2^k. #
8728 # 2.2 Restore the user FPCR. #
8729 # 2.3 Return ans := convert-to-double-extended(k). #
8731 # Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8733 # Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8734 # 4.1 Restore the user FPCR #
8735 # 4.2 Return ans := Y * INV_L2. #
8737 #########################################################################
8740 long
0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8743 long
0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8746 #--entry point for Log10(X), X is normalized
8749 fcmp.x
%fp0
,(%a0
) # if operand == 1,
8750 fbeq.
l ld_pzero
# return an EXACT zero
8756 bsr slogn
# log(X), X normal.
8758 fmul.x INV_L10
(%pc
),%fp0
8762 #--entry point for Log10(X), X is denormalized
8768 bsr slognd
# log(X), X denorm.
8770 fmul.x INV_L10
(%pc
),%fp0
8774 #--entry point for Log2(X), X is normalized
8780 bne.
b continue
# X is not 2^k
8783 and.l &0x7FFFFFFF,%d1
8788 and.l &0x00007FFF,%d1
8798 bsr slogn
# log(X), X normal.
8800 fmul.x INV_L2
(%pc
),%fp0
8807 #--entry point for Log2(X), X is denormalized
8813 bsr slognd
# log(X), X denorm.
8815 fmul.x INV_L2
(%pc
),%fp0
8818 #########################################################################
8819 # stwotox(): computes 2**X for a normalized input #
8820 # stwotoxd(): computes 2**X for a denormalized input #
8821 # stentox(): computes 10**X for a normalized input #
8822 # stentoxd(): computes 10**X for a denormalized input #
8824 # INPUT *************************************************************** #
8825 # a0 = pointer to extended precision input #
8826 # d0 = round precision,mode #
8828 # OUTPUT ************************************************************** #
8829 # fp0 = 2**X or 10**X #
8831 # ACCURACY and MONOTONICITY ******************************************* #
8832 # The returned result is within 2 ulps in 64 significant bit, #
8833 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8834 # rounded to double precision. The result is provably monotonic #
8835 # in double precision. #
8837 # ALGORITHM *********************************************************** #
8840 # 1. If |X| > 16480, go to ExpBig. #
8842 # 2. If |X| < 2**(-70), go to ExpSm. #
8844 # 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8846 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8848 # 4. Overwrite r := r * log2. Then #
8849 # 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8850 # Go to expr to compute that expression. #
8853 # 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8855 # 2. If |X| < 2**(-70), go to ExpSm. #
8857 # 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8858 # N := round-to-int(y). Decompose N as #
8859 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8862 # r := ((X - N*L1)-N*L2) * L10 #
8863 # where L1, L2 are the leading and trailing parts of #
8864 # log_10(2)/64 and L10 is the natural log of 10. Then #
8865 # 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8866 # Go to expr to compute that expression. #
8869 # 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8871 # 2. Overwrite Fact1 and Fact2 by #
8872 # Fact1 := 2**(M) * Fact1 #
8873 # Fact2 := 2**(M) * Fact2 #
8874 # Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8876 # 3. Calculate P where 1 + P approximates exp(r): #
8877 # P = r + r*r*(A1+r*(A2+...+r*A5)). #
8879 # 4. Let AdjFact := 2**(M'). Return #
8880 # AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8884 # 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8885 # generate underflow by Tiny * Tiny. #
8888 # 1. Return 1 + X. #
8890 #########################################################################
8893 long
0x406A934F,0x0979A371 # 64LOG10/LOG2
8895 long
0x3F734413,0x509F8000 # LOG2/64LOG10
8898 long
0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8900 LOG10
: long
0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8902 LOG2
: long
0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8904 EXPA5
: long
0x3F56C16D,0x6F7BD0B2
8905 EXPA4
: long
0x3F811112,0x302C712C
8906 EXPA3
: long
0x3FA55555,0x55554CC1
8907 EXPA2
: long
0x3FC55555,0x55554A54
8908 EXPA1
: long
0x3FE00000,0x00000000,0x00000000,0x00000000
8911 long
0x3FFF0000,0x80000000,0x00000000,0x3F738000
8912 long
0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
8913 long
0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
8914 long
0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
8915 long
0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
8916 long
0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
8917 long
0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
8918 long
0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
8919 long
0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
8920 long
0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
8921 long
0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
8922 long
0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
8923 long
0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
8924 long
0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
8925 long
0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
8926 long
0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
8927 long
0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
8928 long
0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
8929 long
0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
8930 long
0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
8931 long
0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
8932 long
0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
8933 long
0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
8934 long
0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
8935 long
0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
8936 long
0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
8937 long
0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
8938 long
0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
8939 long
0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
8940 long
0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
8941 long
0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
8942 long
0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
8943 long
0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
8944 long
0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
8945 long
0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
8946 long
0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
8947 long
0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
8948 long
0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
8949 long
0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
8950 long
0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
8951 long
0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
8952 long
0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
8953 long
0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
8954 long
0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
8955 long
0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
8956 long
0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
8957 long
0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
8958 long
0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
8959 long
0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
8960 long
0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
8961 long
0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
8962 long
0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
8963 long
0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
8964 long
0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
8965 long
0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
8966 long
0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
8967 long
0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
8968 long
0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
8969 long
0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
8970 long
0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
8971 long
0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
8972 long
0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
8973 long
0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
8974 long
0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
8986 set FACT1LOW
,FACT1+
8
8990 set FACT2LOW
,FACT2+
8
8993 #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
8995 fmovm.x
(%a0
),&0x80 # LOAD INPUT
9000 and.l &0x7FFFFFFF,%d1
9002 cmp.
l %d1
,&0x3FB98000 # |X| >= 2**(-70)?
9007 cmp.
l %d1
,&0x400D80C0 # |X| > 16480?
9012 #--USUAL CASE, 2^(-70) <= |X| <= 16480
9015 fmul.s
&0x42800000,%fp1
# 64 * X
9016 fmov.
l %fp1
,INT
(%a6
) # N = ROUND-TO-INT(64 X)
9018 lea TEXPTBL
(%pc
),%a1
# LOAD ADDRESS OF TABLE OF 2^(J/64)
9019 fmov.
l INT
(%a6
),%fp1
# N --> FLOATING FMT
9022 and.l &0x3F,%d1
# D0 IS J
9023 asl.
l &4,%d1
# DISPLACEMENT FOR 2^(J/64)
9024 add.l %d1
,%a1
# ADDRESS FOR 2^(J/64)
9025 asr.
l &6,%d2
# d2 IS L, N = 64L + J
9027 asr.
l &1,%d1
# D0 IS M
9028 sub.l %d1
,%d2
# d2 IS M', N = 64(M+M') + J
9031 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9032 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9033 #--ADJFACT = 2^(M').
9034 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9036 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
9038 fmul.s
&0x3C800000,%fp1
# (1/64)*N
9039 mov.
l (%a1
)+,FACT1
(%a6
)
9040 mov.
l (%a1
)+,FACT1HI
(%a6
)
9041 mov.
l (%a1
)+,FACT1LOW
(%a6
)
9042 mov.w
(%a1
)+,FACT2
(%a6
)
9044 fsub.x
%fp1
,%fp0
# X - (1/64)*INT(64 X)
9046 mov.w
(%a1
)+,FACT2HI
(%a6
)
9047 clr.w FACT2HI+
2(%a6
)
9049 add.w
%d1
,FACT1
(%a6
)
9050 fmul.x LOG2
(%pc
),%fp0
# FP0 IS R
9051 add.w
%d1
,FACT2
(%a6
)
9057 cmp.
l %d1
,&0x3FFF8000
9060 #--|X| IS SMALL, RETURN 1 + X
9062 fmov.
l %d0
,%fpcr
# restore users round prec,mode
9063 fadd.s
&0x3F800000,%fp0
# RETURN 1 + X
9067 #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9068 #--REGISTERS SAVE SO FAR ARE FPCR AND D0
9073 bra t_ovfl2
# t_ovfl expects positive value
9076 bra t_unfl2
# t_unfl expects positive value
9080 #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9082 fmov.
l %d0
,%fpcr
# set user's rounding mode/precision
9083 fmov.s
&0x3F800000,%fp0
# RETURN 1 + X
9085 or.l &0x00800001,%d1
9090 #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9092 fmovm.x
(%a0
),&0x80 # LOAD INPUT
9097 and.l &0x7FFFFFFF,%d1
9099 cmp.
l %d1
,&0x3FB98000 # |X| >= 2**(-70)?
9104 cmp.
l %d1
,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9109 #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9112 fmul.d L2TEN64
(%pc
),%fp1
# X*64*LOG10/LOG2
9113 fmov.
l %fp1
,INT
(%a6
) # N=INT(X*64*LOG10/LOG2)
9115 lea TEXPTBL
(%pc
),%a1
# LOAD ADDRESS OF TABLE OF 2^(J/64)
9116 fmov.
l INT
(%a6
),%fp1
# N --> FLOATING FMT
9119 and.l &0x3F,%d1
# D0 IS J
9120 asl.
l &4,%d1
# DISPLACEMENT FOR 2^(J/64)
9121 add.l %d1
,%a1
# ADDRESS FOR 2^(J/64)
9122 asr.
l &6,%d2
# d2 IS L, N = 64L + J
9124 asr.
l &1,%d1
# D0 IS M
9125 sub.l %d1
,%d2
# d2 IS M', N = 64(M+M') + J
9128 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9129 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9130 #--ADJFACT = 2^(M').
9131 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9132 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
9136 fmul.d L10TWO1
(%pc
),%fp1
# N*(LOG2/64LOG10)_LEAD
9137 mov.
l (%a1
)+,FACT1
(%a6
)
9139 fmul.x L10TWO2
(%pc
),%fp2
# N*(LOG2/64LOG10)_TRAIL
9141 mov.
l (%a1
)+,FACT1HI
(%a6
)
9142 mov.
l (%a1
)+,FACT1LOW
(%a6
)
9143 fsub.x
%fp1
,%fp0
# X - N L_LEAD
9144 mov.w
(%a1
)+,FACT2
(%a6
)
9146 fsub.x
%fp2
,%fp0
# X - N L_TRAIL
9148 mov.w
(%a1
)+,FACT2HI
(%a6
)
9149 clr.w FACT2HI+
2(%a6
)
9152 fmul.x LOG10
(%pc
),%fp0
# FP0 IS R
9153 add.w
%d1
,FACT1
(%a6
)
9154 add.w
%d1
,FACT2
(%a6
)
9157 #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9158 #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9159 #--FP0 IS R. THE FOLLOWING CODE COMPUTES
9160 #-- 2**(M'+M) * 2**(J/64) * EXP(R)
9163 fmul.x
%fp1
,%fp1
# FP1 IS S = R*R
9165 fmov.d EXPA5
(%pc
),%fp2
# FP2 IS A5
9166 fmov.d EXPA4
(%pc
),%fp3
# FP3 IS A4
9168 fmul.x
%fp1
,%fp2
# FP2 IS S*A5
9169 fmul.x
%fp1
,%fp3
# FP3 IS S*A4
9171 fadd.d EXPA3
(%pc
),%fp2
# FP2 IS A3+S*A5
9172 fadd.d EXPA2
(%pc
),%fp3
# FP3 IS A2+S*A4
9174 fmul.x
%fp1
,%fp2
# FP2 IS S*(A3+S*A5)
9175 fmul.x
%fp1
,%fp3
# FP3 IS S*(A2+S*A4)
9177 fadd.d EXPA1
(%pc
),%fp2
# FP2 IS A1+S*(A3+S*A5)
9178 fmul.x
%fp0
,%fp3
# FP3 IS R*S*(A2+S*A4)
9180 fmul.x
%fp1
,%fp2
# FP2 IS S*(A1+S*(A3+S*A5))
9181 fadd.x
%fp3
,%fp0
# FP0 IS R+R*S*(A2+S*A4)
9182 fadd.x
%fp2
,%fp0
# FP0 IS EXP(R) - 1
9184 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
9186 #--FINAL RECONSTRUCTION PROCESS
9187 #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9189 fmul.x FACT1
(%a6
),%fp0
9190 fadd.x FACT2
(%a6
),%fp0
9191 fadd.x FACT1
(%a6
),%fp0
9193 fmov.
l %d0
,%fpcr
# restore users round prec,mode
9194 mov.w
%d2
,ADJFACT
(%a6
) # INSERT EXPONENT
9196 mov.
l &0x80000000,ADJFACT+
4(%a6
)
9197 clr.
l ADJFACT+
8(%a6
)
9198 mov.
b &FMUL_OP
,%d1
# last inst is MUL
9199 fmul.x ADJFACT
(%a6
),%fp0
# FINAL ADJUSTMENT
9204 #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9206 fmov.
l %d0
,%fpcr
# set user's rounding mode/precision
9207 fmov.s
&0x3F800000,%fp0
# RETURN 1 + X
9209 or.l &0x00800001,%d1
9213 #########################################################################
9214 # sscale(): computes the destination operand scaled by the source #
9215 # operand. If the absoulute value of the source operand is #
9216 # >= 2^14, an overflow or underflow is returned. #
9218 # INPUT *************************************************************** #
9219 # a0 = pointer to double-extended source operand X #
9220 # a1 = pointer to double-extended destination operand Y #
9222 # OUTPUT ************************************************************** #
9223 # fp0 = scale(X,Y) #
9225 #########################################################################
9231 mov.
l %d0
,-(%sp
) # store off ctrl bits for now
9233 mov.w DST_EX
(%a1
),%d1
# get dst exponent
9234 smi.
b SIGN
(%a6
) # use SIGN to hold dst sign
9235 andi.l &0x00007fff,%d1
# strip sign from dst exp
9237 mov.w SRC_EX
(%a0
),%d0
# check src bounds
9238 andi.w
&0x7fff,%d0
# clr src sign bit
9239 cmpi.w
%d0
,&0x3fff # is src ~ ZERO?
9240 blt.w src_small
# yes
9241 cmpi.w
%d0
,&0x400c # no; is src too big?
9245 # Source is within 2^14 range.
9248 fintrz.x SRC
(%a0
),%fp0
# calc int of src
9249 fmov.
l %fp0
,%d0
# int src to d0
9250 # don't want any accrued bits from the fintrz showing up later since
9251 # we may need to read the fpsr for the last fp op in t_catch2().
9254 tst.
b DST_HI
(%a1
) # is dst denormalized?
9257 # the dst is a DENORM. normalize the DENORM and add the adjustment to
9258 # the src value. then, jump to the norm part of the routine.
9260 mov.
l %d0
,-(%sp
) # save src for now
9262 mov.w DST_EX
(%a1
),FP_SCR0_EX
(%a6
) # make a copy
9263 mov.
l DST_HI
(%a1
),FP_SCR0_HI
(%a6
)
9264 mov.
l DST_LO
(%a1
),FP_SCR0_LO
(%a6
)
9266 lea FP_SCR0
(%a6
),%a0
# pass ptr to DENORM
9267 bsr.
l norm
# normalize the DENORM
9269 add.l (%sp
)+,%d0
# add adjustment to src
9271 fmovm.x FP_SCR0
(%a6
),&0x80 # load normalized DENORM
9273 cmpi.w
%d0
,&-0x3fff # is the shft amt really low?
9274 bge.
b sok_norm2
# thank goodness no
9276 # the multiply factor that we're trying to create should be a denorm
9277 # for the multiply to work. therefore, we're going to actually do a
9278 # multiply with a denorm which will cause an unimplemented data type
9279 # exception to be put into the machine which will be caught and corrected
9280 # later. we don't do this with the DENORMs above because this method
9281 # is slower. but, don't fret, I don't see it being used much either.
9282 fmov.
l (%sp
)+,%fpcr
# restore user fpcr
9283 mov.
l &0x80000000,%d1
# load normalized mantissa
9284 subi.
l &-0x3fff,%d0
# how many should we shift?
9285 neg.l %d0
# make it positive
9286 cmpi.
b %d0
,&0x20 # is it > 32?
9287 bge.
b sok_dnrm_32
# yes
9288 lsr.
l %d0
,%d1
# no; bit stays in upper lw
9289 clr.
l -(%sp
) # insert zero low mantissa
9290 mov.
l %d1
,-(%sp
) # insert new high mantissa
9291 clr.
l -(%sp
) # make zero exponent
9294 subi.
b &0x20,%d0
# get shift count
9295 lsr.
l %d0
,%d1
# make low mantissa longword
9296 mov.
l %d1
,-(%sp
) # insert new low mantissa
9297 clr.
l -(%sp
) # insert zero high mantissa
9298 clr.
l -(%sp
) # make zero exponent
9301 # the src will force the dst to a DENORM value or worse. so, let's
9302 # create an fp multiply that will create the result.
9304 fmovm.x
DST(%a1
),&0x80 # load fp0 with normalized src
9306 fmov.
l (%sp
)+,%fpcr
# restore user fpcr
9308 addi.w
&0x3fff,%d0
# turn src amt into exp value
9309 swap
%d0
# put exponent in high word
9310 clr.
l -(%sp
) # insert new exponent
9311 mov.
l &0x80000000,-(%sp
) # insert new high mantissa
9312 mov.
l %d0
,-(%sp
) # insert new lo mantissa
9315 fmov.
l %fpcr
,%d0
# d0 needs fpcr for t_catch2
9316 mov.
b &FMUL_OP
,%d1
# last inst is MUL
9317 fmul.x
(%sp
)+,%fp0
# do the multiply
9318 bra t_catch2
# catch any exceptions
9321 # Source is outside of 2^14 range. Test the sign and branch
9322 # to the appropriate exception handler.
9325 mov.
l (%sp
)+,%d0
# restore ctrl bits
9326 exg
%a0
,%a1
# swap src,dst ptrs
9327 tst.
b SRC_EX
(%a1
) # is src negative?
9328 bmi t_unfl
# yes; underflow
9329 bra t_ovfl_sc
# no; overflow
9332 # The source input is below 1, so we check for denormalized numbers
9336 tst.
b DST_HI
(%a1
) # is dst denormalized?
9337 bpl.
b ssmall_done
# yes
9340 fmov.
l %d0
,%fpcr
# no; load control bits
9341 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
9342 fmov.x
DST(%a1
),%fp0
# simply return dest
9345 mov.
l (%sp
)+,%d0
# load control bits into d1
9346 mov.
l %a1
,%a0
# pass ptr to dst
9349 #########################################################################
9350 # smod(): computes the fp MOD of the input values X,Y. #
9351 # srem(): computes the fp (IEEE) REM of the input values X,Y. #
9353 # INPUT *************************************************************** #
9354 # a0 = pointer to extended precision input X #
9355 # a1 = pointer to extended precision input Y #
9356 # d0 = round precision,mode #
9358 # The input operands X and Y can be either normalized or #
9361 # OUTPUT ************************************************************** #
9362 # fp0 = FREM(X,Y) or FMOD(X,Y) #
9364 # ALGORITHM *********************************************************** #
9366 # Step 1. Save and strip signs of X and Y: signX := sign(X), #
9367 # signY := sign(Y), X := |X|, Y := |Y|, #
9368 # signQ := signX EOR signY. Record whether MOD or REM #
9371 # Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9373 # R := X, go to Step 4. #
9375 # R := 2^(-L)X, j := L. #
9378 # Step 3. Perform MOD(X,Y) #
9379 # 3.1 If R = Y, go to Step 9. #
9380 # 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9381 # 3.3 If j = 0, go to Step 4. #
9382 # 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9385 # Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9386 # Last_Subtract := false (used in Step 7 below). If #
9387 # MOD is requested, go to Step 6. #
9389 # Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9390 # 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9392 # 5.2 If R > Y/2, then { set Last_Subtract := true, #
9393 # Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9394 # 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9395 # then { Q := Q + 1, signX := -signX }. #
9397 # Step 6. R := signX*R. #
9399 # Step 7. If Last_Subtract = true, R := R - Y. #
9401 # Step 8. Return signQ, last 7 bits of Q, and R as required. #
9403 # Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9404 # X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9405 # R := 0. Return signQ, last 7 bits of Q, and R. #
9407 #########################################################################
9410 set Sc_Flag
,L_SCR3+
1
9425 long
0x00010000,0x80000000,0x00000000,0x00000000
9429 clr.
b FPSR_QBYTE
(%a6
)
9430 mov.
l %d0
,-(%sp
) # save ctrl bits
9436 clr.
b FPSR_QBYTE
(%a6
)
9437 mov.
l %d0
,-(%sp
) # save ctrl bits
9438 mov.
b &0x1,Mod_Flag
(%a6
)
9441 #..Save sign of X and Y
9442 movm.
l &0x3f00,-(%sp
) # save data registers
9443 mov.w SRC_EX
(%a0
),%d3
9444 mov.w
%d3
,SignY
(%a6
)
9445 and.l &0x00007FFF,%d3
# Y := |Y|
9448 mov.
l SRC_HI
(%a0
),%d4
9449 mov.
l SRC_LO
(%a0
),%d5
# (D3,D4,D5) is |Y|
9454 mov.
l &0x00003FFE,%d3
# $3FFD + 1
9463 bfffo
%d4
{&0:&32},%d6
9465 sub.l %d6
,%d3
# (D3,D4,D5) is normalized
9466 # ...with bias $7FFD
9471 bfffo
%d4
{&0:&32},%d6
9474 mov.
l %d5
,%d7
# a copy of D5
9479 or.l %d7
,%d4
# (D3,D4,D5) normalized
9480 # ...with bias $7FFD
9484 add.l &0x00003FFE,%d3
# (D3,D4,D5) normalized
9485 # ...with bias $7FFD
9488 mov.w DST_EX
(%a1
),%d0
9489 mov.w
%d0
,SignX
(%a6
)
9490 mov.w SignY
(%a6
),%d1
9492 and.l &0x00008000,%d1
9493 mov.w
%d1
,SignQ
(%a6
) # sign(Q) obtained
9494 and.l &0x00007FFF,%d0
9495 mov.
l DST_HI
(%a1
),%d1
9496 mov.
l DST_LO
(%a1
),%d2
# (D0,D1,D2) is |X|
9499 mov.
l &0x00003FFE,%d0
9508 bfffo
%d1
{&0:&32},%d6
9510 sub.l %d6
,%d0
# (D0,D1,D2) is normalized
9511 # ...with bias $7FFD
9516 bfffo
%d1
{&0:&32},%d6
9519 mov.
l %d2
,%d7
# a copy of D2
9524 or.l %d7
,%d1
# (D0,D1,D2) normalized
9525 # ...with bias $7FFD
9529 add.l &0x00003FFE,%d0
# (D0,D1,D2) normalized
9530 # ...with bias $7FFD
9534 mov.
l %d3
,L_SCR1
(%a6
) # save biased exp(Y)
9535 mov.
l %d0
,-(%sp
) # save biased exp(X)
9536 sub.l %d3
,%d0
# L := expo(X)-expo(Y)
9538 clr.
l %d6
# D6 := carry <- 0
9540 mov.
l &0,%a1
# A1 is k; j+k=L, Q=0
9542 #..(Carry,D1,D2) is R
9546 #..expo(X) < expo(Y). Thus X = mod(X,Y)
9548 mov.
l (%sp
)+,%d0
# restore d0
9552 addq.
l &0x4,%sp
# erase exp(X)
9553 #..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9555 tst.
l %d6
# test carry bit
9558 #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9559 cmp.
l %d1
,%d4
# compare hi(R) and hi(Y)
9561 cmp.
l %d2
,%d5
# compare lo(R) and lo(Y)
9564 #..At this point, R = Y
9568 #..use the borrow of the previous compare
9569 bcs.
b R_LT_Y
# borrow is set iff R < Y
9572 #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9573 #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9574 sub.l %d5
,%d2
# lo(R) - lo(Y)
9575 subx.
l %d4
,%d1
# hi(R) - hi(Y)
9576 clr.
l %d6
# clear carry
9577 addq.
l &1,%d3
# Q := Q + 1
9580 #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9581 tst.
l %d0
# see if j = 0.
9584 add.l %d3
,%d3
# Q := 2Q
9585 add.l %d2
,%d2
# lo(R) = 2lo(R)
9586 roxl.
l &1,%d1
# hi(R) = 2hi(R) + carry
9587 scs
%d6
# set Carry if 2(R) overflows
9588 addq.
l &1,%a1
# k := k+1
9589 subq.
l &1,%d0
# j := j - 1
9590 #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9595 #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9598 mov.
l L_SCR1
(%a6
),%d0
# new biased expo of R
9607 bfffo
%d1
{&0:&32},%d6
9609 sub.l %d6
,%d0
# (D0,D1,D2) is normalized
9610 # ...with bias $7FFD
9615 bfffo
%d1
{&0:&32},%d6
9616 bmi.
b Get_Mod
# already normalized
9619 mov.
l %d2
,%d7
# a copy of D2
9624 or.l %d7
,%d1
# (D0,D1,D2) normalized
9628 cmp.
l %d0
,&0x000041FE
9634 mov.
l L_SCR1
(%a6
),%d6
9638 fmov.x R
(%a6
),%fp0
# no exception
9639 mov.
b &1,Sc_Flag
(%a6
)
9646 mov.
l L_SCR1
(%a6
),%d6
9648 mov.
l %d6
,L_SCR1
(%a6
)
9660 mov.
l L_SCR1
(%a6
),%d6
# new biased expo(Y)
9661 subq.
l &1,%d6
# biased expo(Y/2)
9677 fsub.x Y
(%a6
),%fp0
# no exceptions
9678 addq.
l &1,%d3
# Q := Q + 1
9683 mov.w SignX
(%a6
),%d6
9691 mov.w SignQ
(%a6
),%d6
# D6 is sign(Q)
9694 and.l &0x0000007F,%d3
# 7 bits of Q
9695 or.l %d6
,%d3
# sign and bits of Q
9698 # and.l &0xFF00FFFF,%d6
9700 # fmov.l %d6,%fpsr # put Q in fpsr
9701 mov.
b %d3
,FPSR_QBYTE
(%a6
) # put Q in fpsr
9705 movm.
l (%sp
)+,&0xfc # {%d2-%d7}
9710 mov.
b &FMUL_OP
,%d1
# last inst is MUL
9711 fmul.x Scale
(%pc
),%fp0
# may cause underflow
9713 # the '040 package did this apparently to see if the dst operand for the
9714 # preceding fmul was a denorm. but, it better not have been since the
9715 # algorithm just got done playing with fp0 and expected no exceptions
9716 # as a result. trust me...
9717 # bra t_avoid_unsupp # check for denorm as a
9718 # ;result of the scaling
9721 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
9722 fmov.x
%fp0
,%fp0
# capture exceptions & round
9726 #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
9728 cmp.
l %d0
,&8 # D0 is j
9738 fmov.s
&0x00000000,%fp0
9743 #..Check parity of Q
9745 and.l &0x00000001,%d6
9747 beq.w Fix_Sign
# Q is even
9749 #..Q is odd, Q := Q + 1, signX := -signX
9751 mov.w SignX
(%a6
),%d6
9752 eor.
l &0x00008000,%d6
9753 mov.w
%d6
,SignX
(%a6
)
9756 #########################################################################
9757 # XDEF **************************************************************** #
9758 # tag(): return the optype of the input ext fp number #
9760 # This routine is used by the 060FPLSP. #
9762 # XREF **************************************************************** #
9765 # INPUT *************************************************************** #
9766 # a0 = pointer to extended precision operand #
9768 # OUTPUT ************************************************************** #
9769 # d0 = value of type tag #
9770 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
9772 # ALGORITHM *********************************************************** #
9773 # Simply test the exponent, j-bit, and mantissa values to #
9774 # determine the type of operand. #
9775 # If it's an unnormalized zero, alter the operand and force it #
9776 # to be a normal zero. #
9778 #########################################################################
9782 mov.w FTEMP_EX
(%a0
), %d0
# extract exponent
9783 andi.w
&0x7fff, %d0
# strip off sign
9784 cmpi.w
%d0
, &0x7fff # is (EXP == MAX)?
9787 btst
&0x7,FTEMP_HI
(%a0
)
9793 tst.w
%d0
# is exponent = 0?
9807 bsr.
l unnorm_fix
# convert to norm,denorm,or zero
9815 mov.
l FTEMP_HI
(%a0
), %d0
9816 and.l &0x7fffffff, %d0
# msb is a don't care!
9825 #############################################################
9827 qnan
: long
0x7fff0000, 0xffffffff, 0xffffffff
9829 #########################################################################
9830 # XDEF **************************************************************** #
9831 # t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. #
9832 # t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. #
9834 # These rouitnes are used by the 060FPLSP package. #
9836 # XREF **************************************************************** #
9839 # INPUT *************************************************************** #
9840 # a0 = pointer to extended precision source operand. #
9842 # OUTPUT ************************************************************** #
9843 # fp0 = default DZ result. #
9845 # ALGORITHM *********************************************************** #
9846 # Transcendental emulation for the 060FPLSP has detected that #
9847 # a DZ exception should occur for the instruction. If DZ is disabled, #
9848 # return the default result. #
9849 # If DZ is enabled, the dst operand should be returned unscathed #
9850 # in fp0 while fp1 is used to create a DZ exception so that the #
9851 # operating system can log that such an event occurred. #
9853 #########################################################################
9857 tst.
b SRC_EX
(%a0
) # check sign for neg or pos
9858 bpl.
b dz_pinf
# branch if pos sign
9862 ori.
l &dzinf_mask+neg_mask
,USER_FPSR
(%a6
) # set N/I/DZ/ADZ
9864 btst
&dz_bit
,FPCR_ENABLE
(%a6
)
9867 # dz is disabled. return a -INF.
9868 fmov.s
&0xff800000,%fp0
# return -INF
9871 # dz is enabled. create a dz exception so the user can record it
9872 # but use fp1 instead. return the dst operand unscathed in fp0.
9874 fmovm.x EXC_FP0
(%a6
),&0x80 # return fp0 unscathed
9875 fmov.
l USER_FPCR
(%a6
),%fpcr
9876 fmov.s
&0xbf800000,%fp1
# load -1
9877 fdiv.s
&0x00000000,%fp1
# -1 / 0
9881 ori.
l &dzinf_mask
,USER_FPSR
(%a6
) # set I/DZ/ADZ
9883 btst
&dz_bit
,FPCR_ENABLE
(%a6
)
9886 # dz is disabled. return a +INF.
9887 fmov.s
&0x7f800000,%fp0
# return +INF
9890 # dz is enabled. create a dz exception so the user can record it
9891 # but use fp1 instead. return the dst operand unscathed in fp0.
9893 fmovm.x EXC_FP0
(%a6
),&0x80 # return fp0 unscathed
9894 fmov.
l USER_FPCR
(%a6
),%fpcr
9895 fmov.s
&0x3f800000,%fp1
# load +1
9896 fdiv.s
&0x00000000,%fp1
# +1 / 0
9899 #########################################################################
9900 # XDEF **************************************************************** #
9901 # t_operr(): Handle 060FPLSP OPERR exception during emulation. #
9903 # This routine is used by the 060FPLSP package. #
9905 # XREF **************************************************************** #
9908 # INPUT *************************************************************** #
9909 # fp1 = source operand #
9911 # OUTPUT ************************************************************** #
9912 # fp0 = default result #
9915 # ALGORITHM *********************************************************** #
9916 # An operand error should occur as the result of transcendental #
9917 # emulation in the 060FPLSP. If OPERR is disabled, just return a NAN #
9918 # in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 #
9919 # and the source operand in fp1. Use fp2 to create an OPERR exception #
9920 # so that the operating system can log the event. #
9922 #########################################################################
9926 ori.
l &opnan_mask
,USER_FPSR
(%a6
) # set NAN/OPERR/AIOP
9928 btst
&operr_bit
,FPCR_ENABLE
(%a6
)
9931 # operr is disabled. return a QNAN in fp0
9932 fmovm.x qnan
(%pc
),&0x80 # return QNAN
9935 # operr is enabled. create an operr exception so the user can record it
9936 # but use fp2 instead. return the dst operand unscathed in fp0.
9938 fmovm.x EXC_FP0
(%a6
),&0x80 # return fp0 unscathed
9939 fmov.
l USER_FPCR
(%a6
),%fpcr
9940 fmovm.x
&0x04,-(%sp
) # save fp2
9941 fmov.s
&0x7f800000,%fp2
# load +INF
9942 fmul.s
&0x00000000,%fp2
# +INF x 0
9943 fmovm.x
(%sp
)+,&0x20 # restore fp2
9947 long
0x7ffe0000,0xffffffff,0xffffffff
9949 long
0xfffe0000,0xffffffff,0xffffffff
9951 long
0x00000000,0x80000000,0x00000000
9953 long
0x80000000,0x80000000,0x00000000
9955 #########################################################################
9956 # XDEF **************************************************************** #
9957 # t_unfl(): Handle 060FPLSP underflow exception during emulation. #
9958 # t_unfl2(): Handle 060FPLSP underflow exception during #
9959 # emulation. result always positive. #
9961 # This routine is used by the 060FPLSP package. #
9963 # XREF **************************************************************** #
9966 # INPUT *************************************************************** #
9967 # a0 = pointer to extended precision source operand #
9969 # OUTPUT ************************************************************** #
9970 # fp0 = default underflow result #
9972 # ALGORITHM *********************************************************** #
9973 # An underflow should occur as the result of transcendental #
9974 # emulation in the 060FPLSP. Create an underflow by using "fmul" #
9975 # and two very small numbers of appropriate sign so that the operating #
9976 # system can log the event. #
9978 #########################################################################
9987 ori.
l &unfinx_mask+neg_mask
,USER_FPSR
(%a6
) # set N/UNFL/INEX2/AUNFL/AINEX
9989 fmov.
l USER_FPCR
(%a6
),%fpcr
9990 fmovm.x mns_tiny
(%pc
),&0x80
9991 fmul.x pls_tiny
(%pc
),%fp0
9995 mov.
b %d0
,FPSR_CC
(%a6
)
9998 ori.w
&unfinx_mask
,FPSR_EXCEPT
(%a6
) # set UNFL/INEX2/AUNFL/AINEX
10000 fmov.
l USER_FPCR
(%a6
),%fpcr
10001 fmovm.x pls_tiny
(%pc
),&0x80
10006 mov.
b %d0
,FPSR_CC
(%a6
)
10009 #########################################################################
10010 # XDEF **************************************************************** #
10011 # t_ovfl(): Handle 060FPLSP overflow exception during emulation. #
10013 # t_ovfl2(): Handle 060FPLSP overflow exception during #
10014 # emulation. result always positive. (dyadic) #
10015 # t_ovfl_sc(): Handle 060FPLSP overflow exception during #
10016 # emulation for "fscale". #
10018 # This routine is used by the 060FPLSP package. #
10020 # XREF **************************************************************** #
10023 # INPUT *************************************************************** #
10024 # a0 = pointer to extended precision source operand #
10026 # OUTPUT ************************************************************** #
10027 # fp0 = default underflow result #
10029 # ALGORITHM *********************************************************** #
10030 # An overflow should occur as the result of transcendental #
10031 # emulation in the 060FPLSP. Create an overflow by using "fmul" #
10032 # and two very lareg numbers of appropriate sign so that the operating #
10033 # system can log the event. #
10034 # For t_ovfl_sc() we take special care not to lose the INEX2 bit. #
10036 #########################################################################
10040 ori.
l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set OVFL/AOVFL/AINEX
10042 mov.
b %d0
,%d1
# fetch rnd prec,mode
10043 andi.b &0xc0,%d1
# extract prec
10046 # dst op is a DENORM. we have to normalize the mantissa to see if the
10047 # result would be inexact for the given precision. make a copy of the
10048 # dst so we don't screw up the version passed to us.
10049 mov.w LOCAL_EX
(%a0
),FP_SCR0_EX
(%a6
)
10050 mov.
l LOCAL_HI
(%a0
),FP_SCR0_HI
(%a6
)
10051 mov.
l LOCAL_LO
(%a0
),FP_SCR0_LO
(%a6
)
10052 lea FP_SCR0
(%a6
),%a0
# pass ptr to FP_SCR0
10053 movm.
l &0xc080,-(%sp
) # save d0-d1/a0
10054 bsr.
l norm
# normalize mantissa
10055 movm.
l (%sp
)+,&0x0103 # restore d0-d1/a0
10057 cmpi.
b %d1
,&0x40 # is precision sgl?
10058 bne.
b ovfl_sc_dbl
# no; dbl
10060 tst.
l LOCAL_LO
(%a0
) # is lo lw of sgl set?
10061 bne.
b ovfl_sc_inx
# yes
10062 tst.
b 3+LOCAL_HI
(%a0
) # is lo byte of hi lw set?
10063 bne.
b ovfl_sc_inx
# yes
10064 bra.w ovfl_work
# don't set INEX2
10066 mov.
l LOCAL_LO
(%a0
),%d1
# are any of lo 11 bits of
10067 andi.l &0x7ff,%d1
# dbl mantissa set?
10068 beq.w ovfl_work
# no; don't set INEX2
10070 ori.
l &inex2_mask
,USER_FPSR
(%a6
) # set INEX2
10071 bra.
b ovfl_work
# continue
10075 ori.w
&ovfinx_mask
,FPSR_EXCEPT
(%a6
) # set OVFL/INEX2/AOVFL/AINEX
10080 fmov.
l USER_FPCR
(%a6
),%fpcr
10081 fmovm.x mns_huge
(%pc
),&0x80
10082 fmul.x pls_huge
(%pc
),%fp0
10086 ori.
b &neg_mask
,%d0
10087 mov.
b %d0
,FPSR_CC
(%a6
)
10090 fmov.
l USER_FPCR
(%a6
),%fpcr
10091 fmovm.x pls_huge
(%pc
),&0x80
10092 fmul.x pls_huge
(%pc
),%fp0
10096 mov.
b %d0
,FPSR_CC
(%a6
)
10101 ori.w
&ovfinx_mask
,FPSR_EXCEPT
(%a6
) # set OVFL/INEX2/AOVFL/AINEX
10102 fmov.
l USER_FPCR
(%a6
),%fpcr
10103 fmovm.x pls_huge
(%pc
),&0x80
10104 fmul.x pls_huge
(%pc
),%fp0
10108 mov.
b %d0
,FPSR_CC
(%a6
)
10111 #########################################################################
10112 # XDEF **************************************************************** #
10113 # t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10115 # t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10118 # These routines are used by the 060FPLSP package. #
10120 # XREF **************************************************************** #
10123 # INPUT *************************************************************** #
10124 # fp0 = default underflow or overflow result #
10126 # OUTPUT ************************************************************** #
10127 # fp0 = default result #
10129 # ALGORITHM *********************************************************** #
10130 # If an overflow or underflow occurred during the last #
10131 # instruction of transcendental 060FPLSP emulation, then it has already #
10132 # occurred and has been logged. Now we need to see if an inexact #
10133 # exception should occur. #
10135 #########################################################################
10140 or.l %d0
,USER_FPSR
(%a6
)
10146 or.l %d0
,USER_FPSR
(%a6
)
10148 #########################################################################
10149 # XDEF **************************************************************** #
10150 # t_inx2(): Handle inexact 060FPLSP exception during emulation. #
10151 # t_pinx2(): Handle inexact 060FPLSP exception for "+" results. #
10152 # t_minx2(): Handle inexact 060FPLSP exception for "-" results. #
10154 # XREF **************************************************************** #
10157 # INPUT *************************************************************** #
10158 # fp0 = default result #
10160 # OUTPUT ************************************************************** #
10161 # fp0 = default result #
10163 # ALGORITHM *********************************************************** #
10164 # The last instruction of transcendental emulation for the #
10165 # 060FPLSP should be inexact. So, if inexact is enabled, then we create #
10166 # the event here by adding a large and very small number together #
10167 # so that the operating system can log the event. #
10168 # Must check, too, if the result was zero, in which case we just #
10169 # set the FPSR bits and return. #
10171 #########################################################################
10180 ori.w
&inx2a_mask
,FPSR_EXCEPT
(%a6
) # set INEX2/AINEX
10185 ori.
l &inx2a_mask+neg_mask
,USER_FPSR
(%a6
)
10188 btst
&inex2_bit
,FPCR_ENABLE
(%a6
) # is inexact enabled?
10189 bne.
b inx2_work_ena
# yes
10192 fmov.
l USER_FPCR
(%a6
),%fpcr
# insert user's exceptions
10193 fmov.s
&0x3f800000,%fp1
# load +1
10194 fadd.x pls_tiny
(%pc
),%fp1
# cause exception
10198 mov.
b &z_bmask
,FPSR_CC
(%a6
)
10199 ori.w
&inx2a_mask
,2+USER_FPSR
(%a6
) # set INEX/AINEX
10202 #########################################################################
10203 # XDEF **************************************************************** #
10204 # t_extdnrm(): Handle DENORM inputs in 060FPLSP. #
10205 # t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". #
10207 # This routine is used by the 060FPLSP package. #
10209 # XREF **************************************************************** #
10212 # INPUT *************************************************************** #
10213 # a0 = pointer to extended precision input operand #
10215 # OUTPUT ************************************************************** #
10216 # fp0 = default result #
10218 # ALGORITHM *********************************************************** #
10219 # For all functions that have a denormalized input and that #
10220 # f(x)=x, this is the entry point. #
10221 # DENORM value is moved using "fmove" which triggers an exception #
10222 # if enabled so the operating system can log the event. #
10224 #########################################################################
10228 fmov.
l USER_FPCR
(%a6
),%fpcr
10229 fmov.x SRC_EX
(%a0
),%fp0
10231 ori.
l &unfinx_mask
,%d0
10232 or.l %d0
,USER_FPSR
(%a6
)
10237 fmov.
l USER_FPCR
(%a6
),%fpcr
10238 fmov.x SRC_EX
(%a0
),%fp0
10240 or.l %d0
,USER_FPSR
(%a6
)
10243 ##########################################
10247 # This is used by fsincos library emulation. The correct
10248 # values are already in fp0 and fp1 so we do nothing here.
10254 ##########################################
10257 # dst_qnan --- force result when destination is a NaN
10261 fmov.x
DST(%a1
),%fp0
10265 mov.
b &nan_bmask
,FPSR_CC
(%a6
)
10268 mov.
b &nan_bmask+neg_bmask
,FPSR_CC
(%a6
)
10272 # src_qnan --- force result when source is a NaN
10276 fmov.x SRC
(%a0
),%fp0
10280 mov.
b &nan_bmask
,FPSR_CC
(%a6
)
10283 mov.
b &nan_bmask+neg_bmask
,FPSR_CC
(%a6
)
10286 ##########################################
10289 # Native instruction support
10291 # Some systems may need entry points even for 68060 native
10292 # instructions. These routines are provided for
10297 fmov.
l %fpcr
,-(%sp
) # save fpcr
10298 fmov.
l &0x00000000,%fpcr
# clear fpcr for load
10299 fmov.s
0x8(%sp
),%fp0
# load sgl dst
10300 fmov.
l (%sp
)+,%fpcr
# restore fpcr
10301 fadd.s
0x8(%sp
),%fp0
# fadd w/ sgl src
10306 fmov.
l %fpcr
,-(%sp
) # save fpcr
10307 fmov.
l &0x00000000,%fpcr
# clear fpcr for load
10308 fmov.d
0x8(%sp
),%fp0
# load dbl dst
10309 fmov.
l (%sp
)+,%fpcr
# restore fpcr
10310 fadd.d
0xc(%sp
),%fp0
# fadd w/ dbl src
10315 fmovm.x
0x4(%sp
),&0x80 # load ext dst
10316 fadd.x
0x10(%sp
),%fp0
# fadd w/ ext src
10321 fmov.
l %fpcr
,-(%sp
) # save fpcr
10322 fmov.
l &0x00000000,%fpcr
# clear fpcr for load
10323 fmov.s
0x8(%sp
),%fp0
# load sgl dst
10324 fmov.
l (%sp
)+,%fpcr
# restore fpcr
10325 fsub.s
0x8(%sp
),%fp0
# fsub w/ sgl src
10330 fmov.
l %fpcr
,-(%sp
) # save fpcr
10331 fmov.
l &0x00000000,%fpcr
# clear fpcr for load
10332 fmov.d
0x8(%sp
),%fp0
# load dbl dst
10333 fmov.
l (%sp
)+,%fpcr
# restore fpcr
10334 fsub.d
0xc(%sp
),%fp0
# fsub w/ dbl src
10339 fmovm.x
0x4(%sp
),&0x80 # load ext dst
10340 fsub.x
0x10(%sp
),%fp0
# fsub w/ ext src
10345 fmov.
l %fpcr
,-(%sp
) # save fpcr
10346 fmov.
l &0x00000000,%fpcr
# clear fpcr for load
10347 fmov.s
0x8(%sp
),%fp0
# load sgl dst
10348 fmov.
l (%sp
)+,%fpcr
# restore fpcr
10349 fmul.s
0x8(%sp
),%fp0
# fmul w/ sgl src
10354 fmov.
l %fpcr
,-(%sp
) # save fpcr
10355 fmov.
l &0x00000000,%fpcr
# clear fpcr for load
10356 fmov.d
0x8(%sp
),%fp0
# load dbl dst
10357 fmov.
l (%sp
)+,%fpcr
# restore fpcr
10358 fmul.d
0xc(%sp
),%fp0
# fmul w/ dbl src
10363 fmovm.x
0x4(%sp
),&0x80 # load ext dst
10364 fmul.x
0x10(%sp
),%fp0
# fmul w/ ext src
10369 fmov.
l %fpcr
,-(%sp
) # save fpcr
10370 fmov.
l &0x00000000,%fpcr
# clear fpcr for load
10371 fmov.s
0x8(%sp
),%fp0
# load sgl dst
10372 fmov.
l (%sp
)+,%fpcr
# restore fpcr
10373 fdiv.s
0x8(%sp
),%fp0
# fdiv w/ sgl src
10378 fmov.
l %fpcr
,-(%sp
) # save fpcr
10379 fmov.
l &0x00000000,%fpcr
# clear fpcr for load
10380 fmov.d
0x8(%sp
),%fp0
# load dbl dst
10381 fmov.
l (%sp
)+,%fpcr
# restore fpcr
10382 fdiv.d
0xc(%sp
),%fp0
# fdiv w/ dbl src
10387 fmovm.x
0x4(%sp
),&0x80 # load ext dst
10388 fdiv.x
0x10(%sp
),%fp0
# fdiv w/ ext src
10393 fabs.s
0x4(%sp
),%fp0
# fabs w/ sgl src
10398 fabs.d
0x4(%sp
),%fp0
# fabs w/ dbl src
10403 fabs.x
0x4(%sp
),%fp0
# fabs w/ ext src
10408 fneg.s
0x4(%sp
),%fp0
# fneg w/ sgl src
10413 fneg.d
0x4(%sp
),%fp0
# fneg w/ dbl src
10418 fneg.x
0x4(%sp
),%fp0
# fneg w/ ext src
10423 fsqrt.s
0x4(%sp
),%fp0
# fsqrt w/ sgl src
10428 fsqrt.d
0x4(%sp
),%fp0
# fsqrt w/ dbl src
10433 fsqrt.x
0x4(%sp
),%fp0
# fsqrt w/ ext src
10438 fint.s
0x4(%sp
),%fp0
# fint w/ sgl src
10443 fint.d
0x4(%sp
),%fp0
# fint w/ dbl src
10448 fint.x
0x4(%sp
),%fp0
# fint w/ ext src
10453 fintrz.s
0x4(%sp
),%fp0
# fintrz w/ sgl src
10458 fintrz.d
0x4(%sp
),%fp0
# fintrx w/ dbl src
10463 fintrz.x
0x4(%sp
),%fp0
# fintrz w/ ext src
10466 ########################################################################
10468 #########################################################################
10469 # src_zero(): Return signed zero according to sign of src operand. #
10470 #########################################################################
10473 tst.
b SRC_EX
(%a0
) # get sign of src operand
10474 bmi.
b ld_mzero
# if neg, load neg zero
10477 # ld_pzero(): return a positive zero.
10481 fmov.s
&0x00000000,%fp0
# load +0
10482 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set 'Z' ccode bit
10485 # ld_mzero(): return a negative zero.
10488 fmov.s
&0x80000000,%fp0
# load -0
10489 mov.
b &neg_bmask+z_bmask
,FPSR_CC
(%a6
) # set 'N','Z' ccode bits
10492 #########################################################################
10493 # dst_zero(): Return signed zero according to sign of dst operand. #
10494 #########################################################################
10497 tst.
b DST_EX
(%a1
) # get sign of dst operand
10498 bmi.
b ld_mzero
# if neg, load neg zero
10499 bra.
b ld_pzero
# load positive zero
10501 #########################################################################
10502 # src_inf(): Return signed inf according to sign of src operand. #
10503 #########################################################################
10506 tst.
b SRC_EX
(%a0
) # get sign of src operand
10507 bmi.
b ld_minf
# if negative branch
10510 # ld_pinf(): return a positive infinity.
10514 fmov.s
&0x7f800000,%fp0
# load +INF
10515 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set 'INF' ccode bit
10519 # ld_minf():return a negative infinity.
10523 fmov.s
&0xff800000,%fp0
# load -INF
10524 mov.
b &neg_bmask+inf_bmask
,FPSR_CC
(%a6
) # set 'N','I' ccode bits
10527 #########################################################################
10528 # dst_inf(): Return signed inf according to sign of dst operand. #
10529 #########################################################################
10532 tst.
b DST_EX
(%a1
) # get sign of dst operand
10533 bmi.
b ld_minf
# if negative branch
10537 #################################################################
10538 # szr_inf(): Return +ZERO for a negative src operand or #
10539 # +INF for a positive src operand. #
10540 # Routine used for fetox, ftwotox, and ftentox. #
10541 #################################################################
10543 tst.
b SRC_EX
(%a0
) # check sign of source
10547 #########################################################################
10548 # sopr_inf(): Return +INF for a positive src operand or #
10549 # jump to operand error routine for a negative src operand. #
10550 # Routine used for flogn, flognp1, flog10, and flog2. #
10551 #########################################################################
10554 tst.
b SRC_EX
(%a0
) # check sign of source
10558 #################################################################
10559 # setoxm1i(): Return minus one for a negative src operand or #
10560 # positive infinity for a positive src operand. #
10561 # Routine used for fetoxm1. #
10562 #################################################################
10565 tst.
b SRC_EX
(%a0
) # check sign of source
10569 #########################################################################
10570 # src_one(): Return signed one according to sign of src operand. #
10571 #########################################################################
10574 tst.
b SRC_EX
(%a0
) # check sign of source
10578 # ld_pone(): return positive one.
10582 fmov.s
&0x3f800000,%fp0
# load +1
10587 # ld_mone(): return negative one.
10591 fmov.s
&0xbf800000,%fp0
# load -1
10592 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
10595 ppiby2
: long
0x3fff0000, 0xc90fdaa2, 0x2168c235
10596 mpiby2
: long
0xbfff0000, 0xc90fdaa2, 0x2168c235
10598 #################################################################
10599 # spi_2(): Return signed PI/2 according to sign of src operand. #
10600 #################################################################
10603 tst.
b SRC_EX
(%a0
) # check sign of source
10607 # ld_ppi2(): return positive PI/2.
10612 fmov.x ppiby2
(%pc
),%fp0
# load +pi/2
10613 bra.w t_pinx2
# set INEX2
10616 # ld_mpi2(): return negative PI/2.
10621 fmov.x mpiby2
(%pc
),%fp0
# load -pi/2
10622 bra.w t_minx2
# set INEX2
10624 ####################################################
10625 # The following routines give support for fsincos. #
10626 ####################################################
10629 # ssincosz(): When the src operand is ZERO, store a one in the
10630 # cosine register and return a ZERO in fp0 w/ the same sign
10631 # as the src operand.
10635 fmov.s
&0x3f800000,%fp1
10636 tst.
b SRC_EX
(%a0
) # test sign
10638 fmov.s
&0x80000000,%fp0
# return sin result in fp0
10639 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
)
10642 fmov.s
&0x00000000,%fp0
# return sin result in fp0
10643 mov.
b &z_bmask
,FPSR_CC
(%a6
)
10647 # ssincosi(): When the src operand is INF, store a QNAN in the cosine
10648 # register and jump to the operand error routine for negative
10653 fmov.x qnan
(%pc
),%fp1
# load NAN
10657 # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10658 # register and branch to the src QNAN routine.
10662 fmov.x LOCAL_EX
(%a0
),%fp1
10665 ########################################################################
10671 mov.
b DTAG
(%a6
),%d1
10683 mov.
b DTAG
(%a6
),%d1
10695 mov.
b DTAG
(%a6
),%d1
10707 mov.
b SRC_EX
(%a0
),%d1
# get src sign
10708 mov.
b DST_EX
(%a1
),%d0
# get dst sign
10709 eor.
b %d0
,%d1
# get qbyte sign
10711 mov.
b %d1
,FPSR_QBYTE
(%a6
)
10718 clr.
b FPSR_QBYTE
(%a6
)
10720 mov.
b SRC_EX
(%a0
),%d1
# get src sign
10721 mov.
b DST_EX
(%a1
),%d0
# get dst sign
10722 eor.
b %d0
,%d1
# get qbyte sign
10724 mov.
b %d1
,FPSR_QBYTE
(%a6
)
10725 cmpi.
b DTAG
(%a6
),&DENORM
10731 fmov.
l (%sp
)+,%fpcr
10732 fmov.x
DST(%a1
),%fp0
10738 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' code
10741 #########################################################################
10746 mov.
b DTAG
(%a6
),%d1
10758 mov.
b DTAG
(%a6
),%d1
10770 mov.
b DTAG
(%a6
),%d1
10780 #########################################################################
10782 global sscale_snorm
10783 global sscale_sdnrm
10786 mov.
b DTAG
(%a6
),%d1
10796 global sscale_szero
10798 mov.
b DTAG
(%a6
),%d1
10810 mov.
b DTAG
(%a6
),%d1
10816 ########################################################################
10820 mov.
b DTAG
(%a6
),%d1
10825 #########################################################################
10826 # norm(): normalize the mantissa of an extended precision input. the #
10827 # input operand should not be normalized already. #
10829 # XDEF **************************************************************** #
10832 # XREF **************************************************************** #
10835 # INPUT *************************************************************** #
10836 # a0 = pointer fp extended precision operand to normalize #
10838 # OUTPUT ************************************************************** #
10839 # d0 = number of bit positions the mantissa was shifted #
10840 # a0 = the input operand's mantissa is normalized; the exponent #
10843 #########################################################################
10846 mov.
l %d2
, -(%sp
) # create some temp regs
10849 mov.
l FTEMP_HI
(%a0
), %d0
# load hi(mantissa)
10850 mov.
l FTEMP_LO
(%a0
), %d1
# load lo(mantissa)
10852 bfffo
%d0
{&0:&32}, %d2
# how many places to shift?
10853 beq.
b norm_lo
# hi(man) is all zeroes!
10856 lsl.
l %d2
, %d0
# left shift hi(man)
10857 bfextu
%d1
{&0:%d2
}, %d3
# extract lo bits
10859 or.l %d3
, %d0
# create hi(man)
10860 lsl.
l %d2
, %d1
# create lo(man)
10862 mov.
l %d0
, FTEMP_HI
(%a0
) # store new hi(man)
10863 mov.
l %d1
, FTEMP_LO
(%a0
) # store new lo(man)
10865 mov.
l %d2
, %d0
# return shift amount
10867 mov.
l (%sp
)+, %d3
# restore temp regs
10873 bfffo
%d1
{&0:&32}, %d2
# how many places to shift?
10874 lsl.
l %d2
, %d1
# shift lo(man)
10875 add.l &32, %d2
# add 32 to shft amount
10877 mov.
l %d1
, FTEMP_HI
(%a0
) # store hi(man)
10878 clr.
l FTEMP_LO
(%a0
) # lo(man) is now zero
10880 mov.
l %d2
, %d0
# return shift amount
10882 mov.
l (%sp
)+, %d3
# restore temp regs
10887 #########################################################################
10888 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
10889 # - returns corresponding optype tag #
10891 # XDEF **************************************************************** #
10894 # XREF **************************************************************** #
10895 # norm() - normalize the mantissa #
10897 # INPUT *************************************************************** #
10898 # a0 = pointer to unnormalized extended precision number #
10900 # OUTPUT ************************************************************** #
10901 # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
10902 # a0 = input operand has been converted to a norm, denorm, or #
10903 # zero; both the exponent and mantissa are changed. #
10905 #########################################################################
10909 bfffo FTEMP_HI
(%a0
){&0:&32}, %d0
# how many shifts are needed?
10910 bne.
b unnorm_shift
# hi(man) is not all zeroes
10913 # hi(man) is all zeroes so see if any bits in lo(man) are set
10916 bfffo FTEMP_LO
(%a0
){&0:&32}, %d0
# is operand really a zero?
10917 beq.w unnorm_zero
# yes
10919 add.w
&32, %d0
# no; fix shift distance
10922 # d0 = # shifts needed for complete normalization
10925 clr.
l %d1
# clear top word
10926 mov.w FTEMP_EX
(%a0
), %d1
# extract exponent
10927 and.w
&0x7fff, %d1
# strip off sgn
10929 cmp.w
%d0
, %d1
# will denorm push exp < 0?
10930 bgt.
b unnorm_nrm_zero
# yes; denorm only until exp = 0
10933 # exponent would not go < 0. therefore, number stays normalized
10935 sub.w
%d0
, %d1
# shift exponent value
10936 mov.w FTEMP_EX
(%a0
), %d0
# load old exponent
10937 and.w
&0x8000, %d0
# save old sign
10938 or.w
%d0
, %d1
# {sgn,new exp}
10939 mov.w
%d1
, FTEMP_EX
(%a0
) # insert new exponent
10941 bsr.
l norm
# normalize UNNORM
10943 mov.
b &NORM
, %d0
# return new optype tag
10947 # exponent would go < 0, so only denormalize until exp = 0
10950 cmp.
b %d1
, &32 # is exp <= 32?
10951 bgt.
b unnorm_nrm_zero_lrg
# no; go handle large exponent
10953 bfextu FTEMP_HI
(%a0
){%d1
:&32}, %d0
# extract new hi(man)
10954 mov.
l %d0
, FTEMP_HI
(%a0
) # save new hi(man)
10956 mov.
l FTEMP_LO
(%a0
), %d0
# fetch old lo(man)
10957 lsl.
l %d1
, %d0
# extract new lo(man)
10958 mov.
l %d0
, FTEMP_LO
(%a0
) # save new lo(man)
10960 and.w
&0x8000, FTEMP_EX
(%a0
) # set exp = 0
10962 mov.
b &DENORM
, %d0
# return new optype tag
10966 # only mantissa bits set are in lo(man)
10968 unnorm_nrm_zero_lrg
:
10969 sub.w
&32, %d1
# adjust shft amt by 32
10971 mov.
l FTEMP_LO
(%a0
), %d0
# fetch old lo(man)
10972 lsl.
l %d1
, %d0
# left shift lo(man)
10974 mov.
l %d0
, FTEMP_HI
(%a0
) # store new hi(man)
10975 clr.
l FTEMP_LO
(%a0
) # lo(man) = 0
10977 and.w
&0x8000, FTEMP_EX
(%a0
) # set exp = 0
10979 mov.
b &DENORM
, %d0
# return new optype tag
10983 # whole mantissa is zero so this UNNORM is actually a zero
10986 and.w
&0x8000, FTEMP_EX
(%a0
) # force exponent to zero
10988 mov.
b &ZERO
, %d0
# fix optype tag