1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3 M68000 Hi-Performance Microprocessor Division
4 M68060 Software Package
5 Production Release P1.00 -- October 10, 1994
7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10 To the maximum extent permitted by applicable law,
11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13 and any warranty against infringement with regard to the SOFTWARE
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
16 To the maximum extent permitted by applicable law,
17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
26 No licenses are granted by implication, estoppel or otherwise under any patents
27 or trademarks of Motorola, Inc.
28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 # This file is appended to the top of the 060FPSP package
32 # and contains the entry points into the package. The user, in
33 # effect, branches to one of the branch table entries located
34 # after _060FPSP_TABLE.
35 # Also, subroutine stubs exist in this file (_fpsp_done for
36 # example) that are referenced by the FPSP package itself in order
37 # to call a given routine. The stub routine actually performs the
38 # callout. The FPSP code does a "bsr" to the stub routine. This
39 # extra layer of hierarchy adds a slight performance penalty but
40 # it makes the FPSP code easier to read and more mainatinable.
51 set _off_fpu_dis, 0x20
71 ###############################################################
73 # Here's the table of ENTRY POINTS for those linking the package.
95 ###############################################################
99 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
100 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
107 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
108 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
115 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
116 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
123 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
124 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
131 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
132 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
139 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
140 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
147 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
148 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
155 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
156 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
163 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
164 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
168 global _real_fpu_disabled
171 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
172 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
179 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
180 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
187 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
188 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
195 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
196 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
200 #######################################
205 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
206 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
213 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
214 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
221 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
222 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
226 global _imem_read_word
229 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
230 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
234 global _imem_read_long
237 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
238 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
242 global _dmem_read_byte
245 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
246 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
250 global _dmem_read_word
253 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
254 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
258 global _dmem_read_long
261 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
262 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
266 global _dmem_write_byte
269 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
270 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
274 global _dmem_write_word
277 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
278 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
282 global _dmem_write_long
285 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
286 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
291 # This file contains a set of define statements for constants
292 # in order to promote readability within the corecode itself.
295 set LOCAL_SIZE, 192 # stack frame size(bytes)
296 set LV, -LOCAL_SIZE # stack offset
298 set EXC_SR, 0x4 # stack status register
299 set EXC_PC, 0x6 # stack pc
300 set EXC_VOFF, 0xa # stacked vector offset
301 set EXC_EA, 0xc # stacked <ea>
303 set EXC_FP, 0x0 # frame pointer
305 set EXC_AREGS, -68 # offset of all address regs
306 set EXC_DREGS, -100 # offset of all data regs
307 set EXC_FPREGS, -36 # offset of all fp regs
309 set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
310 set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
311 set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
312 set EXC_A5, EXC_AREGS+(5*4)
313 set EXC_A4, EXC_AREGS+(4*4)
314 set EXC_A3, EXC_AREGS+(3*4)
315 set EXC_A2, EXC_AREGS+(2*4)
316 set EXC_A1, EXC_AREGS+(1*4)
317 set EXC_A0, EXC_AREGS+(0*4)
318 set EXC_D7, EXC_DREGS+(7*4)
319 set EXC_D6, EXC_DREGS+(6*4)
320 set EXC_D5, EXC_DREGS+(5*4)
321 set EXC_D4, EXC_DREGS+(4*4)
322 set EXC_D3, EXC_DREGS+(3*4)
323 set EXC_D2, EXC_DREGS+(2*4)
324 set EXC_D1, EXC_DREGS+(1*4)
325 set EXC_D0, EXC_DREGS+(0*4)
327 set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
328 set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
329 set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
331 set FP_SCR1, LV+80 # fp scratch 1
332 set FP_SCR1_EX, FP_SCR1+0
333 set FP_SCR1_SGN, FP_SCR1+2
334 set FP_SCR1_HI, FP_SCR1+4
335 set FP_SCR1_LO, FP_SCR1+8
337 set FP_SCR0, LV+68 # fp scratch 0
338 set FP_SCR0_EX, FP_SCR0+0
339 set FP_SCR0_SGN, FP_SCR0+2
340 set FP_SCR0_HI, FP_SCR0+4
341 set FP_SCR0_LO, FP_SCR0+8
343 set FP_DST, LV+56 # fp destination operand
344 set FP_DST_EX, FP_DST+0
345 set FP_DST_SGN, FP_DST+2
346 set FP_DST_HI, FP_DST+4
347 set FP_DST_LO, FP_DST+8
349 set FP_SRC, LV+44 # fp source operand
350 set FP_SRC_EX, FP_SRC+0
351 set FP_SRC_SGN, FP_SRC+2
352 set FP_SRC_HI, FP_SRC+4
353 set FP_SRC_LO, FP_SRC+8
355 set USER_FPIAR, LV+40 # FP instr address register
357 set USER_FPSR, LV+36 # FP status register
358 set FPSR_CC, USER_FPSR+0 # FPSR condition codes
359 set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
360 set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
361 set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
363 set USER_FPCR, LV+32 # FP control register
364 set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
365 set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
367 set L_SCR3, LV+28 # integer scratch 3
368 set L_SCR2, LV+24 # integer scratch 2
369 set L_SCR1, LV+20 # integer scratch 1
371 set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
373 set EXC_TEMP2, LV+24 # temporary space
374 set EXC_TEMP, LV+16 # temporary space
376 set DTAG, LV+15 # destination operand type
377 set STAG, LV+14 # source operand type
379 set SPCOND_FLG, LV+10 # flag: special case (see below)
381 set EXC_CC, LV+8 # saved condition codes
382 set EXC_EXTWPTR, LV+4 # saved current PC (active)
383 set EXC_EXTWORD, LV+2 # saved extension word
384 set EXC_CMDREG, LV+2 # saved extension word
385 set EXC_OPWORD, LV+0 # saved operation word
387 ################################
391 set FTEMP, 0 # offsets within an
392 set FTEMP_EX, 0 # extended precision
393 set FTEMP_SGN, 2 # value saved in memory.
398 set LOCAL, 0 # offsets within an
399 set LOCAL_EX, 0 # extended precision
400 set LOCAL_SGN, 2 # value saved in memory.
405 set DST, 0 # offsets within an
406 set DST_EX, 0 # extended precision
407 set DST_HI, 4 # value saved in memory.
410 set SRC, 0 # offsets within an
411 set SRC_EX, 0 # extended precision
412 set SRC_HI, 4 # value saved in memory.
415 set SGL_LO, 0x3f81 # min sgl prec exponent
416 set SGL_HI, 0x407e # max sgl prec exponent
417 set DBL_LO, 0x3c01 # min dbl prec exponent
418 set DBL_HI, 0x43fe # max dbl prec exponent
419 set EXT_LO, 0x0 # min ext prec exponent
420 set EXT_HI, 0x7ffe # max ext prec exponent
422 set EXT_BIAS, 0x3fff # extended precision bias
423 set SGL_BIAS, 0x007f # single precision bias
424 set DBL_BIAS, 0x03ff # double precision bias
426 set NORM, 0x00 # operand type for STAG/DTAG
427 set ZERO, 0x01 # operand type for STAG/DTAG
428 set INF, 0x02 # operand type for STAG/DTAG
429 set QNAN, 0x03 # operand type for STAG/DTAG
430 set DENORM, 0x04 # operand type for STAG/DTAG
431 set SNAN, 0x05 # operand type for STAG/DTAG
432 set UNNORM, 0x06 # operand type for STAG/DTAG
437 set neg_bit, 0x3 # negative result
438 set z_bit, 0x2 # zero result
439 set inf_bit, 0x1 # infinite result
440 set nan_bit, 0x0 # NAN result
442 set q_sn_bit, 0x7 # sign bit of quotient byte
444 set bsun_bit, 7 # branch on unordered
445 set snan_bit, 6 # signalling NAN
446 set operr_bit, 5 # operand error
447 set ovfl_bit, 4 # overflow
448 set unfl_bit, 3 # underflow
449 set dz_bit, 2 # divide by zero
450 set inex2_bit, 1 # inexact result 2
451 set inex1_bit, 0 # inexact result 1
453 set aiop_bit, 7 # accrued inexact operation bit
454 set aovfl_bit, 6 # accrued overflow bit
455 set aunfl_bit, 5 # accrued underflow bit
456 set adz_bit, 4 # accrued dz bit
457 set ainex_bit, 3 # accrued inexact bit
459 #############################
460 # FPSR individual bit masks #
461 #############################
462 set neg_mask, 0x08000000 # negative bit mask (lw)
463 set inf_mask, 0x02000000 # infinity bit mask (lw)
464 set z_mask, 0x04000000 # zero bit mask (lw)
465 set nan_mask, 0x01000000 # nan bit mask (lw)
467 set neg_bmask, 0x08 # negative bit mask (byte)
468 set inf_bmask, 0x02 # infinity bit mask (byte)
469 set z_bmask, 0x04 # zero bit mask (byte)
470 set nan_bmask, 0x01 # nan bit mask (byte)
472 set bsun_mask, 0x00008000 # bsun exception mask
473 set snan_mask, 0x00004000 # snan exception mask
474 set operr_mask, 0x00002000 # operr exception mask
475 set ovfl_mask, 0x00001000 # overflow exception mask
476 set unfl_mask, 0x00000800 # underflow exception mask
477 set dz_mask, 0x00000400 # dz exception mask
478 set inex2_mask, 0x00000200 # inex2 exception mask
479 set inex1_mask, 0x00000100 # inex1 exception mask
481 set aiop_mask, 0x00000080 # accrued illegal operation
482 set aovfl_mask, 0x00000040 # accrued overflow
483 set aunfl_mask, 0x00000020 # accrued underflow
484 set adz_mask, 0x00000010 # accrued divide by zero
485 set ainex_mask, 0x00000008 # accrued inexact
487 ######################################
488 # FPSR combinations used in the FPSP #
489 ######################################
490 set dzinf_mask, inf_mask+dz_mask+adz_mask
491 set opnan_mask, nan_mask+operr_mask+aiop_mask
492 set nzi_mask, 0x01ffffff #clears N, Z, and I
493 set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
494 set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
495 set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
496 set inx1a_mask, inex1_mask+ainex_mask
497 set inx2a_mask, inex2_mask+ainex_mask
498 set snaniop_mask, nan_mask+snan_mask+aiop_mask
499 set snaniop2_mask, snan_mask+aiop_mask
500 set naniop_mask, nan_mask+aiop_mask
501 set neginf_mask, neg_mask+inf_mask
502 set infaiop_mask, inf_mask+aiop_mask
503 set negz_mask, neg_mask+z_mask
504 set opaop_mask, operr_mask+aiop_mask
505 set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
506 set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
511 set rnd_stky_bit, 29 # stky bit pos in longword
513 set sign_bit, 0x7 # sign bit
514 set signan_bit, 0x6 # signalling nan bit
516 set sgl_thresh, 0x3f81 # minimum sgl exponent
517 set dbl_thresh, 0x3c01 # minimum dbl exponent
519 set x_mode, 0x0 # extended precision
520 set s_mode, 0x4 # single precision
521 set d_mode, 0x8 # double precision
523 set rn_mode, 0x0 # round-to-nearest
524 set rz_mode, 0x1 # round-to-zero
525 set rm_mode, 0x2 # round-tp-minus-infinity
526 set rp_mode, 0x3 # round-to-plus-infinity
528 set mantissalen, 64 # length of mantissa in bits
530 set BYTE, 1 # len(byte) == 1 byte
531 set WORD, 2 # len(word) == 2 bytes
532 set LONG, 4 # len(longword) == 2 bytes
534 set BSUN_VEC, 0xc0 # bsun vector offset
535 set INEX_VEC, 0xc4 # inexact vector offset
536 set DZ_VEC, 0xc8 # dz vector offset
537 set UNFL_VEC, 0xcc # unfl vector offset
538 set OPERR_VEC, 0xd0 # operr vector offset
539 set OVFL_VEC, 0xd4 # ovfl vector offset
540 set SNAN_VEC, 0xd8 # snan vector offset
542 ###########################
543 # SPecial CONDition FLaGs #
544 ###########################
545 set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
546 set fbsun_flg, 0x02 # flag bit: bsun exception
547 set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
548 set mda7_flg, 0x08 # flag bit: -(a7) <ea>
549 set fmovm_flg, 0x40 # flag bit: fmovm instruction
550 set immed_flg, 0x80 # flag bit: &<data> <ea>
558 ##################################
559 # TRANSCENDENTAL "LAST-OP" FLAGS #
560 ##################################
561 set FMUL_OP, 0x0 # fmul instr performed last
562 set FDIV_OP, 0x1 # fdiv performed last
563 set FADD_OP, 0x2 # fadd performed last
564 set FMOV_OP, 0x3 # fmov performed last
569 T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
570 T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
572 PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
573 PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
576 long 0x3FE45F30,0x6DC9C883
578 #########################################################################
579 # XDEF **************************************************************** #
580 # _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
582 # This handler should be the first code executed upon taking the #
583 # FP Overflow exception in an operating system. #
585 # XREF **************************************************************** #
586 # _imem_read_long() - read instruction longword #
587 # fix_skewed_ops() - adjust src operand in fsave frame #
588 # set_tag_x() - determine optype of src/dst operands #
589 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
590 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
591 # load_fpn2() - load dst operand from FP regfile #
592 # fout() - emulate an opclass 3 instruction #
593 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
594 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
595 # _real_ovfl() - "callout" for Overflow exception enabled code #
596 # _real_inex() - "callout" for Inexact exception enabled code #
597 # _real_trace() - "callout" for Trace exception code #
599 # INPUT *************************************************************** #
600 # - The system stack contains the FP Ovfl exception stack frame #
601 # - The fsave frame contains the source operand #
603 # OUTPUT ************************************************************** #
604 # Overflow Exception enabled: #
605 # - The system stack is unchanged #
606 # - The fsave frame contains the adjusted src op for opclass 0,2 #
607 # Overflow Exception disabled: #
608 # - The system stack is unchanged #
609 # - The "exception present" flag in the fsave frame is cleared #
611 # ALGORITHM *********************************************************** #
612 # On the 060, if an FP overflow is present as the result of any #
613 # instruction, the 060 will take an overflow exception whether the #
614 # exception is enabled or disabled in the FPCR. For the disabled case, #
615 # This handler emulates the instruction to determine what the correct #
616 # default result should be for the operation. This default result is #
617 # then stored in either the FP regfile, data regfile, or memory. #
618 # Finally, the handler exits through the "callout" _fpsp_done() #
619 # denoting that no exceptional conditions exist within the machine. #
620 # If the exception is enabled, then this handler must create the #
621 # exceptional operand and plave it in the fsave state frame, and store #
622 # the default result (only if the instruction is opclass 3). For #
623 # exceptions enabled, this handler must exit through the "callout" #
624 # _real_ovfl() so that the operating system enabled overflow handler #
625 # can handle this case. #
626 # Two other conditions exist. First, if overflow was disabled #
627 # but the inexact exception was enabled, this handler must exit #
628 # through the "callout" _real_inex() regardless of whether the result #
630 # Also, in the case of an opclass three instruction where #
631 # overflow was disabled and the trace exception was enabled, this #
632 # handler must exit through the "callout" _real_trace(). #
634 #########################################################################
639 #$# sub.l &24,%sp # make room for src/dst
641 link.w %a6,&-LOCAL_SIZE # init stack frame
643 fsave FP_SRC(%a6) # grab the "busy" frame
645 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
646 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
647 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
649 # the FPIAR holds the "current PC" of the faulting instruction
650 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
651 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
652 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
653 bsr.l _imem_read_long # fetch the instruction words
654 mov.l %d0,EXC_OPWORD(%a6)
656 ##############################################################################
658 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
662 lea FP_SRC(%a6),%a0 # pass: ptr to src op
663 bsr.l fix_skewed_ops # fix src op
665 # since, I believe, only NORMs and DENORMs can come through here,
666 # maybe we can avoid the subroutine call.
667 lea FP_SRC(%a6),%a0 # pass: ptr to src op
668 bsr.l set_tag_x # tag the operand type
669 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
671 # bit five of the fp extension word separates the monadic and dyadic operations
672 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
673 # will never take this exception.
674 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
675 beq.b fovfl_extract # monadic
677 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
678 bsr.l load_fpn2 # load dst into FP_DST
680 lea FP_DST(%a6),%a0 # pass: ptr to dst op
681 bsr.l set_tag_x # tag the operand type
682 cmpi.b %d0,&UNNORM # is operand an UNNORM?
683 bne.b fovfl_op2_done # no
684 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
686 mov.b %d0,DTAG(%a6) # save dst optype tag
690 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
691 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
692 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
693 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
694 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
695 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
698 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
700 mov.b 1+EXC_CMDREG(%a6),%d1
701 andi.w &0x007f,%d1 # extract extension
703 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
705 fmov.l &0x0,%fpcr # zero current control regs
711 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
712 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
713 jsr (tbl_unsupp.l,%pc,%d1.l*1)
715 # the operation has been emulated. the result is in fp0.
716 # the EXOP, if an exception occurred, is in fp1.
717 # we must save the default result regardless of whether
718 # traps are enabled or disabled.
719 bfextu EXC_CMDREG(%a6){&6:&3},%d0
722 # the exceptional possibilities we have left ourselves with are ONLY overflow
723 # and inexact. and, the inexact is such that overflow occurred and was disabled
724 # but inexact was enabled.
725 btst &ovfl_bit,FPCR_ENABLE(%a6)
728 btst &inex2_bit,FPCR_ENABLE(%a6)
731 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
732 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
733 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
739 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
740 # in fp1. now, simply jump to _real_ovfl()!
742 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
744 mov.w &0xe005,2+FP_SRC(%a6) # save exc status
746 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
747 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
748 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
750 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
756 # overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
757 # we must jump to real_inex().
760 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
762 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
763 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
765 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
766 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
767 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
769 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
775 ########################################################################
779 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
780 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
781 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
783 # the src operand is definitely a NORM(!), so tag it as such
784 mov.b &NORM,STAG(%a6) # set src optype tag
787 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
789 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
791 fmov.l &0x0,%fpcr # zero current control regs
794 lea FP_SRC(%a6),%a0 # pass ptr to src operand
798 btst &ovfl_bit,FPCR_ENABLE(%a6)
801 btst &inex2_bit,FPCR_ENABLE(%a6)
804 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
805 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
806 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
811 btst &0x7,(%sp) # is trace on?
812 beq.l _fpsp_done # no
814 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
815 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
818 #########################################################################
819 # XDEF **************************************************************** #
820 # _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
822 # This handler should be the first code executed upon taking the #
823 # FP Underflow exception in an operating system. #
825 # XREF **************************************************************** #
826 # _imem_read_long() - read instruction longword #
827 # fix_skewed_ops() - adjust src operand in fsave frame #
828 # set_tag_x() - determine optype of src/dst operands #
829 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
830 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
831 # load_fpn2() - load dst operand from FP regfile #
832 # fout() - emulate an opclass 3 instruction #
833 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
834 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
835 # _real_ovfl() - "callout" for Overflow exception enabled code #
836 # _real_inex() - "callout" for Inexact exception enabled code #
837 # _real_trace() - "callout" for Trace exception code #
839 # INPUT *************************************************************** #
840 # - The system stack contains the FP Unfl exception stack frame #
841 # - The fsave frame contains the source operand #
843 # OUTPUT ************************************************************** #
844 # Underflow Exception enabled: #
845 # - The system stack is unchanged #
846 # - The fsave frame contains the adjusted src op for opclass 0,2 #
847 # Underflow Exception disabled: #
848 # - The system stack is unchanged #
849 # - The "exception present" flag in the fsave frame is cleared #
851 # ALGORITHM *********************************************************** #
852 # On the 060, if an FP underflow is present as the result of any #
853 # instruction, the 060 will take an underflow exception whether the #
854 # exception is enabled or disabled in the FPCR. For the disabled case, #
855 # This handler emulates the instruction to determine what the correct #
856 # default result should be for the operation. This default result is #
857 # then stored in either the FP regfile, data regfile, or memory. #
858 # Finally, the handler exits through the "callout" _fpsp_done() #
859 # denoting that no exceptional conditions exist within the machine. #
860 # If the exception is enabled, then this handler must create the #
861 # exceptional operand and plave it in the fsave state frame, and store #
862 # the default result (only if the instruction is opclass 3). For #
863 # exceptions enabled, this handler must exit through the "callout" #
864 # _real_unfl() so that the operating system enabled overflow handler #
865 # can handle this case. #
866 # Two other conditions exist. First, if underflow was disabled #
867 # but the inexact exception was enabled and the result was inexact, #
868 # this handler must exit through the "callout" _real_inex(). #
870 # Also, in the case of an opclass three instruction where #
871 # underflow was disabled and the trace exception was enabled, this #
872 # handler must exit through the "callout" _real_trace(). #
874 #########################################################################
879 #$# sub.l &24,%sp # make room for src/dst
881 link.w %a6,&-LOCAL_SIZE # init stack frame
883 fsave FP_SRC(%a6) # grab the "busy" frame
885 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
886 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
887 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
889 # the FPIAR holds the "current PC" of the faulting instruction
890 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
891 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
892 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
893 bsr.l _imem_read_long # fetch the instruction words
894 mov.l %d0,EXC_OPWORD(%a6)
896 ##############################################################################
898 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
902 lea FP_SRC(%a6),%a0 # pass: ptr to src op
903 bsr.l fix_skewed_ops # fix src op
905 lea FP_SRC(%a6),%a0 # pass: ptr to src op
906 bsr.l set_tag_x # tag the operand type
907 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
909 # bit five of the fp ext word separates the monadic and dyadic operations
910 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
911 # will never take this exception.
912 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
913 beq.b funfl_extract # monadic
915 # now, what's left that's not dyadic is fsincos. we can distinguish it
916 # from all dyadics by the '0110xxx pattern
917 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
918 bne.b funfl_extract # yes
920 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
921 bsr.l load_fpn2 # load dst into FP_DST
923 lea FP_DST(%a6),%a0 # pass: ptr to dst op
924 bsr.l set_tag_x # tag the operand type
925 cmpi.b %d0,&UNNORM # is operand an UNNORM?
926 bne.b funfl_op2_done # no
927 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
929 mov.b %d0,DTAG(%a6) # save dst optype tag
933 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
934 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
935 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
936 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
937 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
938 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
941 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
943 mov.b 1+EXC_CMDREG(%a6),%d1
944 andi.w &0x007f,%d1 # extract extension
946 andi.l &0x00ff01ff,USER_FPSR(%a6)
948 fmov.l &0x0,%fpcr # zero current control regs
954 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
955 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
956 jsr (tbl_unsupp.l,%pc,%d1.l*1)
958 bfextu EXC_CMDREG(%a6){&6:&3},%d0
961 # The `060 FPU multiplier hardware is such that if the result of a
962 # multiply operation is the smallest possible normalized number
963 # (0x00000000_80000000_00000000), then the machine will take an
964 # underflow exception. Since this is incorrect, we need to check
965 # if our emulation, after re-doing the operation, decided that
966 # no underflow was called for. We do these checks only in
967 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
968 # special case will simply exit gracefully with the correct result.
970 # the exceptional possibilities we have left ourselves with are ONLY overflow
971 # and inexact. and, the inexact is such that overflow occurred and was disabled
972 # but inexact was enabled.
973 btst &unfl_bit,FPCR_ENABLE(%a6)
977 btst &inex2_bit,FPCR_ENABLE(%a6)
981 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
982 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
983 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
989 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
990 # in fp1 (don't forget to save fp0). what to do now?
991 # well, we simply have to get to go to _real_unfl()!
994 # The `060 FPU multiplier hardware is such that if the result of a
995 # multiply operation is the smallest possible normalized number
996 # (0x00000000_80000000_00000000), then the machine will take an
997 # underflow exception. Since this is incorrect, we check here to see
998 # if our emulation, after re-doing the operation, decided that
999 # no underflow was called for.
1000 btst &unfl_bit,FPSR_EXCEPT(%a6)
1004 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1006 mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1008 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1009 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1010 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1012 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1018 # underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1019 # we must jump to real_inex().
1022 # The `060 FPU multiplier hardware is such that if the result of a
1023 # multiply operation is the smallest possible normalized number
1024 # (0x00000000_80000000_00000000), then the machine will take an
1025 # underflow exception.
1026 # But, whether bogus or not, if inexact is enabled AND it occurred,
1027 # then we have to branch to real_inex.
1029 btst &inex2_bit,FPSR_EXCEPT(%a6)
1034 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1036 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1037 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1039 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1040 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1041 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1043 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1049 #######################################################################
1053 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1054 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1055 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1057 # the src operand is definitely a NORM(!), so tag it as such
1058 mov.b &NORM,STAG(%a6) # set src optype tag
1061 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1063 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1065 fmov.l &0x0,%fpcr # zero current control regs
1068 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1072 btst &unfl_bit,FPCR_ENABLE(%a6)
1073 bne.w funfl_unfl_on2
1075 btst &inex2_bit,FPCR_ENABLE(%a6)
1076 bne.w funfl_inex_on2
1078 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1079 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1080 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1085 btst &0x7,(%sp) # is trace on?
1086 beq.l _fpsp_done # no
1088 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1089 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1092 #########################################################################
1093 # XDEF **************************************************************** #
1094 # _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1095 # Data Type" exception. #
1097 # This handler should be the first code executed upon taking the #
1098 # FP Unimplemented Data Type exception in an operating system. #
1100 # XREF **************************************************************** #
1101 # _imem_read_{word,long}() - read instruction word/longword #
1102 # fix_skewed_ops() - adjust src operand in fsave frame #
1103 # set_tag_x() - determine optype of src/dst operands #
1104 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
1105 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
1106 # load_fpn2() - load dst operand from FP regfile #
1107 # load_fpn1() - load src operand from FP regfile #
1108 # fout() - emulate an opclass 3 instruction #
1109 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1110 # _real_inex() - "callout" to operating system inexact handler #
1111 # _fpsp_done() - "callout" for exit; work all done #
1112 # _real_trace() - "callout" for Trace enabled exception #
1113 # funimp_skew() - adjust fsave src ops to "incorrect" value #
1114 # _real_snan() - "callout" for SNAN exception #
1115 # _real_operr() - "callout" for OPERR exception #
1116 # _real_ovfl() - "callout" for OVFL exception #
1117 # _real_unfl() - "callout" for UNFL exception #
1118 # get_packed() - fetch packed operand from memory #
1120 # INPUT *************************************************************** #
1121 # - The system stack contains the "Unimp Data Type" stk frame #
1122 # - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1124 # OUTPUT ************************************************************** #
1125 # If Inexact exception (opclass 3): #
1126 # - The system stack is changed to an Inexact exception stk frame #
1127 # If SNAN exception (opclass 3): #
1128 # - The system stack is changed to an SNAN exception stk frame #
1129 # If OPERR exception (opclass 3): #
1130 # - The system stack is changed to an OPERR exception stk frame #
1131 # If OVFL exception (opclass 3): #
1132 # - The system stack is changed to an OVFL exception stk frame #
1133 # If UNFL exception (opclass 3): #
1134 # - The system stack is changed to an UNFL exception stack frame #
1135 # If Trace exception enabled: #
1136 # - The system stack is changed to a Trace exception stack frame #
1137 # Else: (normal case) #
1138 # - Correct result has been stored as appropriate #
1140 # ALGORITHM *********************************************************** #
1141 # Two main instruction types can enter here: (1) DENORM or UNNORM #
1142 # unimplemented data types. These can be either opclass 0,2 or 3 #
1143 # instructions, and (2) PACKED unimplemented data format instructions #
1144 # also of opclasses 0,2, or 3. #
1145 # For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1146 # operand from the fsave state frame and the dst operand (if dyadic) #
1147 # from the FP register file. The instruction is then emulated by #
1148 # choosing an emulation routine from a table of routines indexed by #
1149 # instruction type. Once the instruction has been emulated and result #
1150 # saved, then we check to see if any enabled exceptions resulted from #
1151 # instruction emulation. If none, then we exit through the "callout" #
1152 # _fpsp_done(). If there is an enabled FP exception, then we insert #
1153 # this exception into the FPU in the fsave state frame and then exit #
1154 # through _fpsp_done(). #
1155 # PACKED opclass 0 and 2 is similar in how the instruction is #
1156 # emulated and exceptions handled. The differences occur in how the #
1157 # handler loads the packed op (by calling get_packed() routine) and #
1158 # by the fact that a Trace exception could be pending for PACKED ops. #
1159 # If a Trace exception is pending, then the current exception stack #
1160 # frame is changed to a Trace exception stack frame and an exit is #
1161 # made through _real_trace(). #
1162 # For UNNORM/DENORM opclass 3, the actual move out to memory is #
1163 # performed by calling the routine fout(). If no exception should occur #
1164 # as the result of emulation, then an exit either occurs through #
1165 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1166 # (a Trace stack frame must be created here, too). If an FP exception #
1167 # should occur, then we must create an exception stack frame of that #
1168 # type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1169 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1170 # emulation is performed in a similar manner. #
1172 #########################################################################
1175 # (1) DENORM and UNNORM (unimplemented) data types:
1180 # pre-instruction * *
1181 # ***************** *****************
1182 # * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1183 # ***************** *****************
1186 # ***************** *****************
1188 # ***************** *****************
1190 # (2) PACKED format (unsupported) opclasses two and three:
1206 link.w %a6,&-LOCAL_SIZE # init stack frame
1208 fsave FP_SRC(%a6) # save fp state
1210 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1211 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1212 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1214 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1217 mov.l %usp,%a0 # fetch user stack pointer
1218 mov.l %a0,EXC_A7(%a6) # save on stack
1220 # if the exception is an opclass zero or two unimplemented data type
1221 # exception, then the a7' calculated here is wrong since it doesn't
1222 # stack an ea. however, we don't need an a7' for this case anyways.
1224 lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1225 mov.l %a0,EXC_A7(%a6) # save on stack
1229 # the FPIAR holds the "current PC" of the faulting instruction
1230 # the FPIAR should be set correctly for ALL exceptions passing through
1232 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1233 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1234 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1235 bsr.l _imem_read_long # fetch the instruction words
1236 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1238 ############################
1240 clr.b SPCOND_FLG(%a6) # clear special condition flag
1242 # Separate opclass three (fpn-to-mem) ops since they have a different
1243 # stack frame and protocol.
1244 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1247 # Separate packed opclass two instructions.
1248 bfextu EXC_CMDREG(%a6){&0:&6},%d0
1253 # I'm not sure at this point what FPSR bits are valid for this instruction.
1254 # so, since the emulation routines re-create them anyways, zero exception field
1255 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1257 fmov.l &0x0,%fpcr # zero current control regs
1260 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1261 # precision format if the src format was single or double and the
1262 # source data type was an INF, NAN, DENORM, or UNNORM
1263 lea FP_SRC(%a6),%a0 # pass ptr to input
1264 bsr.l fix_skewed_ops
1266 # we don't know whether the src operand or the dst operand (or both) is the
1267 # UNNORM or DENORM. call the function that tags the operand type. if the
1268 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1269 lea FP_SRC(%a6),%a0 # pass: ptr to src op
1270 bsr.l set_tag_x # tag the operand type
1271 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1273 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1276 mov.b %d0,STAG(%a6) # save src optype tag
1278 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1280 # bit five of the fp extension word separates the monadic and dyadic operations
1282 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1283 beq.b fu_extract # monadic
1284 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1285 beq.b fu_extract # yes, so it's monadic, too
1287 bsr.l load_fpn2 # load dst into FP_DST
1289 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1290 bsr.l set_tag_x # tag the operand type
1291 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1292 bne.b fu_op2_done # no
1293 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1295 mov.b %d0,DTAG(%a6) # save dst optype tag
1299 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1301 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1306 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1307 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1310 # Exceptions in order of precedence:
1312 # SNAN : all dyadic ops
1313 # OPERR : fsqrt(-NORM)
1314 # OVFL : all except ftst,fcmp
1315 # UNFL : all except ftst,fcmp
1317 # INEX2 : all except ftst,fcmp
1318 # INEX1 : none (packed doesn't go through here)
1321 # we determine the highest priority exception(if any) set by the
1322 # emulation routine that has also been enabled by the user.
1323 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1324 bne.b fu_in_ena # some are enabled
1327 # fcmp and ftst do not store any result.
1328 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1329 andi.b &0x38,%d0 # extract bits 3-5
1330 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1331 beq.b fu_in_exit # yes
1333 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1334 bsr.l store_fpreg # store the result
1338 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1339 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1340 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1347 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1348 bfffo %d0{&24:&8},%d0 # find highest priority exception
1349 bne.b fu_in_exc # there is at least one set
1352 # No exceptions occurred that were also enabled. Now:
1354 # if (OVFL && ovfl_disabled && inexact_enabled) {
1355 # branch to _real_inex() (even if the result was exact!);
1357 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1361 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1362 beq.b fu_in_cont # no
1365 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1366 beq.b fu_in_cont # no
1367 bra.w fu_in_exc_ovfl # go insert overflow frame
1370 # An exception occurred and that exception was enabled:
1372 # shift enabled exception field into lo byte of d0;
1373 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1374 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1376 # * this is the case where we must call _real_inex() now or else
1377 # * there will be no other way to pass it the exceptional operand
1379 # call _real_inex();
1381 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1385 subi.l &24,%d0 # fix offset to be 0-8
1386 cmpi.b %d0,&0x6 # is exception INEX? (6)
1387 bne.b fu_in_exc_exit # no
1389 # the enabled exception was inexact
1390 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1391 bne.w fu_in_exc_unfl # yes
1392 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1393 bne.w fu_in_exc_ovfl # yes
1395 # here, we insert the correct fsave status value into the fsave frame for the
1396 # corresponding exception. the operand in the fsave frame should be the original
1399 mov.l %d0,-(%sp) # save d0
1400 bsr.l funimp_skew # skew sgl or dbl inputs
1401 mov.l (%sp)+,%d0 # restore d0
1403 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1405 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1406 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1407 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1409 frestore FP_SRC(%a6) # restore src op
1416 short 0xe000,0xe006,0xe004,0xe005
1417 short 0xe003,0xe002,0xe001,0xe001
1421 bra.b fu_in_exc_exit
1424 bra.b fu_in_exc_exit
1426 # If the input operand to this operation was opclass two and a single
1427 # or double precision denorm, inf, or nan, the operand needs to be
1428 # "corrected" in order to have the proper equivalent extended precision
1430 global fix_skewed_ops
1432 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1433 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1435 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1440 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1441 andi.w &0x7fff,%d0 # strip sign
1442 cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1443 beq.b fso_sgl_dnrm_zero # yes
1444 cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1445 beq.b fso_infnan # yes
1449 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1450 beq.b fso_zero # it's a skewed zero
1452 # here, we count on norm not to alter a0...
1453 bsr.l norm # normalize mantissa
1454 neg.w %d0 # -shft amt
1455 addi.w &0x3f81,%d0 # adjust new exponent
1456 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1457 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1461 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1465 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1466 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1470 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1471 andi.w &0x7fff,%d0 # strip sign
1472 cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1473 beq.b fso_dbl_dnrm_zero # yes
1474 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1475 beq.b fso_infnan # yes
1479 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1480 bne.b fso_dbl_dnrm # it's a skewed denorm
1481 tst.l LOCAL_LO(%a0) # is it a zero?
1482 beq.b fso_zero # yes
1484 # here, we count on norm not to alter a0...
1485 bsr.l norm # normalize mantissa
1486 neg.w %d0 # -shft amt
1487 addi.w &0x3c01,%d0 # adjust new exponent
1488 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1489 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1492 #################################################################
1494 # fmove out took an unimplemented data type exception.
1495 # the src operand is in FP_SRC. Call _fout() to write out the result and
1496 # to determine which exceptions, if any, to take.
1499 # Separate packed move outs from the UNNORM and DENORM move outs.
1500 bfextu EXC_CMDREG(%a6){&3:&3},%d0
1507 # I'm not sure at this point what FPSR bits are valid for this instruction.
1508 # so, since the emulation routines re-create them anyways, zero exception field.
1509 # fmove out doesn't affect ccodes.
1510 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1512 fmov.l &0x0,%fpcr # zero current control regs
1515 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1516 # call here. just figure out what it is...
1517 mov.w FP_SRC_EX(%a6),%d0 # get exponent
1518 andi.w &0x7fff,%d0 # strip sign
1519 beq.b fu_out_denorm # it's a DENORM
1522 bsr.l unnorm_fix # yes; fix it
1528 mov.b &DENORM,STAG(%a6)
1532 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1534 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1536 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1537 bsr.l fout # call fmove out routine
1539 # Exceptions in order of precedence:
1542 # OPERR : fmove.{b,w,l} out of large UNNORM
1543 # OVFL : fmove.{s,d}
1544 # UNFL : fmove.{s,d,x}
1547 # INEX1 : none (packed doesn't travel through here)
1549 # determine the highest priority exception(if any) set by the
1550 # emulation routine that has also been enabled by the user.
1551 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1552 bne.w fu_out_ena # some are enabled
1556 mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1558 # on extended precision opclass three instructions using pre-decrement or
1559 # post-increment addressing mode, the address register is not updated. is the
1560 # address register was the stack pointer used from user mode, then let's update
1561 # it here. if it was used from supervisor mode, then we have to handle this
1562 # as a special case.
1563 btst &0x5,EXC_SR(%a6)
1566 mov.l EXC_A7(%a6),%a0 # restore a7
1570 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1571 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1572 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1576 btst &0x7,(%sp) # is trace on?
1577 bne.b fu_out_trace # yes
1581 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1582 # ("fmov.x fpm,-(a7)") if so,
1584 cmpi.b SPCOND_FLG(%a6),&mda7_flg
1585 bne.b fu_out_done_cont
1587 # the extended precision result is still in fp0. but, we need to save it
1588 # somewhere on the stack until we can copy it to its final resting place.
1589 # here, we're counting on the top of the stack to be the old place-holders
1590 # for fp0/fp1 which have already been restored. that way, we can write
1591 # over those destinations with the shifted stack frame.
1592 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1594 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1595 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1596 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1598 mov.l (%a6),%a6 # restore frame pointer
1600 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1601 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1603 # now, copy the result to the proper place on the stack
1604 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1605 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1606 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1608 add.l &LOCAL_SIZE-0x8,%sp
1616 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1617 bfffo %d0{&24:&8},%d0 # find highest priority exception
1618 bne.b fu_out_exc # there is at least one set
1620 # no exceptions were set.
1621 # if a disabled overflow occurred and inexact was enabled but the result
1622 # was exact, then a branch to _real_inex() is made.
1623 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1624 beq.w fu_out_done # no
1627 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1628 beq.w fu_out_done # no
1632 # The fp move out that took the "Unimplemented Data Type" exception was
1633 # being traced. Since the stack frames are similar, get the "current" PC
1634 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1636 # UNSUPP FRAME TRACE FRAME
1637 # ***************** *****************
1638 # * EA * * Current *
1640 # ***************** *****************
1641 # * 0x3 * 0x0dc * * 0x2 * 0x024 *
1642 # ***************** *****************
1645 # ***************** *****************
1647 # ***************** *****************
1650 mov.w &0x2024,0x6(%sp)
1651 fmov.l %fpiar,0x8(%sp)
1654 # an exception occurred and that exception was enabled.
1656 subi.l &24,%d0 # fix offset to be 0-8
1658 # we don't mess with the existing fsave frame. just re-insert it and
1659 # jump to the "_real_{}()" handler...
1660 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1661 jmp (tbl_fu_out.b,%pc,%d0.w*1)
1665 short tbl_fu_out - tbl_fu_out # BSUN can't happen
1666 short tbl_fu_out - tbl_fu_out # SNAN can't happen
1667 short fu_operr - tbl_fu_out # OPERR
1668 short fu_ovfl - tbl_fu_out # OVFL
1669 short fu_unfl - tbl_fu_out # UNFL
1670 short tbl_fu_out - tbl_fu_out # DZ can't happen
1671 short fu_inex - tbl_fu_out # INEX2
1672 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1674 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1677 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1678 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1679 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1681 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1682 mov.w &0xe006,2+FP_SRC(%a6)
1684 frestore FP_SRC(%a6)
1692 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1693 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1694 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1696 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1697 mov.w &0xe004,2+FP_SRC(%a6)
1699 frestore FP_SRC(%a6)
1707 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1709 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1710 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1711 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1713 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1714 mov.w &0xe005,2+FP_SRC(%a6)
1716 frestore FP_SRC(%a6) # restore EXOP
1722 # underflow can happen for extended precision. extended precision opclass
1723 # three instruction exceptions don't update the stack pointer. so, if the
1724 # exception occurred from user mode, then simply update a7 and exit normally.
1725 # if the exception occurred from supervisor mode, check if
1727 mov.l EXC_A6(%a6),(%a6) # restore a6
1729 btst &0x5,EXC_SR(%a6)
1732 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1733 mov.l %a0,%usp # to or not...
1736 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1738 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1739 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1740 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1742 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1743 mov.w &0xe003,2+FP_SRC(%a6)
1745 frestore FP_SRC(%a6) # restore EXOP
1752 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1755 # the extended precision result is still in fp0. but, we need to save it
1756 # somewhere on the stack until we can copy it to its final resting place
1757 # (where the exc frame is currently). make sure it's not at the top of the
1758 # frame or it will get overwritten when the exc stack frame is shifted "down".
1759 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1760 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1762 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1763 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1764 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1766 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1767 mov.w &0xe003,2+FP_DST(%a6)
1769 frestore FP_DST(%a6) # restore EXOP
1771 mov.l (%a6),%a6 # restore frame pointer
1773 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1774 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1775 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1777 # now, copy the result to the proper place on the stack
1778 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1779 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1780 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1782 add.l &LOCAL_SIZE-0x8,%sp
1786 # fmove in and out enter here.
1788 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1790 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1791 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1792 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1794 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1795 mov.w &0xe001,2+FP_SRC(%a6)
1797 frestore FP_SRC(%a6) # restore EXOP
1804 #########################################################################
1805 #########################################################################
1809 # I'm not sure at this point what FPSR bits are valid for this instruction.
1810 # so, since the emulation routines re-create them anyways, zero exception field
1811 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1813 fmov.l &0x0,%fpcr # zero current control regs
1816 bsr.l get_packed # fetch packed src operand
1818 lea FP_SRC(%a6),%a0 # pass ptr to src
1819 bsr.l set_tag_x # set src optype tag
1821 mov.b %d0,STAG(%a6) # save src optype tag
1823 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1825 # bit five of the fp extension word separates the monadic and dyadic operations
1827 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1828 beq.b fu_extract_p # monadic
1829 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1830 beq.b fu_extract_p # yes, so it's monadic, too
1832 bsr.l load_fpn2 # load dst into FP_DST
1834 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1835 bsr.l set_tag_x # tag the operand type
1836 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1837 bne.b fu_op2_done_p # no
1838 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1840 mov.b %d0,DTAG(%a6) # save dst optype tag
1844 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1846 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1851 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1852 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1855 # Exceptions in order of precedence:
1857 # SNAN : all dyadic ops
1858 # OPERR : fsqrt(-NORM)
1859 # OVFL : all except ftst,fcmp
1860 # UNFL : all except ftst,fcmp
1862 # INEX2 : all except ftst,fcmp
1866 # we determine the highest priority exception(if any) set by the
1867 # emulation routine that has also been enabled by the user.
1868 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1869 bne.w fu_in_ena_p # some are enabled
1872 # fcmp and ftst do not store any result.
1873 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1874 andi.b &0x38,%d0 # extract bits 3-5
1875 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1876 beq.b fu_in_exit_p # yes
1878 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1879 bsr.l store_fpreg # store the result
1883 btst &0x5,EXC_SR(%a6) # user or supervisor?
1884 bne.w fu_in_exit_s_p # supervisor
1886 mov.l EXC_A7(%a6),%a0 # update user a7
1890 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1891 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1892 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1894 unlk %a6 # unravel stack frame
1896 btst &0x7,(%sp) # is trace on?
1897 bne.w fu_trace_p # yes
1899 bra.l _fpsp_done # exit to os
1901 # the exception occurred in supervisor mode. check to see if the
1902 # addressing mode was (a7)+. if so, we'll need to shift the
1905 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1906 beq.b fu_in_exit_cont_p # no
1908 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1909 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1910 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1912 unlk %a6 # unravel stack frame
1914 # shift the stack frame "up". we don't really care about the <ea> field.
1915 mov.l 0x4(%sp),0x10(%sp)
1916 mov.l 0x0(%sp),0xc(%sp)
1919 btst &0x7,(%sp) # is trace on?
1920 bne.w fu_trace_p # yes
1922 bra.l _fpsp_done # exit to os
1925 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1926 bfffo %d0{&24:&8},%d0 # find highest priority exception
1927 bne.b fu_in_exc_p # at least one was set
1930 # No exceptions occurred that were also enabled. Now:
1932 # if (OVFL && ovfl_disabled && inexact_enabled) {
1933 # branch to _real_inex() (even if the result was exact!);
1935 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1939 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1940 beq.w fu_in_cont_p # no
1943 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1944 beq.w fu_in_cont_p # no
1945 bra.w fu_in_exc_ovfl_p # do _real_inex() now
1948 # An exception occurred and that exception was enabled:
1950 # shift enabled exception field into lo byte of d0;
1951 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1952 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1954 # * this is the case where we must call _real_inex() now or else
1955 # * there will be no other way to pass it the exceptional operand
1957 # call _real_inex();
1959 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1963 subi.l &24,%d0 # fix offset to be 0-8
1964 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1965 blt.b fu_in_exc_exit_p # no
1967 # the enabled exception was inexact
1968 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1969 bne.w fu_in_exc_unfl_p # yes
1970 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1971 bne.w fu_in_exc_ovfl_p # yes
1973 # here, we insert the correct fsave status value into the fsave frame for the
1974 # corresponding exception. the operand in the fsave frame should be the original
1976 # as a reminder for future predicted pain and agony, we are passing in fsave the
1977 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1978 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1980 btst &0x5,EXC_SR(%a6) # user or supervisor?
1981 bne.w fu_in_exc_exit_s_p # supervisor
1983 mov.l EXC_A7(%a6),%a0 # update user a7
1986 fu_in_exc_exit_cont_p:
1987 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1989 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1990 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1991 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1993 frestore FP_SRC(%a6) # restore src op
1997 btst &0x7,(%sp) # is trace enabled?
1998 bne.w fu_trace_p # yes
2003 short 0xe000,0xe006,0xe004,0xe005
2004 short 0xe003,0xe002,0xe001,0xe001
2008 bra.w fu_in_exc_exit_p
2012 bra.w fu_in_exc_exit_p
2015 btst &mia7_bit,SPCOND_FLG(%a6)
2016 beq.b fu_in_exc_exit_cont_p
2018 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2020 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2021 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2022 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2024 frestore FP_SRC(%a6) # restore src op
2026 unlk %a6 # unravel stack frame
2028 # shift stack frame "up". who cares about <ea> field.
2029 mov.l 0x4(%sp),0x10(%sp)
2030 mov.l 0x0(%sp),0xc(%sp)
2033 btst &0x7,(%sp) # is trace on?
2034 bne.b fu_trace_p # yes
2036 bra.l _fpsp_done # exit to os
2039 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2040 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2041 # trace stack frame then jump to _real_trace().
2043 # UNSUPP FRAME TRACE FRAME
2044 # ***************** *****************
2045 # * EA * * Current *
2047 # ***************** *****************
2048 # * 0x2 * 0x0dc * * 0x2 * 0x024 *
2049 # ***************** *****************
2052 # ***************** *****************
2054 # ***************** *****************
2056 mov.w &0x2024,0x6(%sp)
2057 fmov.l %fpiar,0x8(%sp)
2061 #########################################################
2062 #########################################################
2066 # I'm not sure at this point what FPSR bits are valid for this instruction.
2067 # so, since the emulation routines re-create them anyways, zero exception field.
2068 # fmove out doesn't affect ccodes.
2069 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2071 fmov.l &0x0,%fpcr # zero current control regs
2074 bfextu EXC_CMDREG(%a6){&6:&3},%d0
2077 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2078 # able to detect all operand types.
2080 bsr.l set_tag_x # tag the operand type
2081 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2083 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2086 mov.b %d0,STAG(%a6) # save src optype tag
2089 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2091 lea FP_SRC(%a6),%a0 # pass ptr to src operand
2093 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2094 bsr.l fout # call fmove out routine
2096 # Exceptions in order of precedence:
2099 # OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2106 # determine the highest priority exception(if any) set by the
2107 # emulation routine that has also been enabled by the user.
2108 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2109 bne.w fu_out_ena_p # some are enabled
2112 mov.l EXC_A6(%a6),(%a6) # restore a6
2114 btst &0x5,EXC_SR(%a6) # user or supervisor?
2115 bne.b fu_out_exit_s_p # supervisor
2117 mov.l EXC_A7(%a6),%a0 # update user a7
2121 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2122 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2123 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2125 unlk %a6 # unravel stack frame
2127 btst &0x7,(%sp) # is trace on?
2128 bne.w fu_trace_p # yes
2130 bra.l _fpsp_done # exit to os
2132 # the exception occurred in supervisor mode. check to see if the
2133 # addressing mode was -(a7). if so, we'll need to shift the
2134 # stack frame "down".
2136 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2137 beq.b fu_out_exit_cont_p # no
2139 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2140 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2141 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2143 mov.l (%a6),%a6 # restore frame pointer
2145 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2146 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2148 # now, copy the result to the proper place on the stack
2149 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2150 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2151 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2153 add.l &LOCAL_SIZE-0x8,%sp
2161 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2162 bfffo %d0{&24:&8},%d0 # find highest priority exception
2165 mov.l EXC_A6(%a6),(%a6) # restore a6
2167 # an exception occurred and that exception was enabled.
2168 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2175 btst &0x5,EXC_SR(%a6)
2178 mov.l EXC_A7(%a6),%a0
2183 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2186 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2187 # the strategy is to move the exception frame "down" 12 bytes. then, we
2188 # can store the default result where the exception frame was.
2189 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2190 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2191 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2193 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2194 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2196 frestore FP_SRC(%a6) # restore src operand
2198 mov.l (%a6),%a6 # restore frame pointer
2200 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2201 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2202 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2204 # now, we copy the default result to its proper location
2205 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2206 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2207 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2209 add.l &LOCAL_SIZE-0x8,%sp
2215 btst &0x5,EXC_SR(%a6)
2218 mov.l EXC_A7(%a6),%a0
2223 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2226 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2227 # the strategy is to move the exception frame "down" 12 bytes. then, we
2228 # can store the default result where the exception frame was.
2229 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2230 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2231 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2233 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2234 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2236 frestore FP_SRC(%a6) # restore src operand
2238 mov.l (%a6),%a6 # restore frame pointer
2240 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2241 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2242 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2244 # now, we copy the default result to its proper location
2245 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2246 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2247 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2249 add.l &LOCAL_SIZE-0x8,%sp
2255 btst &0x5,EXC_SR(%a6)
2258 mov.l EXC_A7(%a6),%a0
2263 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2266 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2267 # the strategy is to move the exception frame "down" 12 bytes. then, we
2268 # can store the default result where the exception frame was.
2269 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2270 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2271 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2273 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2274 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2276 frestore FP_SRC(%a6) # restore src operand
2278 mov.l (%a6),%a6 # restore frame pointer
2280 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2281 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2282 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2284 # now, we copy the default result to its proper location
2285 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2286 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2287 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2289 add.l &LOCAL_SIZE-0x8,%sp
2294 #########################################################################
2297 # if we're stuffing a source operand back into an fsave frame then we
2298 # have to make sure that for single or double source operands that the
2299 # format stuffed is as weird as the hardware usually makes it.
2303 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2304 cmpi.b %d0,&0x1 # was src sgl?
2305 beq.b funimp_skew_sgl # yes
2306 cmpi.b %d0,&0x5 # was src dbl?
2307 beq.b funimp_skew_dbl # yes
2311 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2312 andi.w &0x7fff,%d0 # strip sign
2313 beq.b funimp_skew_sgl_not
2315 bgt.b funimp_skew_sgl_not
2316 neg.w %d0 # make exponent negative
2317 addi.w &0x3f81,%d0 # find amt to shift
2318 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2319 lsr.l %d0,%d1 # shift it
2320 bset &31,%d1 # set j-bit
2321 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2322 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2323 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2324 funimp_skew_sgl_not:
2328 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2329 andi.w &0x7fff,%d0 # strip sign
2330 beq.b funimp_skew_dbl_not
2332 bgt.b funimp_skew_dbl_not
2334 tst.b FP_SRC_EX(%a6) # make "internal format"
2335 smi.b 0x2+FP_SRC(%a6)
2336 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2337 clr.l %d0 # clear g,r,s
2338 lea FP_SRC(%a6),%a0 # pass ptr to src op
2339 mov.w &0x3c01,%d1 # pass denorm threshold
2340 bsr.l dnrm_lp # denorm it
2341 mov.w &0x3c00,%d0 # new exponent
2342 tst.b 0x2+FP_SRC(%a6) # is sign set?
2343 beq.b fss_dbl_denorm_done # no
2344 bset &15,%d0 # set sign
2345 fss_dbl_denorm_done:
2346 bset &0x7,FP_SRC_HI(%a6) # set j-bit
2347 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2348 funimp_skew_dbl_not:
2351 #########################################################################
2354 btst &0x5,EXC_SR(%a6)
2356 mov.l 0x0(%a0),FP_DST_EX(%a6)
2357 mov.l 0x4(%a0),FP_DST_HI(%a6)
2358 mov.l 0x8(%a0),FP_DST_LO(%a6)
2362 #########################################################################
2363 # XDEF **************************************************************** #
2364 # _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2365 # effective address" exception. #
2367 # This handler should be the first code executed upon taking the #
2368 # FP Unimplemented Effective Address exception in an operating #
2371 # XREF **************************************************************** #
2372 # _imem_read_long() - read instruction longword #
2373 # fix_skewed_ops() - adjust src operand in fsave frame #
2374 # set_tag_x() - determine optype of src/dst operands #
2375 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
2376 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
2377 # load_fpn2() - load dst operand from FP regfile #
2378 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2379 # decbin() - convert packed data to FP binary data #
2380 # _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2381 # _real_access() - "callout" for access error exception #
2382 # _mem_read() - read extended immediate operand from memory #
2383 # _fpsp_done() - "callout" for exit; work all done #
2384 # _real_trace() - "callout" for Trace enabled exception #
2385 # fmovm_dynamic() - emulate dynamic fmovm instruction #
2386 # fmovm_ctrl() - emulate fmovm control instruction #
2388 # INPUT *************************************************************** #
2389 # - The system stack contains the "Unimplemented <ea>" stk frame #
2391 # OUTPUT ************************************************************** #
2392 # If access error: #
2393 # - The system stack is changed to an access error stack frame #
2394 # If FPU disabled: #
2395 # - The system stack is changed to an FPU disabled stack frame #
2396 # If Trace exception enabled: #
2397 # - The system stack is changed to a Trace exception stack frame #
2398 # Else: (normal case) #
2399 # - None (correct result has been stored as appropriate) #
2401 # ALGORITHM *********************************************************** #
2402 # This exception handles 3 types of operations: #
2403 # (1) FP Instructions using extended precision or packed immediate #
2404 # addressing mode. #
2405 # (2) The "fmovm.x" instruction w/ dynamic register specification. #
2406 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2408 # For immediate data operations, the data is read in w/ a #
2409 # _mem_read() "callout", converted to FP binary (if packed), and used #
2410 # as the source operand to the instruction specified by the instruction #
2411 # word. If no FP exception should be reported ads a result of the #
2412 # emulation, then the result is stored to the destination register and #
2413 # the handler exits through _fpsp_done(). If an enabled exc has been #
2414 # signalled as a result of emulation, then an fsave state frame #
2415 # corresponding to the FP exception type must be entered into the 060 #
2416 # FPU before exiting. In either the enabled or disabled cases, we #
2417 # must also check if a Trace exception is pending, in which case, we #
2418 # must create a Trace exception stack frame from the current exception #
2419 # stack frame. If no Trace is pending, we simply exit through #
2421 # For "fmovm.x", call the routine fmovm_dynamic() which will #
2422 # decode and emulate the instruction. No FP exceptions can be pending #
2423 # as a result of this operation emulation. A Trace exception can be #
2424 # pending, though, which means the current stack frame must be changed #
2425 # to a Trace stack frame and an exit made through _real_trace(). #
2426 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2427 # was executed from supervisor mode, this handler must store the FP #
2428 # register file values to the system stack by itself since #
2429 # fmovm_dynamic() can't handle this. A normal exit is made through #
2431 # For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2432 # Again, a Trace exception may be pending and an exit made through #
2433 # _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2435 # Before any of the above is attempted, it must be checked to #
2436 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2437 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2438 # has higher priority, we check the disabled bit in the PCR. If set, #
2439 # then we must create an 8 word "FPU disabled" exception stack frame #
2440 # from the current 4 word exception stack frame. This includes #
2441 # reproducing the effective address of the instruction to put on the #
2442 # new stack frame. #
2444 # In the process of all emulation work, if a _mem_read() #
2445 # "callout" returns a failing result indicating an access error, then #
2446 # we must create an access error stack frame from the current stack #
2447 # frame. This information includes a faulting address and a fault- #
2448 # status-longword. These are created within this handler. #
2450 #########################################################################
2455 # This exception type takes priority over the "Line F Emulator"
2456 # exception. Therefore, the FPU could be disabled when entering here.
2457 # So, we must check to see if it's disabled and handle that case separately.
2458 mov.l %d0,-(%sp) # save d0
2459 movc %pcr,%d0 # load proc cr
2460 btst &0x1,%d0 # is FPU disabled?
2461 bne.w iea_disabled # yes
2462 mov.l (%sp)+,%d0 # restore d0
2464 link %a6,&-LOCAL_SIZE # init stack frame
2466 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2467 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2468 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2470 # PC of instruction that took the exception is the PC in the frame
2471 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2473 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2474 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2475 bsr.l _imem_read_long # fetch the instruction words
2476 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2478 #########################################################################
2480 tst.w %d0 # is operation fmovem?
2481 bmi.w iea_fmovm # yes
2484 # here, we will have:
2485 # fabs fdabs fsabs facos fmod
2486 # fadd fdadd fsadd fasin frem
2488 # fdiv fddiv fsdiv fatanh fsin
2490 # fintrz fcosh fsinh
2491 # fmove fdmove fsmove fetox ftan
2492 # fmul fdmul fsmul fetoxm1 ftanh
2493 # fneg fdneg fsneg fgetexp ftentox
2494 # fsgldiv fgetman ftwotox
2497 # fsub fdsub fssub flogn
2499 # which can all use f<op>.{x,p}
2500 # so, now it's immediate data extended precision AND PACKED FORMAT!
2503 andi.l &0x00ff00ff,USER_FPSR(%a6)
2505 btst &0xa,%d0 # is src fmt x or p?
2506 bne.b iea_op_pack # packed
2509 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2510 lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2511 mov.l &0xc,%d0 # pass: 12 bytes
2512 bsr.l _imem_read # read extended immediate
2514 tst.l %d1 # did ifetch fail?
2515 bne.w iea_iacc # yes
2521 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2522 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2523 mov.l &0xc,%d0 # pass: 12 bytes
2524 bsr.l _imem_read # read packed operand
2526 tst.l %d1 # did ifetch fail?
2527 bne.w iea_iacc # yes
2529 # The packed operand is an INF or a NAN if the exponent field is all ones.
2530 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2531 cmpi.w %d0,&0x7fff # INF or NAN?
2532 beq.b iea_op_setsrc # operand is an INF or NAN
2534 # The packed operand is a zero if the mantissa is all zero, else it's
2535 # a normal packed op.
2536 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2537 andi.b &0x0f,%d0 # clear all but last nybble
2538 bne.b iea_op_gp_not_spec # not a zero
2539 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2540 bne.b iea_op_gp_not_spec # not a zero
2541 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2542 beq.b iea_op_setsrc # operand is a ZERO
2544 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2545 bsr.l decbin # convert to extended
2546 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2549 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2551 # FP_SRC now holds the src operand.
2552 lea FP_SRC(%a6),%a0 # pass: ptr to src op
2553 bsr.l set_tag_x # tag the operand type
2554 mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2555 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2556 bne.b iea_op_getdst # no
2557 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2558 mov.b %d0,STAG(%a6) # set new optype tag
2560 clr.b STORE_FLG(%a6) # clear "store result" boolean
2562 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2563 beq.b iea_op_extract # monadic
2564 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2565 bne.b iea_op_spec # yes
2568 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2569 bsr.l load_fpn2 # load dst operand
2571 lea FP_DST(%a6),%a0 # pass: ptr to dst op
2572 bsr.l set_tag_x # tag the operand type
2573 mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2574 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2575 bne.b iea_op_extract # no
2576 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2577 mov.b %d0,DTAG(%a6) # set new optype tag
2578 bra.b iea_op_extract
2580 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2582 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2583 beq.b iea_op_extract # yes
2584 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2585 # store a result. then, only fcmp will branch back and pick up a dst operand.
2586 st STORE_FLG(%a6) # don't store a final result
2587 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2588 beq.b iea_op_loaddst # yes
2592 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2594 mov.b 1+EXC_CMDREG(%a6),%d1
2595 andi.w &0x007f,%d1 # extract extension
2603 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2604 jsr (tbl_unsupp.l,%pc,%d1.l*1)
2607 # Exceptions in order of precedence:
2609 # SNAN : all operations
2610 # OPERR : all reg-reg or mem-reg operations that can normally operr
2611 # OVFL : same as OPERR
2612 # UNFL : same as OPERR
2613 # DZ : same as OPERR
2614 # INEX2 : same as OPERR
2615 # INEX1 : all packed immediate operations
2618 # we determine the highest priority exception(if any) set by the
2619 # emulation routine that has also been enabled by the user.
2620 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2621 bne.b iea_op_ena # some are enabled
2623 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2624 # these don't save results.
2626 tst.b STORE_FLG(%a6) # does this op store a result?
2627 bne.b iea_op_exit1 # exit with no frestore
2630 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2631 bsr.l store_fpreg # store the result
2634 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2635 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2637 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2638 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2639 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2641 unlk %a6 # unravel the frame
2643 btst &0x7,(%sp) # is trace on?
2644 bne.w iea_op_trace # yes
2646 bra.l _fpsp_done # exit to os
2649 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2650 bfffo %d0{&24:&8},%d0 # find highest priority exception
2651 bne.b iea_op_exc # at least one was set
2653 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2654 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2655 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2659 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2660 beq.b iea_op_store # no
2661 bra.b iea_op_exc_ovfl # yes
2663 # an enabled exception occurred. we have to insert the exception type back into
2666 subi.l &24,%d0 # fix offset to be 0-8
2667 cmpi.b %d0,&0x6 # is exception INEX?
2668 bne.b iea_op_exc_force # no
2670 # the enabled exception was inexact. so, if it occurs with an overflow
2671 # or underflow that was disabled, then we have to force an overflow or
2673 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2674 bne.b iea_op_exc_ovfl # yes
2675 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2676 bne.b iea_op_exc_unfl # yes
2679 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2680 bra.b iea_op_exit2 # exit with frestore
2683 short 0xe002, 0xe006, 0xe004, 0xe005
2684 short 0xe003, 0xe002, 0xe001, 0xe001
2687 mov.w &0xe005,2+FP_SRC(%a6)
2691 mov.w &0xe003,2+FP_SRC(%a6)
2694 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2695 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2697 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2698 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2699 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2701 frestore FP_SRC(%a6) # restore exceptional state
2703 unlk %a6 # unravel the frame
2705 btst &0x7,(%sp) # is trace on?
2706 bne.b iea_op_trace # yes
2708 bra.l _fpsp_done # exit to os
2711 # The opclass two instruction that took an "Unimplemented Effective Address"
2712 # exception was being traced. Make the "current" PC the FPIAR and put it in
2713 # the trace stack frame then jump to _real_trace().
2715 # UNIMP EA FRAME TRACE FRAME
2716 # ***************** *****************
2717 # * 0x0 * 0x0f0 * * Current *
2718 # ***************** * PC *
2719 # * Current * *****************
2720 # * PC * * 0x2 * 0x024 *
2721 # ***************** *****************
2723 # ***************** * PC *
2728 mov.l (%sp),-(%sp) # shift stack frame "down"
2729 mov.w 0x8(%sp),0x4(%sp)
2730 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2731 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2735 #########################################################################
2737 btst &14,%d0 # ctrl or data reg
2738 beq.w iea_fmovm_ctrl
2742 btst &0x5,EXC_SR(%a6) # user or supervisor mode
2743 bne.b iea_fmovm_data_s
2747 mov.l %a0,EXC_A7(%a6) # store current a7
2748 bsr.l fmovm_dynamic # do dynamic fmovm
2749 mov.l EXC_A7(%a6),%a0 # load possibly new a7
2750 mov.l %a0,%usp # update usp
2751 bra.w iea_fmovm_exit
2754 clr.b SPCOND_FLG(%a6)
2755 lea 0x2+EXC_VOFF(%a6),%a0
2756 mov.l %a0,EXC_A7(%a6)
2757 bsr.l fmovm_dynamic # do dynamic fmovm
2759 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2760 beq.w iea_fmovm_data_predec
2761 cmpi.b SPCOND_FLG(%a6),&mia7_flg
2762 bne.w iea_fmovm_exit
2764 # right now, d0 = the size.
2765 # the data has been fetched from the supervisor stack, but we have not
2766 # incremented the stack pointer by the appropriate number of bytes.
2768 iea_fmovm_data_postinc:
2769 btst &0x7,EXC_SR(%a6)
2770 bne.b iea_fmovm_data_pi_trace
2772 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2773 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2774 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2776 lea (EXC_SR,%a6,%d0),%a0
2777 mov.l %a0,EXC_SR(%a6)
2779 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2780 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2781 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2787 iea_fmovm_data_pi_trace:
2788 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2789 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2790 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2791 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2793 lea (EXC_SR-0x4,%a6,%d0),%a0
2794 mov.l %a0,EXC_SR(%a6)
2796 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2797 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2804 # right now, d1 = size and d0 = the strg.
2805 iea_fmovm_data_predec:
2806 mov.b %d1,EXC_VOFF(%a6) # store strg
2807 mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2809 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2810 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2811 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2813 mov.l (%a6),-(%sp) # make a copy of a6
2814 mov.l %d0,-(%sp) # save d0
2815 mov.l %d1,-(%sp) # save d1
2816 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2819 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2820 neg.l %d0 # get negative of size
2822 btst &0x7,EXC_SR(%a6) # is trace enabled?
2823 beq.b iea_fmovm_data_p2
2825 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2826 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2827 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2828 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2830 pea (%a6,%d0) # create final sp
2831 bra.b iea_fmovm_data_p3
2834 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2835 mov.l (%sp)+,(EXC_PC,%a6,%d0)
2836 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2838 pea (0x4,%a6,%d0) # create final sp
2842 mov.b EXC_VOFF(%a6),%d1 # fetch strg
2846 fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2851 fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2856 fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2861 fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2866 fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2871 fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2876 fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2881 fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2888 btst &0x7,(%sp) # is trace enabled?
2892 #########################################################################
2895 bsr.l fmovm_ctrl # load ctrl regs
2898 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2899 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2900 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2902 btst &0x7,EXC_SR(%a6) # is trace on?
2903 bne.b iea_fmovm_trace # yes
2905 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2907 unlk %a6 # unravel the frame
2909 bra.l _fpsp_done # exit to os
2912 # The control reg instruction that took an "Unimplemented Effective Address"
2913 # exception was being traced. The "Current PC" for the trace frame is the
2914 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2915 # After fixing the stack frame, jump to _real_trace().
2917 # UNIMP EA FRAME TRACE FRAME
2918 # ***************** *****************
2919 # * 0x0 * 0x0f0 * * Current *
2920 # ***************** * PC *
2921 # * Current * *****************
2922 # * PC * * 0x2 * 0x024 *
2923 # ***************** *****************
2925 # ***************** * PC *
2929 # this ain't a pretty solution, but it works:
2930 # -restore a6 (not with unlk)
2931 # -shift stack frame down over where old a6 used to be
2932 # -add LOCAL_SIZE to stack pointer
2934 mov.l (%a6),%a6 # restore frame pointer
2935 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2936 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2937 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2938 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2939 add.l &LOCAL_SIZE,%sp # clear stack frame
2943 #########################################################################
2944 # The FPU is disabled and so we should really have taken the "Line
2945 # F Emulator" exception. So, here we create an 8-word stack frame
2946 # from our 4-word stack frame. This means we must calculate the length
2947 # the faulting instruction to get the "next PC". This is trivial for
2948 # immediate operands but requires some extra work for fmovm dynamic
2949 # which can use most addressing modes.
2951 mov.l (%sp)+,%d0 # restore d0
2953 link %a6,&-LOCAL_SIZE # init stack frame
2955 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2957 # PC of instruction that took the exception is the PC in the frame
2958 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2959 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2960 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2961 bsr.l _imem_read_long # fetch the instruction words
2962 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2964 tst.w %d0 # is instr fmovm?
2965 bmi.b iea_dis_fmovm # yes
2966 # instruction is using an extended precision immediate operand. Therefore,
2967 # the total instruction length is 16 bytes.
2969 mov.l &0x10,%d0 # 16 bytes of instruction
2972 btst &0xe,%d0 # is instr fmovm ctrl
2973 bne.b iea_dis_fmovm_data # no
2974 # the instruction is a fmovm.l with 2 or 3 registers.
2975 bfextu %d0{&19:&3},%d1
2977 cmpi.b %d1,&0x7 # move all regs?
2981 # the instruction is an fmovm.x dynamic which can use many addressing
2982 # modes and thus can have several different total instruction lengths.
2983 # call fmovm_calc_ea which will go through the ea calc process and,
2984 # as a by-product, will tell us how long the instruction is.
2988 mov.l EXC_EXTWPTR(%a6),%d0
2989 sub.l EXC_PC(%a6),%d0
2991 mov.w %d0,EXC_VOFF(%a6) # store stack shift value
2993 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2997 # here, we actually create the 8-word frame from the 4-word frame,
2998 # with the "next PC" as additional info.
2999 # the <ea> field is let as undefined.
3000 subq.l &0x8,%sp # make room for new stack
3001 mov.l %d0,-(%sp) # save d0
3002 mov.w 0xc(%sp),0x4(%sp) # move SR
3003 mov.l 0xe(%sp),0x6(%sp) # move Current PC
3006 mov.l 0x6(%sp),0x10(%sp) # move Current PC
3007 add.l %d0,0x6(%sp) # make Next PC
3008 mov.w &0x402c,0xa(%sp) # insert offset,frame format
3009 mov.l (%sp)+,%d0 # restore d0
3011 bra.l _real_fpu_disabled
3019 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3020 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3022 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3026 subq.w &0x8,%sp # make stack frame bigger
3027 mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3028 mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3029 mov.w &0x4008,0x6(%sp) # store voff
3030 mov.l 0x2(%sp),0x8(%sp) # store ea
3031 mov.l &0x09428001,0xc(%sp) # store fslw
3034 btst &0x5,(%sp) # user or supervisor mode?
3035 beq.b iea_acc_done2 # user
3036 bset &0x2,0xd(%sp) # set supervisor TM bit
3042 lea -LOCAL_SIZE(%a6),%sp
3047 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3048 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3052 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3053 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3054 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3055 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3056 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3057 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3059 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3060 add.w &LOCAL_SIZE-0x4,%sp
3064 #########################################################################
3065 # XDEF **************************************************************** #
3066 # _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3068 # This handler should be the first code executed upon taking the #
3069 # FP Operand Error exception in an operating system. #
3071 # XREF **************************************************************** #
3072 # _imem_read_long() - read instruction longword #
3073 # fix_skewed_ops() - adjust src operand in fsave frame #
3074 # _real_operr() - "callout" to operating system operr handler #
3075 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3076 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3077 # facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3079 # INPUT *************************************************************** #
3080 # - The system stack contains the FP Operr exception frame #
3081 # - The fsave frame contains the source operand #
3083 # OUTPUT ************************************************************** #
3084 # No access error: #
3085 # - The system stack is unchanged #
3086 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3088 # ALGORITHM *********************************************************** #
3089 # In a system where the FP Operr exception is enabled, the goal #
3090 # is to get to the handler specified at _real_operr(). But, on the 060, #
3091 # for opclass zero and two instruction taking this exception, the #
3092 # input operand in the fsave frame may be incorrect for some cases #
3093 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3094 # do just this and then exits through _real_operr(). #
3095 # For opclass 3 instructions, the 060 doesn't store the default #
3096 # operr result out to memory or data register file as it should. #
3097 # This code must emulate the move out before finally exiting through #
3098 # _real_inex(). The move out, if to memory, is performed using #
3099 # _mem_write() "callout" routines that may return a failing result. #
3100 # In this special case, the handler must exit through facc_out() #
3101 # which creates an access error stack frame from the current operr #
3104 #########################################################################
3109 link.w %a6,&-LOCAL_SIZE # init stack frame
3111 fsave FP_SRC(%a6) # grab the "busy" frame
3113 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3114 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3115 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3117 # the FPIAR holds the "current PC" of the faulting instruction
3118 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3120 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3121 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3122 bsr.l _imem_read_long # fetch the instruction words
3123 mov.l %d0,EXC_OPWORD(%a6)
3125 ##############################################################################
3127 btst &13,%d0 # is instr an fmove out?
3128 bne.b foperr_out # fmove out
3131 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3132 # this would be the case for opclass two operations with a source infinity or
3133 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3134 # cause an operr so we don't need to check for them here.
3135 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3136 bsr.l fix_skewed_ops # fix src op
3139 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3140 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3141 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3143 frestore FP_SRC(%a6)
3148 ########################################################################
3151 # the hardware does not save the default result to memory on enabled
3152 # operand error exceptions. we do this here before passing control to
3153 # the user operand error handler.
3155 # byte, word, and long destination format operations can pass
3156 # through here. we simply need to test the sign of the src
3157 # operand and save the appropriate minimum or maximum integer value
3158 # to the effective address as pointed to by the stacked effective address.
3160 # although packed opclass three operations can take operand error
3161 # exceptions, they won't pass through here since they are caught
3162 # first by the unsupported data format exception handler. that handler
3163 # sends them directly to _real_operr() if necessary.
3167 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3170 bne.b foperr_out_not_qnan
3171 # the operand is either an infinity or a QNAN.
3172 tst.l FP_SRC_LO(%a6)
3173 bne.b foperr_out_qnan
3174 mov.l FP_SRC_HI(%a6),%d1
3175 andi.l &0x7fffffff,%d1
3176 beq.b foperr_out_not_qnan
3178 mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3179 bra.b foperr_out_jmp
3181 foperr_out_not_qnan:
3182 mov.l &0x7fffffff,%d1
3183 tst.b FP_SRC_EX(%a6)
3184 bpl.b foperr_out_not_qnan2
3186 foperr_out_not_qnan2:
3187 mov.l %d1,L_SCR1(%a6)
3190 bfextu %d0{&19:&3},%d0 # extract dst format field
3191 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3192 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3193 jmp (tbl_operr.b,%pc,%a0)
3196 short foperr_out_l - tbl_operr # long word integer
3197 short tbl_operr - tbl_operr # sgl prec shouldn't happen
3198 short tbl_operr - tbl_operr # ext prec shouldn't happen
3199 short foperr_exit - tbl_operr # packed won't enter here
3200 short foperr_out_w - tbl_operr # word integer
3201 short tbl_operr - tbl_operr # dbl prec shouldn't happen
3202 short foperr_out_b - tbl_operr # byte integer
3203 short tbl_operr - tbl_operr # packed won't enter here
3206 mov.b L_SCR1(%a6),%d0 # load positive default result
3207 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3208 ble.b foperr_out_b_save_dn # yes
3209 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3210 bsr.l _dmem_write_byte # write the default result
3212 tst.l %d1 # did dstore fail?
3213 bne.l facc_out_b # yes
3216 foperr_out_b_save_dn:
3218 bsr.l store_dreg_b # store result to regfile
3222 mov.w L_SCR1(%a6),%d0 # load positive default result
3223 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3224 ble.b foperr_out_w_save_dn # yes
3225 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3226 bsr.l _dmem_write_word # write the default result
3228 tst.l %d1 # did dstore fail?
3229 bne.l facc_out_w # yes
3232 foperr_out_w_save_dn:
3234 bsr.l store_dreg_w # store result to regfile
3238 mov.l L_SCR1(%a6),%d0 # load positive default result
3239 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3240 ble.b foperr_out_l_save_dn # yes
3241 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3242 bsr.l _dmem_write_long # write the default result
3244 tst.l %d1 # did dstore fail?
3245 bne.l facc_out_l # yes
3248 foperr_out_l_save_dn:
3250 bsr.l store_dreg_l # store result to regfile
3253 #########################################################################
3254 # XDEF **************************************************************** #
3255 # _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3257 # This handler should be the first code executed upon taking the #
3258 # FP Signalling NAN exception in an operating system. #
3260 # XREF **************************************************************** #
3261 # _imem_read_long() - read instruction longword #
3262 # fix_skewed_ops() - adjust src operand in fsave frame #
3263 # _real_snan() - "callout" to operating system SNAN handler #
3264 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3265 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3266 # facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3267 # _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3269 # INPUT *************************************************************** #
3270 # - The system stack contains the FP SNAN exception frame #
3271 # - The fsave frame contains the source operand #
3273 # OUTPUT ************************************************************** #
3274 # No access error: #
3275 # - The system stack is unchanged #
3276 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3278 # ALGORITHM *********************************************************** #
3279 # In a system where the FP SNAN exception is enabled, the goal #
3280 # is to get to the handler specified at _real_snan(). But, on the 060, #
3281 # for opclass zero and two instructions taking this exception, the #
3282 # input operand in the fsave frame may be incorrect for some cases #
3283 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3284 # do just this and then exits through _real_snan(). #
3285 # For opclass 3 instructions, the 060 doesn't store the default #
3286 # SNAN result out to memory or data register file as it should. #
3287 # This code must emulate the move out before finally exiting through #
3288 # _real_snan(). The move out, if to memory, is performed using #
3289 # _mem_write() "callout" routines that may return a failing result. #
3290 # In this special case, the handler must exit through facc_out() #
3291 # which creates an access error stack frame from the current SNAN #
3293 # For the case of an extended precision opclass 3 instruction, #
3294 # if the effective addressing mode was -() or ()+, then the address #
3295 # register must get updated by calling _calc_ea_fout(). If the <ea> #
3296 # was -(a7) from supervisor mode, then the exception frame currently #
3297 # on the system stack must be carefully moved "down" to make room #
3298 # for the operand being moved. #
3300 #########################################################################
3305 link.w %a6,&-LOCAL_SIZE # init stack frame
3307 fsave FP_SRC(%a6) # grab the "busy" frame
3309 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3310 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3311 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3313 # the FPIAR holds the "current PC" of the faulting instruction
3314 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3316 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3317 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3318 bsr.l _imem_read_long # fetch the instruction words
3319 mov.l %d0,EXC_OPWORD(%a6)
3321 ##############################################################################
3323 btst &13,%d0 # is instr an fmove out?
3324 bne.w fsnan_out # fmove out
3327 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3328 # this would be the case for opclass two operations with a source infinity or
3329 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3331 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3332 bsr.l fix_skewed_ops # fix src op
3335 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3336 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3337 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3339 frestore FP_SRC(%a6)
3344 ########################################################################
3347 # the hardware does not save the default result to memory on enabled
3348 # snan exceptions. we do this here before passing control to
3349 # the user snan handler.
3351 # byte, word, long, and packed destination format operations can pass
3352 # through here. since packed format operations already were handled by
3353 # fpsp_unsupp(), then we need to do nothing else for them here.
3354 # for byte, word, and long, we simply need to test the sign of the src
3355 # operand and save the appropriate minimum or maximum integer value
3356 # to the effective address as pointed to by the stacked effective address.
3360 bfextu %d0{&19:&3},%d0 # extract dst format field
3361 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3362 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3363 jmp (tbl_snan.b,%pc,%a0)
3366 short fsnan_out_l - tbl_snan # long word integer
3367 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3368 short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3369 short tbl_snan - tbl_snan # packed needs no help
3370 short fsnan_out_w - tbl_snan # word integer
3371 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3372 short fsnan_out_b - tbl_snan # byte integer
3373 short tbl_snan - tbl_snan # packed needs no help
3376 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3377 bset &6,%d0 # set SNAN bit
3378 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3379 ble.b fsnan_out_b_dn # yes
3380 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3381 bsr.l _dmem_write_byte # write the default result
3383 tst.l %d1 # did dstore fail?
3384 bne.l facc_out_b # yes
3389 bsr.l store_dreg_b # store result to regfile
3393 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3394 bset &14,%d0 # set SNAN bit
3395 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3396 ble.b fsnan_out_w_dn # yes
3397 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3398 bsr.l _dmem_write_word # write the default result
3400 tst.l %d1 # did dstore fail?
3401 bne.l facc_out_w # yes
3406 bsr.l store_dreg_w # store result to regfile
3410 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3411 bset &30,%d0 # set SNAN bit
3412 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3413 ble.b fsnan_out_l_dn # yes
3414 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3415 bsr.l _dmem_write_long # write the default result
3417 tst.l %d1 # did dstore fail?
3418 bne.l facc_out_l # yes
3423 bsr.l store_dreg_l # store result to regfile
3427 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3428 ble.b fsnan_out_d_dn # yes
3429 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3430 andi.l &0x80000000,%d0 # keep sign
3431 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3432 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3433 lsr.l &0x8,%d1 # shift mantissa for sgl
3434 or.l %d1,%d0 # create sgl SNAN
3435 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3436 bsr.l _dmem_write_long # write the default result
3438 tst.l %d1 # did dstore fail?
3439 bne.l facc_out_l # yes
3443 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3444 andi.l &0x80000000,%d0 # keep sign
3445 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3447 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3448 lsr.l &0x8,%d1 # shift mantissa for sgl
3449 or.l %d1,%d0 # create sgl SNAN
3452 bsr.l store_dreg_l # store result to regfile
3456 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3457 andi.l &0x80000000,%d0 # keep sign
3458 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3459 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3460 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3461 mov.l &11,%d0 # load shift amt
3463 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3464 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3465 andi.l &0x000007ff,%d1
3467 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3468 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3470 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3471 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3472 mov.l EXC_EA(%a6),%a1 # pass: dst addr
3473 movq.l &0x8,%d0 # pass: size of 8 bytes
3474 bsr.l _dmem_write # write the default result
3476 tst.l %d1 # did dstore fail?
3477 bne.l facc_out_d # yes
3481 # for extended precision, if the addressing mode is pre-decrement or
3482 # post-increment, then the address register did not get updated.
3483 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3485 clr.b SPCOND_FLG(%a6) # clear special case flag
3487 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3488 clr.w 2+FP_SCR0(%a6)
3489 mov.l FP_SRC_HI(%a6),%d0
3491 mov.l %d0,FP_SCR0_HI(%a6)
3492 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3494 btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3495 bne.b fsnan_out_x_s # yes
3497 mov.l %usp,%a0 # fetch user stack pointer
3498 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3499 mov.l (%a6),EXC_A6(%a6)
3501 bsr.l _calc_ea_fout # find the correct ea,update An
3503 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3505 mov.l EXC_A7(%a6),%a0
3506 mov.l %a0,%usp # restore user stack pointer
3507 mov.l EXC_A6(%a6),(%a6)
3510 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3511 movq.l &0xc,%d0 # pass: size of extended
3512 bsr.l _dmem_write # write the default result
3514 tst.l %d1 # did dstore fail?
3515 bne.l facc_out_x # yes
3520 mov.l (%a6),EXC_A6(%a6)
3522 bsr.l _calc_ea_fout # find the correct ea,update An
3524 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3526 mov.l EXC_A6(%a6),(%a6)
3528 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3529 bne.b fsnan_out_x_save # no
3531 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3532 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3533 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3534 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3536 frestore FP_SRC(%a6)
3538 mov.l EXC_A6(%a6),%a6 # restore frame pointer
3540 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3541 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3542 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3544 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3545 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3546 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3548 add.l &LOCAL_SIZE-0x8,%sp
3552 #########################################################################
3553 # XDEF **************************************************************** #
3554 # _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3556 # This handler should be the first code executed upon taking the #
3557 # FP Inexact exception in an operating system. #
3559 # XREF **************************************************************** #
3560 # _imem_read_long() - read instruction longword #
3561 # fix_skewed_ops() - adjust src operand in fsave frame #
3562 # set_tag_x() - determine optype of src/dst operands #
3563 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3564 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
3565 # load_fpn2() - load dst operand from FP regfile #
3566 # smovcr() - emulate an "fmovcr" instruction #
3567 # fout() - emulate an opclass 3 instruction #
3568 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3569 # _real_inex() - "callout" to operating system inexact handler #
3571 # INPUT *************************************************************** #
3572 # - The system stack contains the FP Inexact exception frame #
3573 # - The fsave frame contains the source operand #
3575 # OUTPUT ************************************************************** #
3576 # - The system stack is unchanged #
3577 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3579 # ALGORITHM *********************************************************** #
3580 # In a system where the FP Inexact exception is enabled, the goal #
3581 # is to get to the handler specified at _real_inex(). But, on the 060, #
3582 # for opclass zero and two instruction taking this exception, the #
3583 # hardware doesn't store the correct result to the destination FP #
3584 # register as did the '040 and '881/2. This handler must emulate the #
3585 # instruction in order to get this value and then store it to the #
3586 # correct register before calling _real_inex(). #
3587 # For opclass 3 instructions, the 060 doesn't store the default #
3588 # inexact result out to memory or data register file as it should. #
3589 # This code must emulate the move out by calling fout() before finally #
3590 # exiting through _real_inex(). #
3592 #########################################################################
3597 link.w %a6,&-LOCAL_SIZE # init stack frame
3599 fsave FP_SRC(%a6) # grab the "busy" frame
3601 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3602 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3603 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3605 # the FPIAR holds the "current PC" of the faulting instruction
3606 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3608 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3609 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3610 bsr.l _imem_read_long # fetch the instruction words
3611 mov.l %d0,EXC_OPWORD(%a6)
3613 ##############################################################################
3615 btst &13,%d0 # is instr an fmove out?
3616 bne.w finex_out # fmove out
3619 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3620 # longword integer directly into the upper longword of the mantissa along
3621 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3622 bfextu %d0{&19:&3},%d0 # fetch instr size
3623 bne.b finex_cont # instr size is not long
3624 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3625 bne.b finex_cont # no
3627 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3628 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3629 mov.w &0xe001,0x2+FP_SRC(%a6)
3632 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3633 bsr.l fix_skewed_ops # fix src op
3635 # Here, we zero the ccode and exception byte field since we're going to
3636 # emulate the whole instruction. Notice, though, that we don't kill the
3637 # INEX1 bit. This is because a packed op has long since been converted
3638 # to extended before arriving here. Therefore, we need to retain the
3639 # INEX1 bit from when the operand was first converted.
3640 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3642 fmov.l &0x0,%fpcr # zero current control regs
3645 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3646 cmpi.b %d1,&0x17 # is op an fmovecr?
3647 beq.w finex_fmovcr # yes
3649 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3650 bsr.l set_tag_x # tag the operand type
3651 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3653 # bits four and five of the fp extension word separate the monadic and dyadic
3654 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3655 # will never take this exception, but fsincos will.
3656 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3657 beq.b finex_extract # monadic
3659 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3660 bne.b finex_extract # yes
3662 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3663 bsr.l load_fpn2 # load dst into FP_DST
3665 lea FP_DST(%a6),%a0 # pass: ptr to dst op
3666 bsr.l set_tag_x # tag the operand type
3667 cmpi.b %d0,&UNNORM # is operand an UNNORM?
3668 bne.b finex_op2_done # no
3669 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3671 mov.b %d0,DTAG(%a6) # save dst optype tag
3675 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3677 mov.b 1+EXC_CMDREG(%a6),%d1
3678 andi.w &0x007f,%d1 # extract extension
3683 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3684 jsr (tbl_unsupp.l,%pc,%d1.l*1)
3686 # the operation has been emulated. the result is in fp0.
3688 bfextu EXC_CMDREG(%a6){&6:&3},%d0
3692 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3693 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3694 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3696 frestore FP_SRC(%a6)
3703 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3704 mov.b 1+EXC_CMDREG(%a6),%d1
3705 andi.l &0x0000007f,%d1 # pass rom offset
3709 ########################################################################
3712 # the hardware does not save the default result to memory on enabled
3713 # inexact exceptions. we do this here before passing control to
3714 # the user inexact handler.
3716 # byte, word, and long destination format operations can pass
3717 # through here. so can double and single precision.
3718 # although packed opclass three operations can take inexact
3719 # exceptions, they won't pass through here since they are caught
3720 # first by the unsupported data format exception handler. that handler
3721 # sends them directly to _real_inex() if necessary.
3725 mov.b &NORM,STAG(%a6) # src is a NORM
3728 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3730 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3732 lea FP_SRC(%a6),%a0 # pass ptr to src operand
3734 bsr.l fout # store the default result
3738 #########################################################################
3739 # XDEF **************************************************************** #
3740 # _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3742 # This handler should be the first code executed upon taking #
3743 # the FP DZ exception in an operating system. #
3745 # XREF **************************************************************** #
3746 # _imem_read_long() - read instruction longword from memory #
3747 # fix_skewed_ops() - adjust fsave operand #
3748 # _real_dz() - "callout" exit point from FP DZ handler #
3750 # INPUT *************************************************************** #
3751 # - The system stack contains the FP DZ exception stack. #
3752 # - The fsave frame contains the source operand. #
3754 # OUTPUT ************************************************************** #
3755 # - The system stack contains the FP DZ exception stack. #
3756 # - The fsave frame contains the adjusted source operand. #
3758 # ALGORITHM *********************************************************** #
3759 # In a system where the DZ exception is enabled, the goal is to #
3760 # get to the handler specified at _real_dz(). But, on the 060, when the #
3761 # exception is taken, the input operand in the fsave state frame may #
3762 # be incorrect for some cases and need to be adjusted. So, this package #
3763 # adjusts the operand using fix_skewed_ops() and then branches to #
3766 #########################################################################
3771 link.w %a6,&-LOCAL_SIZE # init stack frame
3773 fsave FP_SRC(%a6) # grab the "busy" frame
3775 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3776 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3777 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3779 # the FPIAR holds the "current PC" of the faulting instruction
3780 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3782 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3783 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3784 bsr.l _imem_read_long # fetch the instruction words
3785 mov.l %d0,EXC_OPWORD(%a6)
3787 ##############################################################################
3790 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3791 # this would be the case for opclass two operations with a source zero
3792 # in the sgl or dbl format.
3793 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3794 bsr.l fix_skewed_ops # fix src op
3797 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3798 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3799 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3801 frestore FP_SRC(%a6)
3806 #########################################################################
3807 # XDEF **************************************************************** #
3808 # _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. #
3810 # This handler should be the first code executed upon taking the #
3811 # "Line F Emulator" exception in an operating system. #
3813 # XREF **************************************************************** #
3814 # _fpsp_unimp() - handle "FP Unimplemented" exceptions #
3815 # _real_fpu_disabled() - handle "FPU disabled" exceptions #
3816 # _real_fline() - handle "FLINE" exceptions #
3817 # _imem_read_long() - read instruction longword #
3819 # INPUT *************************************************************** #
3820 # - The system stack contains a "Line F Emulator" exception #
3823 # OUTPUT ************************************************************** #
3824 # - The system stack is unchanged #
3826 # ALGORITHM *********************************************************** #
3827 # When a "Line F Emulator" exception occurs, there are 3 possible #
3828 # exception types, denoted by the exception stack frame format number: #
3829 # (1) FPU unimplemented instruction (6 word stack frame) #
3830 # (2) FPU disabled (8 word stack frame) #
3831 # (3) Line F (4 word stack frame) #
3833 # This module determines which and forks the flow off to the #
3834 # appropriate "callout" (for "disabled" and "Line F") or to the #
3835 # correct emulation code (for "FPU unimplemented"). #
3836 # This code also must check for "fmovecr" instructions w/ a #
3837 # non-zero <ea> field. These may get flagged as "Line F" but should #
3838 # really be flagged as "FPU Unimplemented". (This is a "feature" on #
3841 #########################################################################
3846 # check to see if this exception is a "FP Unimplemented Instruction"
3847 # exception. if so, branch directly to that handler's entry point.
3848 cmpi.w 0x6(%sp),&0x202c
3851 # check to see if the FPU is disabled. if so, jump to the OS entry
3852 # point for that condition.
3853 cmpi.w 0x6(%sp),&0x402c
3854 beq.l _real_fpu_disabled
3856 # the exception was an "F-Line Illegal" exception. we check to see
3857 # if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3858 # so, convert the F-Line exception stack frame to an FP Unimplemented
3859 # Instruction exception stack frame else branch to the OS entry
3860 # point for the F-Line exception handler.
3861 link.w %a6,&-LOCAL_SIZE # init stack frame
3863 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3865 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
3866 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3867 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3868 bsr.l _imem_read_long # fetch instruction words
3870 bfextu %d0{&0:&10},%d1 # is it an fmovecr?
3872 bne.b fline_fline # no
3874 bfextu %d0{&16:&6},%d1 # is it an fmovecr?
3876 bne.b fline_fline # no
3878 # it's an fmovecr w/ a non-zero <ea> that has entered through
3879 # the F-Line Illegal exception.
3880 # so, we need to convert the F-Line exception stack frame into an
3881 # FP Unimplemented Instruction stack frame and jump to that entry
3884 # but, if the FPU is disabled, then we need to jump to the FPU disabled
3890 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3894 sub.l &0x8,%sp # make room for "Next PC", <ea>
3895 mov.w 0x8(%sp),(%sp)
3896 mov.l 0xa(%sp),0x2(%sp) # move "Current PC"
3897 mov.w &0x402c,0x6(%sp)
3898 mov.l 0x2(%sp),0xc(%sp)
3899 addq.l &0x4,0x2(%sp) # set "Next PC"
3901 bra.l _real_fpu_disabled
3904 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3908 fmov.l 0x2(%sp),%fpiar # set current PC
3909 addq.l &0x4,0x2(%sp) # set Next PC
3912 mov.l 0x8(%sp),0x4(%sp)
3913 mov.b &0x20,0x6(%sp)
3918 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3924 #########################################################################
3925 # XDEF **************************************************************** #
3926 # _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented #
3927 # Instruction" exception. #
3929 # This handler should be the first code executed upon taking the #
3930 # FP Unimplemented Instruction exception in an operating system. #
3932 # XREF **************************************************************** #
3933 # _imem_read_{word,long}() - read instruction word/longword #
3934 # load_fop() - load src/dst ops from memory and/or FP regfile #
3935 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3936 # tbl_trans - addr of table of emulation routines for trnscndls #
3937 # _real_access() - "callout" for access error exception #
3938 # _fpsp_done() - "callout" for exit; work all done #
3939 # _real_trace() - "callout" for Trace enabled exception #
3940 # smovcr() - emulate "fmovecr" instruction #
3941 # funimp_skew() - adjust fsave src ops to "incorrect" value #
3942 # _ftrapcc() - emulate an "ftrapcc" instruction #
3943 # _fdbcc() - emulate an "fdbcc" instruction #
3944 # _fscc() - emulate an "fscc" instruction #
3945 # _real_trap() - "callout" for Trap exception #
3946 # _real_bsun() - "callout" for enabled Bsun exception #
3948 # INPUT *************************************************************** #
3949 # - The system stack contains the "Unimplemented Instr" stk frame #
3951 # OUTPUT ************************************************************** #
3952 # If access error: #
3953 # - The system stack is changed to an access error stack frame #
3954 # If Trace exception enabled: #
3955 # - The system stack is changed to a Trace exception stack frame #
3956 # Else: (normal case) #
3957 # - Correct result has been stored as appropriate #
3959 # ALGORITHM *********************************************************** #
3960 # There are two main cases of instructions that may enter here to #
3961 # be emulated: (1) the FPgen instructions, most of which were also #
3962 # unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". #
3963 # For the first set, this handler calls the routine load_fop() #
3964 # to load the source and destination (for dyadic) operands to be used #
3965 # for instruction emulation. The correct emulation routine is then #
3966 # chosen by decoding the instruction type and indexing into an #
3967 # emulation subroutine index table. After emulation returns, this #
3968 # handler checks to see if an exception should occur as a result of the #
3969 # FP instruction emulation. If so, then an FP exception of the correct #
3970 # type is inserted into the FPU state frame using the "frestore" #
3971 # instruction before exiting through _fpsp_done(). In either the #
3972 # exceptional or non-exceptional cases, we must check to see if the #
3973 # Trace exception is enabled. If so, then we must create a Trace #
3974 # exception frame from the current exception frame and exit through #
3976 # For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines #
3977 # _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three #
3978 # may flag that a BSUN exception should be taken. If so, then the #
3979 # current exception stack frame is converted into a BSUN exception #
3980 # stack frame and an exit is made through _real_bsun(). If the #
3981 # instruction was "ftrapcc" and a Trap exception should result, a Trap #
3982 # exception stack frame is created from the current frame and an exit #
3983 # is made through _real_trap(). If a Trace exception is pending, then #
3984 # a Trace exception frame is created from the current frame and a jump #
3985 # is made to _real_trace(). Finally, if none of these conditions exist, #
3986 # then the handler exits though the callout _fpsp_done(). #
3988 # In any of the above scenarios, if a _mem_read() or _mem_write() #
3989 # "callout" returns a failing value, then an access error stack frame #
3990 # is created from the current stack frame and an exit is made through #
3993 #########################################################################
3996 # FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
3999 # * * => <ea> of fp unimp instr.
4003 # * 0x2 * 0x02c * => frame format and vector offset(vector #11)
4006 # - Next PC - => PC of instr to execute after exc handling
4009 # * SR * => SR at the time the exception was taken
4012 # Note: the !NULL bit does not get set in the fsave frame when the
4013 # machine encounters an fp unimp exception. Therefore, it must be set
4014 # before leaving this handler.
4019 link.w %a6,&-LOCAL_SIZE # init stack frame
4021 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4022 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4023 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1
4025 btst &0x5,EXC_SR(%a6) # user mode exception?
4026 bne.b funimp_s # no; supervisor mode
4028 # save the value of the user stack pointer onto the stack frame
4030 mov.l %usp,%a0 # fetch user stack pointer
4031 mov.l %a0,EXC_A7(%a6) # store in stack frame
4034 # store the value of the supervisor stack pointer BEFORE the exc occurred.
4035 # old_sp is address just above stacked effective address.
4037 lea 4+EXC_EA(%a6),%a0 # load old a7'
4038 mov.l %a0,EXC_A7(%a6) # store a7'
4039 mov.l %a0,OLD_A7(%a6) # make a copy
4043 # the FPIAR holds the "current PC" of the faulting instruction.
4044 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4046 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4047 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
4048 bsr.l _imem_read_long # fetch the instruction words
4049 mov.l %d0,EXC_OPWORD(%a6)
4051 ############################################################################
4053 fmov.l &0x0,%fpcr # clear FPCR
4054 fmov.l &0x0,%fpsr # clear FPSR
4056 clr.b SPCOND_FLG(%a6) # clear "special case" flag
4058 # Divide the fp instructions into 8 types based on the TYPE field in
4059 # bits 6-8 of the opword(classes 6,7 are undefined).
4060 # (for the '060, only two types can take this exception)
4061 # bftst %d0{&7:&3} # test TYPE
4062 btst &22,%d0 # type 0 or 1 ?
4063 bne.w funimp_misc # type 1
4065 #########################################
4066 # TYPE == 0: General instructions #
4067 #########################################
4070 clr.b STORE_FLG(%a6) # clear "store result" flag
4072 # clear the ccode byte and exception status byte
4073 andi.l &0x00ff00ff,USER_FPSR(%a6)
4075 bfextu %d0{&16:&6},%d1 # extract upper 6 of cmdreg
4076 cmpi.b %d1,&0x17 # is op an fmovecr?
4077 beq.w funimp_fmovcr # yes
4080 bsr.l _load_fop # load
4083 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode
4085 mov.b 1+EXC_CMDREG(%a6),%d1
4086 andi.w &0x003f,%d1 # extract extension bits
4087 lsl.w &0x3,%d1 # shift right 3 bits
4088 or.b STAG(%a6),%d1 # insert src optag bits
4090 lea FP_DST(%a6),%a1 # pass dst ptr in a1
4091 lea FP_SRC(%a6),%a0 # pass src ptr in a0
4093 mov.w (tbl_trans.w,%pc,%d1.w*2),%d1
4094 jsr (tbl_trans.w,%pc,%d1.w*1) # emulate
4097 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
4098 bne.w funimp_ena # some are enabled
4101 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4102 bsr.l store_fpreg # store result to fp regfile
4105 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4106 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4107 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4109 funimp_gen_exit_cmp:
4110 cmpi.b SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4111 beq.b funimp_gen_exit_a7 # yes
4113 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4114 beq.b funimp_gen_exit_a7 # yes
4116 funimp_gen_exit_cont:
4119 funimp_gen_exit_cont2:
4120 btst &0x7,(%sp) # is trace on?
4121 beq.l _fpsp_done # no
4123 # this catches a problem with the case where an exception will be re-inserted
4124 # into the machine. the frestore has already been executed...so, the fmov.l
4125 # alone of the control register would trigger an unwanted exception.
4126 # until I feel like fixing this, we'll sidestep the exception.
4128 fmov.l %fpiar,0x14(%sp) # "Current PC" is in FPIAR
4130 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x24
4134 btst &0x5,EXC_SR(%a6) # supervisor or user mode?
4135 bne.b funimp_gen_exit_a7_s # supervisor
4138 mov.l EXC_A7(%a6),%a0
4141 bra.b funimp_gen_exit_cont
4143 # if the instruction was executed from supervisor mode and the addressing
4144 # mode was (a7)+, then the stack frame for the rte must be shifted "up"
4145 # "n" bytes where "n" is the size of the src operand type.
4146 # f<op>.{b,w,l,s,d,x,p}
4147 funimp_gen_exit_a7_s:
4148 mov.l %d0,-(%sp) # save d0
4149 mov.l EXC_A7(%a6),%d0 # load new a7'
4150 sub.l OLD_A7(%a6),%d0 # subtract old a7'
4151 mov.l 0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4152 mov.l EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4153 mov.w %d0,EXC_SR(%a6) # store incr number
4154 mov.l (%sp)+,%d0 # restore d0
4158 add.w (%sp),%sp # stack frame shifted
4159 bra.b funimp_gen_exit_cont2
4161 ######################
4162 # fmovecr.x #ccc,fpn #
4163 ######################
4166 mov.b FPCR_MODE(%a6),%d0
4167 mov.b 1+EXC_CMDREG(%a6),%d1
4168 andi.l &0x0000007f,%d1 # pass rom offset in d1
4172 #########################################################################
4175 # the user has enabled some exceptions. we figure not to see this too
4176 # often so that's why it gets lower priority.
4180 # was an exception set that was also enabled?
4181 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled and set
4182 bfffo %d0{&24:&8},%d0 # find highest priority exception
4183 bne.b funimp_exc # at least one was set
4185 # no exception that was enabled was set BUT if we got an exact overflow
4186 # and overflow wasn't enabled but inexact was (yech!) then this is
4187 # an inexact exception; otherwise, return to normal non-exception flow.
4188 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4189 beq.w funimp_store # no; return to normal flow
4191 # the overflow w/ exact result happened but was inexact set in the FPCR?
4193 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4194 beq.w funimp_store # no; return to normal flow
4195 bra.b funimp_exc_ovfl # yes
4197 # some exception happened that was actually enabled.
4198 # we'll insert this new exception into the FPU and then return.
4200 subi.l &24,%d0 # fix offset to be 0-8
4201 cmpi.b %d0,&0x6 # is exception INEX?
4202 bne.b funimp_exc_force # no
4204 # the enabled exception was inexact. so, if it occurs with an overflow
4205 # or underflow that was disabled, then we have to force an overflow or
4206 # underflow frame. the eventual overflow or underflow handler will see that
4207 # it's actually an inexact and act appropriately. this is the only easy
4208 # way to have the EXOP available for the enabled inexact handler when
4209 # a disabled overflow or underflow has also happened.
4210 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4211 bne.b funimp_exc_ovfl # yes
4212 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4213 bne.b funimp_exc_unfl # yes
4215 # force the fsave exception status bits to signal an exception of the
4216 # appropriate type. don't forget to "skew" the source operand in case we
4217 # "unskewed" the one the hardware initially gave us.
4219 mov.l %d0,-(%sp) # save d0
4220 bsr.l funimp_skew # check for special case
4221 mov.l (%sp)+,%d0 # restore d0
4222 mov.w (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4223 bra.b funimp_gen_exit2 # exit with frestore
4226 short 0xe002, 0xe006, 0xe004, 0xe005
4227 short 0xe003, 0xe002, 0xe001, 0xe001
4229 # insert an overflow frame
4231 bsr.l funimp_skew # check for special case
4232 mov.w &0xe005,2+FP_SRC(%a6)
4233 bra.b funimp_gen_exit2
4235 # insert an underflow frame
4237 bsr.l funimp_skew # check for special case
4238 mov.w &0xe003,2+FP_SRC(%a6)
4240 # this is the general exit point for an enabled exception that will be
4241 # restored into the machine for the instruction just emulated.
4243 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4244 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4245 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4247 frestore FP_SRC(%a6) # insert exceptional status
4249 bra.w funimp_gen_exit_cmp
4251 ############################################################################
4254 # TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4256 # These instructions were implemented on the '881/2 and '040 in hardware but
4257 # are emulated in software on the '060.
4260 bfextu %d0{&10:&3},%d1 # extract mode field
4261 cmpi.b %d1,&0x1 # is it an fdb<cc>?
4262 beq.w funimp_fdbcc # yes
4263 cmpi.b %d1,&0x7 # is it an fs<cc>?
4264 bne.w funimp_fscc # yes
4265 bfextu %d0{&13:&3},%d1
4266 cmpi.b %d1,&0x2 # is it an fs<cc>?
4267 blt.w funimp_fscc # yes
4269 #########################
4271 # ftrap<cc>.w #<data> #
4272 # ftrap<cc>.l #<data> #
4273 #########################
4276 bsr.l _ftrapcc # FTRAP<cc>()
4278 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4279 beq.w funimp_bsun # yes
4281 cmpi.b SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4282 bne.w funimp_done # no
4284 # FP UNIMP FRAME TRAP FRAME
4285 # ***************** *****************
4286 # ** <EA> ** ** Current PC **
4287 # ***************** *****************
4288 # * 0x2 * 0x02c * * 0x2 * 0x01c *
4289 # ***************** *****************
4290 # ** Next PC ** ** Next PC **
4291 # ***************** *****************
4293 # ***************** *****************
4294 # (6 words) (6 words)
4296 # the ftrapcc instruction should take a trap. so, here we must create a
4297 # trap stack frame from an unimplemented fp instruction stack frame and
4298 # jump to the user supplied entry point for the trap exception
4300 mov.l USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4301 mov.w &0x201c,EXC_VOFF(%a6) # Vector Offset = 0x01c
4303 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4304 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4305 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4310 #########################
4311 # fdb<cc> Dn,<label> #
4312 #########################
4315 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4316 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4317 bsr.l _imem_read_word # read displacement
4319 tst.l %d1 # did ifetch fail?
4320 bne.w funimp_iacc # yes
4322 ext.l %d0 # sign extend displacement
4324 bsr.l _fdbcc # FDB<cc>()
4326 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4329 bra.w funimp_done # branch to finish
4336 bsr.l _fscc # FS<cc>()
4338 # I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4339 # does not need to update "An" before taking a bsun exception.
4340 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4343 btst &0x5,EXC_SR(%a6) # yes; is it a user mode exception?
4344 bne.b funimp_fscc_s # no
4347 mov.l EXC_A7(%a6),%a0 # yes; set new USP
4349 bra.w funimp_done # branch to finish
4351 # remember, I'm assuming that post-increment is bogus...(it IS!!!)
4352 # so, the least significant WORD of the stacked effective address got
4353 # overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4354 # so that the rte will work correctly without destroying the result.
4355 # even though the operation size is byte, the stack ptr is decr by 2.
4357 # remember, also, this instruction may be traced.
4359 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4360 bne.w funimp_done # no
4362 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4363 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4364 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4368 btst &0x7,(%sp) # is trace enabled?
4369 bne.b funimp_fscc_s_trace # yes
4372 mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"
4373 mov.l 0x6(%sp),0x4(%sp) # shift lo(PC),voff "down"
4376 funimp_fscc_s_trace:
4378 mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"
4379 mov.w 0x6(%sp),0x4(%sp) # shift lo(PC)
4380 mov.w &0x2024,0x6(%sp) # fmt/voff = $2024
4381 fmov.l %fpiar,0x8(%sp) # insert "current PC"
4386 # The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4387 # the fp unimplemented instruction exception stack frame into a bsun stack frame,
4388 # restore a bsun exception into the machine, and branch to the user
4389 # supplied bsun hook.
4391 # FP UNIMP FRAME BSUN FRAME
4392 # ***************** *****************
4393 # ** <EA> ** * 0x0 * 0x0c0 *
4394 # ***************** *****************
4395 # * 0x2 * 0x02c * ** Current PC **
4396 # ***************** *****************
4397 # ** Next PC ** * SR *
4398 # ***************** *****************
4404 mov.w &0x00c0,2+EXC_EA(%a6) # Fmt = 0x0; Vector Offset = 0x0c0
4405 mov.l USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4406 mov.w EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4408 mov.w &0xe000,2+FP_SRC(%a6) # bsun exception enabled
4410 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4411 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4412 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4414 frestore FP_SRC(%a6) # restore bsun exception
4418 addq.l &0x4,%sp # erase sludge
4420 bra.l _real_bsun # branch to user bsun hook
4423 # all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4426 # as usual, we have to check for trace mode being on here. since instructions
4427 # modifying the supervisor stack frame don't pass through here, this is a
4428 # relatively easy task.
4431 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4432 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4433 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4437 btst &0x7,(%sp) # is trace enabled?
4438 bne.b funimp_trace # yes
4442 # FP UNIMP FRAME TRACE FRAME
4443 # ***************** *****************
4444 # ** <EA> ** ** Current PC **
4445 # ***************** *****************
4446 # * 0x2 * 0x02c * * 0x2 * 0x024 *
4447 # ***************** *****************
4448 # ** Next PC ** ** Next PC **
4449 # ***************** *****************
4451 # ***************** *****************
4452 # (6 words) (6 words)
4454 # the fscc instruction should take a trace trap. so, here we must create a
4455 # trace stack frame from an unimplemented fp instruction stack frame and
4456 # jump to the user supplied entry point for the trace exception
4458 fmov.l %fpiar,0x8(%sp) # current PC is in fpiar
4459 mov.b &0x24,0x7(%sp) # vector offset = 0x024
4463 ################################################################
4468 short tbl_trans - tbl_trans # $00-0 fmovecr all
4469 short tbl_trans - tbl_trans # $00-1 fmovecr all
4470 short tbl_trans - tbl_trans # $00-2 fmovecr all
4471 short tbl_trans - tbl_trans # $00-3 fmovecr all
4472 short tbl_trans - tbl_trans # $00-4 fmovecr all
4473 short tbl_trans - tbl_trans # $00-5 fmovecr all
4474 short tbl_trans - tbl_trans # $00-6 fmovecr all
4475 short tbl_trans - tbl_trans # $00-7 fmovecr all
4477 short tbl_trans - tbl_trans # $01-0 fint norm
4478 short tbl_trans - tbl_trans # $01-1 fint zero
4479 short tbl_trans - tbl_trans # $01-2 fint inf
4480 short tbl_trans - tbl_trans # $01-3 fint qnan
4481 short tbl_trans - tbl_trans # $01-5 fint denorm
4482 short tbl_trans - tbl_trans # $01-4 fint snan
4483 short tbl_trans - tbl_trans # $01-6 fint unnorm
4484 short tbl_trans - tbl_trans # $01-7 ERROR
4486 short ssinh - tbl_trans # $02-0 fsinh norm
4487 short src_zero - tbl_trans # $02-1 fsinh zero
4488 short src_inf - tbl_trans # $02-2 fsinh inf
4489 short src_qnan - tbl_trans # $02-3 fsinh qnan
4490 short ssinhd - tbl_trans # $02-5 fsinh denorm
4491 short src_snan - tbl_trans # $02-4 fsinh snan
4492 short tbl_trans - tbl_trans # $02-6 fsinh unnorm
4493 short tbl_trans - tbl_trans # $02-7 ERROR
4495 short tbl_trans - tbl_trans # $03-0 fintrz norm
4496 short tbl_trans - tbl_trans # $03-1 fintrz zero
4497 short tbl_trans - tbl_trans # $03-2 fintrz inf
4498 short tbl_trans - tbl_trans # $03-3 fintrz qnan
4499 short tbl_trans - tbl_trans # $03-5 fintrz denorm
4500 short tbl_trans - tbl_trans # $03-4 fintrz snan
4501 short tbl_trans - tbl_trans # $03-6 fintrz unnorm
4502 short tbl_trans - tbl_trans # $03-7 ERROR
4504 short tbl_trans - tbl_trans # $04-0 fsqrt norm
4505 short tbl_trans - tbl_trans # $04-1 fsqrt zero
4506 short tbl_trans - tbl_trans # $04-2 fsqrt inf
4507 short tbl_trans - tbl_trans # $04-3 fsqrt qnan
4508 short tbl_trans - tbl_trans # $04-5 fsqrt denorm
4509 short tbl_trans - tbl_trans # $04-4 fsqrt snan
4510 short tbl_trans - tbl_trans # $04-6 fsqrt unnorm
4511 short tbl_trans - tbl_trans # $04-7 ERROR
4513 short tbl_trans - tbl_trans # $05-0 ERROR
4514 short tbl_trans - tbl_trans # $05-1 ERROR
4515 short tbl_trans - tbl_trans # $05-2 ERROR
4516 short tbl_trans - tbl_trans # $05-3 ERROR
4517 short tbl_trans - tbl_trans # $05-4 ERROR
4518 short tbl_trans - tbl_trans # $05-5 ERROR
4519 short tbl_trans - tbl_trans # $05-6 ERROR
4520 short tbl_trans - tbl_trans # $05-7 ERROR
4522 short slognp1 - tbl_trans # $06-0 flognp1 norm
4523 short src_zero - tbl_trans # $06-1 flognp1 zero
4524 short sopr_inf - tbl_trans # $06-2 flognp1 inf
4525 short src_qnan - tbl_trans # $06-3 flognp1 qnan
4526 short slognp1d - tbl_trans # $06-5 flognp1 denorm
4527 short src_snan - tbl_trans # $06-4 flognp1 snan
4528 short tbl_trans - tbl_trans # $06-6 flognp1 unnorm
4529 short tbl_trans - tbl_trans # $06-7 ERROR
4531 short tbl_trans - tbl_trans # $07-0 ERROR
4532 short tbl_trans - tbl_trans # $07-1 ERROR
4533 short tbl_trans - tbl_trans # $07-2 ERROR
4534 short tbl_trans - tbl_trans # $07-3 ERROR
4535 short tbl_trans - tbl_trans # $07-4 ERROR
4536 short tbl_trans - tbl_trans # $07-5 ERROR
4537 short tbl_trans - tbl_trans # $07-6 ERROR
4538 short tbl_trans - tbl_trans # $07-7 ERROR
4540 short setoxm1 - tbl_trans # $08-0 fetoxm1 norm
4541 short src_zero - tbl_trans # $08-1 fetoxm1 zero
4542 short setoxm1i - tbl_trans # $08-2 fetoxm1 inf
4543 short src_qnan - tbl_trans # $08-3 fetoxm1 qnan
4544 short setoxm1d - tbl_trans # $08-5 fetoxm1 denorm
4545 short src_snan - tbl_trans # $08-4 fetoxm1 snan
4546 short tbl_trans - tbl_trans # $08-6 fetoxm1 unnorm
4547 short tbl_trans - tbl_trans # $08-7 ERROR
4549 short stanh - tbl_trans # $09-0 ftanh norm
4550 short src_zero - tbl_trans # $09-1 ftanh zero
4551 short src_one - tbl_trans # $09-2 ftanh inf
4552 short src_qnan - tbl_trans # $09-3 ftanh qnan
4553 short stanhd - tbl_trans # $09-5 ftanh denorm
4554 short src_snan - tbl_trans # $09-4 ftanh snan
4555 short tbl_trans - tbl_trans # $09-6 ftanh unnorm
4556 short tbl_trans - tbl_trans # $09-7 ERROR
4558 short satan - tbl_trans # $0a-0 fatan norm
4559 short src_zero - tbl_trans # $0a-1 fatan zero
4560 short spi_2 - tbl_trans # $0a-2 fatan inf
4561 short src_qnan - tbl_trans # $0a-3 fatan qnan
4562 short satand - tbl_trans # $0a-5 fatan denorm
4563 short src_snan - tbl_trans # $0a-4 fatan snan
4564 short tbl_trans - tbl_trans # $0a-6 fatan unnorm
4565 short tbl_trans - tbl_trans # $0a-7 ERROR
4567 short tbl_trans - tbl_trans # $0b-0 ERROR
4568 short tbl_trans - tbl_trans # $0b-1 ERROR
4569 short tbl_trans - tbl_trans # $0b-2 ERROR
4570 short tbl_trans - tbl_trans # $0b-3 ERROR
4571 short tbl_trans - tbl_trans # $0b-4 ERROR
4572 short tbl_trans - tbl_trans # $0b-5 ERROR
4573 short tbl_trans - tbl_trans # $0b-6 ERROR
4574 short tbl_trans - tbl_trans # $0b-7 ERROR
4576 short sasin - tbl_trans # $0c-0 fasin norm
4577 short src_zero - tbl_trans # $0c-1 fasin zero
4578 short t_operr - tbl_trans # $0c-2 fasin inf
4579 short src_qnan - tbl_trans # $0c-3 fasin qnan
4580 short sasind - tbl_trans # $0c-5 fasin denorm
4581 short src_snan - tbl_trans # $0c-4 fasin snan
4582 short tbl_trans - tbl_trans # $0c-6 fasin unnorm
4583 short tbl_trans - tbl_trans # $0c-7 ERROR
4585 short satanh - tbl_trans # $0d-0 fatanh norm
4586 short src_zero - tbl_trans # $0d-1 fatanh zero
4587 short t_operr - tbl_trans # $0d-2 fatanh inf
4588 short src_qnan - tbl_trans # $0d-3 fatanh qnan
4589 short satanhd - tbl_trans # $0d-5 fatanh denorm
4590 short src_snan - tbl_trans # $0d-4 fatanh snan
4591 short tbl_trans - tbl_trans # $0d-6 fatanh unnorm
4592 short tbl_trans - tbl_trans # $0d-7 ERROR
4594 short ssin - tbl_trans # $0e-0 fsin norm
4595 short src_zero - tbl_trans # $0e-1 fsin zero
4596 short t_operr - tbl_trans # $0e-2 fsin inf
4597 short src_qnan - tbl_trans # $0e-3 fsin qnan
4598 short ssind - tbl_trans # $0e-5 fsin denorm
4599 short src_snan - tbl_trans # $0e-4 fsin snan
4600 short tbl_trans - tbl_trans # $0e-6 fsin unnorm
4601 short tbl_trans - tbl_trans # $0e-7 ERROR
4603 short stan - tbl_trans # $0f-0 ftan norm
4604 short src_zero - tbl_trans # $0f-1 ftan zero
4605 short t_operr - tbl_trans # $0f-2 ftan inf
4606 short src_qnan - tbl_trans # $0f-3 ftan qnan
4607 short stand - tbl_trans # $0f-5 ftan denorm
4608 short src_snan - tbl_trans # $0f-4 ftan snan
4609 short tbl_trans - tbl_trans # $0f-6 ftan unnorm
4610 short tbl_trans - tbl_trans # $0f-7 ERROR
4612 short setox - tbl_trans # $10-0 fetox norm
4613 short ld_pone - tbl_trans # $10-1 fetox zero
4614 short szr_inf - tbl_trans # $10-2 fetox inf
4615 short src_qnan - tbl_trans # $10-3 fetox qnan
4616 short setoxd - tbl_trans # $10-5 fetox denorm
4617 short src_snan - tbl_trans # $10-4 fetox snan
4618 short tbl_trans - tbl_trans # $10-6 fetox unnorm
4619 short tbl_trans - tbl_trans # $10-7 ERROR
4621 short stwotox - tbl_trans # $11-0 ftwotox norm
4622 short ld_pone - tbl_trans # $11-1 ftwotox zero
4623 short szr_inf - tbl_trans # $11-2 ftwotox inf
4624 short src_qnan - tbl_trans # $11-3 ftwotox qnan
4625 short stwotoxd - tbl_trans # $11-5 ftwotox denorm
4626 short src_snan - tbl_trans # $11-4 ftwotox snan
4627 short tbl_trans - tbl_trans # $11-6 ftwotox unnorm
4628 short tbl_trans - tbl_trans # $11-7 ERROR
4630 short stentox - tbl_trans # $12-0 ftentox norm
4631 short ld_pone - tbl_trans # $12-1 ftentox zero
4632 short szr_inf - tbl_trans # $12-2 ftentox inf
4633 short src_qnan - tbl_trans # $12-3 ftentox qnan
4634 short stentoxd - tbl_trans # $12-5 ftentox denorm
4635 short src_snan - tbl_trans # $12-4 ftentox snan
4636 short tbl_trans - tbl_trans # $12-6 ftentox unnorm
4637 short tbl_trans - tbl_trans # $12-7 ERROR
4639 short tbl_trans - tbl_trans # $13-0 ERROR
4640 short tbl_trans - tbl_trans # $13-1 ERROR
4641 short tbl_trans - tbl_trans # $13-2 ERROR
4642 short tbl_trans - tbl_trans # $13-3 ERROR
4643 short tbl_trans - tbl_trans # $13-4 ERROR
4644 short tbl_trans - tbl_trans # $13-5 ERROR
4645 short tbl_trans - tbl_trans # $13-6 ERROR
4646 short tbl_trans - tbl_trans # $13-7 ERROR
4648 short slogn - tbl_trans # $14-0 flogn norm
4649 short t_dz2 - tbl_trans # $14-1 flogn zero
4650 short sopr_inf - tbl_trans # $14-2 flogn inf
4651 short src_qnan - tbl_trans # $14-3 flogn qnan
4652 short slognd - tbl_trans # $14-5 flogn denorm
4653 short src_snan - tbl_trans # $14-4 flogn snan
4654 short tbl_trans - tbl_trans # $14-6 flogn unnorm
4655 short tbl_trans - tbl_trans # $14-7 ERROR
4657 short slog10 - tbl_trans # $15-0 flog10 norm
4658 short t_dz2 - tbl_trans # $15-1 flog10 zero
4659 short sopr_inf - tbl_trans # $15-2 flog10 inf
4660 short src_qnan - tbl_trans # $15-3 flog10 qnan
4661 short slog10d - tbl_trans # $15-5 flog10 denorm
4662 short src_snan - tbl_trans # $15-4 flog10 snan
4663 short tbl_trans - tbl_trans # $15-6 flog10 unnorm
4664 short tbl_trans - tbl_trans # $15-7 ERROR
4666 short slog2 - tbl_trans # $16-0 flog2 norm
4667 short t_dz2 - tbl_trans # $16-1 flog2 zero
4668 short sopr_inf - tbl_trans # $16-2 flog2 inf
4669 short src_qnan - tbl_trans # $16-3 flog2 qnan
4670 short slog2d - tbl_trans # $16-5 flog2 denorm
4671 short src_snan - tbl_trans # $16-4 flog2 snan
4672 short tbl_trans - tbl_trans # $16-6 flog2 unnorm
4673 short tbl_trans - tbl_trans # $16-7 ERROR
4675 short tbl_trans - tbl_trans # $17-0 ERROR
4676 short tbl_trans - tbl_trans # $17-1 ERROR
4677 short tbl_trans - tbl_trans # $17-2 ERROR
4678 short tbl_trans - tbl_trans # $17-3 ERROR
4679 short tbl_trans - tbl_trans # $17-4 ERROR
4680 short tbl_trans - tbl_trans # $17-5 ERROR
4681 short tbl_trans - tbl_trans # $17-6 ERROR
4682 short tbl_trans - tbl_trans # $17-7 ERROR
4684 short tbl_trans - tbl_trans # $18-0 fabs norm
4685 short tbl_trans - tbl_trans # $18-1 fabs zero
4686 short tbl_trans - tbl_trans # $18-2 fabs inf
4687 short tbl_trans - tbl_trans # $18-3 fabs qnan
4688 short tbl_trans - tbl_trans # $18-5 fabs denorm
4689 short tbl_trans - tbl_trans # $18-4 fabs snan
4690 short tbl_trans - tbl_trans # $18-6 fabs unnorm
4691 short tbl_trans - tbl_trans # $18-7 ERROR
4693 short scosh - tbl_trans # $19-0 fcosh norm
4694 short ld_pone - tbl_trans # $19-1 fcosh zero
4695 short ld_pinf - tbl_trans # $19-2 fcosh inf
4696 short src_qnan - tbl_trans # $19-3 fcosh qnan
4697 short scoshd - tbl_trans # $19-5 fcosh denorm
4698 short src_snan - tbl_trans # $19-4 fcosh snan
4699 short tbl_trans - tbl_trans # $19-6 fcosh unnorm
4700 short tbl_trans - tbl_trans # $19-7 ERROR
4702 short tbl_trans - tbl_trans # $1a-0 fneg norm
4703 short tbl_trans - tbl_trans # $1a-1 fneg zero
4704 short tbl_trans - tbl_trans # $1a-2 fneg inf
4705 short tbl_trans - tbl_trans # $1a-3 fneg qnan
4706 short tbl_trans - tbl_trans # $1a-5 fneg denorm
4707 short tbl_trans - tbl_trans # $1a-4 fneg snan
4708 short tbl_trans - tbl_trans # $1a-6 fneg unnorm
4709 short tbl_trans - tbl_trans # $1a-7 ERROR
4711 short tbl_trans - tbl_trans # $1b-0 ERROR
4712 short tbl_trans - tbl_trans # $1b-1 ERROR
4713 short tbl_trans - tbl_trans # $1b-2 ERROR
4714 short tbl_trans - tbl_trans # $1b-3 ERROR
4715 short tbl_trans - tbl_trans # $1b-4 ERROR
4716 short tbl_trans - tbl_trans # $1b-5 ERROR
4717 short tbl_trans - tbl_trans # $1b-6 ERROR
4718 short tbl_trans - tbl_trans # $1b-7 ERROR
4720 short sacos - tbl_trans # $1c-0 facos norm
4721 short ld_ppi2 - tbl_trans # $1c-1 facos zero
4722 short t_operr - tbl_trans # $1c-2 facos inf
4723 short src_qnan - tbl_trans # $1c-3 facos qnan
4724 short sacosd - tbl_trans # $1c-5 facos denorm
4725 short src_snan - tbl_trans # $1c-4 facos snan
4726 short tbl_trans - tbl_trans # $1c-6 facos unnorm
4727 short tbl_trans - tbl_trans # $1c-7 ERROR
4729 short scos - tbl_trans # $1d-0 fcos norm
4730 short ld_pone - tbl_trans # $1d-1 fcos zero
4731 short t_operr - tbl_trans # $1d-2 fcos inf
4732 short src_qnan - tbl_trans # $1d-3 fcos qnan
4733 short scosd - tbl_trans # $1d-5 fcos denorm
4734 short src_snan - tbl_trans # $1d-4 fcos snan
4735 short tbl_trans - tbl_trans # $1d-6 fcos unnorm
4736 short tbl_trans - tbl_trans # $1d-7 ERROR
4738 short sgetexp - tbl_trans # $1e-0 fgetexp norm
4739 short src_zero - tbl_trans # $1e-1 fgetexp zero
4740 short t_operr - tbl_trans # $1e-2 fgetexp inf
4741 short src_qnan - tbl_trans # $1e-3 fgetexp qnan
4742 short sgetexpd - tbl_trans # $1e-5 fgetexp denorm
4743 short src_snan - tbl_trans # $1e-4 fgetexp snan
4744 short tbl_trans - tbl_trans # $1e-6 fgetexp unnorm
4745 short tbl_trans - tbl_trans # $1e-7 ERROR
4747 short sgetman - tbl_trans # $1f-0 fgetman norm
4748 short src_zero - tbl_trans # $1f-1 fgetman zero
4749 short t_operr - tbl_trans # $1f-2 fgetman inf
4750 short src_qnan - tbl_trans # $1f-3 fgetman qnan
4751 short sgetmand - tbl_trans # $1f-5 fgetman denorm
4752 short src_snan - tbl_trans # $1f-4 fgetman snan
4753 short tbl_trans - tbl_trans # $1f-6 fgetman unnorm
4754 short tbl_trans - tbl_trans # $1f-7 ERROR
4756 short tbl_trans - tbl_trans # $20-0 fdiv norm
4757 short tbl_trans - tbl_trans # $20-1 fdiv zero
4758 short tbl_trans - tbl_trans # $20-2 fdiv inf
4759 short tbl_trans - tbl_trans # $20-3 fdiv qnan
4760 short tbl_trans - tbl_trans # $20-5 fdiv denorm
4761 short tbl_trans - tbl_trans # $20-4 fdiv snan
4762 short tbl_trans - tbl_trans # $20-6 fdiv unnorm
4763 short tbl_trans - tbl_trans # $20-7 ERROR
4765 short smod_snorm - tbl_trans # $21-0 fmod norm
4766 short smod_szero - tbl_trans # $21-1 fmod zero
4767 short smod_sinf - tbl_trans # $21-2 fmod inf
4768 short sop_sqnan - tbl_trans # $21-3 fmod qnan
4769 short smod_sdnrm - tbl_trans # $21-5 fmod denorm
4770 short sop_ssnan - tbl_trans # $21-4 fmod snan
4771 short tbl_trans - tbl_trans # $21-6 fmod unnorm
4772 short tbl_trans - tbl_trans # $21-7 ERROR
4774 short tbl_trans - tbl_trans # $22-0 fadd norm
4775 short tbl_trans - tbl_trans # $22-1 fadd zero
4776 short tbl_trans - tbl_trans # $22-2 fadd inf
4777 short tbl_trans - tbl_trans # $22-3 fadd qnan
4778 short tbl_trans - tbl_trans # $22-5 fadd denorm
4779 short tbl_trans - tbl_trans # $22-4 fadd snan
4780 short tbl_trans - tbl_trans # $22-6 fadd unnorm
4781 short tbl_trans - tbl_trans # $22-7 ERROR
4783 short tbl_trans - tbl_trans # $23-0 fmul norm
4784 short tbl_trans - tbl_trans # $23-1 fmul zero
4785 short tbl_trans - tbl_trans # $23-2 fmul inf
4786 short tbl_trans - tbl_trans # $23-3 fmul qnan
4787 short tbl_trans - tbl_trans # $23-5 fmul denorm
4788 short tbl_trans - tbl_trans # $23-4 fmul snan
4789 short tbl_trans - tbl_trans # $23-6 fmul unnorm
4790 short tbl_trans - tbl_trans # $23-7 ERROR
4792 short tbl_trans - tbl_trans # $24-0 fsgldiv norm
4793 short tbl_trans - tbl_trans # $24-1 fsgldiv zero
4794 short tbl_trans - tbl_trans # $24-2 fsgldiv inf
4795 short tbl_trans - tbl_trans # $24-3 fsgldiv qnan
4796 short tbl_trans - tbl_trans # $24-5 fsgldiv denorm
4797 short tbl_trans - tbl_trans # $24-4 fsgldiv snan
4798 short tbl_trans - tbl_trans # $24-6 fsgldiv unnorm
4799 short tbl_trans - tbl_trans # $24-7 ERROR
4801 short srem_snorm - tbl_trans # $25-0 frem norm
4802 short srem_szero - tbl_trans # $25-1 frem zero
4803 short srem_sinf - tbl_trans # $25-2 frem inf
4804 short sop_sqnan - tbl_trans # $25-3 frem qnan
4805 short srem_sdnrm - tbl_trans # $25-5 frem denorm
4806 short sop_ssnan - tbl_trans # $25-4 frem snan
4807 short tbl_trans - tbl_trans # $25-6 frem unnorm
4808 short tbl_trans - tbl_trans # $25-7 ERROR
4810 short sscale_snorm - tbl_trans # $26-0 fscale norm
4811 short sscale_szero - tbl_trans # $26-1 fscale zero
4812 short sscale_sinf - tbl_trans # $26-2 fscale inf
4813 short sop_sqnan - tbl_trans # $26-3 fscale qnan
4814 short sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4815 short sop_ssnan - tbl_trans # $26-4 fscale snan
4816 short tbl_trans - tbl_trans # $26-6 fscale unnorm
4817 short tbl_trans - tbl_trans # $26-7 ERROR
4819 short tbl_trans - tbl_trans # $27-0 fsglmul norm
4820 short tbl_trans - tbl_trans # $27-1 fsglmul zero
4821 short tbl_trans - tbl_trans # $27-2 fsglmul inf
4822 short tbl_trans - tbl_trans # $27-3 fsglmul qnan
4823 short tbl_trans - tbl_trans # $27-5 fsglmul denorm
4824 short tbl_trans - tbl_trans # $27-4 fsglmul snan
4825 short tbl_trans - tbl_trans # $27-6 fsglmul unnorm
4826 short tbl_trans - tbl_trans # $27-7 ERROR
4828 short tbl_trans - tbl_trans # $28-0 fsub norm
4829 short tbl_trans - tbl_trans # $28-1 fsub zero
4830 short tbl_trans - tbl_trans # $28-2 fsub inf
4831 short tbl_trans - tbl_trans # $28-3 fsub qnan
4832 short tbl_trans - tbl_trans # $28-5 fsub denorm
4833 short tbl_trans - tbl_trans # $28-4 fsub snan
4834 short tbl_trans - tbl_trans # $28-6 fsub unnorm
4835 short tbl_trans - tbl_trans # $28-7 ERROR
4837 short tbl_trans - tbl_trans # $29-0 ERROR
4838 short tbl_trans - tbl_trans # $29-1 ERROR
4839 short tbl_trans - tbl_trans # $29-2 ERROR
4840 short tbl_trans - tbl_trans # $29-3 ERROR
4841 short tbl_trans - tbl_trans # $29-4 ERROR
4842 short tbl_trans - tbl_trans # $29-5 ERROR
4843 short tbl_trans - tbl_trans # $29-6 ERROR
4844 short tbl_trans - tbl_trans # $29-7 ERROR
4846 short tbl_trans - tbl_trans # $2a-0 ERROR
4847 short tbl_trans - tbl_trans # $2a-1 ERROR
4848 short tbl_trans - tbl_trans # $2a-2 ERROR
4849 short tbl_trans - tbl_trans # $2a-3 ERROR
4850 short tbl_trans - tbl_trans # $2a-4 ERROR
4851 short tbl_trans - tbl_trans # $2a-5 ERROR
4852 short tbl_trans - tbl_trans # $2a-6 ERROR
4853 short tbl_trans - tbl_trans # $2a-7 ERROR
4855 short tbl_trans - tbl_trans # $2b-0 ERROR
4856 short tbl_trans - tbl_trans # $2b-1 ERROR
4857 short tbl_trans - tbl_trans # $2b-2 ERROR
4858 short tbl_trans - tbl_trans # $2b-3 ERROR
4859 short tbl_trans - tbl_trans # $2b-4 ERROR
4860 short tbl_trans - tbl_trans # $2b-5 ERROR
4861 short tbl_trans - tbl_trans # $2b-6 ERROR
4862 short tbl_trans - tbl_trans # $2b-7 ERROR
4864 short tbl_trans - tbl_trans # $2c-0 ERROR
4865 short tbl_trans - tbl_trans # $2c-1 ERROR
4866 short tbl_trans - tbl_trans # $2c-2 ERROR
4867 short tbl_trans - tbl_trans # $2c-3 ERROR
4868 short tbl_trans - tbl_trans # $2c-4 ERROR
4869 short tbl_trans - tbl_trans # $2c-5 ERROR
4870 short tbl_trans - tbl_trans # $2c-6 ERROR
4871 short tbl_trans - tbl_trans # $2c-7 ERROR
4873 short tbl_trans - tbl_trans # $2d-0 ERROR
4874 short tbl_trans - tbl_trans # $2d-1 ERROR
4875 short tbl_trans - tbl_trans # $2d-2 ERROR
4876 short tbl_trans - tbl_trans # $2d-3 ERROR
4877 short tbl_trans - tbl_trans # $2d-4 ERROR
4878 short tbl_trans - tbl_trans # $2d-5 ERROR
4879 short tbl_trans - tbl_trans # $2d-6 ERROR
4880 short tbl_trans - tbl_trans # $2d-7 ERROR
4882 short tbl_trans - tbl_trans # $2e-0 ERROR
4883 short tbl_trans - tbl_trans # $2e-1 ERROR
4884 short tbl_trans - tbl_trans # $2e-2 ERROR
4885 short tbl_trans - tbl_trans # $2e-3 ERROR
4886 short tbl_trans - tbl_trans # $2e-4 ERROR
4887 short tbl_trans - tbl_trans # $2e-5 ERROR
4888 short tbl_trans - tbl_trans # $2e-6 ERROR
4889 short tbl_trans - tbl_trans # $2e-7 ERROR
4891 short tbl_trans - tbl_trans # $2f-0 ERROR
4892 short tbl_trans - tbl_trans # $2f-1 ERROR
4893 short tbl_trans - tbl_trans # $2f-2 ERROR
4894 short tbl_trans - tbl_trans # $2f-3 ERROR
4895 short tbl_trans - tbl_trans # $2f-4 ERROR
4896 short tbl_trans - tbl_trans # $2f-5 ERROR
4897 short tbl_trans - tbl_trans # $2f-6 ERROR
4898 short tbl_trans - tbl_trans # $2f-7 ERROR
4900 short ssincos - tbl_trans # $30-0 fsincos norm
4901 short ssincosz - tbl_trans # $30-1 fsincos zero
4902 short ssincosi - tbl_trans # $30-2 fsincos inf
4903 short ssincosqnan - tbl_trans # $30-3 fsincos qnan
4904 short ssincosd - tbl_trans # $30-5 fsincos denorm
4905 short ssincossnan - tbl_trans # $30-4 fsincos snan
4906 short tbl_trans - tbl_trans # $30-6 fsincos unnorm
4907 short tbl_trans - tbl_trans # $30-7 ERROR
4909 short ssincos - tbl_trans # $31-0 fsincos norm
4910 short ssincosz - tbl_trans # $31-1 fsincos zero
4911 short ssincosi - tbl_trans # $31-2 fsincos inf
4912 short ssincosqnan - tbl_trans # $31-3 fsincos qnan
4913 short ssincosd - tbl_trans # $31-5 fsincos denorm
4914 short ssincossnan - tbl_trans # $31-4 fsincos snan
4915 short tbl_trans - tbl_trans # $31-6 fsincos unnorm
4916 short tbl_trans - tbl_trans # $31-7 ERROR
4918 short ssincos - tbl_trans # $32-0 fsincos norm
4919 short ssincosz - tbl_trans # $32-1 fsincos zero
4920 short ssincosi - tbl_trans # $32-2 fsincos inf
4921 short ssincosqnan - tbl_trans # $32-3 fsincos qnan
4922 short ssincosd - tbl_trans # $32-5 fsincos denorm
4923 short ssincossnan - tbl_trans # $32-4 fsincos snan
4924 short tbl_trans - tbl_trans # $32-6 fsincos unnorm
4925 short tbl_trans - tbl_trans # $32-7 ERROR
4927 short ssincos - tbl_trans # $33-0 fsincos norm
4928 short ssincosz - tbl_trans # $33-1 fsincos zero
4929 short ssincosi - tbl_trans # $33-2 fsincos inf
4930 short ssincosqnan - tbl_trans # $33-3 fsincos qnan
4931 short ssincosd - tbl_trans # $33-5 fsincos denorm
4932 short ssincossnan - tbl_trans # $33-4 fsincos snan
4933 short tbl_trans - tbl_trans # $33-6 fsincos unnorm
4934 short tbl_trans - tbl_trans # $33-7 ERROR
4936 short ssincos - tbl_trans # $34-0 fsincos norm
4937 short ssincosz - tbl_trans # $34-1 fsincos zero
4938 short ssincosi - tbl_trans # $34-2 fsincos inf
4939 short ssincosqnan - tbl_trans # $34-3 fsincos qnan
4940 short ssincosd - tbl_trans # $34-5 fsincos denorm
4941 short ssincossnan - tbl_trans # $34-4 fsincos snan
4942 short tbl_trans - tbl_trans # $34-6 fsincos unnorm
4943 short tbl_trans - tbl_trans # $34-7 ERROR
4945 short ssincos - tbl_trans # $35-0 fsincos norm
4946 short ssincosz - tbl_trans # $35-1 fsincos zero
4947 short ssincosi - tbl_trans # $35-2 fsincos inf
4948 short ssincosqnan - tbl_trans # $35-3 fsincos qnan
4949 short ssincosd - tbl_trans # $35-5 fsincos denorm
4950 short ssincossnan - tbl_trans # $35-4 fsincos snan
4951 short tbl_trans - tbl_trans # $35-6 fsincos unnorm
4952 short tbl_trans - tbl_trans # $35-7 ERROR
4954 short ssincos - tbl_trans # $36-0 fsincos norm
4955 short ssincosz - tbl_trans # $36-1 fsincos zero
4956 short ssincosi - tbl_trans # $36-2 fsincos inf
4957 short ssincosqnan - tbl_trans # $36-3 fsincos qnan
4958 short ssincosd - tbl_trans # $36-5 fsincos denorm
4959 short ssincossnan - tbl_trans # $36-4 fsincos snan
4960 short tbl_trans - tbl_trans # $36-6 fsincos unnorm
4961 short tbl_trans - tbl_trans # $36-7 ERROR
4963 short ssincos - tbl_trans # $37-0 fsincos norm
4964 short ssincosz - tbl_trans # $37-1 fsincos zero
4965 short ssincosi - tbl_trans # $37-2 fsincos inf
4966 short ssincosqnan - tbl_trans # $37-3 fsincos qnan
4967 short ssincosd - tbl_trans # $37-5 fsincos denorm
4968 short ssincossnan - tbl_trans # $37-4 fsincos snan
4969 short tbl_trans - tbl_trans # $37-6 fsincos unnorm
4970 short tbl_trans - tbl_trans # $37-7 ERROR
4974 # the instruction fetch access for the displacement word for the
4975 # fdbcc emulation failed. here, we create an access error frame
4976 # from the current frame and branch to _real_access().
4978 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4979 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4980 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
4982 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4986 mov.l (%sp),-(%sp) # store SR,hi(PC)
4987 mov.w 0x8(%sp),0x4(%sp) # store lo(PC)
4988 mov.w &0x4008,0x6(%sp) # store voff
4989 mov.l 0x2(%sp),0x8(%sp) # store EA
4990 mov.l &0x09428001,0xc(%sp) # store FSLW
4992 btst &0x5,(%sp) # user or supervisor mode?
4993 beq.b funimp_iacc_end # user
4994 bset &0x2,0xd(%sp) # set supervisor TM bit
4999 #########################################################################
5000 # ssin(): computes the sine of a normalized input #
5001 # ssind(): computes the sine of a denormalized input #
5002 # scos(): computes the cosine of a normalized input #
5003 # scosd(): computes the cosine of a denormalized input #
5004 # ssincos(): computes the sine and cosine of a normalized input #
5005 # ssincosd(): computes the sine and cosine of a denormalized input #
5007 # INPUT *************************************************************** #
5008 # a0 = pointer to extended precision input #
5009 # d0 = round precision,mode #
5011 # OUTPUT ************************************************************** #
5012 # fp0 = sin(X) or cos(X) #
5018 # ACCURACY and MONOTONICITY ******************************************* #
5019 # The returned result is within 1 ulp in 64 significant bit, i.e. #
5020 # within 0.5001 ulp to 53 bits if the result is subsequently #
5021 # rounded to double precision. The result is provably monotonic #
5022 # in double precision. #
5024 # ALGORITHM *********************************************************** #
5027 # 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
5029 # 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
5031 # 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5032 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
5033 # Overwrite k by k := k + AdjN. #
5035 # 4. If k is even, go to 6. #
5037 # 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
5038 # Return sgn*cos(r) where cos(r) is approximated by an #
5039 # even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
5043 # 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
5044 # where sin(r) is approximated by an odd polynomial in r #
5045 # r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
5048 # 7. If |X| > 1, go to 9. #
5050 # 8. (|X|<2**(-40)) If SIN is invoked, return X; #
5051 # otherwise return 1. #
5053 # 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
5057 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5059 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5060 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
5062 # 3. If k is even, go to 5. #
5064 # 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
5065 # j1 exclusive or with the l.s.b. of k. #
5066 # sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
5067 # SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
5068 # sin(r) and cos(r) are computed as odd and even #
5069 # polynomials in r, respectively. Exit #
5071 # 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
5072 # SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
5073 # sin(r) and cos(r) are computed as odd and even #
5074 # polynomials in r, respectively. Exit #
5076 # 6. If |X| > 1, go to 8. #
5078 # 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
5080 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
5083 #########################################################################
5085 SINA7: long 0xBD6AAA77,0xCCC994F5
5086 SINA6: long 0x3DE61209,0x7AAE8DA1
5087 SINA5: long 0xBE5AE645,0x2A118AE4
5088 SINA4: long 0x3EC71DE3,0xA5341531
5089 SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5090 SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000
5091 SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5093 COSB8: long 0x3D2AC4D0,0xD6011EE3
5094 COSB7: long 0xBDA9396F,0x9F45AC19
5095 COSB6: long 0x3E21EED9,0x0612C972
5096 COSB5: long 0xBE927E4F,0xB79D9FCF
5097 COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5098 COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5099 COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5100 COSB1: long 0xBF000000
5119 ############################################
5122 mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0
5125 ############################################
5128 mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1
5130 ############################################
5132 #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5134 fmov.x (%a0),%fp0 # LOAD INPUT
5135 fmov.x %fp0,X(%a6) # save input at X
5138 mov.l (%a0),%d1 # put exp in hi word
5139 mov.w 4(%a0),%d1 # fetch hi(man)
5140 and.l &0x7FFFFFFF,%d1 # strip sign
5142 cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)?
5144 bra.w SINSM # yes; input is very small
5147 cmp.l %d1,&0x4004BC7E # is |X| < 15 PI?
5149 bra.w SREDUCEX # yes; input is very large
5151 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5152 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5155 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5157 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5159 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5161 mov.l INT(%a6),%d1 # make a copy of N
5162 asl.l &4,%d1 # N *= 16
5163 add.l %d1,%a1 # tbl_addr = a1 + (N*16)
5165 # A1 IS THE ADDRESS OF N*PIBY2
5166 # ...WHICH IS IN TWO PIECES Y1 & Y2
5167 fsub.x (%a1)+,%fp0 # X-Y1
5168 fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2
5171 #--continuation from REDUCEX
5173 #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5175 add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN
5176 ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE
5180 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5181 #--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5182 #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5183 #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5184 #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5186 #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5187 #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5189 fmovm.x &0x0c,-(%sp) # save fp2/fp3
5191 fmov.x %fp0,X(%a6) # X IS R
5192 fmul.x %fp0,%fp0 # FP0 IS S
5194 fmov.d SINA7(%pc),%fp3
5195 fmov.d SINA6(%pc),%fp2
5198 fmul.x %fp1,%fp1 # FP1 IS T
5201 and.l &0x80000000,%d1
5202 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5203 eor.l %d1,X(%a6) # X IS NOW R'= SGN*R
5205 fmul.x %fp1,%fp3 # TA7
5206 fmul.x %fp1,%fp2 # TA6
5208 fadd.d SINA5(%pc),%fp3 # A5+TA7
5209 fadd.d SINA4(%pc),%fp2 # A4+TA6
5211 fmul.x %fp1,%fp3 # T(A5+TA7)
5212 fmul.x %fp1,%fp2 # T(A4+TA6)
5214 fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)
5215 fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)
5217 fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))
5219 fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))
5220 fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))
5221 fmul.x X(%a6),%fp0 # R'*S
5223 fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5225 fmul.x %fp1,%fp0 # SIN(R')-R'
5227 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5229 fmov.l %d0,%fpcr # restore users round mode,prec
5230 fadd.x X(%a6),%fp0 # last inst - possible exception set
5233 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5234 #--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5235 #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5236 #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5237 #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5239 #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5240 #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5241 #--AND IS THEREFORE STORED AS SINGLE PRECISION.
5243 fmovm.x &0x0c,-(%sp) # save fp2/fp3
5245 fmul.x %fp0,%fp0 # FP0 IS S
5247 fmov.d COSB8(%pc),%fp2
5248 fmov.d COSB7(%pc),%fp3
5251 fmul.x %fp1,%fp1 # FP1 IS T
5253 fmov.x %fp0,X(%a6) # X IS S
5255 and.l &0x80000000,%d1
5256 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5258 fmul.x %fp1,%fp2 # TB8
5260 eor.l %d1,X(%a6) # X IS NOW S'= SGN*S
5261 and.l &0x80000000,%d1
5263 fmul.x %fp1,%fp3 # TB7
5265 or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE
5266 mov.l %d1,POSNEG1(%a6)
5268 fadd.d COSB6(%pc),%fp2 # B6+TB8
5269 fadd.d COSB5(%pc),%fp3 # B5+TB7
5271 fmul.x %fp1,%fp2 # T(B6+TB8)
5272 fmul.x %fp1,%fp3 # T(B5+TB7)
5274 fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)
5275 fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)
5277 fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))
5278 fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))
5280 fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))
5281 fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))
5283 fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))
5289 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5291 fmov.l %d0,%fpcr # restore users round mode,prec
5292 fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set
5295 ##############################################
5297 # SINe: Big OR Small?
5298 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5299 #--IF |X| < 2**(-40), RETURN X OR 1.
5301 cmp.l %d1,&0x3FFF8000
5309 # here, the operation may underflow iff the precision is sgl or dbl.
5310 # extended denorms are handled through another entry point.
5312 # mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5314 fmov.l %d0,%fpcr # restore users round mode,prec
5315 mov.b &FMOV_OP,%d1 # last inst is MOVE
5316 fmov.x X(%a6),%fp0 # last inst - possible exception set
5320 fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5321 fmov.l %d0,%fpcr # restore users round mode,prec
5322 fadd.s &0x80800000,%fp0 # last inst - possible exception set
5325 ################################################
5327 #--SIN(X) = X FOR DENORMALIZED X
5331 ############################################
5333 #--COS(X) = 1 FOR DENORMALIZED X
5335 fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5338 ##################################################
5345 fmov.x (%a0),%fp0 # LOAD INPUT
5350 and.l &0x7FFFFFFF,%d1 # COMPACTIFY X
5352 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5357 cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5362 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5363 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5367 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5369 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5371 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5375 add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2
5377 fsub.x (%a1)+,%fp0 # X-Y1
5378 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5381 #--continuation point from REDUCEX
5385 cmp.l %d1,&0 # D0 < 0 IFF N IS ODD
5389 #--REGISTERS SAVED SO FAR: D0, A0, FP2.
5390 fmovm.x &0x04,-(%sp) # save fp2
5392 fmov.x %fp0,RPRIME(%a6)
5393 fmul.x %fp0,%fp0 # FP0 IS S = R*R
5394 fmov.d SINA7(%pc),%fp1 # A7
5395 fmov.d COSB8(%pc),%fp2 # B8
5396 fmul.x %fp0,%fp1 # SA7
5397 fmul.x %fp0,%fp2 # SB8
5402 and.l &0x80000000,%d2
5404 and.l &0x80000000,%d2
5406 fadd.d SINA6(%pc),%fp1 # A6+SA7
5407 fadd.d COSB7(%pc),%fp2 # B7+SB8
5409 fmul.x %fp0,%fp1 # S(A6+SA7)
5410 eor.l %d2,RPRIME(%a6)
5412 fmul.x %fp0,%fp2 # S(B7+SB8)
5414 and.l &0x80000000,%d1
5415 mov.l &0x3F800000,POSNEG1(%a6)
5416 eor.l %d1,POSNEG1(%a6)
5418 fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)
5419 fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)
5421 fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))
5422 fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))
5423 fmov.x %fp0,SPRIME(%a6)
5425 fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))
5426 eor.l %d1,SPRIME(%a6)
5427 fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))
5429 fmul.x %fp0,%fp1 # S(A4+...)
5430 fmul.x %fp0,%fp2 # S(B5+...)
5432 fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)
5433 fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)
5435 fmul.x %fp0,%fp1 # S(A3+...)
5436 fmul.x %fp0,%fp2 # S(B4+...)
5438 fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)
5439 fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)
5441 fmul.x %fp0,%fp1 # S(A2+...)
5442 fmul.x %fp0,%fp2 # S(B3+...)
5444 fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)
5445 fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)
5447 fmul.x %fp0,%fp1 # S(A1+...)
5448 fmul.x %fp2,%fp0 # S(B2+...)
5450 fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)
5451 fadd.s COSB1(%pc),%fp0 # B1+S(B2...)
5452 fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))
5454 fmovm.x (%sp)+,&0x20 # restore fp2
5457 fadd.x RPRIME(%a6),%fp1 # COS(X)
5458 bsr sto_cos # store cosine result
5459 fadd.s POSNEG1(%a6),%fp0 # SIN(X)
5463 #--REGISTERS SAVED SO FAR: FP2.
5464 fmovm.x &0x04,-(%sp) # save fp2
5466 fmov.x %fp0,RPRIME(%a6)
5467 fmul.x %fp0,%fp0 # FP0 IS S = R*R
5469 fmov.d COSB8(%pc),%fp1 # B8
5470 fmov.d SINA7(%pc),%fp2 # A7
5472 fmul.x %fp0,%fp1 # SB8
5473 fmov.x %fp0,SPRIME(%a6)
5474 fmul.x %fp0,%fp2 # SA7
5477 and.l &0x80000000,%d1
5479 fadd.d COSB7(%pc),%fp1 # B7+SB8
5480 fadd.d SINA6(%pc),%fp2 # A6+SA7
5482 eor.l %d1,RPRIME(%a6)
5483 eor.l %d1,SPRIME(%a6)
5485 fmul.x %fp0,%fp1 # S(B7+SB8)
5487 or.l &0x3F800000,%d1
5488 mov.l %d1,POSNEG1(%a6)
5490 fmul.x %fp0,%fp2 # S(A6+SA7)
5492 fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)
5493 fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)
5495 fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))
5496 fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))
5498 fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))
5499 fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))
5501 fmul.x %fp0,%fp1 # S(B5+...)
5502 fmul.x %fp0,%fp2 # S(A4+...)
5504 fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)
5505 fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)
5507 fmul.x %fp0,%fp1 # S(B4+...)
5508 fmul.x %fp0,%fp2 # S(A3+...)
5510 fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)
5511 fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)
5513 fmul.x %fp0,%fp1 # S(B3+...)
5514 fmul.x %fp0,%fp2 # S(A2+...)
5516 fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)
5517 fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)
5519 fmul.x %fp0,%fp1 # S(B2+...)
5520 fmul.x %fp2,%fp0 # s(a1+...)
5523 fadd.s COSB1(%pc),%fp1 # B1+S(B2...)
5524 fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)
5525 fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))
5527 fmovm.x (%sp)+,&0x20 # restore fp2
5530 fadd.s POSNEG1(%a6),%fp1 # COS(X)
5531 bsr sto_cos # store cosine result
5532 fadd.x RPRIME(%a6),%fp0 # SIN(X)
5535 ################################################
5538 cmp.l %d1,&0x3FFF8000
5541 ################################################
5544 # mov.w &0x0000,XDCARE(%a6)
5545 fmov.s &0x3F800000,%fp1
5548 fsub.s &0x00800000,%fp1
5549 bsr sto_cos # store cosine result
5550 fmov.l %fpcr,%d0 # d0 must have fpcr,too
5551 mov.b &FMOV_OP,%d1 # last inst is MOVE
5555 ##############################################
5558 #--SIN AND COS OF X FOR DENORMALIZED X
5560 mov.l %d0,-(%sp) # save d0
5561 fmov.s &0x3F800000,%fp1
5562 bsr sto_cos # store cosine result
5563 mov.l (%sp)+,%d0 # restore d0
5566 ############################################
5568 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5569 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5570 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5572 fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5573 mov.l %d2,-(%sp) # save d2
5574 fmov.s &0x00000000,%fp1 # fp1 = 0
5576 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5577 #--there is a danger of unwanted overflow in first LOOP iteration. In this
5578 #--case, reduce argument by one remainder step to make subsequent reduction
5580 cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5583 # yes; create 2**16383*PI/2
5584 mov.w &0x7ffe,FP_SCR0_EX(%a6)
5585 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5586 clr.l FP_SCR0_LO(%a6)
5588 # create low half of 2**16383*PI/2 at FP_SCR1
5589 mov.w &0x7fdc,FP_SCR1_EX(%a6)
5590 mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5591 clr.l FP_SCR1_LO(%a6)
5593 ftest.x %fp0 # test sign of argument
5596 or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5597 or.b &0x80,FP_SCR1_EX(%a6)
5599 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5600 fmov.x %fp0,%fp1 # save high result in fp1
5601 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5602 fsub.x %fp0,%fp1 # determine low component of result
5603 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5605 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5606 #--integer quotient will be stored in N
5607 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5609 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5610 mov.w INARG(%a6),%d1
5611 mov.l %d1,%a1 # save a copy of D0
5612 and.l &0x00007FFF,%d1
5613 sub.l &0x00003FFF,%d1 # d0 = K
5617 sub.l &27,%d1 # d0 = L := K-27
5618 mov.b &0,ENDFLAG(%a6)
5621 clr.l %d1 # d0 = L := 0
5622 mov.b &1,ENDFLAG(%a6)
5625 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5626 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5628 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5629 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5631 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5632 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5634 mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5635 mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5636 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5639 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5641 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5642 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5643 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5644 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5645 #--US THE DESIRED VALUE IN FLOATING POINT.
5648 and.l &0x80000000,%d2
5649 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5650 mov.l %d2,TWOTO63(%a6)
5651 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5652 fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5655 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5656 mov.l %d1,%d2 # d2 = L
5658 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5659 mov.w %d2,FP_SCR0_EX(%a6)
5660 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5661 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5663 add.l &0x00003FDD,%d1
5664 mov.w %d1,FP_SCR1_EX(%a6)
5665 mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5666 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5668 mov.b ENDFLAG(%a6),%d1
5670 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5671 #--P2 = 2**(L) * Piby2_2
5672 fmov.x %fp2,%fp4 # fp4 = N
5673 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5674 fmov.x %fp2,%fp5 # fp5 = N
5675 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
5676 fmov.x %fp4,%fp3 # fp3 = W = N*P1
5678 #--we want P+p = W+w but |p| <= half ulp of P
5679 #--Then, we need to compute A := R-P and a := r-p
5680 fadd.x %fp5,%fp3 # fp3 = P
5681 fsub.x %fp3,%fp4 # fp4 = W-P
5683 fsub.x %fp3,%fp0 # fp0 = A := R - P
5684 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
5686 fmov.x %fp0,%fp3 # fp3 = A
5687 fsub.x %fp4,%fp1 # fp1 = a := r - p
5689 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5690 #--|r| <= half ulp of R.
5691 fadd.x %fp1,%fp0 # fp0 = R := A+a
5692 #--No need to calculate r if this is the last loop
5696 #--Need to calculate r
5697 fsub.x %fp0,%fp3 # fp3 = A-R
5698 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
5702 fmov.l %fp2,INT(%a6)
5703 mov.l (%sp)+,%d2 # restore d2
5704 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
5712 #########################################################################
5713 # stan(): computes the tangent of a normalized input #
5714 # stand(): computes the tangent of a denormalized input #
5716 # INPUT *************************************************************** #
5717 # a0 = pointer to extended precision input #
5718 # d0 = round precision,mode #
5720 # OUTPUT ************************************************************** #
5723 # ACCURACY and MONOTONICITY ******************************************* #
5724 # The returned result is within 3 ulp in 64 significant bit, i.e. #
5725 # within 0.5001 ulp to 53 bits if the result is subsequently #
5726 # rounded to double precision. The result is provably monotonic #
5727 # in double precision. #
5729 # ALGORITHM *********************************************************** #
5731 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5733 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5734 # k = N mod 2, so in particular, k = 0 or 1. #
5736 # 3. If k is odd, go to 5. #
5738 # 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5739 # rational function U/V where #
5740 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5741 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5744 # 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5745 # a rational function U/V where #
5746 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5747 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5748 # -Cot(r) = -V/U. Exit. #
5750 # 6. If |X| > 1, go to 8. #
5752 # 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5754 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5757 #########################################################################
5760 long 0x3EA0B759,0xF50F8688
5762 long 0xBEF2BAA5,0xA8924F04
5765 long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5768 long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5771 long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5774 long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5777 long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5780 long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5783 long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
5785 long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5787 #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5788 #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5789 #--MOST 69 BITS LONG.
5792 long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5793 long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5794 long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5795 long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5796 long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5797 long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5798 long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5799 long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5800 long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5801 long 0xC0040000,0x90836524,0x88034B96,0x20B00000
5802 long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5803 long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5804 long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5805 long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5806 long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5807 long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5808 long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5809 long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5810 long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5811 long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5812 long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5813 long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5814 long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5815 long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5816 long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5817 long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5818 long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5819 long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5820 long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5821 long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5822 long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5823 long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5824 long 0x00000000,0x00000000,0x00000000,0x00000000
5825 long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5826 long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5827 long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5828 long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5829 long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5830 long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5831 long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5832 long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5833 long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5834 long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5835 long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5836 long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5837 long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5838 long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5839 long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5840 long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5841 long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5842 long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5843 long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5844 long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5845 long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5846 long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5847 long 0x40040000,0x90836524,0x88034B96,0xA0B00000
5848 long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5849 long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5850 long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5851 long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5852 long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5853 long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5854 long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5855 long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5856 long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5866 fmov.x (%a0),%fp0 # LOAD INPUT
5870 and.l &0x7FFFFFFF,%d1
5872 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5876 cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5881 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5882 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5884 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5886 lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5888 fmov.l %fp1,%d1 # CONVERT TO INTEGER
5891 add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2
5893 fsub.x (%a1)+,%fp0 # X-Y1
5895 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5898 and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0
5901 fmovm.x &0x0c,-(%sp) # save fp2,fp3
5907 fmul.x %fp1,%fp1 # S = R*R
5909 fmov.d TANQ4(%pc),%fp3
5910 fmov.d TANP3(%pc),%fp2
5912 fmul.x %fp1,%fp3 # SQ4
5913 fmul.x %fp1,%fp2 # SP3
5915 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5916 fadd.x TANP2(%pc),%fp2 # P2+SP3
5918 fmul.x %fp1,%fp3 # S(Q3+SQ4)
5919 fmul.x %fp1,%fp2 # S(P2+SP3)
5921 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5922 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5924 fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))
5925 fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))
5927 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5928 fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))
5930 fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))
5932 fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))
5934 fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)
5936 fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5938 fmov.l %d0,%fpcr # restore users round mode,prec
5939 fdiv.x %fp1,%fp0 # last inst - possible exception set
5944 fmul.x %fp0,%fp0 # S = R*R
5946 fmov.d TANQ4(%pc),%fp3
5947 fmov.d TANP3(%pc),%fp2
5949 fmul.x %fp0,%fp3 # SQ4
5950 fmul.x %fp0,%fp2 # SP3
5952 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5953 fadd.x TANP2(%pc),%fp2 # P2+SP3
5955 fmul.x %fp0,%fp3 # S(Q3+SQ4)
5956 fmul.x %fp0,%fp2 # S(P2+SP3)
5958 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5959 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5961 fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))
5962 fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))
5964 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5965 fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))
5967 fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))
5969 fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))
5970 fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)
5972 fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5975 eor.l &0x80000000,(%sp)
5977 fmov.l %d0,%fpcr # restore users round mode,prec
5978 fdiv.x (%sp)+,%fp0 # last inst - possible exception set
5982 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5983 #--IF |X| < 2**(-40), RETURN X OR 1.
5984 cmp.l %d1,&0x3FFF8000
5989 fmov.l %d0,%fpcr # restore users round mode,prec
5990 mov.b &FMOV_OP,%d1 # last inst is MOVE
5991 fmov.x (%sp)+,%fp0 # last inst - posibble exception set
5995 #--TAN(X) = X FOR DENORMALIZED X
5999 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6000 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6001 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6003 fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
6004 mov.l %d2,-(%sp) # save d2
6005 fmov.s &0x00000000,%fp1 # fp1 = 0
6007 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6008 #--there is a danger of unwanted overflow in first LOOP iteration. In this
6009 #--case, reduce argument by one remainder step to make subsequent reduction
6011 cmp.l %d1,&0x7ffeffff # is arg dangerously large?
6014 # yes; create 2**16383*PI/2
6015 mov.w &0x7ffe,FP_SCR0_EX(%a6)
6016 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
6017 clr.l FP_SCR0_LO(%a6)
6019 # create low half of 2**16383*PI/2 at FP_SCR1
6020 mov.w &0x7fdc,FP_SCR1_EX(%a6)
6021 mov.l &0x85a308d3,FP_SCR1_HI(%a6)
6022 clr.l FP_SCR1_LO(%a6)
6024 ftest.x %fp0 # test sign of argument
6027 or.b &0x80,FP_SCR0_EX(%a6) # positive arg
6028 or.b &0x80,FP_SCR1_EX(%a6)
6030 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
6031 fmov.x %fp0,%fp1 # save high result in fp1
6032 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
6033 fsub.x %fp0,%fp1 # determine low component of result
6034 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
6036 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6037 #--integer quotient will be stored in N
6038 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6040 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
6041 mov.w INARG(%a6),%d1
6042 mov.l %d1,%a1 # save a copy of D0
6043 and.l &0x00007FFF,%d1
6044 sub.l &0x00003FFF,%d1 # d0 = K
6048 sub.l &27,%d1 # d0 = L := K-27
6049 mov.b &0,ENDFLAG(%a6)
6052 clr.l %d1 # d0 = L := 0
6053 mov.b &1,ENDFLAG(%a6)
6056 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
6057 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
6059 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6060 #--2**L * (PIby2_1), 2**L * (PIby2_2)
6062 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
6063 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
6065 mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
6066 mov.l &0x4E44152A,FP_SCR0_LO(%a6)
6067 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
6070 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
6072 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6073 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
6074 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6075 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
6076 #--US THE DESIRED VALUE IN FLOATING POINT.
6079 and.l &0x80000000,%d2
6080 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
6081 mov.l %d2,TWOTO63(%a6)
6082 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
6083 fsub.s TWOTO63(%a6),%fp2 # fp2 = N
6084 # fintrz.x %fp2,%fp2
6086 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6087 mov.l %d1,%d2 # d2 = L
6089 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
6090 mov.w %d2,FP_SCR0_EX(%a6)
6091 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
6092 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
6094 add.l &0x00003FDD,%d1
6095 mov.w %d1,FP_SCR1_EX(%a6)
6096 mov.l &0x85A308D3,FP_SCR1_HI(%a6)
6097 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
6099 mov.b ENDFLAG(%a6),%d1
6101 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6102 #--P2 = 2**(L) * Piby2_2
6103 fmov.x %fp2,%fp4 # fp4 = N
6104 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
6105 fmov.x %fp2,%fp5 # fp5 = N
6106 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
6107 fmov.x %fp4,%fp3 # fp3 = W = N*P1
6109 #--we want P+p = W+w but |p| <= half ulp of P
6110 #--Then, we need to compute A := R-P and a := r-p
6111 fadd.x %fp5,%fp3 # fp3 = P
6112 fsub.x %fp3,%fp4 # fp4 = W-P
6114 fsub.x %fp3,%fp0 # fp0 = A := R - P
6115 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
6117 fmov.x %fp0,%fp3 # fp3 = A
6118 fsub.x %fp4,%fp1 # fp1 = a := r - p
6120 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6121 #--|r| <= half ulp of R.
6122 fadd.x %fp1,%fp0 # fp0 = R := A+a
6123 #--No need to calculate r if this is the last loop
6127 #--Need to calculate r
6128 fsub.x %fp0,%fp3 # fp3 = A-R
6129 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
6133 fmov.l %fp2,INT(%a6)
6134 mov.l (%sp)+,%d2 # restore d2
6135 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
6142 #########################################################################
6143 # satan(): computes the arctangent of a normalized number #
6144 # satand(): computes the arctangent of a denormalized number #
6146 # INPUT *************************************************************** #
6147 # a0 = pointer to extended precision input #
6148 # d0 = round precision,mode #
6150 # OUTPUT ************************************************************** #
6153 # ACCURACY and MONOTONICITY ******************************************* #
6154 # The returned result is within 2 ulps in 64 significant bit, #
6155 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6156 # rounded to double precision. The result is provably monotonic #
6157 # in double precision. #
6159 # ALGORITHM *********************************************************** #
6160 # Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6162 # Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6163 # Note that k = -4, -3,..., or 3. #
6164 # Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6165 # significant bits of X with a bit-1 attached at the 6-th #
6166 # bit position. Define u to be u = (X-F) / (1 + X*F). #
6168 # Step 3. Approximate arctan(u) by a polynomial poly. #
6170 # Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6171 # table of values calculated beforehand. Exit. #
6173 # Step 5. If |X| >= 16, go to Step 7. #
6175 # Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6177 # Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6178 # polynomial in X'. #
6179 # Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6181 #########################################################################
6183 ATANA3: long 0xBFF6687E,0x314987D8
6184 ATANA2: long 0x4002AC69,0x34A26DB3
6185 ATANA1: long 0xBFC2476F,0x4E1DA28E
6187 ATANB6: long 0x3FB34444,0x7F876989
6188 ATANB5: long 0xBFB744EE,0x7FAF45DB
6189 ATANB4: long 0x3FBC71C6,0x46940220
6190 ATANB3: long 0xBFC24924,0x921872F9
6191 ATANB2: long 0x3FC99999,0x99998FA9
6192 ATANB1: long 0xBFD55555,0x55555555
6194 ATANC5: long 0xBFB70BF3,0x98539E6A
6195 ATANC4: long 0x3FBC7187,0x962D1D7D
6196 ATANC3: long 0xBFC24924,0x827107B8
6197 ATANC2: long 0x3FC99999,0x9996263E
6198 ATANC1: long 0xBFD55555,0x55555536
6200 PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6201 NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6203 PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000
6204 NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000
6207 long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6208 long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6209 long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6210 long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6211 long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6212 long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6213 long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6214 long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6215 long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6216 long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6217 long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6218 long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6219 long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6220 long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6221 long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6222 long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6223 long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6224 long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6225 long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6226 long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6227 long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6228 long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6229 long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6230 long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6231 long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6232 long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6233 long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6234 long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6235 long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6236 long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6237 long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6238 long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6239 long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6240 long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6241 long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6242 long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6243 long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6244 long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6245 long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6246 long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6247 long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6248 long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6249 long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6250 long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6251 long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6252 long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6253 long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6254 long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6255 long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6256 long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6257 long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6258 long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6259 long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6260 long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
6261 long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6262 long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6263 long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6264 long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6265 long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6266 long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6267 long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6268 long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6269 long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6270 long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6271 long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6272 long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6273 long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6274 long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6275 long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6276 long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6277 long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6278 long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6279 long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6280 long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6281 long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6282 long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6283 long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6284 long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6285 long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6286 long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6287 long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6288 long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6289 long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6290 long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6291 long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6292 long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6293 long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6294 long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6295 long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6296 long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6297 long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6298 long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6299 long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6300 long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6301 long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6302 long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6303 long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6304 long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6305 long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6306 long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6307 long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6308 long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6309 long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6310 long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6311 long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6312 long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6313 long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6314 long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6315 long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6316 long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6317 long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6318 long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6319 long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6320 long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6321 long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6322 long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6323 long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6324 long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6325 long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6326 long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6327 long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6328 long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6329 long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6330 long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6331 long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6332 long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6333 long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6334 long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6346 #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6348 fmov.x (%a0),%fp0 # LOAD INPUT
6353 and.l &0x7FFFFFFF,%d1
6355 cmp.l %d1,&0x3FFB8000 # |X| >= 1/16?
6360 cmp.l %d1,&0x4002FFFF # |X| < 16 ?
6364 #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6365 #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6366 #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6367 #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6368 #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6369 #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6370 #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6371 #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6372 #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6373 #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6374 #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6375 #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6376 #--WILL INVOLVE A VERY LONG POLYNOMIAL.
6378 #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6379 #--WE CHOSE F TO BE +-2^K * 1.BBBB1
6380 #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6381 #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6382 #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6383 #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6387 and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS
6388 or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1
6389 mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6391 fmov.x %fp0,%fp1 # FP1 IS X
6392 fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0
6393 fsub.x X(%a6),%fp0 # FP0 IS X-F
6394 fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F
6395 fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)
6397 #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6398 #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6399 #--SAVE REGISTERS FP2.
6401 mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY
6402 mov.l %d1,%d2 # THE EXP AND 16 BITS OF X
6403 and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION
6404 and.l &0x7FFF0000,%d2 # EXPONENT OF F
6405 sub.l &0x3FFB0000,%d2 # K+4
6407 add.l %d2,%d1 # THE 7 BITS IDENTIFYING F
6408 asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|)
6409 lea ATANTBL(%pc),%a1
6410 add.l %d1,%a1 # ADDRESS OF ATAN(|F|)
6411 mov.l (%a1)+,ATANF(%a6)
6412 mov.l (%a1)+,ATANFHI(%a6)
6413 mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|)
6414 mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN
6415 and.l &0x80000000,%d1 # SIGN(F)
6416 or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6417 mov.l (%sp)+,%d2 # RESTORE d2
6419 #--THAT'S ALL I HAVE TO DO FOR NOW,
6420 #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6422 #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6423 #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6424 #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6425 #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6426 #--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6427 #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6428 #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6430 fmovm.x &0x04,-(%sp) # save fp2
6434 fmov.d ATANA3(%pc),%fp2
6435 fadd.x %fp1,%fp2 # A3+V
6436 fmul.x %fp1,%fp2 # V*(A3+V)
6437 fmul.x %fp0,%fp1 # U*V
6438 fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)
6439 fmul.d ATANA1(%pc),%fp1 # A1*U*V
6440 fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V))
6441 fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED
6443 fmovm.x (%sp)+,&0x20 # restore fp2
6445 fmov.l %d0,%fpcr # restore users rnd mode,prec
6446 fadd.x ATANF(%a6),%fp0 # ATAN(X)
6450 #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6451 #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6452 cmp.l %d1,&0x3FFF8000
6453 bgt.w ATANBIG # I.E. |X| >= 16
6457 #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6458 #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6459 #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6460 #--WHERE Y = X*X, AND Z = Y*Y.
6462 cmp.l %d1,&0x3FD78000
6465 #--COMPUTE POLYNOMIAL
6466 fmovm.x &0x0c,-(%sp) # save fp2/fp3
6468 fmul.x %fp0,%fp0 # FPO IS Y = X*X
6471 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6473 fmov.d ATANB6(%pc),%fp2
6474 fmov.d ATANB5(%pc),%fp3
6476 fmul.x %fp1,%fp2 # Z*B6
6477 fmul.x %fp1,%fp3 # Z*B5
6479 fadd.d ATANB4(%pc),%fp2 # B4+Z*B6
6480 fadd.d ATANB3(%pc),%fp3 # B3+Z*B5
6482 fmul.x %fp1,%fp2 # Z*(B4+Z*B6)
6483 fmul.x %fp3,%fp1 # Z*(B3+Z*B5)
6485 fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6)
6486 fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5)
6488 fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6))
6489 fmul.x X(%a6),%fp0 # X*Y
6491 fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6493 fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6495 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6497 fmov.l %d0,%fpcr # restore users rnd mode,prec
6502 #--|X| < 2^(-40), ATAN(X) = X
6504 fmov.l %d0,%fpcr # restore users rnd mode,prec
6505 mov.b &FMOV_OP,%d1 # last inst is MOVE
6506 fmov.x X(%a6),%fp0 # last inst - possible exception set
6511 #--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6512 #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6513 cmp.l %d1,&0x40638000
6516 #--APPROXIMATE ATAN(-1/X) BY
6517 #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6518 #--THIS CAN BE RE-WRITTEN AS
6519 #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6521 fmovm.x &0x0c,-(%sp) # save fp2/fp3
6523 fmov.s &0xBF800000,%fp1 # LOAD -1
6524 fdiv.x %fp0,%fp1 # FP1 IS -1/X
6526 #--DIVIDE IS STILL CRANKING
6528 fmov.x %fp1,%fp0 # FP0 IS X'
6529 fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'
6530 fmov.x %fp1,X(%a6) # X IS REALLY X'
6533 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6535 fmov.d ATANC5(%pc),%fp3
6536 fmov.d ATANC4(%pc),%fp2
6538 fmul.x %fp1,%fp3 # Z*C5
6539 fmul.x %fp1,%fp2 # Z*B4
6541 fadd.d ATANC3(%pc),%fp3 # C3+Z*C5
6542 fadd.d ATANC2(%pc),%fp2 # C2+Z*C4
6544 fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED
6545 fmul.x %fp0,%fp2 # Y*(C2+Z*C4)
6547 fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5)
6548 fmul.x X(%a6),%fp0 # X'*Y
6550 fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6552 fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)]
6553 # ... +[Y*(B2+Z*(B4+Z*B6))])
6556 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6558 fmov.l %d0,%fpcr # restore users rnd mode,prec
6563 fadd.x NPIBY2(%pc),%fp0
6567 fadd.x PPIBY2(%pc),%fp0
6571 #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6576 fmov.x NPIBY2(%pc),%fp0
6578 fadd.x PTINY(%pc),%fp0
6582 fmov.x PPIBY2(%pc),%fp0
6584 fadd.x NTINY(%pc),%fp0
6588 #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6592 #########################################################################
6593 # sasin(): computes the inverse sine of a normalized input #
6594 # sasind(): computes the inverse sine of a denormalized input #
6596 # INPUT *************************************************************** #
6597 # a0 = pointer to extended precision input #
6598 # d0 = round precision,mode #
6600 # OUTPUT ************************************************************** #
6603 # ACCURACY and MONOTONICITY ******************************************* #
6604 # The returned result is within 3 ulps in 64 significant bit, #
6605 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6606 # rounded to double precision. The result is provably monotonic #
6607 # in double precision. #
6609 # ALGORITHM *********************************************************** #
6612 # 1. If |X| >= 1, go to 3. #
6614 # 2. (|X| < 1) Calculate asin(X) by #
6615 # z := sqrt( [1-X][1+X] ) #
6616 # asin(X) = atan( x / z ). #
6619 # 3. If |X| > 1, go to 5. #
6621 # 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6623 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6626 #########################################################################
6630 fmov.x (%a0),%fp0 # LOAD INPUT
6634 and.l &0x7FFFFFFF,%d1
6635 cmp.l %d1,&0x3FFF8000
6638 # This catch is added here for the '060 QSP. Originally, the call to
6639 # satan() would handle this case by causing the exception which would
6640 # not be caught until gen_except(). Now, with the exceptions being
6641 # detected inside of satan(), the exception would have been handled there
6642 # instead of inside sasin() as expected.
6643 cmp.l %d1,&0x3FD78000
6646 #--THIS IS THE USUAL CASE, |X| < 1
6647 #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6650 fmov.s &0x3F800000,%fp1
6651 fsub.x %fp0,%fp1 # 1-X
6652 fmovm.x &0x4,-(%sp) # {fp2}
6653 fmov.s &0x3F800000,%fp2
6654 fadd.x %fp0,%fp2 # 1+X
6655 fmul.x %fp2,%fp1 # (1+X)(1-X)
6656 fmovm.x (%sp)+,&0x20 # {fp2}
6657 fsqrt.x %fp1 # SQRT([1-X][1+X])
6658 fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])
6659 fmovm.x &0x01,-(%sp) # save X/SQRT(...)
6660 lea (%sp),%a0 # pass ptr to X/SQRT(...)
6662 add.l &0xc,%sp # clear X/SQRT(...) from stack
6667 fcmp.s %fp0,&0x3F800000
6668 fbgt t_operr # cause an operr exception
6670 #--|X| = 1, ASIN(X) = +- PI/2.
6672 fmov.x PIBY2(%pc),%fp0
6674 and.l &0x80000000,%d1 # SIGN BIT OF X
6675 or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT
6676 mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT
6681 #--|X| < 2^(-40), ATAN(X) = X
6683 fmov.l %d0,%fpcr # restore users rnd mode,prec
6684 mov.b &FMOV_OP,%d1 # last inst is MOVE
6685 fmov.x (%a0),%fp0 # last inst - possible exception
6689 #--ASIN(X) = X FOR DENORMALIZED X
6693 #########################################################################
6694 # sacos(): computes the inverse cosine of a normalized input #
6695 # sacosd(): computes the inverse cosine of a denormalized input #
6697 # INPUT *************************************************************** #
6698 # a0 = pointer to extended precision input #
6699 # d0 = round precision,mode #
6701 # OUTPUT ************************************************************** #
6704 # ACCURACY and MONOTONICITY ******************************************* #
6705 # The returned result is within 3 ulps in 64 significant bit, #
6706 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6707 # rounded to double precision. The result is provably monotonic #
6708 # in double precision. #
6710 # ALGORITHM *********************************************************** #
6713 # 1. If |X| >= 1, go to 3. #
6715 # 2. (|X| < 1) Calculate acos(X) by #
6716 # z := (1-X) / (1+X) #
6717 # acos(X) = 2 * atan( sqrt(z) ). #
6720 # 3. If |X| > 1, go to 5. #
6722 # 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6724 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6727 #########################################################################
6731 fmov.x (%a0),%fp0 # LOAD INPUT
6733 mov.l (%a0),%d1 # pack exp w/ upper 16 fraction
6735 and.l &0x7FFFFFFF,%d1
6736 cmp.l %d1,&0x3FFF8000
6739 #--THIS IS THE USUAL CASE, |X| < 1
6740 #--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6743 fmov.s &0x3F800000,%fp1
6744 fadd.x %fp0,%fp1 # 1+X
6746 fadd.s &0x3F800000,%fp0 # 1-X
6747 fdiv.x %fp1,%fp0 # (1-X)/(1+X)
6748 fsqrt.x %fp0 # SQRT((1-X)/(1+X))
6749 mov.l %d0,-(%sp) # save original users fpcr
6751 fmovm.x &0x01,-(%sp) # save SQRT(...) to stack
6752 lea (%sp),%a0 # pass ptr to sqrt
6753 bsr satan # ATAN(SQRT([1-X]/[1+X]))
6754 add.l &0xc,%sp # clear SQRT(...) from stack
6756 fmov.l (%sp)+,%fpcr # restore users round prec,mode
6757 fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )
6762 fcmp.s %fp0,&0x3F800000
6763 fbgt t_operr # cause an operr exception
6765 #--|X| = 1, ACOS(X) = 0 OR PI
6766 tst.b (%a0) # is X positive or negative?
6770 #Returns PI and inexact exception
6772 fmov.x PI(%pc),%fp0 # load PI
6773 fmov.l %d0,%fpcr # load round mode,prec
6774 fadd.s &0x00800000,%fp0 # add a small value
6778 bra ld_pzero # answer is positive zero
6781 #--ACOS(X) = PI/2 FOR DENORMALIZED X
6783 fmov.l %d0,%fpcr # load user's rnd mode/prec
6784 fmov.x PIBY2(%pc),%fp0
6787 #########################################################################
6788 # setox(): computes the exponential for a normalized input #
6789 # setoxd(): computes the exponential for a denormalized input #
6790 # setoxm1(): computes the exponential minus 1 for a normalized input #
6791 # setoxm1d(): computes the exponential minus 1 for a denormalized input #
6793 # INPUT *************************************************************** #
6794 # a0 = pointer to extended precision input #
6795 # d0 = round precision,mode #
6797 # OUTPUT ************************************************************** #
6798 # fp0 = exp(X) or exp(X)-1 #
6800 # ACCURACY and MONOTONICITY ******************************************* #
6801 # The returned result is within 0.85 ulps in 64 significant bit, #
6802 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6803 # rounded to double precision. The result is provably monotonic #
6804 # in double precision. #
6806 # ALGORITHM and IMPLEMENTATION **************************************** #
6810 # Step 1. Set ans := 1.0 #
6812 # Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6813 # Notes: This will always generate one exception -- inexact. #
6819 # Step 1. Filter out extreme cases of input argument. #
6820 # 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6821 # 1.2 Go to Step 7. #
6822 # 1.3 If |X| < 16380 log(2), go to Step 2. #
6823 # 1.4 Go to Step 8. #
6824 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6825 # To avoid the use of floating-point comparisons, a #
6826 # compact representation of |X| is used. This format is a #
6827 # 32-bit integer, the upper (more significant) 16 bits #
6828 # are the sign and biased exponent field of |X|; the #
6829 # lower 16 bits are the 16 most significant fraction #
6830 # (including the explicit bit) bits of |X|. Consequently, #
6831 # the comparisons in Steps 1.1 and 1.3 can be performed #
6832 # by integer comparison. Note also that the constant #
6833 # 16380 log(2) used in Step 1.3 is also in the compact #
6834 # form. Thus taking the branch to Step 2 guarantees #
6835 # |X| < 16380 log(2). There is no harm to have a small #
6836 # number of cases where |X| is less than, but close to, #
6837 # 16380 log(2) and the branch to Step 9 is taken. #
6839 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6840 # 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6842 # 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6843 # 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6845 # 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6846 # 2.5 Calculate the address of the stored value of #
6848 # 2.6 Create the value Scale = 2^M. #
6849 # Notes: The calculation in 2.2 is really performed by #
6850 # Z := X * constant #
6851 # N := round-to-nearest-integer(Z) #
6853 # constant := single-precision( 64/log 2 ). #
6855 # Using a single-precision constant avoids memory #
6856 # access. Another effect of using a single-precision #
6857 # "constant" is that the calculated value Z is #
6859 # Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6861 # This error has to be considered later in Steps 3 and 4. #
6863 # Step 3. Calculate X - N*log2/64. #
6864 # 3.1 R := X + N*L1, #
6865 # where L1 := single-precision(-log2/64). #
6866 # 3.2 R := R + N*L2, #
6867 # L2 := extended-precision(-log2/64 - L1).#
6868 # Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6869 # approximate the value -log2/64 to 88 bits of accuracy. #
6870 # b) N*L1 is exact because N is no longer than 22 bits #
6871 # and L1 is no longer than 24 bits. #
6872 # c) The calculation X+N*L1 is also exact due to #
6873 # cancellation. Thus, R is practically X+N(L1+L2) to full #
6875 # d) It is important to estimate how large can |R| be #
6878 # N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6879 # X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6880 # X*64/log2 - N = f - eps*X 64/log2 #
6881 # X - N*log2/64 = f*log2/64 - eps*X #
6884 # Now |X| <= 16446 log2, thus #
6886 # |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6887 # <= 0.57 log2/64. #
6888 # This bound will be used in Step 4. #
6890 # Step 4. Approximate exp(R)-1 by a polynomial #
6891 # p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6892 # Notes: a) In order to reduce memory access, the coefficients #
6893 # are made as "short" as possible: A1 (which is 1/2), A4 #
6894 # and A5 are single precision; A2 and A3 are double #
6896 # b) Even with the restrictions above, #
6897 # |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6898 # Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6899 # c) To fully utilize the pipeline, p is separated into #
6900 # two independent pieces of roughly equal complexities #
6901 # p = [ R + R*S*(A2 + S*A4) ] + #
6902 # [ S*(A1 + S*(A3 + S*A5)) ] #
6905 # Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6906 # ans := T + ( T*p + t) #
6907 # where T and t are the stored values for 2^(J/64). #
6908 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6909 # 2^(J/64) to roughly 85 bits; T is in extended precision #
6910 # and t is in single precision. Note also that T is #
6911 # rounded to 62 bits so that the last two bits of T are #
6912 # zero. The reason for such a special form is that T-1, #
6913 # T-2, and T-8 will all be exact --- a property that will #
6914 # give much more accurate computation of the function #
6917 # Step 6. Reconstruction of exp(X) #
6918 # exp(X) = 2^M * 2^(J/64) * exp(R). #
6919 # 6.1 If AdjFlag = 0, go to 6.3 #
6920 # 6.2 ans := ans * AdjScale #
6921 # 6.3 Restore the user FPCR #
6922 # 6.4 Return ans := ans * Scale. Exit. #
6923 # Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6924 # |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6925 # neither overflow nor underflow. If AdjFlag = 1, that #
6927 # X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6928 # Hence, exp(X) may overflow or underflow or neither. #
6929 # When that is the case, AdjScale = 2^(M1) where M1 is #
6930 # approximately M. Thus 6.2 will never cause #
6931 # over/underflow. Possible exception in 6.4 is overflow #
6932 # or underflow. The inexact exception is not generated in #
6933 # 6.4. Although one can argue that the inexact flag #
6934 # should always be raised, to simulate that exception #
6935 # cost to much than the flag is worth in practical uses. #
6937 # Step 7. Return 1 + X. #
6939 # 7.2 Restore user FPCR. #
6940 # 7.3 Return ans := 1 + ans. Exit #
6941 # Notes: For non-zero X, the inexact exception will always be #
6942 # raised by 7.3. That is the only exception raised by 7.3.#
6943 # Note also that we use the FMOVEM instruction to move X #
6944 # in Step 7.1 to avoid unnecessary trapping. (Although #
6945 # the FMOVEM may not seem relevant since X is normalized, #
6946 # the precaution will be useful in the library version of #
6947 # this code where the separate entry for denormalized #
6948 # inputs will be done away with.) #
6950 # Step 8. Handle exp(X) where |X| >= 16380log2. #
6951 # 8.1 If |X| > 16480 log2, go to Step 9. #
6952 # (mimic 2.2 - 2.6) #
6953 # 8.2 N := round-to-integer( X * 64/log2 ) #
6954 # 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6955 # 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6957 # 8.5 Calculate the address of the stored value #
6959 # 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6960 # 8.7 Go to Step 3. #
6961 # Notes: Refer to notes for 2.2 - 2.6. #
6963 # Step 9. Handle exp(X), |X| > 16480 log2. #
6964 # 9.1 If X < 0, go to 9.3 #
6965 # 9.2 ans := Huge, go to 9.4 #
6966 # 9.3 ans := Tiny. #
6967 # 9.4 Restore user FPCR. #
6968 # 9.5 Return ans := ans * ans. Exit. #
6969 # Notes: Exp(X) will surely overflow or underflow, depending on #
6970 # X's sign. "Huge" and "Tiny" are respectively large/tiny #
6971 # extended-precision numbers whose square over/underflow #
6972 # with an inexact result. Thus, 9.5 always raises the #
6973 # inexact together with either overflow or underflow. #
6978 # Step 1. Set ans := 0 #
6980 # Step 2. Return ans := X + ans. Exit. #
6981 # Notes: This will return X with the appropriate rounding #
6982 # precision prescribed by the user FPCR. #
6987 # Step 1. Check |X| #
6988 # 1.1 If |X| >= 1/4, go to Step 1.3. #
6989 # 1.2 Go to Step 7. #
6990 # 1.3 If |X| < 70 log(2), go to Step 2. #
6991 # 1.4 Go to Step 10. #
6992 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6993 # However, it is conceivable |X| can be small very often #
6994 # because EXPM1 is intended to evaluate exp(X)-1 #
6995 # accurately when |X| is small. For further details on #
6996 # the comparisons, see the notes on Step 1 of setox. #
6998 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6999 # 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
7000 # 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
7002 # 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
7003 # 2.4 Calculate the address of the stored value of #
7005 # 2.5 Create the values Sc = 2^M and #
7006 # OnebySc := -2^(-M). #
7007 # Notes: See the notes on Step 2 of setox. #
7009 # Step 3. Calculate X - N*log2/64. #
7010 # 3.1 R := X + N*L1, #
7011 # where L1 := single-precision(-log2/64). #
7012 # 3.2 R := R + N*L2, #
7013 # L2 := extended-precision(-log2/64 - L1).#
7014 # Notes: Applying the analysis of Step 3 of setox in this case #
7015 # shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
7018 # Step 4. Approximate exp(R)-1 by a polynomial #
7019 # p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
7020 # Notes: a) In order to reduce memory access, the coefficients #
7021 # are made as "short" as possible: A1 (which is 1/2), A5 #
7022 # and A6 are single precision; A2, A3 and A4 are double #
7024 # b) Even with the restriction above, #
7025 # |p - (exp(R)-1)| < |R| * 2^(-72.7) #
7026 # for all |R| <= 0.0055. #
7027 # c) To fully utilize the pipeline, p is separated into #
7028 # two independent pieces of roughly equal complexity #
7029 # p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
7030 # [ R + S*(A1 + S*(A3 + S*A5)) ] #
7033 # Step 5. Compute 2^(J/64)*p by #
7035 # where T and t are the stored values for 2^(J/64). #
7036 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
7037 # 2^(J/64) to roughly 85 bits; T is in extended precision #
7038 # and t is in single precision. Note also that T is #
7039 # rounded to 62 bits so that the last two bits of T are #
7040 # zero. The reason for such a special form is that T-1, #
7041 # T-2, and T-8 will all be exact --- a property that will #
7042 # be exploited in Step 6 below. The total relative error #
7043 # in p is no bigger than 2^(-67.7) compared to the final #
7046 # Step 6. Reconstruction of exp(X)-1 #
7047 # exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
7048 # 6.1 If M <= 63, go to Step 6.3. #
7049 # 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
7050 # 6.3 If M >= -3, go to 6.5. #
7051 # 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
7052 # 6.5 ans := (T + OnebySc) + (p + t). #
7053 # 6.6 Restore user FPCR. #
7054 # 6.7 Return ans := Sc * ans. Exit. #
7055 # Notes: The various arrangements of the expressions give #
7056 # accurate evaluations. #
7058 # Step 7. exp(X)-1 for |X| < 1/4. #
7059 # 7.1 If |X| >= 2^(-65), go to Step 9. #
7060 # 7.2 Go to Step 8. #
7062 # Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
7063 # 8.1 If |X| < 2^(-16312), goto 8.3 #
7064 # 8.2 Restore FPCR; return ans := X - 2^(-16382). #
7066 # 8.3 X := X * 2^(140). #
7067 # 8.4 Restore FPCR; ans := ans - 2^(-16382). #
7068 # Return ans := ans*2^(140). Exit #
7069 # Notes: The idea is to return "X - tiny" under the user #
7070 # precision and rounding modes. To avoid unnecessary #
7071 # inefficiency, we stay away from denormalized numbers #
7072 # the best we can. For |X| >= 2^(-16312), the #
7073 # straightforward 8.2 generates the inexact exception as #
7074 # the case warrants. #
7076 # Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
7077 # p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
7078 # Notes: a) In order to reduce memory access, the coefficients #
7079 # are made as "short" as possible: B1 (which is 1/2), B9 #
7080 # to B12 are single precision; B3 to B8 are double #
7081 # precision; and B2 is double extended. #
7082 # b) Even with the restriction above, #
7083 # |p - (exp(X)-1)| < |X| 2^(-70.6) #
7084 # for all |X| <= 0.251. #
7085 # Note that 0.251 is slightly bigger than 1/4. #
7086 # c) To fully preserve accuracy, the polynomial is #
7088 # X + ( S*B1 + Q ) where S = X*X and #
7089 # Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
7090 # d) To fully utilize the pipeline, Q is separated into #
7091 # two independent pieces of roughly equal complexity #
7092 # Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
7093 # [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
7095 # Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
7096 # 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
7097 # practical purposes. Therefore, go to Step 1 of setox. #
7098 # 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
7101 # Restore user FPCR #
7102 # Return ans := ans + 2^(-126). Exit. #
7103 # Notes: 10.2 will always create an inexact and return -1 + tiny #
7104 # in the user rounding precision and mode. #
7106 #########################################################################
7108 L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7110 EEXPA3: long 0x3FA55555,0x55554CC1
7111 EEXPA2: long 0x3FC55555,0x55554A54
7113 EM1A4: long 0x3F811111,0x11174385
7114 EM1A3: long 0x3FA55555,0x55554F5A
7116 EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000
7118 EM1B8: long 0x3EC71DE3,0xA5774682
7119 EM1B7: long 0x3EFA01A0,0x19D7CB68
7121 EM1B6: long 0x3F2A01A0,0x1A019DF3
7122 EM1B5: long 0x3F56C16C,0x16C170E2
7124 EM1B4: long 0x3F811111,0x11111111
7125 EM1B3: long 0x3FA55555,0x55555555
7127 EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7130 TWO140: long 0x48B00000,0x00000000
7132 long 0x37300000,0x00000000
7135 long 0x3FFF0000,0x80000000,0x00000000,0x00000000
7136 long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7137 long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7138 long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7139 long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7140 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7141 long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7142 long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7143 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7144 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7145 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7146 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7147 long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7148 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7149 long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7150 long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7151 long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7152 long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7153 long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7154 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7155 long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7156 long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7157 long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7158 long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7159 long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7160 long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7161 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7162 long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7163 long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7164 long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7165 long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7166 long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7167 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7168 long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7169 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7170 long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7171 long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7172 long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7173 long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7174 long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7175 long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7176 long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7177 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7178 long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7179 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7180 long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7181 long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7182 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7183 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7184 long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7185 long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7186 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7187 long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7188 long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7189 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7190 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7191 long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7192 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7193 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7194 long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7195 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7196 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7197 long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7198 long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7202 set ADJSCALE,FP_SCR1
7208 #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7211 mov.l (%a0),%d1 # load part of input X
7212 and.l &0x7FFF0000,%d1 # biased expo. of X
7213 cmp.l %d1,&0x3FBE0000 # 2^(-65)
7214 bge.b EXPC1 # normal case
7218 #--The case |X| >= 2^(-65)
7219 mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7220 cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits
7221 blt.b EXPMAIN # normal case
7226 #--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7227 fmov.x (%a0),%fp0 # load input from (a0)
7230 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7231 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7232 mov.l &0,ADJFLAG(%a6)
7233 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7234 lea EEXPTBL(%pc),%a1
7235 fmov.l %d1,%fp0 # convert to floating-format
7237 mov.l %d1,L_SCR1(%a6) # save N temporarily
7238 and.l &0x3F,%d1 # D0 is J = N mod 64
7240 add.l %d1,%a1 # address of 2^(J/64)
7241 mov.l L_SCR1(%a6),%d1
7242 asr.l &6,%d1 # D0 is M
7243 add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7244 mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB
7248 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7249 #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7251 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7252 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7253 fadd.x %fp1,%fp0 # X + N*L1
7254 fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7257 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7258 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7259 #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7260 #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7263 fmul.x %fp1,%fp1 # fp1 IS S = R*R
7265 fmov.s &0x3AB60B70,%fp2 # fp2 IS A5
7267 fmul.x %fp1,%fp2 # fp2 IS S*A5
7269 fmul.s &0x3C088895,%fp3 # fp3 IS S*A4
7271 fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5
7272 fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4
7274 fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5)
7275 mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended
7276 mov.l &0x80000000,SCALE+4(%a6)
7279 fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4)
7281 fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5)
7282 fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4)
7284 fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5))
7285 fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4),
7287 fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)
7288 fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1
7291 #--final reconstruction process
7292 #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7294 fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)
7295 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7296 fadd.s (%a1),%fp0 # accurate 2^(J/64)
7298 fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...
7299 mov.l ADJFLAG(%a6),%d1
7305 fmul.x ADJSCALE(%a6),%fp0
7307 fmov.l %d0,%fpcr # restore user FPCR
7308 mov.b &FMUL_OP,%d1 # last inst is MUL
7309 fmul.x SCALE(%a6),%fp0 # multiply 2^(M)
7314 fmovm.x (%a0),&0x80 # load X
7316 fadd.s &0x3F800000,%fp0 # 1+X in user mode
7321 cmp.l %d1,&0x400CB27C # 16480 log2
7324 fmov.x (%a0),%fp0 # load input from (a0)
7327 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7328 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7329 mov.l &1,ADJFLAG(%a6)
7330 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7331 lea EEXPTBL(%pc),%a1
7332 fmov.l %d1,%fp0 # convert to floating-format
7333 mov.l %d1,L_SCR1(%a6) # save N temporarily
7334 and.l &0x3F,%d1 # D0 is J = N mod 64
7336 add.l %d1,%a1 # address of 2^(J/64)
7337 mov.l L_SCR1(%a6),%d1
7338 asr.l &6,%d1 # D0 is K
7339 mov.l %d1,L_SCR1(%a6) # save K temporarily
7340 asr.l &1,%d1 # D0 is M1
7341 sub.l %d1,L_SCR1(%a6) # a1 is M
7342 add.w &0x3FFF,%d1 # biased expo. of 2^(M1)
7343 mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)
7344 mov.l &0x80000000,ADJSCALE+4(%a6)
7345 clr.l ADJSCALE+8(%a6)
7346 mov.l L_SCR1(%a6),%d1 # D0 is M
7347 add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7348 bra.w EXPCONT1 # go back to Step 3
7352 tst.b (%a0) # is X positive or negative?
7358 #--entry point for EXP(X), X is denormalized
7360 andi.l &0x80000000,(%sp)
7361 ori.l &0x00800000,(%sp) # sign(X)*2^(-126)
7363 fmov.s &0x3F800000,%fp0
7371 #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7375 mov.l (%a0),%d1 # load part of input X
7376 and.l &0x7FFF0000,%d1 # biased expo. of X
7377 cmp.l %d1,&0x3FFD0000 # 1/4
7378 bge.b EM1CON1 # |X| >= 1/4
7383 #--The case |X| >= 1/4
7384 mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7385 cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits
7386 ble.b EM1MAIN # 1/4 <= |X| <= 70log2
7391 #--This is the case: 1/4 <= |X| <= 70 log2.
7392 fmov.x (%a0),%fp0 # load input from (a0)
7395 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7396 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7397 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7398 lea EEXPTBL(%pc),%a1
7399 fmov.l %d1,%fp0 # convert to floating-format
7401 mov.l %d1,L_SCR1(%a6) # save N temporarily
7402 and.l &0x3F,%d1 # D0 is J = N mod 64
7404 add.l %d1,%a1 # address of 2^(J/64)
7405 mov.l L_SCR1(%a6),%d1
7406 asr.l &6,%d1 # D0 is M
7407 mov.l %d1,L_SCR1(%a6) # save a copy of M
7410 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7411 #--a0 points to 2^(J/64), D0 and a1 both contain M
7413 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7414 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7415 fadd.x %fp1,%fp0 # X + N*L1
7416 fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7417 add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M
7420 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7421 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7422 #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7423 #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7426 fmul.x %fp1,%fp1 # fp1 IS S = R*R
7428 fmov.s &0x3950097B,%fp2 # fp2 IS a6
7430 fmul.x %fp1,%fp2 # fp2 IS S*A6
7432 fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5
7434 fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6
7435 fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5
7436 mov.w %d1,SC(%a6) # SC is 2^(M) in extended
7437 mov.l &0x80000000,SC+4(%a6)
7440 fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6)
7441 mov.l L_SCR1(%a6),%d1 # D0 is M
7442 neg.w %d1 # D0 is -M
7443 fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5)
7444 add.w &0x3FFF,%d1 # biased expo. of 2^(-M)
7445 fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6)
7446 fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5)
7448 fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6))
7449 or.w &0x8000,%d1 # signed/expo. of -2^(-M)
7450 mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)
7451 mov.l &0x80000000,ONEBYSC+4(%a6)
7452 clr.l ONEBYSC+8(%a6)
7453 fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5))
7455 fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6))
7456 fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5))
7458 fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1
7460 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7463 #--Compute 2^(J/64)*p
7465 fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)
7469 mov.l L_SCR1(%a6),%d1 # retrieve M
7473 fmov.s 12(%a1),%fp1 # fp1 is t
7474 fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc
7475 fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released
7476 fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))
7484 fadd.s 12(%a1),%fp0 # p+t
7485 fadd.x (%a1),%fp0 # T+(p+t)
7486 fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))
7489 #--Step 6.5 -3 <= M <= 63
7490 fmov.x (%a1)+,%fp1 # fp1 is T
7491 fadd.s (%a1),%fp0 # fp0 is p+t
7492 fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc
7493 fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)
7502 #--Step 7 |X| < 1/4.
7503 cmp.l %d1,&0x3FBE0000 # 2^(-65)
7507 #--Step 8 |X| < 2^(-65)
7508 cmp.l %d1,&0x00330000 # 2^(-16312)
7511 mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)
7512 mov.l &0x80000000,SC+4(%a6)
7516 mov.b &FADD_OP,%d1 # last inst is ADD
7523 fmul.d TWO140(%pc),%fp0
7524 mov.l &0x80010000,SC(%a6)
7525 mov.l &0x80000000,SC+4(%a6)
7529 mov.b &FMUL_OP,%d1 # last inst is MUL
7530 fmul.d TWON140(%pc),%fp0
7534 #--Step 9 exp(X)-1 by a simple polynomial
7535 fmov.x (%a0),%fp0 # fp0 is X
7536 fmul.x %fp0,%fp0 # fp0 is S := X*X
7537 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7538 fmov.s &0x2F30CAA8,%fp1 # fp1 is B12
7539 fmul.x %fp0,%fp1 # fp1 is S*B12
7540 fmov.s &0x310F8290,%fp2 # fp2 is B11
7541 fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12
7543 fmul.x %fp0,%fp2 # fp2 is S*B11
7544 fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...
7546 fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...
7547 fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...
7549 fmul.x %fp0,%fp2 # fp2 is S*(B9+...
7550 fmul.x %fp0,%fp1 # fp1 is S*(B8+...
7552 fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...
7553 fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...
7555 fmul.x %fp0,%fp2 # fp2 is S*(B7+...
7556 fmul.x %fp0,%fp1 # fp1 is S*(B6+...
7558 fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...
7559 fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...
7561 fmul.x %fp0,%fp2 # fp2 is S*(B5+...
7562 fmul.x %fp0,%fp1 # fp1 is S*(B4+...
7564 fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...
7565 fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...
7567 fmul.x %fp0,%fp2 # fp2 is S*(B3+...
7568 fmul.x %fp0,%fp1 # fp1 is S*(B2+...
7570 fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...)
7571 fmul.x (%a0),%fp1 # fp1 is X*S*(B2...
7573 fmul.s &0x3F000000,%fp0 # fp0 is S*B1
7574 fadd.x %fp2,%fp1 # fp1 is Q
7576 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7578 fadd.x %fp1,%fp0 # fp0 is S*B1+Q
7585 #--Step 10 |X| > 70 log2
7590 fmov.s &0xBF800000,%fp0 # fp0 is -1
7592 fadd.s &0x00800000,%fp0 # -1 + 2^(-126)
7597 #--entry point for EXPM1(X), here X is denormalized
7601 #########################################################################
7602 # sgetexp(): returns the exponent portion of the input argument. #
7603 # The exponent bias is removed and the exponent value is #
7604 # returned as an extended precision number in fp0. #
7605 # sgetexpd(): handles denormalized numbers. #
7607 # sgetman(): extracts the mantissa of the input argument. The #
7608 # mantissa is converted to an extended precision number w/ #
7609 # an exponent of $3fff and is returned in fp0. The range of #
7610 # the result is [1.0 - 2.0). #
7611 # sgetmand(): handles denormalized numbers. #
7613 # INPUT *************************************************************** #
7614 # a0 = pointer to extended precision input #
7616 # OUTPUT ************************************************************** #
7617 # fp0 = exponent(X) or mantissa(X) #
7619 #########################################################################
7623 mov.w SRC_EX(%a0),%d0 # get the exponent
7624 bclr &0xf,%d0 # clear the sign bit
7625 subi.w &0x3fff,%d0 # subtract off the bias
7626 fmov.w %d0,%fp0 # return exp in fp0
7627 blt.b sgetexpn # it's negative
7631 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7636 bsr.l norm # normalize
7637 neg.w %d0 # new exp = -(shft amt)
7638 subi.w &0x3fff,%d0 # subtract off the bias
7639 fmov.w %d0,%fp0 # return exp in fp0
7640 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7645 mov.w SRC_EX(%a0),%d0 # get the exp
7646 ori.w &0x7fff,%d0 # clear old exp
7647 bclr &0xe,%d0 # make it the new exp +-3fff
7649 # here, we build the result in a tmp location so as not to disturb the input
7650 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7651 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7652 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7653 fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0
7654 bmi.b sgetmann # it's negative
7658 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7662 # For denormalized numbers, shift the mantissa until the j-bit = 1,
7663 # then load the exponent with +/1 $3fff.
7667 bsr.l norm # normalize exponent
7670 #########################################################################
7671 # scosh(): computes the hyperbolic cosine of a normalized input #
7672 # scoshd(): computes the hyperbolic cosine of a denormalized input #
7674 # INPUT *************************************************************** #
7675 # a0 = pointer to extended precision input #
7676 # d0 = round precision,mode #
7678 # OUTPUT ************************************************************** #
7681 # ACCURACY and MONOTONICITY ******************************************* #
7682 # The returned result is within 3 ulps in 64 significant bit, #
7683 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7684 # rounded to double precision. The result is provably monotonic #
7685 # in double precision. #
7687 # ALGORITHM *********************************************************** #
7690 # 1. If |X| > 16380 log2, go to 3. #
7692 # 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7693 # y = |X|, z = exp(Y), and #
7694 # cosh(X) = (1/2)*( z + 1/z ). #
7697 # 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7699 # 4. (16380 log2 < |X| <= 16480 log2) #
7700 # cosh(X) = sign(X) * exp(|X|)/2. #
7701 # However, invoking exp(|X|) may cause premature #
7702 # overflow. Thus, we calculate sinh(X) as follows: #
7704 # Fact := 2**(16380) #
7705 # Y' := Y - 16381 log2 #
7706 # cosh(X) := Fact * exp(Y'). #
7709 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7710 # Huge*Huge to generate overflow and an infinity with #
7711 # the appropriate sign. Huge is the largest finite number #
7712 # in extended format. Exit. #
7714 #########################################################################
7717 long 0x7FFB0000,0x80000000,0x00000000,0x00000000
7721 fmov.x (%a0),%fp0 # LOAD INPUT
7725 and.l &0x7FFFFFFF,%d1
7726 cmp.l %d1,&0x400CB167
7729 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7730 #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7736 fmovm.x &0x01,-(%sp) # save |X| to stack
7737 lea (%sp),%a0 # pass ptr to |X|
7738 bsr setox # FP0 IS EXP(|X|)
7739 add.l &0xc,%sp # erase |X| from stack
7740 fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|)
7743 fmov.s &0x3E800000,%fp1 # (1/4)
7744 fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|))
7747 mov.b &FADD_OP,%d1 # last inst is ADD
7752 cmp.l %d1,&0x400CB2B3
7756 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7757 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7761 fmovm.x &0x01,-(%sp) # save fp0 to stack
7762 lea (%sp),%a0 # pass ptr to fp0
7764 add.l &0xc,%sp # clear fp0 from stack
7768 mov.b &FMUL_OP,%d1 # last inst is MUL
7769 fmul.x TWO16380(%pc),%fp0
7776 #--COSH(X) = 1 FOR DENORMALIZED X
7778 fmov.s &0x3F800000,%fp0
7781 fadd.s &0x00800000,%fp0
7784 #########################################################################
7785 # ssinh(): computes the hyperbolic sine of a normalized input #
7786 # ssinhd(): computes the hyperbolic sine of a denormalized input #
7788 # INPUT *************************************************************** #
7789 # a0 = pointer to extended precision input #
7790 # d0 = round precision,mode #
7792 # OUTPUT ************************************************************** #
7795 # ACCURACY and MONOTONICITY ******************************************* #
7796 # The returned result is within 3 ulps in 64 significant bit, #
7797 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7798 # rounded to double precision. The result is provably monotonic #
7799 # in double precision. #
7801 # ALGORITHM *********************************************************** #
7804 # 1. If |X| > 16380 log2, go to 3. #
7806 # 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7807 # y = |X|, sgn = sign(X), and z = expm1(Y), #
7808 # sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7811 # 3. If |X| > 16480 log2, go to 5. #
7813 # 4. (16380 log2 < |X| <= 16480 log2) #
7814 # sinh(X) = sign(X) * exp(|X|)/2. #
7815 # However, invoking exp(|X|) may cause premature overflow. #
7816 # Thus, we calculate sinh(X) as follows: #
7819 # sgnFact := sgn * 2**(16380) #
7820 # Y' := Y - 16381 log2 #
7821 # sinh(X) := sgnFact * exp(Y'). #
7824 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7825 # sign(X)*Huge*Huge to generate overflow and an infinity with #
7826 # the appropriate sign. Huge is the largest finite number in #
7827 # extended format. Exit. #
7829 #########################################################################
7833 fmov.x (%a0),%fp0 # LOAD INPUT
7837 mov.l %d1,%a1 # save (compacted) operand
7838 and.l &0x7FFFFFFF,%d1
7839 cmp.l %d1,&0x400CB167
7842 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7843 #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7845 fabs.x %fp0 # Y = |X|
7847 movm.l &0x8040,-(%sp) # {a1/d0}
7848 fmovm.x &0x01,-(%sp) # save Y on stack
7849 lea (%sp),%a0 # pass ptr to Y
7851 bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7852 add.l &0xc,%sp # clear Y from stack
7854 movm.l (%sp)+,&0x0201 # {a1/d0}
7857 fadd.s &0x3F800000,%fp1 # 1+Z
7859 fdiv.x %fp1,%fp0 # Z/(1+Z)
7861 and.l &0x80000000,%d1
7862 or.l &0x3F000000,%d1
7867 mov.b &FMUL_OP,%d1 # last inst is MUL
7868 fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set
7872 cmp.l %d1,&0x400CB2B3
7875 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7877 mov.l &0x80000000,-(%sp)
7879 and.l &0x80000000,%d1
7880 or.l &0x7FFB0000,%d1
7881 mov.l %d1,-(%sp) # EXTENDED FMT
7882 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7886 fmovm.x &0x01,-(%sp) # save fp0 on stack
7887 lea (%sp),%a0 # pass ptr to fp0
7889 add.l &0xc,%sp # clear fp0 from stack
7893 mov.b &FMUL_OP,%d1 # last inst is MUL
7894 fmul.x (%sp)+,%fp0 # possible exception
7898 #--SINH(X) = X FOR DENORMALIZED X
7902 #########################################################################
7903 # stanh(): computes the hyperbolic tangent of a normalized input #
7904 # stanhd(): computes the hyperbolic tangent of a denormalized input #
7906 # INPUT *************************************************************** #
7907 # a0 = pointer to extended precision input #
7908 # d0 = round precision,mode #
7910 # OUTPUT ************************************************************** #
7913 # ACCURACY and MONOTONICITY ******************************************* #
7914 # The returned result is within 3 ulps in 64 significant bit, #
7915 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7916 # rounded to double precision. The result is provably monotonic #
7917 # in double precision. #
7919 # ALGORITHM *********************************************************** #
7922 # 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7924 # 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7925 # sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7926 # tanh(X) = sgn*( z/(2+z) ). #
7929 # 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7932 # 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7934 # 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7935 # sgn := sign(X), y := 2|X|, z := exp(Y), #
7936 # tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7939 # 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7940 # calculate Tanh(X) by #
7941 # sgn := sign(X), Tiny := 2**(-126), #
7942 # tanh(X) := sgn - sgn*Tiny. #
7945 # 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7947 #########################################################################
7958 fmov.x (%a0),%fp0 # LOAD INPUT
7964 and.l &0x7FFFFFFF,%d1
7965 cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)?
7966 blt.w TANHBORS # yes
7967 cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2?
7968 bgt.w TANHBORS # yes
7970 #--THIS IS THE USUAL CASE
7971 #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7975 and.l &0x7FFF0000,%d1
7976 add.l &0x00010000,%d1 # EXPONENT OF 2|X|
7978 and.l &0x80000000,SGN(%a6)
7979 fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X|
7983 fmovm.x &0x1,-(%sp) # save Y on stack
7984 lea (%sp),%a0 # pass ptr to Y
7985 bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7986 add.l &0xc,%sp # clear Y from stack
7990 fadd.s &0x40000000,%fp1 # Z+2
7995 fmov.l %d0,%fpcr # restore users round prec,mode
8000 cmp.l %d1,&0x3FFF8000
8003 cmp.l %d1,&0x40048AA1
8006 #-- (5/2) LOG2 < |X| < 50 LOG2,
8007 #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8008 #--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
8012 and.l &0x7FFF0000,%d1
8013 add.l &0x00010000,%d1 # EXPO OF 2|X|
8014 mov.l %d1,X(%a6) # Y = 2|X|
8015 and.l &0x80000000,SGN(%a6)
8017 fmov.x X(%a6),%fp0 # Y = 2|X|
8021 fmovm.x &0x01,-(%sp) # save Y on stack
8022 lea (%sp),%a0 # pass ptr to Y
8023 bsr setox # FP0 IS EXP(Y)
8024 add.l &0xc,%sp # clear Y from stack
8027 fadd.s &0x3F800000,%fp0 # EXP(Y)+1
8029 eor.l &0xC0000000,%d1 # -SIGN(X)*2
8030 fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT
8031 fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]
8034 or.l &0x3F800000,%d1 # SGN
8035 fmov.s %d1,%fp0 # SGN IN SGL FMT
8037 fmov.l %d0,%fpcr # restore users round prec,mode
8038 mov.b &FADD_OP,%d1 # last inst is ADD
8043 fmov.l %d0,%fpcr # restore users round prec,mode
8044 mov.b &FMOV_OP,%d1 # last inst is MOVE
8045 fmov.x X(%a6),%fp0 # last inst - possible exception set
8048 #---RETURN SGN(X) - SGN(X)EPS
8051 and.l &0x80000000,%d1
8052 or.l &0x3F800000,%d1
8054 and.l &0x80000000,%d1
8055 eor.l &0x80800000,%d1 # -SIGN(X)*EPS
8057 fmov.l %d0,%fpcr # restore users round prec,mode
8062 #--TANH(X) = X FOR DENORMALIZED X
8066 #########################################################################
8067 # slogn(): computes the natural logarithm of a normalized input #
8068 # slognd(): computes the natural logarithm of a denormalized input #
8069 # slognp1(): computes the log(1+X) of a normalized input #
8070 # slognp1d(): computes the log(1+X) of a denormalized input #
8072 # INPUT *************************************************************** #
8073 # a0 = pointer to extended precision input #
8074 # d0 = round precision,mode #
8076 # OUTPUT ************************************************************** #
8077 # fp0 = log(X) or log(1+X) #
8079 # ACCURACY and MONOTONICITY ******************************************* #
8080 # The returned result is within 2 ulps in 64 significant bit, #
8081 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8082 # rounded to double precision. The result is provably monotonic #
8083 # in double precision. #
8085 # ALGORITHM *********************************************************** #
8087 # Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
8088 # polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
8089 # move on to Step 2. #
8091 # Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
8092 # seven significant bits of Y plus 2**(-7), i.e. #
8093 # F = 1.xxxxxx1 in base 2 where the six "x" match those #
8094 # of Y. Note that |Y-F| <= 2**(-7). #
8096 # Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
8097 # polynomial in u, log(1+u) = poly. #
8099 # Step 4. Reconstruct #
8100 # log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
8101 # by k*log(2) + (log(F) + poly). The values of log(F) are #
8102 # calculated beforehand and stored in the program. #
8105 # Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8106 # polynomial in u where u = 2X/(2+X). Otherwise, move on #
8109 # Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8110 # in Step 2 of the algorithm for LOGN and compute #
8111 # log(1+X) as k*log(2) + log(F) + poly where poly #
8112 # approximates log(1+u), u = (Y-F)/F. #
8114 # Implementation Notes: #
8115 # Note 1. There are 64 different possible values for F, thus 64 #
8116 # log(F)'s need to be tabulated. Moreover, the values of #
8117 # 1/F are also tabulated so that the division in (Y-F)/F #
8118 # can be performed by a multiplication. #
8120 # Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8121 # the value Y-F has to be calculated carefully when #
8124 # Note 3. To fully exploit the pipeline, polynomials are usually #
8125 # separated into two parts evaluated independently before #
8128 #########################################################################
8130 long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8142 long 0x3FC2499A,0xB5E4040B
8144 long 0xBFC555B5,0x848CB7DB
8147 long 0x3FC99999,0x987D8730
8149 long 0xBFCFFFFF,0xFF6F7E97
8152 long 0x3FD55555,0x555555A4
8154 long 0xBFE00000,0x00000008
8157 long 0x3F175496,0xADD7DAD6
8159 long 0x3F3C71C2,0xFE80C7E0
8162 long 0x3F624924,0x928BCCFF
8164 long 0x3F899999,0x999995EC
8167 long 0x3FB55555,0x55555555
8169 long 0x40000000,0x00000000
8172 long 0x3f990000,0x80000000,0x00000000,0x00000000
8175 long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8176 long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8177 long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8178 long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8179 long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8180 long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8181 long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8182 long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8183 long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8184 long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8185 long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8186 long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8187 long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8188 long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8189 long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8190 long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8191 long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8192 long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8193 long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8194 long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8195 long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8196 long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8197 long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8198 long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8199 long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8200 long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8201 long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8202 long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8203 long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8204 long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8205 long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8206 long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8207 long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8208 long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8209 long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8210 long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8211 long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8212 long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8213 long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8214 long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8215 long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8216 long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8217 long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8218 long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8219 long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8220 long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8221 long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8222 long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8223 long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8224 long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8225 long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8226 long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8227 long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8228 long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8229 long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8230 long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8231 long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8232 long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8233 long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8234 long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8235 long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8236 long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8237 long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8238 long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8239 long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8240 long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8241 long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8242 long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8243 long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8244 long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8245 long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8246 long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8247 long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8248 long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8249 long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8250 long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8251 long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8252 long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8253 long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8254 long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8255 long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8256 long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8257 long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8258 long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8259 long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8260 long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8261 long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8262 long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8263 long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8264 long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8265 long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8266 long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8267 long 0x3FFE0000,0x94458094,0x45809446,0x00000000
8268 long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8269 long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8270 long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8271 long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8272 long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8273 long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8274 long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8275 long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8276 long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8277 long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8278 long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8279 long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8280 long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8281 long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8282 long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8283 long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8284 long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8285 long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8286 long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8287 long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8288 long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8289 long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8290 long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8291 long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8292 long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8293 long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8294 long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8295 long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8296 long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8297 long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8298 long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8299 long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8300 long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8301 long 0x3FFE0000,0x80808080,0x80808081,0x00000000
8302 long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8318 #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8320 fmov.x (%a0),%fp0 # LOAD INPUT
8321 mov.l &0x00000000,ADJK(%a6)
8324 #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8325 #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8331 mov.l 4(%a0),X+4(%a6)
8332 mov.l 8(%a0),X+8(%a6)
8334 cmp.l %d1,&0 # CHECK IF X IS NEGATIVE
8335 blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID
8336 # X IS POSITIVE, CHECK IF X IS NEAR 1
8337 cmp.l %d1,&0x3ffef07d # IS X < 15/16?
8339 cmp.l %d1,&0x3fff8841 # IS X > 17/16?
8343 #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8345 #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8346 #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8347 #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8348 #-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8349 #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8350 #--LOG(1+U) CAN BE VERY EFFICIENT.
8351 #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8352 #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8354 #--GET K, Y, F, AND ADDRESS OF 1/F.
8356 asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X
8357 sub.l &0x3FFF,%d1 # THIS IS K
8358 add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8359 lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)
8360 fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT
8362 #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8363 mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X
8364 mov.l XFRAC(%a6),FFRAC(%a6)
8365 and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y
8366 or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8367 mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F
8368 and.l &0x7E000000,%d1
8371 asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT
8372 add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F
8375 mov.l &0x3fff0000,F(%a6)
8377 fsub.x F(%a6),%fp0 # Y-F
8378 fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY
8379 #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8380 #--REGISTERS SAVED: FPCR, FP1, FP2
8383 #--AN RE-ENTRY POINT FOR LOGNP1
8384 fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F
8385 fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY
8387 fmul.x %fp2,%fp2 # FP2 IS V=U*U
8388 fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1
8390 #--LOG(1+U) IS APPROXIMATED BY
8391 #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8392 #--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8397 fmul.d LOGA6(%pc),%fp1 # V*A6
8398 fmul.d LOGA5(%pc),%fp2 # V*A5
8400 fadd.d LOGA4(%pc),%fp1 # A4+V*A6
8401 fadd.d LOGA3(%pc),%fp2 # A3+V*A5
8403 fmul.x %fp3,%fp1 # V*(A4+V*A6)
8404 fmul.x %fp3,%fp2 # V*(A3+V*A5)
8406 fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6)
8407 fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5)
8409 fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6))
8410 add.l &16,%a0 # ADDRESS OF LOG(F)
8411 fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5))
8413 fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6))
8414 fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5))
8416 fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6))
8417 fmovm.x (%sp)+,&0x30 # RESTORE FP2-3
8418 fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)
8421 fadd.x KLOG2(%a6),%fp0 # FINAL ADD
8427 # if the input is exactly equal to one, then exit through ld_pzero.
8428 # if these 2 lines weren't here, the correct answer would be returned
8429 # but the INEX2 bit would be set.
8430 fcmp.b %fp0,&0x1 # is it equal to one?
8431 fbeq.l ld_pzero # yes
8433 #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8435 fsub.s one(%pc),%fp1 # FP1 IS X-1
8436 fadd.s one(%pc),%fp0 # FP0 IS X+1
8437 fadd.x %fp1,%fp1 # FP1 IS 2(X-1)
8438 #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8439 #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8442 #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8443 fdiv.x %fp0,%fp1 # FP1 IS U
8444 fmovm.x &0xc,-(%sp) # SAVE FP2-3
8445 #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8446 #--LET V=U*U, W=V*V, CALCULATE
8447 #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8448 #--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8450 fmul.x %fp0,%fp0 # FP0 IS V
8451 fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1
8453 fmul.x %fp1,%fp1 # FP1 IS W
8455 fmov.d LOGB5(%pc),%fp3
8456 fmov.d LOGB4(%pc),%fp2
8458 fmul.x %fp1,%fp3 # W*B5
8459 fmul.x %fp1,%fp2 # W*B4
8461 fadd.d LOGB3(%pc),%fp3 # B3+W*B5
8462 fadd.d LOGB2(%pc),%fp2 # B2+W*B4
8464 fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED
8466 fmul.x %fp0,%fp2 # V*(B2+W*B4)
8468 fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5)
8469 fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V
8471 fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8472 fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED
8474 fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8477 fadd.x SAVEU(%a6),%fp0
8480 #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8486 #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8488 mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0
8490 #----normalize the input value by left shifting k bits (k to be determined
8491 #----below), adjusting exponent and storing -k to ADJK
8492 #----the value TWOTO100 is no longer needed.
8493 #----Note that this code assumes the denormalized input is NON-ZERO.
8495 movm.l &0x3f00,-(%sp) # save some registers {d2-d7}
8496 mov.l (%a0),%d3 # D3 is exponent of smallest norm. #
8498 mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)
8499 clr.l %d2 # D2 used for holding K
8509 bfffo %d4{&0:&32},%d6
8511 add.l %d6,%d2 # (D3,D4,D5) is normalized
8514 mov.l %d4,XFRAC(%a6)
8515 mov.l %d5,XFRAC+4(%a6)
8519 movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8521 bra.w LOGBGN # begin regular log(X)
8525 bfffo %d4{&0:&32},%d6 # find first 1
8526 mov.l %d6,%d2 # get k
8528 mov.l %d5,%d7 # a copy of D5
8533 or.l %d7,%d4 # (D3,D4,D5) normalized
8536 mov.l %d4,XFRAC(%a6)
8537 mov.l %d5,XFRAC+4(%a6)
8541 movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8543 bra.w LOGBGN # begin regular log(X)
8546 #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8548 fmov.x (%a0),%fp0 # LOAD INPUT
8549 fabs.x %fp0 # test magnitude
8550 fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold
8551 fbgt.w LP1REAL # if greater, continue
8553 mov.b &FMOV_OP,%d1 # last inst is MOVE
8554 fmov.x (%a0),%fp0 # return signed argument
8558 fmov.x (%a0),%fp0 # LOAD INPUT
8559 mov.l &0x00000000,ADJK(%a6)
8560 fmov.x %fp0,%fp1 # FP1 IS INPUT Z
8561 fadd.s one(%pc),%fp0 # X := ROUND(1+Z)
8563 mov.w XFRAC(%a6),XDCARE(%a6)
8566 ble.w LP1NEG0 # LOG OF ZERO OR -VE
8567 cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8569 cmp.l %d1,&0x3fffc000
8571 #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8572 #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8573 #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8576 #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8577 cmp.l %d1,&0x3ffef07d
8579 cmp.l %d1,&0x3fff8841
8583 #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8584 #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8585 fadd.x %fp1,%fp1 # FP1 IS 2Z
8586 fadd.s one(%pc),%fp0 # FP0 IS 1+X
8591 #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8592 #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8593 #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8594 #--THERE ARE ONLY TWO CASES.
8595 #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8596 #--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8597 #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8598 #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8600 mov.l XFRAC(%a6),FFRAC(%a6)
8601 and.l &0xFE000000,FFRAC(%a6)
8602 or.l &0x01000000,FFRAC(%a6) # F OBTAINED
8603 cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1
8607 fmov.s TWO(%pc),%fp0
8608 mov.l &0x3fff0000,F(%a6)
8610 fsub.x F(%a6),%fp0 # 2-F
8611 mov.l FFRAC(%a6),%d1
8612 and.l &0x7E000000,%d1
8615 asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F
8616 fadd.x %fp1,%fp1 # GET 2Z
8617 fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}
8618 fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z
8619 lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F
8621 fmov.s negone(%pc),%fp1 # FP1 IS K = -1
8625 fmov.s one(%pc),%fp0
8626 mov.l &0x3fff0000,F(%a6)
8628 fsub.x F(%a6),%fp0 # 1-F
8629 mov.l FFRAC(%a6),%d1
8630 and.l &0x7E000000,%d1
8634 fadd.x %fp1,%fp0 # FP0 IS Y-F
8635 fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}
8637 add.l %d1,%a0 # A0 IS ADDRESS OF 1/F
8638 fmov.s zero(%pc),%fp1 # FP1 IS K = 0
8642 #--FPCR SAVED. D0 IS X IN COMPACT FORM.
8646 fmov.s negone(%pc),%fp0
8652 fmov.s zero(%pc),%fp0
8658 #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8659 # Simply return the denorm
8663 #########################################################################
8664 # satanh(): computes the inverse hyperbolic tangent of a norm input #
8665 # satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8667 # INPUT *************************************************************** #
8668 # a0 = pointer to extended precision input #
8669 # d0 = round precision,mode #
8671 # OUTPUT ************************************************************** #
8672 # fp0 = arctanh(X) #
8674 # ACCURACY and MONOTONICITY ******************************************* #
8675 # The returned result is within 3 ulps in 64 significant bit, #
8676 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8677 # rounded to double precision. The result is provably monotonic #
8678 # in double precision. #
8680 # ALGORITHM *********************************************************** #
8683 # 1. If |X| >= 1, go to 3. #
8685 # 2. (|X| < 1) Calculate atanh(X) by #
8689 # atanh(X) := sgn * (1/2) * logp1(z) #
8692 # 3. If |X| > 1, go to 5. #
8694 # 4. (|X| = 1) Generate infinity with an appropriate sign and #
8695 # divide-by-zero by #
8697 # atan(X) := sgn / (+0). #
8700 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8703 #########################################################################
8709 and.l &0x7FFFFFFF,%d1
8710 cmp.l %d1,&0x3FFF8000
8713 #--THIS IS THE USUAL CASE, |X| < 1
8714 #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8716 fabs.x (%a0),%fp0 # Y = |X|
8719 fadd.x %fp0,%fp0 # 2Y
8720 fadd.s &0x3F800000,%fp1 # 1-Y
8721 fdiv.x %fp1,%fp0 # 2Y/(1-Y)
8723 and.l &0x80000000,%d1
8724 or.l &0x3F000000,%d1 # SIGN(X)*HALF
8727 mov.l %d0,-(%sp) # save rnd prec,mode
8728 clr.l %d0 # pass ext prec,RN
8729 fmovm.x &0x01,-(%sp) # save Z on stack
8730 lea (%sp),%a0 # pass ptr to Z
8731 bsr slognp1 # LOG1P(Z)
8732 add.l &0xc,%sp # clear Z from stack
8734 mov.l (%sp)+,%d0 # fetch old prec,mode
8735 fmov.l %d0,%fpcr # load it
8736 mov.b &FMUL_OP,%d1 # last inst is MUL
8741 fabs.x (%a0),%fp0 # |X|
8742 fcmp.s %fp0,&0x3F800000
8747 #--ATANH(X) = X FOR DENORMALIZED X
8751 #########################################################################
8752 # slog10(): computes the base-10 logarithm of a normalized input #
8753 # slog10d(): computes the base-10 logarithm of a denormalized input #
8754 # slog2(): computes the base-2 logarithm of a normalized input #
8755 # slog2d(): computes the base-2 logarithm of a denormalized input #
8757 # INPUT *************************************************************** #
8758 # a0 = pointer to extended precision input #
8759 # d0 = round precision,mode #
8761 # OUTPUT ************************************************************** #
8762 # fp0 = log_10(X) or log_2(X) #
8764 # ACCURACY and MONOTONICITY ******************************************* #
8765 # The returned result is within 1.7 ulps in 64 significant bit, #
8766 # i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8767 # rounded to double precision. The result is provably monotonic #
8768 # in double precision. #
8770 # ALGORITHM *********************************************************** #
8774 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8775 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8776 # Notes: Default means round-to-nearest mode, no floating-point #
8777 # traps, and precision control = double extended. #
8779 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8780 # Notes: Even if X is denormalized, log(X) is always normalized. #
8782 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8783 # 2.1 Restore the user FPCR #
8784 # 2.2 Return ans := Y * INV_L10. #
8788 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8789 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8790 # Notes: Default means round-to-nearest mode, no floating-point #
8791 # traps, and precision control = double extended. #
8793 # Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8795 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8796 # 2.1 Restore the user FPCR #
8797 # 2.2 Return ans := Y * INV_L10. #
8801 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8802 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8803 # Notes: Default means round-to-nearest mode, no floating-point #
8804 # traps, and precision control = double extended. #
8806 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8807 # Notes: Even if X is denormalized, log(X) is always normalized. #
8809 # Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8810 # 2.1 Restore the user FPCR #
8811 # 2.2 Return ans := Y * INV_L2. #
8815 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8816 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8817 # Notes: Default means round-to-nearest mode, no floating-point #
8818 # traps, and precision control = double extended. #
8820 # Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8823 # Step 2. Return k. #
8824 # 2.1 Get integer k, X = 2^k. #
8825 # 2.2 Restore the user FPCR. #
8826 # 2.3 Return ans := convert-to-double-extended(k). #
8828 # Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8830 # Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8831 # 4.1 Restore the user FPCR #
8832 # 4.2 Return ans := Y * INV_L2. #
8834 #########################################################################
8837 long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8840 long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8843 #--entry point for Log10(X), X is normalized
8846 fcmp.x %fp0,(%a0) # if operand == 1,
8847 fbeq.l ld_pzero # return an EXACT zero
8853 bsr slogn # log(X), X normal.
8855 fmul.x INV_L10(%pc),%fp0
8859 #--entry point for Log10(X), X is denormalized
8865 bsr slognd # log(X), X denorm.
8867 fmul.x INV_L10(%pc),%fp0
8871 #--entry point for Log2(X), X is normalized
8877 bne.b continue # X is not 2^k
8880 and.l &0x7FFFFFFF,%d1
8885 and.l &0x00007FFF,%d1
8895 bsr slogn # log(X), X normal.
8897 fmul.x INV_L2(%pc),%fp0
8904 #--entry point for Log2(X), X is denormalized
8910 bsr slognd # log(X), X denorm.
8912 fmul.x INV_L2(%pc),%fp0
8915 #########################################################################
8916 # stwotox(): computes 2**X for a normalized input #
8917 # stwotoxd(): computes 2**X for a denormalized input #
8918 # stentox(): computes 10**X for a normalized input #
8919 # stentoxd(): computes 10**X for a denormalized input #
8921 # INPUT *************************************************************** #
8922 # a0 = pointer to extended precision input #
8923 # d0 = round precision,mode #
8925 # OUTPUT ************************************************************** #
8926 # fp0 = 2**X or 10**X #
8928 # ACCURACY and MONOTONICITY ******************************************* #
8929 # The returned result is within 2 ulps in 64 significant bit, #
8930 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8931 # rounded to double precision. The result is provably monotonic #
8932 # in double precision. #
8934 # ALGORITHM *********************************************************** #
8937 # 1. If |X| > 16480, go to ExpBig. #
8939 # 2. If |X| < 2**(-70), go to ExpSm. #
8941 # 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8943 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8945 # 4. Overwrite r := r * log2. Then #
8946 # 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8947 # Go to expr to compute that expression. #
8950 # 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8952 # 2. If |X| < 2**(-70), go to ExpSm. #
8954 # 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8955 # N := round-to-int(y). Decompose N as #
8956 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8959 # r := ((X - N*L1)-N*L2) * L10 #
8960 # where L1, L2 are the leading and trailing parts of #
8961 # log_10(2)/64 and L10 is the natural log of 10. Then #
8962 # 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8963 # Go to expr to compute that expression. #
8966 # 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8968 # 2. Overwrite Fact1 and Fact2 by #
8969 # Fact1 := 2**(M) * Fact1 #
8970 # Fact2 := 2**(M) * Fact2 #
8971 # Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8973 # 3. Calculate P where 1 + P approximates exp(r): #
8974 # P = r + r*r*(A1+r*(A2+...+r*A5)). #
8976 # 4. Let AdjFact := 2**(M'). Return #
8977 # AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8981 # 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8982 # generate underflow by Tiny * Tiny. #
8985 # 1. Return 1 + X. #
8987 #########################################################################
8990 long 0x406A934F,0x0979A371 # 64LOG10/LOG2
8992 long 0x3F734413,0x509F8000 # LOG2/64LOG10
8995 long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8997 LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8999 LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9001 EXPA5: long 0x3F56C16D,0x6F7BD0B2
9002 EXPA4: long 0x3F811112,0x302C712C
9003 EXPA3: long 0x3FA55555,0x55554CC1
9004 EXPA2: long 0x3FC55555,0x55554A54
9005 EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000
9008 long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
9009 long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9010 long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9011 long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9012 long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9013 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9014 long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9015 long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9016 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9017 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9018 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9019 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9020 long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9021 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9022 long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9023 long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9024 long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9025 long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9026 long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9027 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9028 long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9029 long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9030 long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9031 long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9032 long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9033 long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9034 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9035 long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9036 long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9037 long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9038 long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9039 long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9040 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9041 long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9042 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9043 long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9044 long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9045 long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9046 long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9047 long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9048 long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9049 long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9050 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9051 long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9052 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9053 long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9054 long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9055 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9056 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9057 long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9058 long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9059 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9060 long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9061 long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9062 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9063 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9064 long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9065 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9066 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9067 long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9068 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9069 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9070 long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9071 long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9083 set FACT1LOW,FACT1+8
9087 set FACT2LOW,FACT2+8
9090 #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9092 fmovm.x (%a0),&0x80 # LOAD INPUT
9097 and.l &0x7FFFFFFF,%d1
9099 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9104 cmp.l %d1,&0x400D80C0 # |X| > 16480?
9109 #--USUAL CASE, 2^(-70) <= |X| <= 16480
9112 fmul.s &0x42800000,%fp1 # 64 * X
9113 fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)
9115 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9116 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9119 and.l &0x3F,%d1 # D0 IS J
9120 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9121 add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9122 asr.l &6,%d2 # d2 IS L, N = 64L + J
9124 asr.l &1,%d1 # D0 IS M
9125 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9128 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9129 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9130 #--ADJFACT = 2^(M').
9131 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9133 fmovm.x &0x0c,-(%sp) # save fp2/fp3
9135 fmul.s &0x3C800000,%fp1 # (1/64)*N
9136 mov.l (%a1)+,FACT1(%a6)
9137 mov.l (%a1)+,FACT1HI(%a6)
9138 mov.l (%a1)+,FACT1LOW(%a6)
9139 mov.w (%a1)+,FACT2(%a6)
9141 fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)
9143 mov.w (%a1)+,FACT2HI(%a6)
9144 clr.w FACT2HI+2(%a6)
9146 add.w %d1,FACT1(%a6)
9147 fmul.x LOG2(%pc),%fp0 # FP0 IS R
9148 add.w %d1,FACT2(%a6)
9154 cmp.l %d1,&0x3FFF8000
9157 #--|X| IS SMALL, RETURN 1 + X
9159 fmov.l %d0,%fpcr # restore users round prec,mode
9160 fadd.s &0x3F800000,%fp0 # RETURN 1 + X
9164 #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9165 #--REGISTERS SAVE SO FAR ARE FPCR AND D0
9170 bra t_ovfl2 # t_ovfl expects positive value
9173 bra t_unfl2 # t_unfl expects positive value
9177 #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9179 fmov.l %d0,%fpcr # set user's rounding mode/precision
9180 fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9182 or.l &0x00800001,%d1
9187 #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9189 fmovm.x (%a0),&0x80 # LOAD INPUT
9194 and.l &0x7FFFFFFF,%d1
9196 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9201 cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9206 #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9209 fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2
9210 fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2)
9212 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9213 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9216 and.l &0x3F,%d1 # D0 IS J
9217 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9218 add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9219 asr.l &6,%d2 # d2 IS L, N = 64L + J
9221 asr.l &1,%d1 # D0 IS M
9222 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9225 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9226 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9227 #--ADJFACT = 2^(M').
9228 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9229 fmovm.x &0x0c,-(%sp) # save fp2/fp3
9233 fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD
9234 mov.l (%a1)+,FACT1(%a6)
9236 fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL
9238 mov.l (%a1)+,FACT1HI(%a6)
9239 mov.l (%a1)+,FACT1LOW(%a6)
9240 fsub.x %fp1,%fp0 # X - N L_LEAD
9241 mov.w (%a1)+,FACT2(%a6)
9243 fsub.x %fp2,%fp0 # X - N L_TRAIL
9245 mov.w (%a1)+,FACT2HI(%a6)
9246 clr.w FACT2HI+2(%a6)
9249 fmul.x LOG10(%pc),%fp0 # FP0 IS R
9250 add.w %d1,FACT1(%a6)
9251 add.w %d1,FACT2(%a6)
9254 #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9255 #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9256 #--FP0 IS R. THE FOLLOWING CODE COMPUTES
9257 #-- 2**(M'+M) * 2**(J/64) * EXP(R)
9260 fmul.x %fp1,%fp1 # FP1 IS S = R*R
9262 fmov.d EXPA5(%pc),%fp2 # FP2 IS A5
9263 fmov.d EXPA4(%pc),%fp3 # FP3 IS A4
9265 fmul.x %fp1,%fp2 # FP2 IS S*A5
9266 fmul.x %fp1,%fp3 # FP3 IS S*A4
9268 fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5
9269 fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4
9271 fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5)
9272 fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4)
9274 fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5)
9275 fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4)
9277 fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5))
9278 fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4)
9279 fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1
9281 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
9283 #--FINAL RECONSTRUCTION PROCESS
9284 #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9286 fmul.x FACT1(%a6),%fp0
9287 fadd.x FACT2(%a6),%fp0
9288 fadd.x FACT1(%a6),%fp0
9290 fmov.l %d0,%fpcr # restore users round prec,mode
9291 mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT
9293 mov.l &0x80000000,ADJFACT+4(%a6)
9294 clr.l ADJFACT+8(%a6)
9295 mov.b &FMUL_OP,%d1 # last inst is MUL
9296 fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT
9301 #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9303 fmov.l %d0,%fpcr # set user's rounding mode/precision
9304 fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9306 or.l &0x00800001,%d1
9310 #########################################################################
9311 # smovcr(): returns the ROM constant at the offset specified in d1 #
9312 # rounded to the mode and precision specified in d0. #
9314 # INPUT *************************************************************** #
9315 # d0 = rnd prec,mode #
9318 # OUTPUT ************************************************************** #
9319 # fp0 = the ROM constant rounded to the user's rounding mode,prec #
9321 #########################################################################
9325 mov.l %d1,-(%sp) # save rom offset for a sec
9327 lsr.b &0x4,%d0 # shift ctrl bits to lo
9328 mov.l %d0,%d1 # make a copy
9329 andi.w &0x3,%d1 # extract rnd mode
9330 andi.w &0xc,%d0 # extract rnd prec
9331 swap %d0 # put rnd prec in hi
9332 mov.w %d1,%d0 # put rnd mode in lo
9334 mov.l (%sp)+,%d1 # get rom offset
9337 # check range of offset
9339 tst.b %d1 # if zero, offset is to pi
9340 beq.b pi_tbl # it is pi
9341 cmpi.b %d1,&0x0a # check range $01 - $0a
9342 ble.b z_val # if in this range, return zero
9343 cmpi.b %d1,&0x0e # check range $0b - $0e
9344 ble.b sm_tbl # valid constants in this range
9345 cmpi.b %d1,&0x2f # check range $10 - $2f
9346 ble.b z_val # if in this range, return zero
9347 cmpi.b %d1,&0x3f # check range $30 - $3f
9348 ble.b bg_tbl # valid constants in this range
9351 bra.l ld_pzero # return a zero
9354 # the answer is PI rounded to the proper precision.
9356 # fetch a pointer to the answer table relating to the proper rounding
9360 tst.b %d0 # is rmode RN?
9361 bne.b pi_not_rn # no
9363 lea.l PIRN(%pc),%a0 # yes; load PI RN table addr
9366 cmpi.b %d0,&rp_mode # is rmode RP?
9369 lea.l PIRZRM(%pc),%a0 # no; load PI RZ,RM table addr
9372 lea.l PIRP(%pc),%a0 # load PI RP table addr
9376 # the answer is one of:
9377 # $0B log10(2) (inexact)
9379 # $0D log2(e) (inexact)
9380 # $0E log10(e) (exact)
9382 # fetch a pointer to the answer table relating to the proper rounding
9386 subi.b &0xb,%d1 # make offset in 0-4 range
9387 tst.b %d0 # is rmode RN?
9388 bne.b sm_not_rn # no
9390 lea.l SMALRN(%pc),%a0 # yes; load RN table addr
9392 cmpi.b %d1,&0x2 # is result log10(e)?
9393 ble.b set_finx # no; answer is inexact
9394 bra.b no_finx # yes; answer is exact
9396 cmpi.b %d0,&rp_mode # is rmode RP?
9399 lea.l SMALRZRM(%pc),%a0 # no; load RZ,RM table addr
9402 lea.l SMALRP(%pc),%a0 # load RP table addr
9406 # the answer is one of:
9407 # $30 ln(2) (inexact)
9408 # $31 ln(10) (inexact)
9415 # $38 10^32 (inexact)
9416 # $39 10^64 (inexact)
9417 # $3A 10^128 (inexact)
9418 # $3B 10^256 (inexact)
9419 # $3C 10^512 (inexact)
9420 # $3D 10^1024 (inexact)
9421 # $3E 10^2048 (inexact)
9422 # $3F 10^4096 (inexact)
9424 # fetch a pointer to the answer table relating to the proper rounding
9428 subi.b &0x30,%d1 # make offset in 0-f range
9429 tst.b %d0 # is rmode RN?
9430 bne.b bg_not_rn # no
9432 lea.l BIGRN(%pc),%a0 # yes; load RN table addr
9434 cmpi.b %d1,&0x1 # is offset <= $31?
9435 ble.b set_finx # yes; answer is inexact
9436 cmpi.b %d1,&0x7 # is $32 <= offset <= $37?
9437 ble.b no_finx # yes; answer is exact
9438 bra.b set_finx # no; answer is inexact
9440 cmpi.b %d0,&rp_mode # is rmode RP?
9443 lea.l BIGRZRM(%pc),%a0 # no; load RZ,RM table addr
9446 lea.l BIGRP(%pc),%a0 # load RP table addr
9449 # answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9451 ori.l &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9453 mulu.w &0xc,%d1 # offset points into tables
9454 swap %d0 # put rnd prec in lo word
9455 tst.b %d0 # is precision extended?
9457 bne.b not_ext # if xprec, do not call round
9459 # Precision is extended
9460 fmovm.x (%a0,%d1.w),&0x80 # return result in fp0
9463 # Precision is single or double
9465 swap %d0 # rnd prec in upper word
9467 # call round() to round the answer to the proper precision.
9468 # exponents out of range for single or double DO NOT cause underflow
9470 mov.w 0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9471 mov.l 0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9472 mov.l 0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9474 clr.l %d0 # clear g,r,s
9475 lea FP_SCR1(%a6),%a0 # pass ptr to answer
9476 clr.w LOCAL_SGN(%a0) # sign always positive
9477 bsr.l _round # round the mantissa
9479 fmovm.x (%a0),&0x80 # return rounded result in fp0
9484 PIRN: long 0x40000000,0xc90fdaa2,0x2168c235 # pi
9485 PIRZRM: long 0x40000000,0xc90fdaa2,0x2168c234 # pi
9486 PIRP: long 0x40000000,0xc90fdaa2,0x2168c235 # pi
9488 SMALRN: long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
9489 long 0x40000000,0xadf85458,0xa2bb4a9a # e
9490 long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
9491 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9492 long 0x00000000,0x00000000,0x00000000 # 0.0
9495 long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
9496 long 0x40000000,0xadf85458,0xa2bb4a9a # e
9497 long 0x3fff0000,0xb8aa3b29,0x5c17f0bb # log2(e)
9498 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9499 long 0x00000000,0x00000000,0x00000000 # 0.0
9501 SMALRP: long 0x3ffd0000,0x9a209a84,0xfbcff799 # log10(2)
9502 long 0x40000000,0xadf85458,0xa2bb4a9b # e
9503 long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
9504 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9505 long 0x00000000,0x00000000,0x00000000 # 0.0
9507 BIGRN: long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
9508 long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
9510 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9511 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9512 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9513 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9514 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9515 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9516 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
9517 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
9518 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
9519 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
9520 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
9521 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
9522 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
9523 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
9526 long 0x3ffe0000,0xb17217f7,0xd1cf79ab # ln(2)
9527 long 0x40000000,0x935d8ddd,0xaaa8ac16 # ln(10)
9529 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9530 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9531 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9532 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9533 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9534 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9535 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
9536 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
9537 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
9538 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
9539 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
9540 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
9541 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
9542 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
9545 long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
9546 long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
9548 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9549 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9550 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9551 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9552 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9553 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9554 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
9555 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
9556 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
9557 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
9558 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
9559 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
9560 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
9561 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
9563 #########################################################################
9564 # sscale(): computes the destination operand scaled by the source #
9565 # operand. If the absoulute value of the source operand is #
9566 # >= 2^14, an overflow or underflow is returned. #
9568 # INPUT *************************************************************** #
9569 # a0 = pointer to double-extended source operand X #
9570 # a1 = pointer to double-extended destination operand Y #
9572 # OUTPUT ************************************************************** #
9573 # fp0 = scale(X,Y) #
9575 #########################################################################
9581 mov.l %d0,-(%sp) # store off ctrl bits for now
9583 mov.w DST_EX(%a1),%d1 # get dst exponent
9584 smi.b SIGN(%a6) # use SIGN to hold dst sign
9585 andi.l &0x00007fff,%d1 # strip sign from dst exp
9587 mov.w SRC_EX(%a0),%d0 # check src bounds
9588 andi.w &0x7fff,%d0 # clr src sign bit
9589 cmpi.w %d0,&0x3fff # is src ~ ZERO?
9590 blt.w src_small # yes
9591 cmpi.w %d0,&0x400c # no; is src too big?
9595 # Source is within 2^14 range.
9598 fintrz.x SRC(%a0),%fp0 # calc int of src
9599 fmov.l %fp0,%d0 # int src to d0
9600 # don't want any accrued bits from the fintrz showing up later since
9601 # we may need to read the fpsr for the last fp op in t_catch2().
9604 tst.b DST_HI(%a1) # is dst denormalized?
9607 # the dst is a DENORM. normalize the DENORM and add the adjustment to
9608 # the src value. then, jump to the norm part of the routine.
9610 mov.l %d0,-(%sp) # save src for now
9612 mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9613 mov.l DST_HI(%a1),FP_SCR0_HI(%a6)
9614 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
9616 lea FP_SCR0(%a6),%a0 # pass ptr to DENORM
9617 bsr.l norm # normalize the DENORM
9619 add.l (%sp)+,%d0 # add adjustment to src
9621 fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM
9623 cmpi.w %d0,&-0x3fff # is the shft amt really low?
9624 bge.b sok_norm2 # thank goodness no
9626 # the multiply factor that we're trying to create should be a denorm
9627 # for the multiply to work. Therefore, we're going to actually do a
9628 # multiply with a denorm which will cause an unimplemented data type
9629 # exception to be put into the machine which will be caught and corrected
9630 # later. we don't do this with the DENORMs above because this method
9631 # is slower. but, don't fret, I don't see it being used much either.
9632 fmov.l (%sp)+,%fpcr # restore user fpcr
9633 mov.l &0x80000000,%d1 # load normalized mantissa
9634 subi.l &-0x3fff,%d0 # how many should we shift?
9635 neg.l %d0 # make it positive
9636 cmpi.b %d0,&0x20 # is it > 32?
9637 bge.b sok_dnrm_32 # yes
9638 lsr.l %d0,%d1 # no; bit stays in upper lw
9639 clr.l -(%sp) # insert zero low mantissa
9640 mov.l %d1,-(%sp) # insert new high mantissa
9641 clr.l -(%sp) # make zero exponent
9644 subi.b &0x20,%d0 # get shift count
9645 lsr.l %d0,%d1 # make low mantissa longword
9646 mov.l %d1,-(%sp) # insert new low mantissa
9647 clr.l -(%sp) # insert zero high mantissa
9648 clr.l -(%sp) # make zero exponent
9651 # the src will force the dst to a DENORM value or worse. so, let's
9652 # create an fp multiply that will create the result.
9654 fmovm.x DST(%a1),&0x80 # load fp0 with normalized src
9656 fmov.l (%sp)+,%fpcr # restore user fpcr
9658 addi.w &0x3fff,%d0 # turn src amt into exp value
9659 swap %d0 # put exponent in high word
9660 clr.l -(%sp) # insert new exponent
9661 mov.l &0x80000000,-(%sp) # insert new high mantissa
9662 mov.l %d0,-(%sp) # insert new lo mantissa
9665 fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2
9666 mov.b &FMUL_OP,%d1 # last inst is MUL
9667 fmul.x (%sp)+,%fp0 # do the multiply
9668 bra t_catch2 # catch any exceptions
9671 # Source is outside of 2^14 range. Test the sign and branch
9672 # to the appropriate exception handler.
9675 mov.l (%sp)+,%d0 # restore ctrl bits
9676 exg %a0,%a1 # swap src,dst ptrs
9677 tst.b SRC_EX(%a1) # is src negative?
9678 bmi t_unfl # yes; underflow
9679 bra t_ovfl_sc # no; overflow
9682 # The source input is below 1, so we check for denormalized numbers
9686 tst.b DST_HI(%a1) # is dst denormalized?
9687 bpl.b ssmall_done # yes
9690 fmov.l %d0,%fpcr # no; load control bits
9691 mov.b &FMOV_OP,%d1 # last inst is MOVE
9692 fmov.x DST(%a1),%fp0 # simply return dest
9695 mov.l (%sp)+,%d0 # load control bits into d1
9696 mov.l %a1,%a0 # pass ptr to dst
9699 #########################################################################
9700 # smod(): computes the fp MOD of the input values X,Y. #
9701 # srem(): computes the fp (IEEE) REM of the input values X,Y. #
9703 # INPUT *************************************************************** #
9704 # a0 = pointer to extended precision input X #
9705 # a1 = pointer to extended precision input Y #
9706 # d0 = round precision,mode #
9708 # The input operands X and Y can be either normalized or #
9711 # OUTPUT ************************************************************** #
9712 # fp0 = FREM(X,Y) or FMOD(X,Y) #
9714 # ALGORITHM *********************************************************** #
9716 # Step 1. Save and strip signs of X and Y: signX := sign(X), #
9717 # signY := sign(Y), X := |X|, Y := |Y|, #
9718 # signQ := signX EOR signY. Record whether MOD or REM #
9721 # Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9723 # R := X, go to Step 4. #
9725 # R := 2^(-L)X, j := L. #
9728 # Step 3. Perform MOD(X,Y) #
9729 # 3.1 If R = Y, go to Step 9. #
9730 # 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9731 # 3.3 If j = 0, go to Step 4. #
9732 # 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9735 # Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9736 # Last_Subtract := false (used in Step 7 below). If #
9737 # MOD is requested, go to Step 6. #
9739 # Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9740 # 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9742 # 5.2 If R > Y/2, then { set Last_Subtract := true, #
9743 # Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9744 # 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9745 # then { Q := Q + 1, signX := -signX }. #
9747 # Step 6. R := signX*R. #
9749 # Step 7. If Last_Subtract = true, R := R - Y. #
9751 # Step 8. Return signQ, last 7 bits of Q, and R as required. #
9753 # Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9754 # X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9755 # R := 0. Return signQ, last 7 bits of Q, and R. #
9757 #########################################################################
9760 set Sc_Flag,L_SCR3+1
9775 long 0x00010000,0x80000000,0x00000000,0x00000000
9779 clr.b FPSR_QBYTE(%a6)
9780 mov.l %d0,-(%sp) # save ctrl bits
9786 clr.b FPSR_QBYTE(%a6)
9787 mov.l %d0,-(%sp) # save ctrl bits
9788 mov.b &0x1,Mod_Flag(%a6)
9791 #..Save sign of X and Y
9792 movm.l &0x3f00,-(%sp) # save data registers
9793 mov.w SRC_EX(%a0),%d3
9794 mov.w %d3,SignY(%a6)
9795 and.l &0x00007FFF,%d3 # Y := |Y|
9798 mov.l SRC_HI(%a0),%d4
9799 mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y|
9804 mov.l &0x00003FFE,%d3 # $3FFD + 1
9813 bfffo %d4{&0:&32},%d6
9815 sub.l %d6,%d3 # (D3,D4,D5) is normalized
9816 # ...with bias $7FFD
9821 bfffo %d4{&0:&32},%d6
9824 mov.l %d5,%d7 # a copy of D5
9829 or.l %d7,%d4 # (D3,D4,D5) normalized
9830 # ...with bias $7FFD
9834 add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized
9835 # ...with bias $7FFD
9838 mov.w DST_EX(%a1),%d0
9839 mov.w %d0,SignX(%a6)
9840 mov.w SignY(%a6),%d1
9842 and.l &0x00008000,%d1
9843 mov.w %d1,SignQ(%a6) # sign(Q) obtained
9844 and.l &0x00007FFF,%d0
9845 mov.l DST_HI(%a1),%d1
9846 mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X|
9849 mov.l &0x00003FFE,%d0
9858 bfffo %d1{&0:&32},%d6
9860 sub.l %d6,%d0 # (D0,D1,D2) is normalized
9861 # ...with bias $7FFD
9866 bfffo %d1{&0:&32},%d6
9869 mov.l %d2,%d7 # a copy of D2
9874 or.l %d7,%d1 # (D0,D1,D2) normalized
9875 # ...with bias $7FFD
9879 add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized
9880 # ...with bias $7FFD
9884 mov.l %d3,L_SCR1(%a6) # save biased exp(Y)
9885 mov.l %d0,-(%sp) # save biased exp(X)
9886 sub.l %d3,%d0 # L := expo(X)-expo(Y)
9888 clr.l %d6 # D6 := carry <- 0
9890 mov.l &0,%a1 # A1 is k; j+k=L, Q=0
9892 #..(Carry,D1,D2) is R
9896 #..expo(X) < expo(Y). Thus X = mod(X,Y)
9898 mov.l (%sp)+,%d0 # restore d0
9902 addq.l &0x4,%sp # erase exp(X)
9903 #..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9905 tst.l %d6 # test carry bit
9908 #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9909 cmp.l %d1,%d4 # compare hi(R) and hi(Y)
9911 cmp.l %d2,%d5 # compare lo(R) and lo(Y)
9914 #..At this point, R = Y
9918 #..use the borrow of the previous compare
9919 bcs.b R_LT_Y # borrow is set iff R < Y
9922 #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9923 #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9924 sub.l %d5,%d2 # lo(R) - lo(Y)
9925 subx.l %d4,%d1 # hi(R) - hi(Y)
9926 clr.l %d6 # clear carry
9927 addq.l &1,%d3 # Q := Q + 1
9930 #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9931 tst.l %d0 # see if j = 0.
9934 add.l %d3,%d3 # Q := 2Q
9935 add.l %d2,%d2 # lo(R) = 2lo(R)
9936 roxl.l &1,%d1 # hi(R) = 2hi(R) + carry
9937 scs %d6 # set Carry if 2(R) overflows
9938 addq.l &1,%a1 # k := k+1
9939 subq.l &1,%d0 # j := j - 1
9940 #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9945 #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9948 mov.l L_SCR1(%a6),%d0 # new biased expo of R
9957 bfffo %d1{&0:&32},%d6
9959 sub.l %d6,%d0 # (D0,D1,D2) is normalized
9960 # ...with bias $7FFD
9965 bfffo %d1{&0:&32},%d6
9966 bmi.b Get_Mod # already normalized
9969 mov.l %d2,%d7 # a copy of D2
9974 or.l %d7,%d1 # (D0,D1,D2) normalized
9978 cmp.l %d0,&0x000041FE
9984 mov.l L_SCR1(%a6),%d6
9988 fmov.x R(%a6),%fp0 # no exception
9989 mov.b &1,Sc_Flag(%a6)
9996 mov.l L_SCR1(%a6),%d6
9998 mov.l %d6,L_SCR1(%a6)
10001 mov.l %d4,Y_Hi(%a6)
10002 mov.l %d5,Y_Lo(%a6)
10007 tst.b Mod_Flag(%a6)
10010 mov.l L_SCR1(%a6),%d6 # new biased expo(Y)
10011 subq.l &1,%d6 # biased expo(Y/2)
10027 fsub.x Y(%a6),%fp0 # no exceptions
10028 addq.l &1,%d3 # Q := Q + 1
10033 mov.w SignX(%a6),%d6
10041 mov.w SignQ(%a6),%d6 # D6 is sign(Q)
10044 and.l &0x0000007F,%d3 # 7 bits of Q
10045 or.l %d6,%d3 # sign and bits of Q
10048 # and.l &0xFF00FFFF,%d6
10050 # fmov.l %d6,%fpsr # put Q in fpsr
10051 mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr
10055 movm.l (%sp)+,&0xfc # {%d2-%d7}
10060 mov.b &FMUL_OP,%d1 # last inst is MUL
10061 fmul.x Scale(%pc),%fp0 # may cause underflow
10063 # the '040 package did this apparently to see if the dst operand for the
10064 # preceding fmul was a denorm. but, it better not have been since the
10065 # algorithm just got done playing with fp0 and expected no exceptions
10066 # as a result. trust me...
10067 # bra t_avoid_unsupp # check for denorm as a
10068 # ;result of the scaling
10071 mov.b &FMOV_OP,%d1 # last inst is MOVE
10072 fmov.x %fp0,%fp0 # capture exceptions & round
10076 #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10078 cmp.l %d0,&8 # D0 is j
10088 fmov.s &0x00000000,%fp0
10093 #..Check parity of Q
10095 and.l &0x00000001,%d6
10097 beq.w Fix_Sign # Q is even
10099 #..Q is odd, Q := Q + 1, signX := -signX
10101 mov.w SignX(%a6),%d6
10102 eor.l &0x00008000,%d6
10103 mov.w %d6,SignX(%a6)
10106 qnan: long 0x7fff0000, 0xffffffff, 0xffffffff
10108 #########################################################################
10109 # XDEF **************************************************************** #
10110 # t_dz(): Handle DZ exception during transcendental emulation. #
10111 # Sets N bit according to sign of source operand. #
10112 # t_dz2(): Handle DZ exception during transcendental emulation. #
10113 # Sets N bit always. #
10115 # XREF **************************************************************** #
10118 # INPUT *************************************************************** #
10119 # a0 = pointer to source operand #
10121 # OUTPUT ************************************************************** #
10122 # fp0 = default result #
10124 # ALGORITHM *********************************************************** #
10125 # - Store properly signed INF into fp0. #
10126 # - Set FPSR exception status dz bit, ccode inf bit, and #
10127 # accrued dz bit. #
10129 #########################################################################
10133 tst.b SRC_EX(%a0) # no; is src negative?
10137 fmov.s &0x7f800000,%fp0 # return +INF in fp0
10138 ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10143 fmov.s &0xff800000,%fp0 # return -INF in fp0
10144 ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10147 #################################################################
10148 # OPERR exception: #
10149 # - set FPSR exception status operr bit, condition code #
10150 # nan bit; Store default NAN into fp0 #
10151 #################################################################
10154 ori.l &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10155 fmovm.x qnan(%pc),&0x80 # return default NAN in fp0
10158 #################################################################
10159 # Extended DENORM: #
10160 # - For all functions that have a denormalized input and #
10161 # that f(x)=x, this is the entry point. #
10162 # - we only return the EXOP here if either underflow or #
10163 # inexact is enabled. #
10164 #################################################################
10166 # Entry point for scale w/ extended denorm. The function does
10167 # NOT set INEX2/AUNFL/AINEX.
10170 ori.l &unfl_mask,USER_FPSR(%a6) # set UNFL
10175 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10178 mov.l %a0,%a1 # make copy of src ptr
10179 mov.l %d0,%d1 # make copy of rnd prec,mode
10180 andi.b &0xc0,%d1 # extended precision?
10181 bne.b xdnrm_sd # no
10183 # result precision is extended.
10184 tst.b LOCAL_EX(%a0) # is denorm negative?
10185 bpl.b xdnrm_exit # no
10187 bset &neg_bit,FPSR_CC(%a6) # yes; set 'N' ccode bit
10190 # result precision is single or double
10193 tst.b LOCAL_EX(%a0) # is denorm pos or neg?
10194 smi.b %d1 # set d0 accordingly
10198 fmovm.x (%a0),&0x80 # return default result in fp0
10200 mov.b FPCR_ENABLE(%a6),%d0
10201 andi.b &0x0a,%d0 # is UNFL or INEX enabled?
10202 bne.b xdnrm_ena # yes
10208 # we have a DENORM that needs to be converted into an EXOP.
10209 # so, normalize the mantissa, add 0x6000 to the new exponent,
10210 # and return the result in fp1.
10212 mov.w LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10213 mov.l LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10214 mov.l LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10216 lea FP_SCR0(%a6),%a0
10217 bsr.l norm # normalize mantissa
10218 addi.l &0x6000,%d0 # add extra bias
10219 andi.w &0x8000,FP_SCR0_EX(%a6) # keep old sign
10220 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
10222 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10225 #################################################################
10226 # UNFL exception: #
10227 # - This routine is for cases where even an EXOP isn't #
10228 # large enough to hold the range of this result. #
10229 # In such a case, the EXOP equals zero. #
10230 # - Return the default result to the proper precision #
10231 # with the sign of this result being the same as that #
10232 # of the src operand. #
10233 # - t_unfl2() is provided to force the result sign to #
10234 # positive which is the desired result for fetox(). #
10235 #################################################################
10238 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10240 tst.b (%a0) # is result pos or neg?
10241 smi.b %d1 # set d1 accordingly
10242 bsr.l unf_sub # calc default unfl result
10243 fmovm.x (%a0),&0x80 # return default result in fp0
10245 fmov.s &0x00000000,%fp1 # return EXOP in fp1
10248 # t_unfl2 ALWAYS tells unf_sub to create a positive result
10251 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10253 sf.b %d1 # set d0 to represent positive
10254 bsr.l unf_sub # calc default unfl result
10255 fmovm.x (%a0),&0x80 # return default result in fp0
10257 fmov.s &0x0000000,%fp1 # return EXOP in fp1
10260 #################################################################
10261 # OVFL exception: #
10262 # - This routine is for cases where even an EXOP isn't #
10263 # large enough to hold the range of this result. #
10264 # - Return the default result to the proper precision #
10265 # with the sign of this result being the same as that #
10266 # of the src operand. #
10267 # - t_ovfl2() is provided to force the result sign to #
10268 # positive which is the desired result for fcosh(). #
10269 # - t_ovfl_sc() is provided for scale() which only sets #
10270 # the inexact bits if the number is inexact for the #
10271 # precision indicated. #
10272 #################################################################
10276 ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10278 mov.b %d0,%d1 # fetch rnd mode/prec
10279 andi.b &0xc0,%d1 # extract rnd prec
10280 beq.b ovfl_work # prec is extended
10282 tst.b LOCAL_HI(%a0) # is dst a DENORM?
10283 bmi.b ovfl_sc_norm # no
10285 # dst op is a DENORM. we have to normalize the mantissa to see if the
10286 # result would be inexact for the given precision. make a copy of the
10287 # dst so we don't screw up the version passed to us.
10288 mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10289 mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10290 mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10291 lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0
10292 movm.l &0xc080,-(%sp) # save d0-d1/a0
10293 bsr.l norm # normalize mantissa
10294 movm.l (%sp)+,&0x0103 # restore d0-d1/a0
10297 cmpi.b %d1,&0x40 # is prec dbl?
10298 bne.b ovfl_sc_dbl # no; sgl
10300 tst.l LOCAL_LO(%a0) # is lo lw of sgl set?
10301 bne.b ovfl_sc_inx # yes
10302 tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?
10303 bne.b ovfl_sc_inx # yes
10304 bra.b ovfl_work # don't set INEX2
10306 mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of
10307 andi.l &0x7ff,%d1 # dbl mantissa set?
10308 beq.b ovfl_work # no; don't set INEX2
10310 ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2
10311 bra.b ovfl_work # continue
10315 ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10318 tst.b LOCAL_EX(%a0) # what is the sign?
10319 smi.b %d1 # set d1 accordingly
10320 bsr.l ovf_res # calc default ovfl result
10321 mov.b %d0,FPSR_CC(%a6) # insert new ccodes
10322 fmovm.x (%a0),&0x80 # return default result in fp0
10324 fmov.s &0x00000000,%fp1 # return EXOP in fp1
10327 # t_ovfl2 ALWAYS tells ovf_res to create a positive result
10330 ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10332 sf.b %d1 # clear sign flag for positive
10333 bsr.l ovf_res # calc default ovfl result
10334 mov.b %d0,FPSR_CC(%a6) # insert new ccodes
10335 fmovm.x (%a0),&0x80 # return default result in fp0
10337 fmov.s &0x00000000,%fp1 # return EXOP in fp1
10340 #################################################################
10342 # - the last operation of a transcendental emulation #
10343 # routine may have caused an underflow or overflow. #
10344 # we find out if this occurred by doing an fsave and #
10345 # checking the exception bit. if one did occur, then we #
10346 # jump to fgen_except() which creates the default #
10347 # result and EXOP for us. #
10348 #################################################################
10357 #################################################################
10358 # INEX2 exception: #
10359 # - The inex2 and ainex bits are set. #
10360 #################################################################
10368 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10373 ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10377 mov.b &z_bmask,FPSR_CC(%a6)
10378 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10381 # an underflow or overflow exception occurred.
10382 # we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10384 ori.w &inx2a_mask,FPSR_EXCEPT(%a6)
10400 or.l %d0,USER_FPSR(%a6)
10404 #########################################################################
10406 #########################################################################
10407 # unf_res(): underflow default result calculation for transcendentals #
10410 # d0 : rnd mode,precision #
10411 # d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) #
10413 # a0 : points to result (in instruction memory) #
10414 #########################################################################
10416 ori.l &unfinx_mask,USER_FPSR(%a6)
10418 andi.w &0x10,%d1 # keep sign bit in 4th spot
10420 lsr.b &0x4,%d0 # shift rnd prec,mode to lo bits
10421 andi.b &0xf,%d0 # strip hi rnd mode bit
10422 or.b %d1,%d0 # concat {sgn,mode,prec}
10424 mov.l %d0,%d1 # make a copy
10425 lsl.b &0x1,%d1 # mult index 2 by 2
10427 mov.b (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10428 lea (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10432 byte 0x4, 0x4, 0x4, 0x0
10433 byte 0x4, 0x4, 0x4, 0x0
10434 byte 0x4, 0x4, 0x4, 0x0
10435 byte 0x0, 0x0, 0x0, 0x0
10436 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10437 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10438 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10441 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10442 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10443 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10444 long 0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10446 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10447 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10448 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10449 long 0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10451 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10452 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10453 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10454 long 0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10456 long 0x0,0x0,0x0,0x0
10457 long 0x0,0x0,0x0,0x0
10458 long 0x0,0x0,0x0,0x0
10459 long 0x0,0x0,0x0,0x0
10461 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10462 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10463 long 0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10464 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10466 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10467 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10468 long 0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10469 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10471 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10472 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10473 long 0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10474 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10476 ############################################################
10478 #########################################################################
10479 # src_zero(): Return signed zero according to sign of src operand. #
10480 #########################################################################
10483 tst.b SRC_EX(%a0) # get sign of src operand
10484 bmi.b ld_mzero # if neg, load neg zero
10487 # ld_pzero(): return a positive zero.
10491 fmov.s &0x00000000,%fp0 # load +0
10492 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10495 # ld_mzero(): return a negative zero.
10498 fmov.s &0x80000000,%fp0 # load -0
10499 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10502 #########################################################################
10503 # dst_zero(): Return signed zero according to sign of dst operand. #
10504 #########################################################################
10507 tst.b DST_EX(%a1) # get sign of dst operand
10508 bmi.b ld_mzero # if neg, load neg zero
10509 bra.b ld_pzero # load positive zero
10511 #########################################################################
10512 # src_inf(): Return signed inf according to sign of src operand. #
10513 #########################################################################
10516 tst.b SRC_EX(%a0) # get sign of src operand
10517 bmi.b ld_minf # if negative branch
10520 # ld_pinf(): return a positive infinity.
10524 fmov.s &0x7f800000,%fp0 # load +INF
10525 mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10529 # ld_minf():return a negative infinity.
10533 fmov.s &0xff800000,%fp0 # load -INF
10534 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10537 #########################################################################
10538 # dst_inf(): Return signed inf according to sign of dst operand. #
10539 #########################################################################
10542 tst.b DST_EX(%a1) # get sign of dst operand
10543 bmi.b ld_minf # if negative branch
10547 #################################################################
10548 # szr_inf(): Return +ZERO for a negative src operand or #
10549 # +INF for a positive src operand. #
10550 # Routine used for fetox, ftwotox, and ftentox. #
10551 #################################################################
10553 tst.b SRC_EX(%a0) # check sign of source
10557 #########################################################################
10558 # sopr_inf(): Return +INF for a positive src operand or #
10559 # jump to operand error routine for a negative src operand. #
10560 # Routine used for flogn, flognp1, flog10, and flog2. #
10561 #########################################################################
10564 tst.b SRC_EX(%a0) # check sign of source
10568 #################################################################
10569 # setoxm1i(): Return minus one for a negative src operand or #
10570 # positive infinity for a positive src operand. #
10571 # Routine used for fetoxm1. #
10572 #################################################################
10575 tst.b SRC_EX(%a0) # check sign of source
10579 #########################################################################
10580 # src_one(): Return signed one according to sign of src operand. #
10581 #########################################################################
10584 tst.b SRC_EX(%a0) # check sign of source
10588 # ld_pone(): return positive one.
10592 fmov.s &0x3f800000,%fp0 # load +1
10597 # ld_mone(): return negative one.
10601 fmov.s &0xbf800000,%fp0 # load -1
10602 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10605 ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235
10606 mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235
10608 #################################################################
10609 # spi_2(): Return signed PI/2 according to sign of src operand. #
10610 #################################################################
10613 tst.b SRC_EX(%a0) # check sign of source
10617 # ld_ppi2(): return positive PI/2.
10622 fmov.x ppiby2(%pc),%fp0 # load +pi/2
10623 bra.w t_pinx2 # set INEX2
10626 # ld_mpi2(): return negative PI/2.
10631 fmov.x mpiby2(%pc),%fp0 # load -pi/2
10632 bra.w t_minx2 # set INEX2
10634 ####################################################
10635 # The following routines give support for fsincos. #
10636 ####################################################
10639 # ssincosz(): When the src operand is ZERO, store a one in the
10640 # cosine register and return a ZERO in fp0 w/ the same sign
10641 # as the src operand.
10645 fmov.s &0x3f800000,%fp1
10646 tst.b SRC_EX(%a0) # test sign
10648 fmov.s &0x80000000,%fp0 # return sin result in fp0
10649 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)
10650 bra.b sto_cos # store cosine result
10652 fmov.s &0x00000000,%fp0 # return sin result in fp0
10653 mov.b &z_bmask,FPSR_CC(%a6)
10654 bra.b sto_cos # store cosine result
10657 # ssincosi(): When the src operand is INF, store a QNAN in the cosine
10658 # register and jump to the operand error routine for negative
10663 fmov.x qnan(%pc),%fp1 # load NAN
10664 bsr.l sto_cos # store cosine result
10668 # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10669 # register and branch to the src QNAN routine.
10673 fmov.x LOCAL_EX(%a0),%fp1
10678 # ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10679 # in the cosine register and branch to the src SNAN routine.
10683 fmov.x LOCAL_EX(%a0),%fp1
10687 ########################################################################
10689 #########################################################################
10690 # sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #
10691 # fp1 holds the result of the cosine portion of ssincos(). #
10692 # the value in fp1 will not take any exceptions when moved. #
10694 # fp1 : fp value to store #
10697 #########################################################################
10700 mov.b 1+EXC_CMDREG(%a6),%d0
10702 mov.w (tbl_sto_cos.b,%pc,%d0.w*2),%d0
10703 jmp (tbl_sto_cos.b,%pc,%d0.w*1)
10706 short sto_cos_0 - tbl_sto_cos
10707 short sto_cos_1 - tbl_sto_cos
10708 short sto_cos_2 - tbl_sto_cos
10709 short sto_cos_3 - tbl_sto_cos
10710 short sto_cos_4 - tbl_sto_cos
10711 short sto_cos_5 - tbl_sto_cos
10712 short sto_cos_6 - tbl_sto_cos
10713 short sto_cos_7 - tbl_sto_cos
10716 fmovm.x &0x40,EXC_FP0(%a6)
10719 fmovm.x &0x40,EXC_FP1(%a6)
10740 ##################################################################
10745 mov.b DTAG(%a6),%d1
10759 mov.b DTAG(%a6),%d1
10773 mov.b DTAG(%a6),%d1
10787 mov.b SRC_EX(%a0),%d1 # get src sign
10788 mov.b DST_EX(%a1),%d0 # get dst sign
10789 eor.b %d0,%d1 # get qbyte sign
10791 mov.b %d1,FPSR_QBYTE(%a6)
10798 clr.b FPSR_QBYTE(%a6)
10800 mov.b SRC_EX(%a0),%d1 # get src sign
10801 mov.b DST_EX(%a1),%d0 # get dst sign
10802 eor.b %d0,%d1 # get qbyte sign
10804 mov.b %d1,FPSR_QBYTE(%a6)
10805 cmpi.b DTAG(%a6),&DENORM
10811 fmov.l (%sp)+,%fpcr
10812 fmov.x DST(%a1),%fp0
10818 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode
10821 #########################################################################
10826 mov.b DTAG(%a6),%d1
10840 mov.b DTAG(%a6),%d1
10854 mov.b DTAG(%a6),%d1
10866 #########################################################################
10867 global sscale_snorm
10868 global sscale_sdnrm
10871 mov.b DTAG(%a6),%d1
10883 global sscale_szero
10885 mov.b DTAG(%a6),%d1
10899 mov.b DTAG(%a6),%d1
10907 ########################################################################
10910 # sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10914 mov.b DTAG(%a6),%d1
10922 # sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10926 mov.b DTAG(%a6),%d1
10928 beq.b dst_qnan_src_snan
10934 ori.l &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10938 # dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10942 fmov.x DST(%a1),%fp0 # the fmove sets the SNAN bit
10943 fmov.l %fpsr,%d0 # catch resulting status
10944 or.l %d0,USER_FPSR(%a6) # store status
10948 # dst_qnan(): Return the dst QNAN.
10952 fmov.x DST(%a1),%fp0 # return the non-signalling nan
10953 tst.b DST_EX(%a1) # set ccodes according to QNAN sign
10956 mov.b &nan_bmask,FPSR_CC(%a6)
10959 mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)
10963 # src_snan(): Return the src SNAN w/ the SNAN bit set.
10967 fmov.x SRC(%a0),%fp0 # the fmove sets the SNAN bit
10968 fmov.l %fpsr,%d0 # catch resulting status
10969 or.l %d0,USER_FPSR(%a6) # store status
10973 # src_qnan(): Return the src QNAN.
10977 fmov.x SRC(%a0),%fp0 # return the non-signalling nan
10978 tst.b SRC_EX(%a0) # set ccodes according to QNAN sign
10981 mov.b &nan_bmask,FPSR_CC(%a6)
10984 mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)
10989 # These entry points are used by the exception handler
10990 # routines where an instruction is selected by an index into
10991 # a large jump table corresponding to a given instruction which
10992 # has been decoded. Flow continues here where we now decode
10993 # further according to the source operand type.
10998 mov.b STAG(%a6),%d1
11012 mov.b STAG(%a6),%d1
11026 mov.b STAG(%a6),%d1
11040 mov.b STAG(%a6),%d1
11054 mov.b STAG(%a6),%d1
11068 mov.b STAG(%a6),%d1
11082 mov.b STAG(%a6),%d1
11096 mov.b STAG(%a6),%d1
11110 mov.b STAG(%a6),%d1
11124 mov.b STAG(%a6),%d1
11138 mov.b STAG(%a6),%d1
11152 mov.b STAG(%a6),%d1
11166 mov.b STAG(%a6),%d1
11180 mov.b STAG(%a6),%d1
11194 mov.b STAG(%a6),%d1
11208 mov.b STAG(%a6),%d1
11222 mov.b STAG(%a6),%d1
11236 mov.b STAG(%a6),%d1
11250 mov.b STAG(%a6),%d1
11264 mov.b STAG(%a6),%d1
11278 mov.b STAG(%a6),%d1
11292 mov.b STAG(%a6),%d1
11306 mov.b STAG(%a6),%d1
11320 mov.b STAG(%a6),%d1
11332 #########################################################################
11333 # XDEF **************************************************************** #
11334 # fgen_except(): catch an exception during transcendental #
11337 # XREF **************************************************************** #
11338 # fmul() - emulate a multiply instruction #
11339 # fadd() - emulate an add instruction #
11340 # fin() - emulate an fmove instruction #
11342 # INPUT *************************************************************** #
11343 # fp0 = destination operand #
11344 # d0 = type of instruction that took exception #
11345 # fsave frame = source operand #
11347 # OUTPUT ************************************************************** #
11351 # ALGORITHM *********************************************************** #
11352 # An exception occurred on the last instruction of the #
11353 # transcendental emulation. hopefully, this won't be happening much #
11354 # because it will be VERY slow. #
11355 # The only exceptions capable of passing through here are #
11356 # Overflow, Underflow, and Unsupported Data Type. #
11358 #########################################################################
11362 cmpi.b 0x3(%sp),&0x7 # is exception UNSUPP?
11363 beq.b fge_unsupp # yes
11365 mov.b &NORM,STAG(%a6)
11368 mov.b &NORM,DTAG(%a6)
11370 # ok, I have a problem with putting the dst op at FP_DST. the emulation
11371 # routines aren't supposed to alter the operands but we've just squashed
11374 # 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11375 # then a potential bug. to begin with, only the dyadic functions
11376 # frem,fmod, and fscale would get the dst trashed here. But, for
11377 # the 060SP, the FP_DST is never used again anyways.
11378 fmovm.x &0x80,FP_DST(%a6) # dst op is in fp0
11380 lea 0x4(%sp),%a0 # pass: ptr to src op
11381 lea FP_DST(%a6),%a1 # pass: ptr to dst op
11383 cmpi.b %d1,&FMOV_OP
11384 beq.b fge_fin # it was an "fmov"
11385 cmpi.b %d1,&FADD_OP
11386 beq.b fge_fadd # it was an "fadd"
11398 mov.b &DENORM,STAG(%a6)
11402 # This table holds the offsets of the emulation routines for each individual
11403 # math operation relative to the address of this table. Included are
11404 # routines like fadd/fmul/fabs as well as the transcendentals.
11405 # The location within the table is determined by the extension bits of the
11406 # operation longword.
11411 long fin - tbl_unsupp # 00: fmove
11412 long fint - tbl_unsupp # 01: fint
11413 long fsinh - tbl_unsupp # 02: fsinh
11414 long fintrz - tbl_unsupp # 03: fintrz
11415 long fsqrt - tbl_unsupp # 04: fsqrt
11416 long tbl_unsupp - tbl_unsupp
11417 long flognp1 - tbl_unsupp # 06: flognp1
11418 long tbl_unsupp - tbl_unsupp
11419 long fetoxm1 - tbl_unsupp # 08: fetoxm1
11420 long ftanh - tbl_unsupp # 09: ftanh
11421 long fatan - tbl_unsupp # 0a: fatan
11422 long tbl_unsupp - tbl_unsupp
11423 long fasin - tbl_unsupp # 0c: fasin
11424 long fatanh - tbl_unsupp # 0d: fatanh
11425 long fsine - tbl_unsupp # 0e: fsin
11426 long ftan - tbl_unsupp # 0f: ftan
11427 long fetox - tbl_unsupp # 10: fetox
11428 long ftwotox - tbl_unsupp # 11: ftwotox
11429 long ftentox - tbl_unsupp # 12: ftentox
11430 long tbl_unsupp - tbl_unsupp
11431 long flogn - tbl_unsupp # 14: flogn
11432 long flog10 - tbl_unsupp # 15: flog10
11433 long flog2 - tbl_unsupp # 16: flog2
11434 long tbl_unsupp - tbl_unsupp
11435 long fabs - tbl_unsupp # 18: fabs
11436 long fcosh - tbl_unsupp # 19: fcosh
11437 long fneg - tbl_unsupp # 1a: fneg
11438 long tbl_unsupp - tbl_unsupp
11439 long facos - tbl_unsupp # 1c: facos
11440 long fcos - tbl_unsupp # 1d: fcos
11441 long fgetexp - tbl_unsupp # 1e: fgetexp
11442 long fgetman - tbl_unsupp # 1f: fgetman
11443 long fdiv - tbl_unsupp # 20: fdiv
11444 long fmod - tbl_unsupp # 21: fmod
11445 long fadd - tbl_unsupp # 22: fadd
11446 long fmul - tbl_unsupp # 23: fmul
11447 long fsgldiv - tbl_unsupp # 24: fsgldiv
11448 long frem - tbl_unsupp # 25: frem
11449 long fscale - tbl_unsupp # 26: fscale
11450 long fsglmul - tbl_unsupp # 27: fsglmul
11451 long fsub - tbl_unsupp # 28: fsub
11452 long tbl_unsupp - tbl_unsupp
11453 long tbl_unsupp - tbl_unsupp
11454 long tbl_unsupp - tbl_unsupp
11455 long tbl_unsupp - tbl_unsupp
11456 long tbl_unsupp - tbl_unsupp
11457 long tbl_unsupp - tbl_unsupp
11458 long tbl_unsupp - tbl_unsupp
11459 long fsincos - tbl_unsupp # 30: fsincos
11460 long fsincos - tbl_unsupp # 31: fsincos
11461 long fsincos - tbl_unsupp # 32: fsincos
11462 long fsincos - tbl_unsupp # 33: fsincos
11463 long fsincos - tbl_unsupp # 34: fsincos
11464 long fsincos - tbl_unsupp # 35: fsincos
11465 long fsincos - tbl_unsupp # 36: fsincos
11466 long fsincos - tbl_unsupp # 37: fsincos
11467 long fcmp - tbl_unsupp # 38: fcmp
11468 long tbl_unsupp - tbl_unsupp
11469 long ftst - tbl_unsupp # 3a: ftst
11470 long tbl_unsupp - tbl_unsupp
11471 long tbl_unsupp - tbl_unsupp
11472 long tbl_unsupp - tbl_unsupp
11473 long tbl_unsupp - tbl_unsupp
11474 long tbl_unsupp - tbl_unsupp
11475 long fsin - tbl_unsupp # 40: fsmove
11476 long fssqrt - tbl_unsupp # 41: fssqrt
11477 long tbl_unsupp - tbl_unsupp
11478 long tbl_unsupp - tbl_unsupp
11479 long fdin - tbl_unsupp # 44: fdmove
11480 long fdsqrt - tbl_unsupp # 45: fdsqrt
11481 long tbl_unsupp - tbl_unsupp
11482 long tbl_unsupp - tbl_unsupp
11483 long tbl_unsupp - tbl_unsupp
11484 long tbl_unsupp - tbl_unsupp
11485 long tbl_unsupp - tbl_unsupp
11486 long tbl_unsupp - tbl_unsupp
11487 long tbl_unsupp - tbl_unsupp
11488 long tbl_unsupp - tbl_unsupp
11489 long tbl_unsupp - tbl_unsupp
11490 long tbl_unsupp - tbl_unsupp
11491 long tbl_unsupp - tbl_unsupp
11492 long tbl_unsupp - tbl_unsupp
11493 long tbl_unsupp - tbl_unsupp
11494 long tbl_unsupp - tbl_unsupp
11495 long tbl_unsupp - tbl_unsupp
11496 long tbl_unsupp - tbl_unsupp
11497 long tbl_unsupp - tbl_unsupp
11498 long tbl_unsupp - tbl_unsupp
11499 long fsabs - tbl_unsupp # 58: fsabs
11500 long tbl_unsupp - tbl_unsupp
11501 long fsneg - tbl_unsupp # 5a: fsneg
11502 long tbl_unsupp - tbl_unsupp
11503 long fdabs - tbl_unsupp # 5c: fdabs
11504 long tbl_unsupp - tbl_unsupp
11505 long fdneg - tbl_unsupp # 5e: fdneg
11506 long tbl_unsupp - tbl_unsupp
11507 long fsdiv - tbl_unsupp # 60: fsdiv
11508 long tbl_unsupp - tbl_unsupp
11509 long fsadd - tbl_unsupp # 62: fsadd
11510 long fsmul - tbl_unsupp # 63: fsmul
11511 long fddiv - tbl_unsupp # 64: fddiv
11512 long tbl_unsupp - tbl_unsupp
11513 long fdadd - tbl_unsupp # 66: fdadd
11514 long fdmul - tbl_unsupp # 67: fdmul
11515 long fssub - tbl_unsupp # 68: fssub
11516 long tbl_unsupp - tbl_unsupp
11517 long tbl_unsupp - tbl_unsupp
11518 long tbl_unsupp - tbl_unsupp
11519 long fdsub - tbl_unsupp # 6c: fdsub
11521 #########################################################################
11522 # XDEF **************************************************************** #
11523 # fmul(): emulates the fmul instruction #
11524 # fsmul(): emulates the fsmul instruction #
11525 # fdmul(): emulates the fdmul instruction #
11527 # XREF **************************************************************** #
11528 # scale_to_zero_src() - scale src exponent to zero #
11529 # scale_to_zero_dst() - scale dst exponent to zero #
11530 # unf_res() - return default underflow result #
11531 # ovf_res() - return default overflow result #
11532 # res_qnan() - return QNAN result #
11533 # res_snan() - return SNAN result #
11535 # INPUT *************************************************************** #
11536 # a0 = pointer to extended precision source operand #
11537 # a1 = pointer to extended precision destination operand #
11538 # d0 rnd prec,mode #
11540 # OUTPUT ************************************************************** #
11542 # fp1 = EXOP (if exception occurred) #
11544 # ALGORITHM *********************************************************** #
11545 # Handle NANs, infinities, and zeroes as special cases. Divide #
11546 # norms/denorms into ext/sgl/dbl precision. #
11547 # For norms/denorms, scale the exponents such that a multiply #
11548 # instruction won't cause an exception. Use the regular fmul to #
11549 # compute a result. Check if the regular operands would have taken #
11550 # an exception. If so, return the default overflow/underflow result #
11551 # and return the EXOP if exceptions are enabled. Else, scale the #
11552 # result operand to the proper exponent. #
11554 #########################################################################
11558 long 0x3fff - 0x7ffe # ext_max
11559 long 0x3fff - 0x407e # sgl_max
11560 long 0x3fff - 0x43fe # dbl_max
11562 long 0x3fff + 0x0001 # ext_unfl
11563 long 0x3fff - 0x3f80 # sgl_unfl
11564 long 0x3fff - 0x3c00 # dbl_unfl
11568 andi.b &0x30,%d0 # clear rnd prec
11569 ori.b &s_mode*0x10,%d0 # insert sgl prec
11575 ori.b &d_mode*0x10,%d0 # insert dbl prec
11579 mov.l %d0,L_SCR3(%a6) # store rnd info
11582 mov.b DTAG(%a6),%d1
11584 or.b STAG(%a6),%d1 # combine src tags
11585 bne.w fmul_not_norm # optimize on non-norm input
11588 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11589 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11590 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11592 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11593 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11594 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11596 bsr.l scale_to_zero_src # scale src exponent
11597 mov.l %d0,-(%sp) # save scale factor 1
11599 bsr.l scale_to_zero_dst # scale dst exponent
11601 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
11603 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
11604 lsr.b &0x6,%d1 # shift to lo bits
11605 mov.l (%sp)+,%d0 # load S.F.
11606 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11607 beq.w fmul_may_ovfl # result may rnd to overflow
11608 blt.w fmul_ovfl # result will overflow
11610 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11611 beq.w fmul_may_unfl # result may rnd to no unfl
11612 bgt.w fmul_unfl # result will underflow
11616 # - the result of the multiply operation will neither overflow nor underflow.
11617 # - do the multiply to the proper precision and rounding mode.
11618 # - scale the result exponent using the scale factor. if both operands were
11619 # normalized then we really don't need to go through this scaling. but for now,
11623 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11625 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11626 fmov.l &0x0,%fpsr # clear FPSR
11628 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11630 fmov.l %fpsr,%d1 # save status
11631 fmov.l &0x0,%fpcr # clear FPCR
11633 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11636 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11637 mov.l %d2,-(%sp) # save d2
11638 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
11639 mov.l %d1,%d2 # make a copy
11640 andi.l &0x7fff,%d1 # strip sign
11641 andi.w &0x8000,%d2 # keep old sign
11642 sub.l %d0,%d1 # add scale factor
11643 or.w %d2,%d1 # concat old sign,new exp
11644 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11645 mov.l (%sp)+,%d2 # restore d2
11646 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11651 # - the result of the multiply operation is an overflow.
11652 # - do the multiply to the proper precision and rounding mode in order to
11653 # set the inexact bits.
11654 # - calculate the default result and return it in fp0.
11655 # - if overflow or inexact is enabled, we need a multiply result rounded to
11656 # extended precision. if the original operation was extended, then we have this
11657 # result. if the original operation was single or double, we have to do another
11658 # multiply using extended precision and the correct rounding mode. the result
11659 # of this operation then has its exponent scaled by -0x6000 to create the
11660 # exceptional operand.
11663 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11665 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11666 fmov.l &0x0,%fpsr # clear FPSR
11668 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11670 fmov.l %fpsr,%d1 # save status
11671 fmov.l &0x0,%fpcr # clear FPCR
11673 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11675 # save setting this until now because this is where fmul_may_ovfl may jump in
11677 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11679 mov.b FPCR_ENABLE(%a6),%d1
11680 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11681 bne.b fmul_ovfl_ena # yes
11683 # calculate the default result
11685 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11686 sne %d1 # set sign param accordingly
11687 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
11688 bsr.l ovf_res # calculate default result
11689 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11690 fmovm.x (%a0),&0x80 # return default result in fp0
11694 # OVFL is enabled; Create EXOP:
11695 # - if precision is extended, then we have the EXOP. simply bias the exponent
11696 # with an extra -0x6000. if the precision is single or double, we need to
11697 # calculate a result rounded to extended precision.
11700 mov.l L_SCR3(%a6),%d1
11701 andi.b &0xc0,%d1 # test the rnd prec
11702 bne.b fmul_ovfl_ena_sd # it's sgl or dbl
11704 fmul_ovfl_ena_cont:
11705 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
11707 mov.l %d2,-(%sp) # save d2
11708 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11709 mov.w %d1,%d2 # make a copy
11710 andi.l &0x7fff,%d1 # strip sign
11711 sub.l %d0,%d1 # add scale factor
11712 subi.l &0x6000,%d1 # subtract bias
11713 andi.w &0x7fff,%d1 # clear sign bit
11714 andi.w &0x8000,%d2 # keep old sign
11715 or.w %d2,%d1 # concat old sign,new exp
11716 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11717 mov.l (%sp)+,%d2 # restore d2
11718 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11719 bra.b fmul_ovfl_dis
11722 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11724 mov.l L_SCR3(%a6),%d1
11725 andi.b &0x30,%d1 # keep rnd mode only
11726 fmov.l %d1,%fpcr # set FPCR
11728 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11730 fmov.l &0x0,%fpcr # clear FPCR
11731 bra.b fmul_ovfl_ena_cont
11735 # - the result of the multiply operation MAY overflow.
11736 # - do the multiply to the proper precision and rounding mode in order to
11737 # set the inexact bits.
11738 # - calculate the default result and return it in fp0.
11741 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11743 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11744 fmov.l &0x0,%fpsr # clear FPSR
11746 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11748 fmov.l %fpsr,%d1 # save status
11749 fmov.l &0x0,%fpcr # clear FPCR
11751 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11753 fabs.x %fp0,%fp1 # make a copy of result
11754 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
11755 fbge.w fmul_ovfl_tst # yes; overflow has occurred
11757 # no, it didn't overflow; we have correct result
11758 bra.w fmul_normal_exit
11762 # - the result of the multiply operation is an underflow.
11763 # - do the multiply to the proper precision and rounding mode in order to
11764 # set the inexact bits.
11765 # - calculate the default result and return it in fp0.
11766 # - if overflow or inexact is enabled, we need a multiply result rounded to
11767 # extended precision. if the original operation was extended, then we have this
11768 # result. if the original operation was single or double, we have to do another
11769 # multiply using extended precision and the correct rounding mode. the result
11770 # of this operation then has its exponent scaled by -0x6000 to create the
11771 # exceptional operand.
11774 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11776 # for fun, let's use only extended precision, round to zero. then, let
11777 # the unf_res() routine figure out all the rest.
11778 # will we get the correct answer.
11779 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11781 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11782 fmov.l &0x0,%fpsr # clear FPSR
11784 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11786 fmov.l %fpsr,%d1 # save status
11787 fmov.l &0x0,%fpcr # clear FPCR
11789 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11791 mov.b FPCR_ENABLE(%a6),%d1
11792 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11793 bne.b fmul_unfl_ena # yes
11796 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11798 lea FP_SCR0(%a6),%a0 # pass: result addr
11799 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11800 bsr.l unf_res # calculate default result
11801 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
11802 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11809 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11811 mov.l L_SCR3(%a6),%d1
11812 andi.b &0xc0,%d1 # is precision extended?
11813 bne.b fmul_unfl_ena_sd # no, sgl or dbl
11815 # if the rnd mode is anything but RZ, then we have to re-do the above
11816 # multiplication because we used RZ for all.
11817 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11819 fmul_unfl_ena_cont:
11820 fmov.l &0x0,%fpsr # clear FPSR
11822 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
11824 fmov.l &0x0,%fpcr # clear FPCR
11826 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11827 mov.l %d2,-(%sp) # save d2
11828 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11829 mov.l %d1,%d2 # make a copy
11830 andi.l &0x7fff,%d1 # strip sign
11831 andi.w &0x8000,%d2 # keep old sign
11832 sub.l %d0,%d1 # add scale factor
11833 addi.l &0x6000,%d1 # add bias
11835 or.w %d2,%d1 # concat old sign,new exp
11836 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11837 mov.l (%sp)+,%d2 # restore d2
11838 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11839 bra.w fmul_unfl_dis
11842 mov.l L_SCR3(%a6),%d1
11843 andi.b &0x30,%d1 # use only rnd mode
11844 fmov.l %d1,%fpcr # set FPCR
11846 bra.b fmul_unfl_ena_cont
11849 # -use the correct rounding mode and precision. this code favors operations
11850 # that do not underflow.
11852 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11854 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11855 fmov.l &0x0,%fpsr # clear FPSR
11857 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11859 fmov.l %fpsr,%d1 # save status
11860 fmov.l &0x0,%fpcr # clear FPCR
11862 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11864 fabs.x %fp0,%fp1 # make a copy of result
11865 fcmp.b %fp1,&0x2 # is |result| > 2.b?
11866 fbgt.w fmul_normal_exit # no; no underflow occurred
11867 fblt.w fmul_unfl # yes; underflow occurred
11870 # we still don't know if underflow occurred. result is ~ equal to 2. but,
11871 # we don't know if the result was an underflow that rounded up to a 2 or
11872 # a normalized number that rounded down to a 2. so, redo the entire operation
11873 # using RZ as the rounding mode to see what the pre-rounded result is.
11874 # this case should be relatively rare.
11876 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
11878 mov.l L_SCR3(%a6),%d1
11879 andi.b &0xc0,%d1 # keep rnd prec
11880 ori.b &rz_mode*0x10,%d1 # insert RZ
11882 fmov.l %d1,%fpcr # set FPCR
11883 fmov.l &0x0,%fpsr # clear FPSR
11885 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
11887 fmov.l &0x0,%fpcr # clear FPCR
11888 fabs.x %fp1 # make absolute value
11889 fcmp.b %fp1,&0x2 # is |result| < 2.b?
11890 fbge.w fmul_normal_exit # no; no underflow occurred
11891 bra.w fmul_unfl # yes, underflow occurred
11893 ################################################################################
11896 # Multiply: inputs are not both normalized; what are they?
11899 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
11900 jmp (tbl_fmul_op.b,%pc,%d1.w)
11904 short fmul_norm - tbl_fmul_op # NORM x NORM
11905 short fmul_zero - tbl_fmul_op # NORM x ZERO
11906 short fmul_inf_src - tbl_fmul_op # NORM x INF
11907 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
11908 short fmul_norm - tbl_fmul_op # NORM x DENORM
11909 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
11910 short tbl_fmul_op - tbl_fmul_op #
11911 short tbl_fmul_op - tbl_fmul_op #
11913 short fmul_zero - tbl_fmul_op # ZERO x NORM
11914 short fmul_zero - tbl_fmul_op # ZERO x ZERO
11915 short fmul_res_operr - tbl_fmul_op # ZERO x INF
11916 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
11917 short fmul_zero - tbl_fmul_op # ZERO x DENORM
11918 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
11919 short tbl_fmul_op - tbl_fmul_op #
11920 short tbl_fmul_op - tbl_fmul_op #
11922 short fmul_inf_dst - tbl_fmul_op # INF x NORM
11923 short fmul_res_operr - tbl_fmul_op # INF x ZERO
11924 short fmul_inf_dst - tbl_fmul_op # INF x INF
11925 short fmul_res_qnan - tbl_fmul_op # INF x QNAN
11926 short fmul_inf_dst - tbl_fmul_op # INF x DENORM
11927 short fmul_res_snan - tbl_fmul_op # INF x SNAN
11928 short tbl_fmul_op - tbl_fmul_op #
11929 short tbl_fmul_op - tbl_fmul_op #
11931 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
11932 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
11933 short fmul_res_qnan - tbl_fmul_op # QNAN x INF
11934 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
11935 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
11936 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
11937 short tbl_fmul_op - tbl_fmul_op #
11938 short tbl_fmul_op - tbl_fmul_op #
11940 short fmul_norm - tbl_fmul_op # NORM x NORM
11941 short fmul_zero - tbl_fmul_op # NORM x ZERO
11942 short fmul_inf_src - tbl_fmul_op # NORM x INF
11943 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
11944 short fmul_norm - tbl_fmul_op # NORM x DENORM
11945 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
11946 short tbl_fmul_op - tbl_fmul_op #
11947 short tbl_fmul_op - tbl_fmul_op #
11949 short fmul_res_snan - tbl_fmul_op # SNAN x NORM
11950 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
11951 short fmul_res_snan - tbl_fmul_op # SNAN x INF
11952 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
11953 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
11954 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
11955 short tbl_fmul_op - tbl_fmul_op #
11956 short tbl_fmul_op - tbl_fmul_op #
11966 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11968 global fmul_zero # global for fsglmul
11970 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11971 mov.b DST_EX(%a1),%d1
11973 bpl.b fmul_zero_p # result ZERO is pos.
11975 fmov.s &0x80000000,%fp0 # load -ZERO
11976 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11979 fmov.s &0x00000000,%fp0 # load +ZERO
11980 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11984 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11986 # Note: The j-bit for an infinity is a don't-care. However, to be
11987 # strictly compatible w/ the 68881/882, we make sure to return an
11988 # INF w/ the j-bit set if the input INF j-bit was set. Destination
11989 # INFs take priority.
11991 global fmul_inf_dst # global for fsglmul
11993 fmovm.x DST(%a1),&0x80 # return INF result in fp0
11994 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11995 mov.b DST_EX(%a1),%d1
11997 bpl.b fmul_inf_dst_p # result INF is pos.
11999 fabs.x %fp0 # clear result sign
12000 fneg.x %fp0 # set result sign
12001 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12004 fabs.x %fp0 # clear result sign
12005 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12008 global fmul_inf_src # global for fsglmul
12010 fmovm.x SRC(%a0),&0x80 # return INF result in fp0
12011 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
12012 mov.b DST_EX(%a1),%d1
12014 bpl.b fmul_inf_dst_p # result INF is pos.
12015 bra.b fmul_inf_dst_n
12017 #########################################################################
12018 # XDEF **************************************************************** #
12019 # fin(): emulates the fmove instruction #
12020 # fsin(): emulates the fsmove instruction #
12021 # fdin(): emulates the fdmove instruction #
12023 # XREF **************************************************************** #
12024 # norm() - normalize mantissa for EXOP on denorm #
12025 # scale_to_zero_src() - scale src exponent to zero #
12026 # ovf_res() - return default overflow result #
12027 # unf_res() - return default underflow result #
12028 # res_qnan_1op() - return QNAN result #
12029 # res_snan_1op() - return SNAN result #
12031 # INPUT *************************************************************** #
12032 # a0 = pointer to extended precision source operand #
12033 # d0 = round prec/mode #
12035 # OUTPUT ************************************************************** #
12037 # fp1 = EXOP (if exception occurred) #
12039 # ALGORITHM *********************************************************** #
12040 # Handle NANs, infinities, and zeroes as special cases. Divide #
12041 # norms into extended, single, and double precision. #
12042 # Norms can be emulated w/ a regular fmove instruction. For #
12043 # sgl/dbl, must scale exponent and perform an "fmove". Check to see #
12044 # if the result would have overflowed/underflowed. If so, use unf_res() #
12045 # or ovf_res() to return the default result. Also return EXOP if #
12046 # exception is enabled. If no exception, return the default result. #
12047 # Unnorms don't pass through here. #
12049 #########################################################################
12053 andi.b &0x30,%d0 # clear rnd prec
12054 ori.b &s_mode*0x10,%d0 # insert sgl precision
12059 andi.b &0x30,%d0 # clear rnd prec
12060 ori.b &d_mode*0x10,%d0 # insert dbl precision
12064 mov.l %d0,L_SCR3(%a6) # store rnd info
12066 mov.b STAG(%a6),%d1 # fetch src optype tag
12067 bne.w fin_not_norm # optimize on non-norm input
12070 # FP MOVE IN: NORMs and DENORMs ONLY!
12073 andi.b &0xc0,%d0 # is precision extended?
12074 bne.w fin_not_ext # no, so go handle dbl or sgl
12077 # precision selected is extended. so...we cannot get an underflow
12078 # or overflow because of rounding to the correct precision. so...
12079 # skip the scaling and unscaling...
12081 tst.b SRC_EX(%a0) # is the operand negative?
12082 bpl.b fin_norm_done # no
12083 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
12085 fmovm.x SRC(%a0),&0x80 # return result in fp0
12089 # for an extended precision DENORM, the UNFL exception bit is set
12090 # the accrued bit is NOT set in this instance(no inexactness!)
12093 andi.b &0xc0,%d0 # is precision extended?
12094 bne.w fin_not_ext # no, so go handle dbl or sgl
12096 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12097 tst.b SRC_EX(%a0) # is the operand negative?
12098 bpl.b fin_denorm_done # no
12099 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
12101 fmovm.x SRC(%a0),&0x80 # return result in fp0
12102 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12103 bne.b fin_denorm_unfl_ena # yes
12107 # the input is an extended DENORM and underflow is enabled in the FPCR.
12108 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12109 # exponent and insert back into the operand.
12111 fin_denorm_unfl_ena:
12112 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12113 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12114 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12115 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
12116 bsr.l norm # normalize result
12117 neg.w %d0 # new exponent = -(shft val)
12118 addi.w &0x6000,%d0 # add new bias to exponent
12119 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
12120 andi.w &0x8000,%d1 # keep old sign
12121 andi.w &0x7fff,%d0 # clear sign position
12122 or.w %d1,%d0 # concat new exo,old sign
12123 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
12124 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12128 # operand is to be rounded to single or double precision
12131 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12135 # operand is to be rounded to single precision
12138 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12139 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12140 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12141 bsr.l scale_to_zero_src # calculate scale factor
12143 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
12144 bge.w fin_sd_unfl # yes; go handle underflow
12145 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
12146 beq.w fin_sd_may_ovfl # maybe; go check
12147 blt.w fin_sd_ovfl # yes; go handle overflow
12150 # operand will NOT overflow or underflow when moved into the fp reg file
12153 fmov.l &0x0,%fpsr # clear FPSR
12154 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12156 fmov.x FP_SCR0(%a6),%fp0 # perform move
12158 fmov.l %fpsr,%d1 # save FPSR
12159 fmov.l &0x0,%fpcr # clear FPCR
12161 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12163 fin_sd_normal_exit:
12164 mov.l %d2,-(%sp) # save d2
12165 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12166 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
12167 mov.w %d1,%d2 # make a copy
12168 andi.l &0x7fff,%d1 # strip sign
12169 sub.l %d0,%d1 # add scale factor
12170 andi.w &0x8000,%d2 # keep old sign
12171 or.w %d1,%d2 # concat old sign,new exponent
12172 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12173 mov.l (%sp)+,%d2 # restore d2
12174 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12178 # operand is to be rounded to double precision
12181 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12182 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12183 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12184 bsr.l scale_to_zero_src # calculate scale factor
12186 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
12187 bge.w fin_sd_unfl # yes; go handle underflow
12188 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
12189 beq.w fin_sd_may_ovfl # maybe; go check
12190 blt.w fin_sd_ovfl # yes; go handle overflow
12191 bra.w fin_sd_normal # no; ho handle normalized op
12194 # operand WILL underflow when moved in to the fp register file
12197 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12199 tst.b FP_SCR0_EX(%a6) # is operand negative?
12200 bpl.b fin_sd_unfl_tst
12201 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
12203 # if underflow or inexact is enabled, then go calculate the EXOP first.
12205 mov.b FPCR_ENABLE(%a6),%d1
12206 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12207 bne.b fin_sd_unfl_ena # yes
12210 lea FP_SCR0(%a6),%a0 # pass: result addr
12211 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12212 bsr.l unf_res # calculate default result
12213 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
12214 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12218 # operand will underflow AND underflow or inexact is enabled.
12219 # Therefore, we must return the result rounded to extended precision.
12222 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12223 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12224 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12226 mov.l %d2,-(%sp) # save d2
12227 mov.w %d1,%d2 # make a copy
12228 andi.l &0x7fff,%d1 # strip sign
12229 sub.l %d0,%d1 # subtract scale factor
12230 andi.w &0x8000,%d2 # extract old sign
12231 addi.l &0x6000,%d1 # add new bias
12233 or.w %d1,%d2 # concat old sign,new exp
12234 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
12235 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12236 mov.l (%sp)+,%d2 # restore d2
12237 bra.b fin_sd_unfl_dis
12240 # operand WILL overflow.
12243 fmov.l &0x0,%fpsr # clear FPSR
12244 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12246 fmov.x FP_SCR0(%a6),%fp0 # perform move
12248 fmov.l &0x0,%fpcr # clear FPCR
12249 fmov.l %fpsr,%d1 # save FPSR
12251 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12254 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12256 mov.b FPCR_ENABLE(%a6),%d1
12257 andi.b &0x13,%d1 # is OVFL or INEX enabled?
12258 bne.b fin_sd_ovfl_ena # yes
12261 # OVFL is not enabled; therefore, we must create the default result by
12262 # calling ovf_res().
12265 btst &neg_bit,FPSR_CC(%a6) # is result negative?
12266 sne %d1 # set sign param accordingly
12267 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12268 bsr.l ovf_res # calculate default result
12269 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12270 fmovm.x (%a0),&0x80 # return default result in fp0
12275 # the INEX2 bit has already been updated by the round to the correct precision.
12276 # now, round to extended(and don't alter the FPSR).
12279 mov.l %d2,-(%sp) # save d2
12280 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12281 mov.l %d1,%d2 # make a copy
12282 andi.l &0x7fff,%d1 # strip sign
12283 andi.w &0x8000,%d2 # keep old sign
12284 sub.l %d0,%d1 # add scale factor
12285 sub.l &0x6000,%d1 # subtract bias
12288 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12289 mov.l (%sp)+,%d2 # restore d2
12290 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12291 bra.b fin_sd_ovfl_dis
12294 # the move in MAY overflow. so...
12297 fmov.l &0x0,%fpsr # clear FPSR
12298 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12300 fmov.x FP_SCR0(%a6),%fp0 # perform the move
12302 fmov.l %fpsr,%d1 # save status
12303 fmov.l &0x0,%fpcr # clear FPCR
12305 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12307 fabs.x %fp0,%fp1 # make a copy of result
12308 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
12309 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
12311 # no, it didn't overflow; we have correct result
12312 bra.w fin_sd_normal_exit
12314 ##########################################################################
12317 # operand is not a NORM: check its optype and branch accordingly
12320 cmpi.b %d1,&DENORM # weed out DENORM
12322 cmpi.b %d1,&SNAN # weed out SNANs
12324 cmpi.b %d1,&QNAN # weed out QNANs
12328 # do the fmove in; at this point, only possible ops are ZERO and INF.
12329 # use fmov to determine ccodes.
12330 # prec:mode should be zero at this point but it won't affect answer anyways.
12332 fmov.x SRC(%a0),%fp0 # do fmove in
12333 fmov.l %fpsr,%d0 # no exceptions possible
12334 rol.l &0x8,%d0 # put ccodes in lo byte
12335 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
12338 #########################################################################
12339 # XDEF **************************************************************** #
12340 # fdiv(): emulates the fdiv instruction #
12341 # fsdiv(): emulates the fsdiv instruction #
12342 # fddiv(): emulates the fddiv instruction #
12344 # XREF **************************************************************** #
12345 # scale_to_zero_src() - scale src exponent to zero #
12346 # scale_to_zero_dst() - scale dst exponent to zero #
12347 # unf_res() - return default underflow result #
12348 # ovf_res() - return default overflow result #
12349 # res_qnan() - return QNAN result #
12350 # res_snan() - return SNAN result #
12352 # INPUT *************************************************************** #
12353 # a0 = pointer to extended precision source operand #
12354 # a1 = pointer to extended precision destination operand #
12355 # d0 rnd prec,mode #
12357 # OUTPUT ************************************************************** #
12359 # fp1 = EXOP (if exception occurred) #
12361 # ALGORITHM *********************************************************** #
12362 # Handle NANs, infinities, and zeroes as special cases. Divide #
12363 # norms/denorms into ext/sgl/dbl precision. #
12364 # For norms/denorms, scale the exponents such that a divide #
12365 # instruction won't cause an exception. Use the regular fdiv to #
12366 # compute a result. Check if the regular operands would have taken #
12367 # an exception. If so, return the default overflow/underflow result #
12368 # and return the EXOP if exceptions are enabled. Else, scale the #
12369 # result operand to the proper exponent. #
12371 #########################################################################
12375 long 0x3fff - 0x0000 # ext_unfl
12376 long 0x3fff - 0x3f81 # sgl_unfl
12377 long 0x3fff - 0x3c01 # dbl_unfl
12380 long 0x3fff - 0x7ffe # ext overflow exponent
12381 long 0x3fff - 0x407e # sgl overflow exponent
12382 long 0x3fff - 0x43fe # dbl overflow exponent
12386 andi.b &0x30,%d0 # clear rnd prec
12387 ori.b &s_mode*0x10,%d0 # insert sgl prec
12392 andi.b &0x30,%d0 # clear rnd prec
12393 ori.b &d_mode*0x10,%d0 # insert dbl prec
12397 mov.l %d0,L_SCR3(%a6) # store rnd info
12400 mov.b DTAG(%a6),%d1
12402 or.b STAG(%a6),%d1 # combine src tags
12404 bne.w fdiv_not_norm # optimize on non-norm input
12407 # DIVIDE: NORMs and DENORMs ONLY!
12410 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
12411 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
12412 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
12414 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12415 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12416 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12418 bsr.l scale_to_zero_src # scale src exponent
12419 mov.l %d0,-(%sp) # save scale factor 1
12421 bsr.l scale_to_zero_dst # scale dst exponent
12423 neg.l (%sp) # SCALE FACTOR = scale1 - scale2
12426 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
12427 lsr.b &0x6,%d1 # shift to lo bits
12428 mov.l (%sp)+,%d0 # load S.F.
12429 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12430 ble.w fdiv_may_ovfl # result will overflow
12432 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12433 beq.w fdiv_may_unfl # maybe
12434 bgt.w fdiv_unfl # yes; go handle underflow
12437 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12439 fmov.l L_SCR3(%a6),%fpcr # save FPCR
12440 fmov.l &0x0,%fpsr # clear FPSR
12442 fdiv.x FP_SCR0(%a6),%fp0 # perform divide
12444 fmov.l %fpsr,%d1 # save FPSR
12445 fmov.l &0x0,%fpcr # clear FPCR
12447 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12450 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
12451 mov.l %d2,-(%sp) # store d2
12452 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
12453 mov.l %d1,%d2 # make a copy
12454 andi.l &0x7fff,%d1 # strip sign
12455 andi.w &0x8000,%d2 # keep old sign
12456 sub.l %d0,%d1 # add scale factor
12457 or.w %d2,%d1 # concat old sign,new exp
12458 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12459 mov.l (%sp)+,%d2 # restore d2
12460 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12469 mov.l (%sp)+,%d0 # restore scale factor
12470 bra.b fdiv_normal_exit
12473 mov.l %d0,-(%sp) # save scale factor
12475 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12477 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12478 fmov.l &0x0,%fpsr # set FPSR
12480 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12485 or.l %d0,USER_FPSR(%a6) # save INEX,N
12487 fmovm.x &0x01,-(%sp) # save result to stack
12488 mov.w (%sp),%d0 # fetch new exponent
12489 add.l &0xc,%sp # clear result from stack
12490 andi.l &0x7fff,%d0 # strip sign
12491 sub.l (%sp),%d0 # add scale factor
12492 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12497 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12499 mov.b FPCR_ENABLE(%a6),%d1
12500 andi.b &0x13,%d1 # is OVFL or INEX enabled?
12501 bne.b fdiv_ovfl_ena # yes
12504 btst &neg_bit,FPSR_CC(%a6) # is result negative?
12505 sne %d1 # set sign param accordingly
12506 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
12507 bsr.l ovf_res # calculate default result
12508 or.b %d0,FPSR_CC(%a6) # set INF if applicable
12509 fmovm.x (%a0),&0x80 # return default result in fp0
12513 mov.l L_SCR3(%a6),%d1
12514 andi.b &0xc0,%d1 # is precision extended?
12515 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
12517 fdiv_ovfl_ena_cont:
12518 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
12520 mov.l %d2,-(%sp) # save d2
12521 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12522 mov.w %d1,%d2 # make a copy
12523 andi.l &0x7fff,%d1 # strip sign
12524 sub.l %d0,%d1 # add scale factor
12525 subi.l &0x6000,%d1 # subtract bias
12526 andi.w &0x7fff,%d1 # clear sign bit
12527 andi.w &0x8000,%d2 # keep old sign
12528 or.w %d2,%d1 # concat old sign,new exp
12529 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12530 mov.l (%sp)+,%d2 # restore d2
12531 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12532 bra.b fdiv_ovfl_dis
12535 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
12537 mov.l L_SCR3(%a6),%d1
12538 andi.b &0x30,%d1 # keep rnd mode
12539 fmov.l %d1,%fpcr # set FPCR
12541 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12543 fmov.l &0x0,%fpcr # clear FPCR
12544 bra.b fdiv_ovfl_ena_cont
12547 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12549 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12551 fmov.l &rz_mode*0x10,%fpcr # set FPCR
12552 fmov.l &0x0,%fpsr # clear FPSR
12554 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12556 fmov.l %fpsr,%d1 # save status
12557 fmov.l &0x0,%fpcr # clear FPCR
12559 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12561 mov.b FPCR_ENABLE(%a6),%d1
12562 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12563 bne.b fdiv_unfl_ena # yes
12566 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12568 lea FP_SCR0(%a6),%a0 # pass: result addr
12569 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12570 bsr.l unf_res # calculate default result
12571 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
12572 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12579 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
12581 mov.l L_SCR3(%a6),%d1
12582 andi.b &0xc0,%d1 # is precision extended?
12583 bne.b fdiv_unfl_ena_sd # no, sgl or dbl
12585 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12587 fdiv_unfl_ena_cont:
12588 fmov.l &0x0,%fpsr # clear FPSR
12590 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
12592 fmov.l &0x0,%fpcr # clear FPCR
12594 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
12595 mov.l %d2,-(%sp) # save d2
12596 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12597 mov.l %d1,%d2 # make a copy
12598 andi.l &0x7fff,%d1 # strip sign
12599 andi.w &0x8000,%d2 # keep old sign
12600 sub.l %d0,%d1 # add scale factoer
12601 addi.l &0x6000,%d1 # add bias
12603 or.w %d2,%d1 # concat old sign,new exp
12604 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
12605 mov.l (%sp)+,%d2 # restore d2
12606 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12607 bra.w fdiv_unfl_dis
12610 mov.l L_SCR3(%a6),%d1
12611 andi.b &0x30,%d1 # use only rnd mode
12612 fmov.l %d1,%fpcr # set FPCR
12614 bra.b fdiv_unfl_ena_cont
12617 # the divide operation MAY underflow:
12620 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12622 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12623 fmov.l &0x0,%fpsr # clear FPSR
12625 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12627 fmov.l %fpsr,%d1 # save status
12628 fmov.l &0x0,%fpcr # clear FPCR
12630 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12632 fabs.x %fp0,%fp1 # make a copy of result
12633 fcmp.b %fp1,&0x1 # is |result| > 1.b?
12634 fbgt.w fdiv_normal_exit # no; no underflow occurred
12635 fblt.w fdiv_unfl # yes; underflow occurred
12638 # we still don't know if underflow occurred. result is ~ equal to 1. but,
12639 # we don't know if the result was an underflow that rounded up to a 1
12640 # or a normalized number that rounded down to a 1. so, redo the entire
12641 # operation using RZ as the rounding mode to see what the pre-rounded
12642 # result is. this case should be relatively rare.
12644 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
12646 mov.l L_SCR3(%a6),%d1
12647 andi.b &0xc0,%d1 # keep rnd prec
12648 ori.b &rz_mode*0x10,%d1 # insert RZ
12650 fmov.l %d1,%fpcr # set FPCR
12651 fmov.l &0x0,%fpsr # clear FPSR
12653 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
12655 fmov.l &0x0,%fpcr # clear FPCR
12656 fabs.x %fp1 # make absolute value
12657 fcmp.b %fp1,&0x1 # is |result| < 1.b?
12658 fbge.w fdiv_normal_exit # no; no underflow occurred
12659 bra.w fdiv_unfl # yes; underflow occurred
12661 ############################################################################
12664 # Divide: inputs are not both normalized; what are they?
12667 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12668 jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
12672 short fdiv_norm - tbl_fdiv_op # NORM / NORM
12673 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
12674 short fdiv_zero_load - tbl_fdiv_op # NORM / INF
12675 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
12676 short fdiv_norm - tbl_fdiv_op # NORM / DENORM
12677 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
12678 short tbl_fdiv_op - tbl_fdiv_op #
12679 short tbl_fdiv_op - tbl_fdiv_op #
12681 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
12682 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
12683 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
12684 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
12685 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
12686 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
12687 short tbl_fdiv_op - tbl_fdiv_op #
12688 short tbl_fdiv_op - tbl_fdiv_op #
12690 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
12691 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
12692 short fdiv_res_operr - tbl_fdiv_op # INF / INF
12693 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
12694 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
12695 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
12696 short tbl_fdiv_op - tbl_fdiv_op #
12697 short tbl_fdiv_op - tbl_fdiv_op #
12699 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
12700 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
12701 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
12702 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
12703 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
12704 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
12705 short tbl_fdiv_op - tbl_fdiv_op #
12706 short tbl_fdiv_op - tbl_fdiv_op #
12708 short fdiv_norm - tbl_fdiv_op # DENORM / NORM
12709 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
12710 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
12711 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
12712 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
12713 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
12714 short tbl_fdiv_op - tbl_fdiv_op #
12715 short tbl_fdiv_op - tbl_fdiv_op #
12717 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
12718 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
12719 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
12720 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
12721 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
12722 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
12723 short tbl_fdiv_op - tbl_fdiv_op #
12724 short tbl_fdiv_op - tbl_fdiv_op #
12733 global fdiv_zero_load # global for fsgldiv
12735 mov.b SRC_EX(%a0),%d0 # result sign is exclusive
12736 mov.b DST_EX(%a1),%d1 # or of input signs.
12738 bpl.b fdiv_zero_load_p # result is positive
12739 fmov.s &0x80000000,%fp0 # load a -ZERO
12740 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
12743 fmov.s &0x00000000,%fp0 # load a +ZERO
12744 mov.b &z_bmask,FPSR_CC(%a6) # set Z
12748 # The destination was In Range and the source was a ZERO. The result,
12749 # Therefore, is an INF w/ the proper sign.
12750 # So, determine the sign and return a new INF (w/ the j-bit cleared).
12752 global fdiv_inf_load # global for fsgldiv
12754 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12755 mov.b SRC_EX(%a0),%d0 # load both signs
12756 mov.b DST_EX(%a1),%d1
12758 bpl.b fdiv_inf_load_p # result is positive
12759 fmov.s &0xff800000,%fp0 # make result -INF
12760 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12763 fmov.s &0x7f800000,%fp0 # make result +INF
12764 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12768 # The destination was an INF w/ an In Range or ZERO source, the result is
12769 # an INF w/ the proper sign.
12770 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12771 # dst INF is set, then then j-bit of the result INF is also set).
12773 global fdiv_inf_dst # global for fsgldiv
12775 mov.b DST_EX(%a1),%d0 # load both signs
12776 mov.b SRC_EX(%a0),%d1
12778 bpl.b fdiv_inf_dst_p # result is positive
12780 fmovm.x DST(%a1),&0x80 # return result in fp0
12781 fabs.x %fp0 # clear sign bit
12782 fneg.x %fp0 # set sign bit
12783 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12787 fmovm.x DST(%a1),&0x80 # return result in fp0
12788 fabs.x %fp0 # return positive INF
12789 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12792 #########################################################################
12793 # XDEF **************************************************************** #
12794 # fneg(): emulates the fneg instruction #
12795 # fsneg(): emulates the fsneg instruction #
12796 # fdneg(): emulates the fdneg instruction #
12798 # XREF **************************************************************** #
12799 # norm() - normalize a denorm to provide EXOP #
12800 # scale_to_zero_src() - scale sgl/dbl source exponent #
12801 # ovf_res() - return default overflow result #
12802 # unf_res() - return default underflow result #
12803 # res_qnan_1op() - return QNAN result #
12804 # res_snan_1op() - return SNAN result #
12806 # INPUT *************************************************************** #
12807 # a0 = pointer to extended precision source operand #
12808 # d0 = rnd prec,mode #
12810 # OUTPUT ************************************************************** #
12812 # fp1 = EXOP (if exception occurred) #
12814 # ALGORITHM *********************************************************** #
12815 # Handle NANs, zeroes, and infinities as special cases. Separate #
12816 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
12817 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
12818 # and an actual fneg performed to see if overflow/underflow would have #
12819 # occurred. If so, return default underflow/overflow result. Else, #
12820 # scale the result exponent and return result. FPSR gets set based on #
12821 # the result value. #
12823 #########################################################################
12827 andi.b &0x30,%d0 # clear rnd prec
12828 ori.b &s_mode*0x10,%d0 # insert sgl precision
12833 andi.b &0x30,%d0 # clear rnd prec
12834 ori.b &d_mode*0x10,%d0 # insert dbl prec
12838 mov.l %d0,L_SCR3(%a6) # store rnd info
12839 mov.b STAG(%a6),%d1
12840 bne.w fneg_not_norm # optimize on non-norm input
12843 # NEGATE SIGN : norms and denorms ONLY!
12846 andi.b &0xc0,%d0 # is precision extended?
12847 bne.w fneg_not_ext # no; go handle sgl or dbl
12850 # precision selected is extended. so...we can not get an underflow
12851 # or overflow because of rounding to the correct precision. so...
12852 # skip the scaling and unscaling...
12854 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12855 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12856 mov.w SRC_EX(%a0),%d0
12857 eori.w &0x8000,%d0 # negate sign
12858 bpl.b fneg_norm_load # sign is positive
12859 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
12861 mov.w %d0,FP_SCR0_EX(%a6)
12862 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12866 # for an extended precision DENORM, the UNFL exception bit is set
12867 # the accrued bit is NOT set in this instance(no inexactness!)
12870 andi.b &0xc0,%d0 # is precision extended?
12871 bne.b fneg_not_ext # no; go handle sgl or dbl
12873 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12875 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12876 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12877 mov.w SRC_EX(%a0),%d0
12878 eori.w &0x8000,%d0 # negate sign
12879 bpl.b fneg_denorm_done # no
12880 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
12882 mov.w %d0,FP_SCR0_EX(%a6)
12883 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12885 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12886 bne.b fneg_ext_unfl_ena # yes
12890 # the input is an extended DENORM and underflow is enabled in the FPCR.
12891 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12892 # exponent and insert back into the operand.
12895 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
12896 bsr.l norm # normalize result
12897 neg.w %d0 # new exponent = -(shft val)
12898 addi.w &0x6000,%d0 # add new bias to exponent
12899 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
12900 andi.w &0x8000,%d1 # keep old sign
12901 andi.w &0x7fff,%d0 # clear sign position
12902 or.w %d1,%d0 # concat old sign, new exponent
12903 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
12904 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12908 # operand is either single or double
12911 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12915 # operand is to be rounded to single precision
12918 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12919 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12920 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12921 bsr.l scale_to_zero_src # calculate scale factor
12923 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
12924 bge.w fneg_sd_unfl # yes; go handle underflow
12925 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
12926 beq.w fneg_sd_may_ovfl # maybe; go check
12927 blt.w fneg_sd_ovfl # yes; go handle overflow
12930 # operand will NOT overflow or underflow when moved in to the fp reg file
12933 fmov.l &0x0,%fpsr # clear FPSR
12934 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12936 fneg.x FP_SCR0(%a6),%fp0 # perform negation
12938 fmov.l %fpsr,%d1 # save FPSR
12939 fmov.l &0x0,%fpcr # clear FPCR
12941 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12943 fneg_sd_normal_exit:
12944 mov.l %d2,-(%sp) # save d2
12945 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12946 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12947 mov.w %d1,%d2 # make a copy
12948 andi.l &0x7fff,%d1 # strip sign
12949 sub.l %d0,%d1 # add scale factor
12950 andi.w &0x8000,%d2 # keep old sign
12951 or.w %d1,%d2 # concat old sign,new exp
12952 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12953 mov.l (%sp)+,%d2 # restore d2
12954 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12958 # operand is to be rounded to double precision
12961 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12962 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12963 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12964 bsr.l scale_to_zero_src # calculate scale factor
12966 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
12967 bge.b fneg_sd_unfl # yes; go handle underflow
12968 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
12969 beq.w fneg_sd_may_ovfl # maybe; go check
12970 blt.w fneg_sd_ovfl # yes; go handle overflow
12971 bra.w fneg_sd_normal # no; ho handle normalized op
12974 # operand WILL underflow when moved in to the fp register file
12977 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12979 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
12980 bpl.b fneg_sd_unfl_tst
12981 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
12983 # if underflow or inexact is enabled, go calculate EXOP first.
12985 mov.b FPCR_ENABLE(%a6),%d1
12986 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12987 bne.b fneg_sd_unfl_ena # yes
12990 lea FP_SCR0(%a6),%a0 # pass: result addr
12991 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12992 bsr.l unf_res # calculate default result
12993 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
12994 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12998 # operand will underflow AND underflow is enabled.
12999 # Therefore, we must return the result rounded to extended precision.
13002 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13003 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13004 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
13006 mov.l %d2,-(%sp) # save d2
13007 mov.l %d1,%d2 # make a copy
13008 andi.l &0x7fff,%d1 # strip sign
13009 andi.w &0x8000,%d2 # keep old sign
13010 sub.l %d0,%d1 # subtract scale factor
13011 addi.l &0x6000,%d1 # add new bias
13013 or.w %d2,%d1 # concat new sign,new exp
13014 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
13015 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
13016 mov.l (%sp)+,%d2 # restore d2
13017 bra.b fneg_sd_unfl_dis
13020 # operand WILL overflow.
13023 fmov.l &0x0,%fpsr # clear FPSR
13024 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13026 fneg.x FP_SCR0(%a6),%fp0 # perform negation
13028 fmov.l &0x0,%fpcr # clear FPCR
13029 fmov.l %fpsr,%d1 # save FPSR
13031 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13034 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13036 mov.b FPCR_ENABLE(%a6),%d1
13037 andi.b &0x13,%d1 # is OVFL or INEX enabled?
13038 bne.b fneg_sd_ovfl_ena # yes
13041 # OVFL is not enabled; therefore, we must create the default result by
13042 # calling ovf_res().
13045 btst &neg_bit,FPSR_CC(%a6) # is result negative?
13046 sne %d1 # set sign param accordingly
13047 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
13048 bsr.l ovf_res # calculate default result
13049 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
13050 fmovm.x (%a0),&0x80 # return default result in fp0
13055 # the INEX2 bit has already been updated by the round to the correct precision.
13056 # now, round to extended(and don't alter the FPSR).
13059 mov.l %d2,-(%sp) # save d2
13060 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
13061 mov.l %d1,%d2 # make a copy
13062 andi.l &0x7fff,%d1 # strip sign
13063 andi.w &0x8000,%d2 # keep old sign
13064 sub.l %d0,%d1 # add scale factor
13065 subi.l &0x6000,%d1 # subtract bias
13067 or.w %d2,%d1 # concat sign,exp
13068 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
13069 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13070 mov.l (%sp)+,%d2 # restore d2
13071 bra.b fneg_sd_ovfl_dis
13074 # the move in MAY underflow. so...
13077 fmov.l &0x0,%fpsr # clear FPSR
13078 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13080 fneg.x FP_SCR0(%a6),%fp0 # perform negation
13082 fmov.l %fpsr,%d1 # save status
13083 fmov.l &0x0,%fpcr # clear FPCR
13085 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13087 fabs.x %fp0,%fp1 # make a copy of result
13088 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
13089 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
13091 # no, it didn't overflow; we have correct result
13092 bra.w fneg_sd_normal_exit
13094 ##########################################################################
13097 # input is not normalized; what is it?
13100 cmpi.b %d1,&DENORM # weed out DENORM
13102 cmpi.b %d1,&SNAN # weed out SNAN
13104 cmpi.b %d1,&QNAN # weed out QNAN
13108 # do the fneg; at this point, only possible ops are ZERO and INF.
13109 # use fneg to determine ccodes.
13110 # prec:mode should be zero at this point but it won't affect answer anyways.
13112 fneg.x SRC_EX(%a0),%fp0 # do fneg
13114 rol.l &0x8,%d0 # put ccodes in lo byte
13115 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
13118 #########################################################################
13119 # XDEF **************************************************************** #
13120 # ftst(): emulates the ftest instruction #
13122 # XREF **************************************************************** #
13123 # res{s,q}nan_1op() - set NAN result for monadic instruction #
13125 # INPUT *************************************************************** #
13126 # a0 = pointer to extended precision source operand #
13128 # OUTPUT ************************************************************** #
13131 # ALGORITHM *********************************************************** #
13132 # Check the source operand tag (STAG) and set the FPCR according #
13133 # to the operand type and sign. #
13135 #########################################################################
13139 mov.b STAG(%a6),%d1
13140 bne.b ftst_not_norm # optimize on non-norm input
13146 tst.b SRC_EX(%a0) # is operand negative?
13147 bmi.b ftst_norm_m # yes
13150 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13154 # input is not normalized; what is it?
13157 cmpi.b %d1,&ZERO # weed out ZERO
13159 cmpi.b %d1,&INF # weed out INF
13161 cmpi.b %d1,&SNAN # weed out SNAN
13163 cmpi.b %d1,&QNAN # weed out QNAN
13170 tst.b SRC_EX(%a0) # is operand negative?
13171 bmi.b ftst_denorm_m # yes
13174 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13181 tst.b SRC_EX(%a0) # is operand negative?
13182 bmi.b ftst_inf_m # yes
13184 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13187 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13194 tst.b SRC_EX(%a0) # is operand negative?
13195 bmi.b ftst_zero_m # yes
13197 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13200 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13203 #########################################################################
13204 # XDEF **************************************************************** #
13205 # fint(): emulates the fint instruction #
13207 # XREF **************************************************************** #
13208 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13210 # INPUT *************************************************************** #
13211 # a0 = pointer to extended precision source operand #
13212 # d0 = round precision/mode #
13214 # OUTPUT ************************************************************** #
13217 # ALGORITHM *********************************************************** #
13218 # Separate according to operand type. Unnorms don't pass through #
13219 # here. For norms, load the rounding mode/prec, execute a "fint", then #
13220 # store the resulting FPSR bits. #
13221 # For denorms, force the j-bit to a one and do the same as for #
13222 # norms. Denorms are so low that the answer will either be a zero or a #
13224 # For zeroes/infs/NANs, return the same while setting the FPSR #
13225 # as appropriate. #
13227 #########################################################################
13231 mov.b STAG(%a6),%d1
13232 bne.b fint_not_norm # optimize on non-norm input
13238 andi.b &0x30,%d0 # set prec = ext
13240 fmov.l %d0,%fpcr # set FPCR
13241 fmov.l &0x0,%fpsr # clear FPSR
13243 fint.x SRC(%a0),%fp0 # execute fint
13245 fmov.l &0x0,%fpcr # clear FPCR
13246 fmov.l %fpsr,%d0 # save FPSR
13247 or.l %d0,USER_FPSR(%a6) # set exception bits
13252 # input is not normalized; what is it?
13255 cmpi.b %d1,&ZERO # weed out ZERO
13257 cmpi.b %d1,&INF # weed out INF
13259 cmpi.b %d1,&DENORM # weed out DENORM
13261 cmpi.b %d1,&SNAN # weed out SNAN
13263 bra.l res_qnan_1op # weed out QNAN
13268 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13269 # also, the INEX2 and AINEX exception bits will be set.
13270 # so, we could either set these manually or force the DENORM
13271 # to a very small NORM and ship it to the NORM routine.
13275 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13276 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
13277 lea FP_SCR0(%a6),%a0
13284 tst.b SRC_EX(%a0) # is ZERO negative?
13285 bmi.b fint_zero_m # yes
13287 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
13288 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13291 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
13292 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13299 fmovm.x SRC(%a0),&0x80 # return result in fp0
13300 tst.b SRC_EX(%a0) # is INF negative?
13301 bmi.b fint_inf_m # yes
13303 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13306 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13309 #########################################################################
13310 # XDEF **************************************************************** #
13311 # fintrz(): emulates the fintrz instruction #
13313 # XREF **************************************************************** #
13314 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13316 # INPUT *************************************************************** #
13317 # a0 = pointer to extended precision source operand #
13318 # d0 = round precision/mode #
13320 # OUTPUT ************************************************************** #
13323 # ALGORITHM *********************************************************** #
13324 # Separate according to operand type. Unnorms don't pass through #
13325 # here. For norms, load the rounding mode/prec, execute a "fintrz", #
13326 # then store the resulting FPSR bits. #
13327 # For denorms, force the j-bit to a one and do the same as for #
13328 # norms. Denorms are so low that the answer will either be a zero or a #
13330 # For zeroes/infs/NANs, return the same while setting the FPSR #
13331 # as appropriate. #
13333 #########################################################################
13337 mov.b STAG(%a6),%d1
13338 bne.b fintrz_not_norm # optimize on non-norm input
13344 fmov.l &0x0,%fpsr # clear FPSR
13346 fintrz.x SRC(%a0),%fp0 # execute fintrz
13348 fmov.l %fpsr,%d0 # save FPSR
13349 or.l %d0,USER_FPSR(%a6) # set exception bits
13354 # input is not normalized; what is it?
13357 cmpi.b %d1,&ZERO # weed out ZERO
13359 cmpi.b %d1,&INF # weed out INF
13361 cmpi.b %d1,&DENORM # weed out DENORM
13362 beq.b fintrz_denorm
13363 cmpi.b %d1,&SNAN # weed out SNAN
13365 bra.l res_qnan_1op # weed out QNAN
13370 # for DENORMs, the result will be (+/-)ZERO.
13371 # also, the INEX2 and AINEX exception bits will be set.
13372 # so, we could either set these manually or force the DENORM
13373 # to a very small NORM and ship it to the NORM routine.
13377 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13378 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
13379 lea FP_SCR0(%a6),%a0
13386 tst.b SRC_EX(%a0) # is ZERO negative?
13387 bmi.b fintrz_zero_m # yes
13389 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
13390 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13393 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
13394 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13401 fmovm.x SRC(%a0),&0x80 # return result in fp0
13402 tst.b SRC_EX(%a0) # is INF negative?
13403 bmi.b fintrz_inf_m # yes
13405 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13408 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13411 #########################################################################
13412 # XDEF **************************************************************** #
13413 # fabs(): emulates the fabs instruction #
13414 # fsabs(): emulates the fsabs instruction #
13415 # fdabs(): emulates the fdabs instruction #
13417 # XREF **************************************************************** #
13418 # norm() - normalize denorm mantissa to provide EXOP #
13419 # scale_to_zero_src() - make exponent. = 0; get scale factor #
13420 # unf_res() - calculate underflow result #
13421 # ovf_res() - calculate overflow result #
13422 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13424 # INPUT *************************************************************** #
13425 # a0 = pointer to extended precision source operand #
13426 # d0 = rnd precision/mode #
13428 # OUTPUT ************************************************************** #
13430 # fp1 = EXOP (if exception occurred) #
13432 # ALGORITHM *********************************************************** #
13433 # Handle NANs, infinities, and zeroes as special cases. Divide #
13434 # norms into extended, single, and double precision. #
13435 # Simply clear sign for extended precision norm. Ext prec denorm #
13436 # gets an EXOP created for it since it's an underflow. #
13437 # Double and single precision can overflow and underflow. First, #
13438 # scale the operand such that the exponent is zero. Perform an "fabs" #
13439 # using the correct rnd mode/prec. Check to see if the original #
13440 # exponent would take an exception. If so, use unf_res() or ovf_res() #
13441 # to calculate the default result. Also, create the EXOP for the #
13442 # exceptional case. If no exception should occur, insert the correct #
13443 # result exponent and return. #
13444 # Unnorms don't pass through here. #
13446 #########################################################################
13450 andi.b &0x30,%d0 # clear rnd prec
13451 ori.b &s_mode*0x10,%d0 # insert sgl precision
13456 andi.b &0x30,%d0 # clear rnd prec
13457 ori.b &d_mode*0x10,%d0 # insert dbl precision
13461 mov.l %d0,L_SCR3(%a6) # store rnd info
13462 mov.b STAG(%a6),%d1
13463 bne.w fabs_not_norm # optimize on non-norm input
13466 # ABSOLUTE VALUE: norms and denorms ONLY!
13469 andi.b &0xc0,%d0 # is precision extended?
13470 bne.b fabs_not_ext # no; go handle sgl or dbl
13473 # precision selected is extended. so...we can not get an underflow
13474 # or overflow because of rounding to the correct precision. so...
13475 # skip the scaling and unscaling...
13477 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13478 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13479 mov.w SRC_EX(%a0),%d1
13480 bclr &15,%d1 # force absolute value
13481 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
13482 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
13486 # for an extended precision DENORM, the UNFL exception bit is set
13487 # the accrued bit is NOT set in this instance(no inexactness!)
13490 andi.b &0xc0,%d0 # is precision extended?
13491 bne.b fabs_not_ext # no
13493 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13495 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13496 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13497 mov.w SRC_EX(%a0),%d0
13498 bclr &15,%d0 # clear sign
13499 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
13501 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
13503 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13504 bne.b fabs_ext_unfl_ena
13508 # the input is an extended DENORM and underflow is enabled in the FPCR.
13509 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
13510 # exponent and insert back into the operand.
13513 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
13514 bsr.l norm # normalize result
13515 neg.w %d0 # new exponent = -(shft val)
13516 addi.w &0x6000,%d0 # add new bias to exponent
13517 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
13518 andi.w &0x8000,%d1 # keep old sign
13519 andi.w &0x7fff,%d0 # clear sign position
13520 or.w %d1,%d0 # concat old sign, new exponent
13521 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
13522 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13526 # operand is either single or double
13529 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
13533 # operand is to be rounded to single precision
13536 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13537 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13538 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13539 bsr.l scale_to_zero_src # calculate scale factor
13541 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
13542 bge.w fabs_sd_unfl # yes; go handle underflow
13543 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
13544 beq.w fabs_sd_may_ovfl # maybe; go check
13545 blt.w fabs_sd_ovfl # yes; go handle overflow
13548 # operand will NOT overflow or underflow when moved in to the fp reg file
13551 fmov.l &0x0,%fpsr # clear FPSR
13552 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13554 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13556 fmov.l %fpsr,%d1 # save FPSR
13557 fmov.l &0x0,%fpcr # clear FPCR
13559 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13561 fabs_sd_normal_exit:
13562 mov.l %d2,-(%sp) # save d2
13563 fmovm.x &0x80,FP_SCR0(%a6) # store out result
13564 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
13565 mov.l %d1,%d2 # make a copy
13566 andi.l &0x7fff,%d1 # strip sign
13567 sub.l %d0,%d1 # add scale factor
13568 andi.w &0x8000,%d2 # keep old sign
13569 or.w %d1,%d2 # concat old sign,new exp
13570 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
13571 mov.l (%sp)+,%d2 # restore d2
13572 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
13576 # operand is to be rounded to double precision
13579 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13580 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13581 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13582 bsr.l scale_to_zero_src # calculate scale factor
13584 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
13585 bge.b fabs_sd_unfl # yes; go handle underflow
13586 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
13587 beq.w fabs_sd_may_ovfl # maybe; go check
13588 blt.w fabs_sd_ovfl # yes; go handle overflow
13589 bra.w fabs_sd_normal # no; ho handle normalized op
13592 # operand WILL underflow when moved in to the fp register file
13595 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13597 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
13599 # if underflow or inexact is enabled, go calculate EXOP first.
13600 mov.b FPCR_ENABLE(%a6),%d1
13601 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
13602 bne.b fabs_sd_unfl_ena # yes
13605 lea FP_SCR0(%a6),%a0 # pass: result addr
13606 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
13607 bsr.l unf_res # calculate default result
13608 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
13609 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
13613 # operand will underflow AND underflow is enabled.
13614 # Therefore, we must return the result rounded to extended precision.
13617 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13618 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13619 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
13621 mov.l %d2,-(%sp) # save d2
13622 mov.l %d1,%d2 # make a copy
13623 andi.l &0x7fff,%d1 # strip sign
13624 andi.w &0x8000,%d2 # keep old sign
13625 sub.l %d0,%d1 # subtract scale factor
13626 addi.l &0x6000,%d1 # add new bias
13628 or.w %d2,%d1 # concat new sign,new exp
13629 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
13630 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
13631 mov.l (%sp)+,%d2 # restore d2
13632 bra.b fabs_sd_unfl_dis
13635 # operand WILL overflow.
13638 fmov.l &0x0,%fpsr # clear FPSR
13639 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13641 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13643 fmov.l &0x0,%fpcr # clear FPCR
13644 fmov.l %fpsr,%d1 # save FPSR
13646 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13649 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13651 mov.b FPCR_ENABLE(%a6),%d1
13652 andi.b &0x13,%d1 # is OVFL or INEX enabled?
13653 bne.b fabs_sd_ovfl_ena # yes
13656 # OVFL is not enabled; therefore, we must create the default result by
13657 # calling ovf_res().
13660 btst &neg_bit,FPSR_CC(%a6) # is result negative?
13661 sne %d1 # set sign param accordingly
13662 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
13663 bsr.l ovf_res # calculate default result
13664 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
13665 fmovm.x (%a0),&0x80 # return default result in fp0
13670 # the INEX2 bit has already been updated by the round to the correct precision.
13671 # now, round to extended(and don't alter the FPSR).
13674 mov.l %d2,-(%sp) # save d2
13675 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
13676 mov.l %d1,%d2 # make a copy
13677 andi.l &0x7fff,%d1 # strip sign
13678 andi.w &0x8000,%d2 # keep old sign
13679 sub.l %d0,%d1 # add scale factor
13680 subi.l &0x6000,%d1 # subtract bias
13682 or.w %d2,%d1 # concat sign,exp
13683 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
13684 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13685 mov.l (%sp)+,%d2 # restore d2
13686 bra.b fabs_sd_ovfl_dis
13689 # the move in MAY underflow. so...
13692 fmov.l &0x0,%fpsr # clear FPSR
13693 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13695 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13697 fmov.l %fpsr,%d1 # save status
13698 fmov.l &0x0,%fpcr # clear FPCR
13700 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13702 fabs.x %fp0,%fp1 # make a copy of result
13703 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
13704 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
13706 # no, it didn't overflow; we have correct result
13707 bra.w fabs_sd_normal_exit
13709 ##########################################################################
13712 # input is not normalized; what is it?
13715 cmpi.b %d1,&DENORM # weed out DENORM
13717 cmpi.b %d1,&SNAN # weed out SNAN
13719 cmpi.b %d1,&QNAN # weed out QNAN
13722 fabs.x SRC(%a0),%fp0 # force absolute value
13724 cmpi.b %d1,&INF # weed out INF
13727 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13730 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13733 #########################################################################
13734 # XDEF **************************************************************** #
13735 # fcmp(): fp compare op routine #
13737 # XREF **************************************************************** #
13738 # res_qnan() - return QNAN result #
13739 # res_snan() - return SNAN result #
13741 # INPUT *************************************************************** #
13742 # a0 = pointer to extended precision source operand #
13743 # a1 = pointer to extended precision destination operand #
13744 # d0 = round prec/mode #
13746 # OUTPUT ************************************************************** #
13749 # ALGORITHM *********************************************************** #
13750 # Handle NANs and denorms as special cases. For everything else, #
13751 # just use the actual fcmp instruction to produce the correct condition #
13754 #########################################################################
13759 mov.b DTAG(%a6),%d1
13762 bne.b fcmp_not_norm # optimize on non-norm input
13765 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13768 fmovm.x DST(%a1),&0x80 # load dst op
13770 fcmp.x %fp0,SRC(%a0) # do compare
13772 fmov.l %fpsr,%d0 # save FPSR
13773 rol.l &0x8,%d0 # extract ccode bits
13774 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
13779 # fcmp: inputs are not both normalized; what are they?
13782 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13783 jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
13787 short fcmp_norm - tbl_fcmp_op # NORM - NORM
13788 short fcmp_norm - tbl_fcmp_op # NORM - ZERO
13789 short fcmp_norm - tbl_fcmp_op # NORM - INF
13790 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
13791 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
13792 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
13793 short tbl_fcmp_op - tbl_fcmp_op #
13794 short tbl_fcmp_op - tbl_fcmp_op #
13796 short fcmp_norm - tbl_fcmp_op # ZERO - NORM
13797 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
13798 short fcmp_norm - tbl_fcmp_op # ZERO - INF
13799 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
13800 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
13801 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
13802 short tbl_fcmp_op - tbl_fcmp_op #
13803 short tbl_fcmp_op - tbl_fcmp_op #
13805 short fcmp_norm - tbl_fcmp_op # INF - NORM
13806 short fcmp_norm - tbl_fcmp_op # INF - ZERO
13807 short fcmp_norm - tbl_fcmp_op # INF - INF
13808 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
13809 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
13810 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
13811 short tbl_fcmp_op - tbl_fcmp_op #
13812 short tbl_fcmp_op - tbl_fcmp_op #
13814 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
13815 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
13816 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
13817 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
13818 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
13819 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
13820 short tbl_fcmp_op - tbl_fcmp_op #
13821 short tbl_fcmp_op - tbl_fcmp_op #
13823 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
13824 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
13825 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
13826 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
13827 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
13828 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
13829 short tbl_fcmp_op - tbl_fcmp_op #
13830 short tbl_fcmp_op - tbl_fcmp_op #
13832 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
13833 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
13834 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
13835 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
13836 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
13837 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
13838 short tbl_fcmp_op - tbl_fcmp_op #
13839 short tbl_fcmp_op - tbl_fcmp_op #
13841 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13842 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13845 andi.b &0xf7,FPSR_CC(%a6)
13849 andi.b &0xf7,FPSR_CC(%a6)
13853 # DENORMs are a little more difficult.
13854 # If you have a 2 DENORMs, then you can just force the j-bit to a one
13855 # and use the fcmp_norm routine.
13856 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13857 # and use the fcmp_norm routine.
13858 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13859 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
13860 # (1) signs are (+) and the DENORM is the dst or
13861 # (2) signs are (-) and the DENORM is the src
13865 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13866 mov.l SRC_HI(%a0),%d0
13867 bset &31,%d0 # DENORM src; make into small norm
13868 mov.l %d0,FP_SCR0_HI(%a6)
13869 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13870 lea FP_SCR0(%a6),%a0
13874 mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
13875 mov.l DST_HI(%a1),%d0
13876 bset &31,%d0 # DENORM src; make into small norm
13877 mov.l %d0,FP_SCR0_HI(%a6)
13878 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
13879 lea FP_SCR0(%a6),%a1
13883 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
13884 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13885 mov.l DST_HI(%a1),%d0
13886 bset &31,%d0 # DENORM dst; make into small norm
13887 mov.l %d0,FP_SCR1_HI(%a6)
13888 mov.l SRC_HI(%a0),%d0
13889 bset &31,%d0 # DENORM dst; make into small norm
13890 mov.l %d0,FP_SCR0_HI(%a6)
13891 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
13892 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13893 lea FP_SCR1(%a6),%a1
13894 lea FP_SCR0(%a6),%a0
13898 mov.b SRC_EX(%a0),%d0 # determine if like signs
13899 mov.b DST_EX(%a1),%d1
13903 # signs are the same, so must determine the answer ourselves.
13904 tst.b %d0 # is src op negative?
13905 bmi.b fcmp_nrm_dnrm_m # yes
13908 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13912 mov.b SRC_EX(%a0),%d0 # determine if like signs
13913 mov.b DST_EX(%a1),%d1
13917 # signs are the same, so must determine the answer ourselves.
13918 tst.b %d0 # is src op negative?
13919 bpl.b fcmp_dnrm_nrm_m # no
13922 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13925 #########################################################################
13926 # XDEF **************************************************************** #
13927 # fsglmul(): emulates the fsglmul instruction #
13929 # XREF **************************************************************** #
13930 # scale_to_zero_src() - scale src exponent to zero #
13931 # scale_to_zero_dst() - scale dst exponent to zero #
13932 # unf_res4() - return default underflow result for sglop #
13933 # ovf_res() - return default overflow result #
13934 # res_qnan() - return QNAN result #
13935 # res_snan() - return SNAN result #
13937 # INPUT *************************************************************** #
13938 # a0 = pointer to extended precision source operand #
13939 # a1 = pointer to extended precision destination operand #
13940 # d0 rnd prec,mode #
13942 # OUTPUT ************************************************************** #
13944 # fp1 = EXOP (if exception occurred) #
13946 # ALGORITHM *********************************************************** #
13947 # Handle NANs, infinities, and zeroes as special cases. Divide #
13948 # norms/denorms into ext/sgl/dbl precision. #
13949 # For norms/denorms, scale the exponents such that a multiply #
13950 # instruction won't cause an exception. Use the regular fsglmul to #
13951 # compute a result. Check if the regular operands would have taken #
13952 # an exception. If so, return the default overflow/underflow result #
13953 # and return the EXOP if exceptions are enabled. Else, scale the #
13954 # result operand to the proper exponent. #
13956 #########################################################################
13960 mov.l %d0,L_SCR3(%a6) # store rnd info
13963 mov.b DTAG(%a6),%d1
13967 bne.w fsglmul_not_norm # optimize on non-norm input
13970 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
13971 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
13972 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
13974 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13975 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13976 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13978 bsr.l scale_to_zero_src # scale exponent
13979 mov.l %d0,-(%sp) # save scale factor 1
13981 bsr.l scale_to_zero_dst # scale dst exponent
13983 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
13985 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
13986 beq.w fsglmul_may_ovfl # result may rnd to overflow
13987 blt.w fsglmul_ovfl # result will overflow
13989 cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
13990 beq.w fsglmul_may_unfl # result may rnd to no unfl
13991 bgt.w fsglmul_unfl # result will underflow
13994 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
13996 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13997 fmov.l &0x0,%fpsr # clear FPSR
13999 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14001 fmov.l %fpsr,%d1 # save status
14002 fmov.l &0x0,%fpcr # clear FPCR
14004 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14006 fsglmul_normal_exit:
14007 fmovm.x &0x80,FP_SCR0(%a6) # store out result
14008 mov.l %d2,-(%sp) # save d2
14009 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
14010 mov.l %d1,%d2 # make a copy
14011 andi.l &0x7fff,%d1 # strip sign
14012 andi.w &0x8000,%d2 # keep old sign
14013 sub.l %d0,%d1 # add scale factor
14014 or.w %d2,%d1 # concat old sign,new exp
14015 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14016 mov.l (%sp)+,%d2 # restore d2
14017 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
14021 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14023 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14024 fmov.l &0x0,%fpsr # clear FPSR
14026 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14028 fmov.l %fpsr,%d1 # save status
14029 fmov.l &0x0,%fpcr # clear FPCR
14031 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14035 # save setting this until now because this is where fsglmul_may_ovfl may jump in
14036 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14038 mov.b FPCR_ENABLE(%a6),%d1
14039 andi.b &0x13,%d1 # is OVFL or INEX enabled?
14040 bne.b fsglmul_ovfl_ena # yes
14043 btst &neg_bit,FPSR_CC(%a6) # is result negative?
14044 sne %d1 # set sign param accordingly
14045 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14046 andi.b &0x30,%d0 # force prec = ext
14047 bsr.l ovf_res # calculate default result
14048 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
14049 fmovm.x (%a0),&0x80 # return default result in fp0
14053 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
14055 mov.l %d2,-(%sp) # save d2
14056 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14057 mov.l %d1,%d2 # make a copy
14058 andi.l &0x7fff,%d1 # strip sign
14059 sub.l %d0,%d1 # add scale factor
14060 subi.l &0x6000,%d1 # subtract bias
14062 andi.w &0x8000,%d2 # keep old sign
14063 or.w %d2,%d1 # concat old sign,new exp
14064 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14065 mov.l (%sp)+,%d2 # restore d2
14066 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14067 bra.b fsglmul_ovfl_dis
14070 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14072 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14073 fmov.l &0x0,%fpsr # clear FPSR
14075 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14077 fmov.l %fpsr,%d1 # save status
14078 fmov.l &0x0,%fpcr # clear FPCR
14080 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14082 fabs.x %fp0,%fp1 # make a copy of result
14083 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
14084 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
14086 # no, it didn't overflow; we have correct result
14087 bra.w fsglmul_normal_exit
14090 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14092 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14094 fmov.l &rz_mode*0x10,%fpcr # set FPCR
14095 fmov.l &0x0,%fpsr # clear FPSR
14097 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14099 fmov.l %fpsr,%d1 # save status
14100 fmov.l &0x0,%fpcr # clear FPCR
14102 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14104 mov.b FPCR_ENABLE(%a6),%d1
14105 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14106 bne.b fsglmul_unfl_ena # yes
14109 fmovm.x &0x80,FP_SCR0(%a6) # store out result
14111 lea FP_SCR0(%a6),%a0 # pass: result addr
14112 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14113 bsr.l unf_res4 # calculate default result
14114 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14115 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14122 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14124 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14125 fmov.l &0x0,%fpsr # clear FPSR
14127 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
14129 fmov.l &0x0,%fpcr # clear FPCR
14131 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14132 mov.l %d2,-(%sp) # save d2
14133 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14134 mov.l %d1,%d2 # make a copy
14135 andi.l &0x7fff,%d1 # strip sign
14136 andi.w &0x8000,%d2 # keep old sign
14137 sub.l %d0,%d1 # add scale factor
14138 addi.l &0x6000,%d1 # add bias
14140 or.w %d2,%d1 # concat old sign,new exp
14141 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14142 mov.l (%sp)+,%d2 # restore d2
14143 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14144 bra.w fsglmul_unfl_dis
14147 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14149 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14150 fmov.l &0x0,%fpsr # clear FPSR
14152 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14154 fmov.l %fpsr,%d1 # save status
14155 fmov.l &0x0,%fpcr # clear FPCR
14157 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14159 fabs.x %fp0,%fp1 # make a copy of result
14160 fcmp.b %fp1,&0x2 # is |result| > 2.b?
14161 fbgt.w fsglmul_normal_exit # no; no underflow occurred
14162 fblt.w fsglmul_unfl # yes; underflow occurred
14165 # we still don't know if underflow occurred. result is ~ equal to 2. but,
14166 # we don't know if the result was an underflow that rounded up to a 2 or
14167 # a normalized number that rounded down to a 2. so, redo the entire operation
14168 # using RZ as the rounding mode to see what the pre-rounded result is.
14169 # this case should be relatively rare.
14171 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
14173 mov.l L_SCR3(%a6),%d1
14174 andi.b &0xc0,%d1 # keep rnd prec
14175 ori.b &rz_mode*0x10,%d1 # insert RZ
14177 fmov.l %d1,%fpcr # set FPCR
14178 fmov.l &0x0,%fpsr # clear FPSR
14180 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
14182 fmov.l &0x0,%fpcr # clear FPCR
14183 fabs.x %fp1 # make absolute value
14184 fcmp.b %fp1,&0x2 # is |result| < 2.b?
14185 fbge.w fsglmul_normal_exit # no; no underflow occurred
14186 bra.w fsglmul_unfl # yes, underflow occurred
14188 ##############################################################################
14191 # Single Precision Multiply: inputs are not both normalized; what are they?
14194 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14195 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
14199 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
14200 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
14201 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
14202 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
14203 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
14204 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
14205 short tbl_fsglmul_op - tbl_fsglmul_op #
14206 short tbl_fsglmul_op - tbl_fsglmul_op #
14208 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
14209 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
14210 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
14211 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
14212 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
14213 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
14214 short tbl_fsglmul_op - tbl_fsglmul_op #
14215 short tbl_fsglmul_op - tbl_fsglmul_op #
14217 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
14218 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
14219 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
14220 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
14221 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
14222 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
14223 short tbl_fsglmul_op - tbl_fsglmul_op #
14224 short tbl_fsglmul_op - tbl_fsglmul_op #
14226 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
14227 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
14228 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
14229 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
14230 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
14231 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
14232 short tbl_fsglmul_op - tbl_fsglmul_op #
14233 short tbl_fsglmul_op - tbl_fsglmul_op #
14235 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
14236 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
14237 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
14238 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
14239 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
14240 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
14241 short tbl_fsglmul_op - tbl_fsglmul_op #
14242 short tbl_fsglmul_op - tbl_fsglmul_op #
14244 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
14245 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
14246 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
14247 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
14248 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
14249 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
14250 short tbl_fsglmul_op - tbl_fsglmul_op #
14251 short tbl_fsglmul_op - tbl_fsglmul_op #
14266 #########################################################################
14267 # XDEF **************************************************************** #
14268 # fsgldiv(): emulates the fsgldiv instruction #
14270 # XREF **************************************************************** #
14271 # scale_to_zero_src() - scale src exponent to zero #
14272 # scale_to_zero_dst() - scale dst exponent to zero #
14273 # unf_res4() - return default underflow result for sglop #
14274 # ovf_res() - return default overflow result #
14275 # res_qnan() - return QNAN result #
14276 # res_snan() - return SNAN result #
14278 # INPUT *************************************************************** #
14279 # a0 = pointer to extended precision source operand #
14280 # a1 = pointer to extended precision destination operand #
14281 # d0 rnd prec,mode #
14283 # OUTPUT ************************************************************** #
14285 # fp1 = EXOP (if exception occurred) #
14287 # ALGORITHM *********************************************************** #
14288 # Handle NANs, infinities, and zeroes as special cases. Divide #
14289 # norms/denorms into ext/sgl/dbl precision. #
14290 # For norms/denorms, scale the exponents such that a divide #
14291 # instruction won't cause an exception. Use the regular fsgldiv to #
14292 # compute a result. Check if the regular operands would have taken #
14293 # an exception. If so, return the default overflow/underflow result #
14294 # and return the EXOP if exceptions are enabled. Else, scale the #
14295 # result operand to the proper exponent. #
14297 #########################################################################
14301 mov.l %d0,L_SCR3(%a6) # store rnd info
14304 mov.b DTAG(%a6),%d1
14306 or.b STAG(%a6),%d1 # combine src tags
14308 bne.w fsgldiv_not_norm # optimize on non-norm input
14311 # DIVIDE: NORMs and DENORMs ONLY!
14314 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
14315 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
14316 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
14318 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
14319 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
14320 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
14322 bsr.l scale_to_zero_src # calculate scale factor 1
14323 mov.l %d0,-(%sp) # save scale factor 1
14325 bsr.l scale_to_zero_dst # calculate scale factor 2
14327 neg.l (%sp) # S.F. = scale1 - scale2
14330 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
14333 cmpi.l %d0,&0x3fff-0x7ffe
14334 ble.w fsgldiv_may_ovfl
14336 cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
14337 beq.w fsgldiv_may_unfl # maybe
14338 bgt.w fsgldiv_unfl # yes; go handle underflow
14341 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14343 fmov.l L_SCR3(%a6),%fpcr # save FPCR
14344 fmov.l &0x0,%fpsr # clear FPSR
14346 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
14348 fmov.l %fpsr,%d1 # save FPSR
14349 fmov.l &0x0,%fpcr # clear FPCR
14351 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14353 fsgldiv_normal_exit:
14354 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
14355 mov.l %d2,-(%sp) # save d2
14356 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
14357 mov.l %d1,%d2 # make a copy
14358 andi.l &0x7fff,%d1 # strip sign
14359 andi.w &0x8000,%d2 # keep old sign
14360 sub.l %d0,%d1 # add scale factor
14361 or.w %d2,%d1 # concat old sign,new exp
14362 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14363 mov.l (%sp)+,%d2 # restore d2
14364 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
14368 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14370 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14371 fmov.l &0x0,%fpsr # set FPSR
14373 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
14378 or.l %d1,USER_FPSR(%a6) # save INEX,N
14380 fmovm.x &0x01,-(%sp) # save result to stack
14381 mov.w (%sp),%d1 # fetch new exponent
14382 add.l &0xc,%sp # clear result
14383 andi.l &0x7fff,%d1 # strip sign
14384 sub.l %d0,%d1 # add scale factor
14385 cmp.l %d1,&0x7fff # did divide overflow?
14386 blt.b fsgldiv_normal_exit
14389 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14391 mov.b FPCR_ENABLE(%a6),%d1
14392 andi.b &0x13,%d1 # is OVFL or INEX enabled?
14393 bne.b fsgldiv_ovfl_ena # yes
14396 btst &neg_bit,FPSR_CC(%a6) # is result negative
14397 sne %d1 # set sign param accordingly
14398 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14399 andi.b &0x30,%d0 # kill precision
14400 bsr.l ovf_res # calculate default result
14401 or.b %d0,FPSR_CC(%a6) # set INF if applicable
14402 fmovm.x (%a0),&0x80 # return default result in fp0
14406 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
14408 mov.l %d2,-(%sp) # save d2
14409 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14410 mov.l %d1,%d2 # make a copy
14411 andi.l &0x7fff,%d1 # strip sign
14412 andi.w &0x8000,%d2 # keep old sign
14413 sub.l %d0,%d1 # add scale factor
14414 subi.l &0x6000,%d1 # subtract new bias
14415 andi.w &0x7fff,%d1 # clear ms bit
14416 or.w %d2,%d1 # concat old sign,new exp
14417 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14418 mov.l (%sp)+,%d2 # restore d2
14419 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14420 bra.b fsgldiv_ovfl_dis
14423 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14425 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14427 fmov.l &rz_mode*0x10,%fpcr # set FPCR
14428 fmov.l &0x0,%fpsr # clear FPSR
14430 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
14432 fmov.l %fpsr,%d1 # save status
14433 fmov.l &0x0,%fpcr # clear FPCR
14435 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14437 mov.b FPCR_ENABLE(%a6),%d1
14438 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14439 bne.b fsgldiv_unfl_ena # yes
14442 fmovm.x &0x80,FP_SCR0(%a6) # store out result
14444 lea FP_SCR0(%a6),%a0 # pass: result addr
14445 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14446 bsr.l unf_res4 # calculate default result
14447 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14448 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14455 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14457 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14458 fmov.l &0x0,%fpsr # clear FPSR
14460 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
14462 fmov.l &0x0,%fpcr # clear FPCR
14464 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14465 mov.l %d2,-(%sp) # save d2
14466 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14467 mov.l %d1,%d2 # make a copy
14468 andi.l &0x7fff,%d1 # strip sign
14469 andi.w &0x8000,%d2 # keep old sign
14470 sub.l %d0,%d1 # add scale factor
14471 addi.l &0x6000,%d1 # add bias
14472 andi.w &0x7fff,%d1 # clear top bit
14473 or.w %d2,%d1 # concat old sign, new exp
14474 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14475 mov.l (%sp)+,%d2 # restore d2
14476 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14477 bra.b fsgldiv_unfl_dis
14480 # the divide operation MAY underflow:
14483 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14485 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14486 fmov.l &0x0,%fpsr # clear FPSR
14488 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
14490 fmov.l %fpsr,%d1 # save status
14491 fmov.l &0x0,%fpcr # clear FPCR
14493 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14495 fabs.x %fp0,%fp1 # make a copy of result
14496 fcmp.b %fp1,&0x1 # is |result| > 1.b?
14497 fbgt.w fsgldiv_normal_exit # no; no underflow occurred
14498 fblt.w fsgldiv_unfl # yes; underflow occurred
14501 # we still don't know if underflow occurred. result is ~ equal to 1. but,
14502 # we don't know if the result was an underflow that rounded up to a 1
14503 # or a normalized number that rounded down to a 1. so, redo the entire
14504 # operation using RZ as the rounding mode to see what the pre-rounded
14505 # result is. this case should be relatively rare.
14507 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
14509 clr.l %d1 # clear scratch register
14510 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
14512 fmov.l %d1,%fpcr # set FPCR
14513 fmov.l &0x0,%fpsr # clear FPSR
14515 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
14517 fmov.l &0x0,%fpcr # clear FPCR
14518 fabs.x %fp1 # make absolute value
14519 fcmp.b %fp1,&0x1 # is |result| < 1.b?
14520 fbge.w fsgldiv_normal_exit # no; no underflow occurred
14521 bra.w fsgldiv_unfl # yes; underflow occurred
14523 ############################################################################
14526 # Divide: inputs are not both normalized; what are they?
14529 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14530 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
14534 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
14535 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
14536 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
14537 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
14538 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
14539 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
14540 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14541 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14543 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
14544 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
14545 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
14546 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
14547 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
14548 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
14549 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14550 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14552 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
14553 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
14554 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
14555 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
14556 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
14557 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
14558 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14559 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14561 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
14562 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
14563 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
14564 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
14565 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
14566 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
14567 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14568 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14570 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
14571 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
14572 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
14573 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
14574 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
14575 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
14576 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14577 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14579 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
14580 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
14581 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
14582 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
14583 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
14584 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
14585 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14586 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14595 bra.l fdiv_inf_load
14597 bra.l fdiv_zero_load
14601 #########################################################################
14602 # XDEF **************************************************************** #
14603 # fadd(): emulates the fadd instruction #
14604 # fsadd(): emulates the fadd instruction #
14605 # fdadd(): emulates the fdadd instruction #
14607 # XREF **************************************************************** #
14608 # addsub_scaler2() - scale the operands so they won't take exc #
14609 # ovf_res() - return default overflow result #
14610 # unf_res() - return default underflow result #
14611 # res_qnan() - set QNAN result #
14612 # res_snan() - set SNAN result #
14613 # res_operr() - set OPERR result #
14614 # scale_to_zero_src() - set src operand exponent equal to zero #
14615 # scale_to_zero_dst() - set dst operand exponent equal to zero #
14617 # INPUT *************************************************************** #
14618 # a0 = pointer to extended precision source operand #
14619 # a1 = pointer to extended precision destination operand #
14621 # OUTPUT ************************************************************** #
14623 # fp1 = EXOP (if exception occurred) #
14625 # ALGORITHM *********************************************************** #
14626 # Handle NANs, infinities, and zeroes as special cases. Divide #
14627 # norms into extended, single, and double precision. #
14628 # Do addition after scaling exponents such that exception won't #
14629 # occur. Then, check result exponent to see if exception would have #
14630 # occurred. If so, return default result and maybe EXOP. Else, insert #
14631 # the correct result exponent and return. Set FPSR bits as appropriate. #
14633 #########################################################################
14637 andi.b &0x30,%d0 # clear rnd prec
14638 ori.b &s_mode*0x10,%d0 # insert sgl prec
14643 andi.b &0x30,%d0 # clear rnd prec
14644 ori.b &d_mode*0x10,%d0 # insert dbl prec
14648 mov.l %d0,L_SCR3(%a6) # store rnd info
14651 mov.b DTAG(%a6),%d1
14653 or.b STAG(%a6),%d1 # combine src tags
14655 bne.w fadd_not_norm # optimize on non-norm input
14658 # ADD: norms and denorms
14661 bsr.l addsub_scaler2 # scale exponents
14664 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14666 fmov.l &0x0,%fpsr # clear FPSR
14667 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14669 fadd.x FP_SCR0(%a6),%fp0 # execute add
14671 fmov.l &0x0,%fpcr # clear FPCR
14672 fmov.l %fpsr,%d1 # fetch INEX2,N,Z
14674 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
14676 fbeq.w fadd_zero_exit # if result is zero, end now
14678 mov.l %d2,-(%sp) # save d2
14680 fmovm.x &0x01,-(%sp) # save result to stack
14682 mov.w 2+L_SCR3(%a6),%d1
14685 mov.w (%sp),%d2 # fetch new sign, exp
14686 andi.l &0x7fff,%d2 # strip sign
14687 sub.l %d0,%d2 # add scale factor
14689 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14690 bge.b fadd_ovfl # yes
14692 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14693 blt.w fadd_unfl # yes
14694 beq.w fadd_may_unfl # maybe; go find out
14698 andi.w &0x8000,%d1 # keep sign
14699 or.w %d2,%d1 # concat sign,new exp
14700 mov.w %d1,(%sp) # insert new exponent
14702 fmovm.x (%sp)+,&0x80 # return result in fp0
14704 mov.l (%sp)+,%d2 # restore d2
14708 # fmov.s &0x00000000,%fp0 # return zero in fp0
14712 long 0x7fff # ext ovfl
14713 long 0x407f # sgl ovfl
14714 long 0x43ff # dbl ovfl
14717 long 0x0000 # ext unfl
14718 long 0x3f81 # sgl unfl
14719 long 0x3c01 # dbl unfl
14722 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14724 mov.b FPCR_ENABLE(%a6),%d1
14725 andi.b &0x13,%d1 # is OVFL or INEX enabled?
14726 bne.b fadd_ovfl_ena # yes
14730 btst &neg_bit,FPSR_CC(%a6) # is result negative?
14731 sne %d1 # set sign param accordingly
14732 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14733 bsr.l ovf_res # calculate default result
14734 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
14735 fmovm.x (%a0),&0x80 # return default result in fp0
14736 mov.l (%sp)+,%d2 # restore d2
14740 mov.b L_SCR3(%a6),%d1
14741 andi.b &0xc0,%d1 # is precision extended?
14742 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
14744 fadd_ovfl_ena_cont:
14746 andi.w &0x8000,%d1 # keep sign
14747 subi.l &0x6000,%d2 # add extra bias
14749 or.w %d2,%d1 # concat sign,new exp
14750 mov.w %d1,(%sp) # insert new exponent
14752 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
14753 bra.b fadd_ovfl_dis
14756 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14758 mov.l L_SCR3(%a6),%d1
14759 andi.b &0x30,%d1 # keep rnd mode
14760 fmov.l %d1,%fpcr # set FPCR
14762 fadd.x FP_SCR0(%a6),%fp0 # execute add
14764 fmov.l &0x0,%fpcr # clear FPCR
14767 fmovm.x &0x01,-(%sp)
14768 bra.b fadd_ovfl_ena_cont
14771 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14775 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14777 fmov.l &rz_mode*0x10,%fpcr # set FPCR
14778 fmov.l &0x0,%fpsr # clear FPSR
14780 fadd.x FP_SCR0(%a6),%fp0 # execute add
14782 fmov.l &0x0,%fpcr # clear FPCR
14783 fmov.l %fpsr,%d1 # save status
14785 or.l %d1,USER_FPSR(%a6) # save INEX,N
14787 mov.b FPCR_ENABLE(%a6),%d1
14788 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14789 bne.b fadd_unfl_ena # yes
14792 fmovm.x &0x80,FP_SCR0(%a6) # store out result
14794 lea FP_SCR0(%a6),%a0 # pass: result addr
14795 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14796 bsr.l unf_res # calculate default result
14797 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14798 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14799 mov.l (%sp)+,%d2 # restore d2
14803 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14805 mov.l L_SCR3(%a6),%d1
14806 andi.b &0xc0,%d1 # is precision extended?
14807 bne.b fadd_unfl_ena_sd # no; sgl or dbl
14809 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14811 fadd_unfl_ena_cont:
14812 fmov.l &0x0,%fpsr # clear FPSR
14814 fadd.x FP_SCR0(%a6),%fp1 # execute multiply
14816 fmov.l &0x0,%fpcr # clear FPCR
14818 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14819 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14820 mov.l %d1,%d2 # make a copy
14821 andi.l &0x7fff,%d1 # strip sign
14822 andi.w &0x8000,%d2 # keep old sign
14823 sub.l %d0,%d1 # add scale factor
14824 addi.l &0x6000,%d1 # add new bias
14825 andi.w &0x7fff,%d1 # clear top bit
14826 or.w %d2,%d1 # concat sign,new exp
14827 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14828 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14829 bra.w fadd_unfl_dis
14832 mov.l L_SCR3(%a6),%d1
14833 andi.b &0x30,%d1 # use only rnd mode
14834 fmov.l %d1,%fpcr # set FPCR
14836 bra.b fadd_unfl_ena_cont
14839 # result is equal to the smallest normalized number in the selected precision
14840 # if the precision is extended, this result could not have come from an
14841 # underflow that rounded up.
14844 mov.l L_SCR3(%a6),%d1
14846 beq.w fadd_normal # yes; no underflow occurred
14848 mov.l 0x4(%sp),%d1 # extract hi(man)
14849 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
14850 bne.w fadd_normal # no; no underflow occurred
14852 tst.l 0x8(%sp) # is lo(man) = 0x0?
14853 bne.w fadd_normal # no; no underflow occurred
14855 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14856 beq.w fadd_normal # no; no underflow occurred
14859 # ok, so now the result has a exponent equal to the smallest normalized
14860 # exponent for the selected precision. also, the mantissa is equal to
14861 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
14863 # now, we must determine whether the pre-rounded result was an underflow
14864 # rounded "up" or a normalized number rounded "down".
14865 # so, we do this be re-executing the add using RZ as the rounding mode and
14866 # seeing if the new result is smaller or equal to the current result.
14868 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
14870 mov.l L_SCR3(%a6),%d1
14871 andi.b &0xc0,%d1 # keep rnd prec
14872 ori.b &rz_mode*0x10,%d1 # insert rnd mode
14873 fmov.l %d1,%fpcr # set FPCR
14874 fmov.l &0x0,%fpsr # clear FPSR
14876 fadd.x FP_SCR0(%a6),%fp1 # execute add
14878 fmov.l &0x0,%fpcr # clear FPCR
14880 fabs.x %fp0 # compare absolute values
14882 fcmp.x %fp0,%fp1 # is first result > second?
14884 fbgt.w fadd_unfl # yes; it's an underflow
14885 bra.w fadd_normal # no; it's not an underflow
14887 ##########################################################################
14890 # Add: inputs are not both normalized; what are they?
14893 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
14894 jmp (tbl_fadd_op.b,%pc,%d1.w*1)
14898 short fadd_norm - tbl_fadd_op # NORM + NORM
14899 short fadd_zero_src - tbl_fadd_op # NORM + ZERO
14900 short fadd_inf_src - tbl_fadd_op # NORM + INF
14901 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14902 short fadd_norm - tbl_fadd_op # NORM + DENORM
14903 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14904 short tbl_fadd_op - tbl_fadd_op #
14905 short tbl_fadd_op - tbl_fadd_op #
14907 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
14908 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
14909 short fadd_inf_src - tbl_fadd_op # ZERO + INF
14910 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14911 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
14912 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14913 short tbl_fadd_op - tbl_fadd_op #
14914 short tbl_fadd_op - tbl_fadd_op #
14916 short fadd_inf_dst - tbl_fadd_op # INF + NORM
14917 short fadd_inf_dst - tbl_fadd_op # INF + ZERO
14918 short fadd_inf_2 - tbl_fadd_op # INF + INF
14919 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14920 short fadd_inf_dst - tbl_fadd_op # INF + DENORM
14921 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14922 short tbl_fadd_op - tbl_fadd_op #
14923 short tbl_fadd_op - tbl_fadd_op #
14925 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
14926 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
14927 short fadd_res_qnan - tbl_fadd_op # QNAN + INF
14928 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
14929 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
14930 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
14931 short tbl_fadd_op - tbl_fadd_op #
14932 short tbl_fadd_op - tbl_fadd_op #
14934 short fadd_norm - tbl_fadd_op # DENORM + NORM
14935 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
14936 short fadd_inf_src - tbl_fadd_op # DENORM + INF
14937 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14938 short fadd_norm - tbl_fadd_op # DENORM + DENORM
14939 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14940 short tbl_fadd_op - tbl_fadd_op #
14941 short tbl_fadd_op - tbl_fadd_op #
14943 short fadd_res_snan - tbl_fadd_op # SNAN + NORM
14944 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
14945 short fadd_res_snan - tbl_fadd_op # SNAN + INF
14946 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
14947 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
14948 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
14949 short tbl_fadd_op - tbl_fadd_op #
14950 short tbl_fadd_op - tbl_fadd_op #
14958 # both operands are ZEROes
14961 mov.b SRC_EX(%a0),%d0 # are the signs opposite
14962 mov.b DST_EX(%a1),%d1
14964 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
14966 # the signs are the same. so determine whether they are positive or negative
14967 # and return the appropriately signed zero.
14968 tst.b %d0 # are ZEROes positive or negative?
14969 bmi.b fadd_zero_rm # negative
14970 fmov.s &0x00000000,%fp0 # return +ZERO
14971 mov.b &z_bmask,FPSR_CC(%a6) # set Z
14975 # the ZEROes have opposite signs:
14976 # - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14977 # - -ZERO is returned in the case of RM.
14979 fadd_zero_2_chk_rm:
14980 mov.b 3+L_SCR3(%a6),%d1
14981 andi.b &0x30,%d1 # extract rnd mode
14982 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
14983 beq.b fadd_zero_rm # yes
14984 fmov.s &0x00000000,%fp0 # return +ZERO
14985 mov.b &z_bmask,FPSR_CC(%a6) # set Z
14989 fmov.s &0x80000000,%fp0 # return -ZERO
14990 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
14994 # one operand is a ZERO and the other is a DENORM or NORM. scale
14995 # the DENORM or NORM and jump to the regular fadd routine.
14998 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
14999 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15000 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15001 bsr.l scale_to_zero_src # scale the operand
15002 clr.w FP_SCR1_EX(%a6)
15003 clr.l FP_SCR1_HI(%a6)
15004 clr.l FP_SCR1_LO(%a6)
15005 bra.w fadd_zero_entry # go execute fadd
15008 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
15009 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15010 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15011 bsr.l scale_to_zero_dst # scale the operand
15012 clr.w FP_SCR0_EX(%a6)
15013 clr.l FP_SCR0_HI(%a6)
15014 clr.l FP_SCR0_LO(%a6)
15015 bra.w fadd_zero_entry # go execute fadd
15018 # both operands are INFs. an OPERR will result if the INFs have
15019 # different signs. else, an INF of the same sign is returned
15022 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
15023 mov.b DST_EX(%a1),%d1
15025 bmi.l res_operr # weed out (-INF)+(+INF)
15027 # ok, so it's not an OPERR. but, we do have to remember to return the
15028 # src INF since that's where the 881/882 gets the j-bit from...
15031 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15034 fmovm.x SRC(%a0),&0x80 # return src INF
15035 tst.b SRC_EX(%a0) # is INF positive?
15036 bpl.b fadd_inf_done # yes; we're done
15037 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15041 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15044 fmovm.x DST(%a1),&0x80 # return dst INF
15045 tst.b DST_EX(%a1) # is INF positive?
15046 bpl.b fadd_inf_done # yes; we're done
15047 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15051 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
15054 #########################################################################
15055 # XDEF **************************************************************** #
15056 # fsub(): emulates the fsub instruction #
15057 # fssub(): emulates the fssub instruction #
15058 # fdsub(): emulates the fdsub instruction #
15060 # XREF **************************************************************** #
15061 # addsub_scaler2() - scale the operands so they won't take exc #
15062 # ovf_res() - return default overflow result #
15063 # unf_res() - return default underflow result #
15064 # res_qnan() - set QNAN result #
15065 # res_snan() - set SNAN result #
15066 # res_operr() - set OPERR result #
15067 # scale_to_zero_src() - set src operand exponent equal to zero #
15068 # scale_to_zero_dst() - set dst operand exponent equal to zero #
15070 # INPUT *************************************************************** #
15071 # a0 = pointer to extended precision source operand #
15072 # a1 = pointer to extended precision destination operand #
15074 # OUTPUT ************************************************************** #
15076 # fp1 = EXOP (if exception occurred) #
15078 # ALGORITHM *********************************************************** #
15079 # Handle NANs, infinities, and zeroes as special cases. Divide #
15080 # norms into extended, single, and double precision. #
15081 # Do subtraction after scaling exponents such that exception won't#
15082 # occur. Then, check result exponent to see if exception would have #
15083 # occurred. If so, return default result and maybe EXOP. Else, insert #
15084 # the correct result exponent and return. Set FPSR bits as appropriate. #
15086 #########################################################################
15090 andi.b &0x30,%d0 # clear rnd prec
15091 ori.b &s_mode*0x10,%d0 # insert sgl prec
15096 andi.b &0x30,%d0 # clear rnd prec
15097 ori.b &d_mode*0x10,%d0 # insert dbl prec
15101 mov.l %d0,L_SCR3(%a6) # store rnd info
15104 mov.b DTAG(%a6),%d1
15106 or.b STAG(%a6),%d1 # combine src tags
15108 bne.w fsub_not_norm # optimize on non-norm input
15111 # SUB: norms and denorms
15114 bsr.l addsub_scaler2 # scale exponents
15117 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15119 fmov.l &0x0,%fpsr # clear FPSR
15120 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15122 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15124 fmov.l &0x0,%fpcr # clear FPCR
15125 fmov.l %fpsr,%d1 # fetch INEX2, N, Z
15127 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
15129 fbeq.w fsub_zero_exit # if result zero, end now
15131 mov.l %d2,-(%sp) # save d2
15133 fmovm.x &0x01,-(%sp) # save result to stack
15135 mov.w 2+L_SCR3(%a6),%d1
15138 mov.w (%sp),%d2 # fetch new exponent
15139 andi.l &0x7fff,%d2 # strip sign
15140 sub.l %d0,%d2 # add scale factor
15142 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15143 bge.b fsub_ovfl # yes
15145 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15146 blt.w fsub_unfl # yes
15147 beq.w fsub_may_unfl # maybe; go find out
15151 andi.w &0x8000,%d1 # keep sign
15152 or.w %d2,%d1 # insert new exponent
15153 mov.w %d1,(%sp) # insert new exponent
15155 fmovm.x (%sp)+,&0x80 # return result in fp0
15157 mov.l (%sp)+,%d2 # restore d2
15161 # fmov.s &0x00000000,%fp0 # return zero in fp0
15165 long 0x7fff # ext ovfl
15166 long 0x407f # sgl ovfl
15167 long 0x43ff # dbl ovfl
15170 long 0x0000 # ext unfl
15171 long 0x3f81 # sgl unfl
15172 long 0x3c01 # dbl unfl
15175 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15177 mov.b FPCR_ENABLE(%a6),%d1
15178 andi.b &0x13,%d1 # is OVFL or INEX enabled?
15179 bne.b fsub_ovfl_ena # yes
15183 btst &neg_bit,FPSR_CC(%a6) # is result negative?
15184 sne %d1 # set sign param accordingly
15185 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
15186 bsr.l ovf_res # calculate default result
15187 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
15188 fmovm.x (%a0),&0x80 # return default result in fp0
15189 mov.l (%sp)+,%d2 # restore d2
15193 mov.b L_SCR3(%a6),%d1
15194 andi.b &0xc0,%d1 # is precision extended?
15195 bne.b fsub_ovfl_ena_sd # no
15197 fsub_ovfl_ena_cont:
15198 mov.w (%sp),%d1 # fetch {sgn,exp}
15199 andi.w &0x8000,%d1 # keep sign
15200 subi.l &0x6000,%d2 # subtract new bias
15201 andi.w &0x7fff,%d2 # clear top bit
15202 or.w %d2,%d1 # concat sign,exp
15203 mov.w %d1,(%sp) # insert new exponent
15205 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
15206 bra.b fsub_ovfl_dis
15209 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15211 mov.l L_SCR3(%a6),%d1
15212 andi.b &0x30,%d1 # clear rnd prec
15213 fmov.l %d1,%fpcr # set FPCR
15215 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15217 fmov.l &0x0,%fpcr # clear FPCR
15220 fmovm.x &0x01,-(%sp)
15221 bra.b fsub_ovfl_ena_cont
15224 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15228 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15230 fmov.l &rz_mode*0x10,%fpcr # set FPCR
15231 fmov.l &0x0,%fpsr # clear FPSR
15233 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15235 fmov.l &0x0,%fpcr # clear FPCR
15236 fmov.l %fpsr,%d1 # save status
15238 or.l %d1,USER_FPSR(%a6)
15240 mov.b FPCR_ENABLE(%a6),%d1
15241 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
15242 bne.b fsub_unfl_ena # yes
15245 fmovm.x &0x80,FP_SCR0(%a6) # store out result
15247 lea FP_SCR0(%a6),%a0 # pass: result addr
15248 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
15249 bsr.l unf_res # calculate default result
15250 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
15251 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
15252 mov.l (%sp)+,%d2 # restore d2
15256 fmovm.x FP_SCR1(%a6),&0x40
15258 mov.l L_SCR3(%a6),%d1
15259 andi.b &0xc0,%d1 # is precision extended?
15260 bne.b fsub_unfl_ena_sd # no
15262 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15264 fsub_unfl_ena_cont:
15265 fmov.l &0x0,%fpsr # clear FPSR
15267 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
15269 fmov.l &0x0,%fpcr # clear FPCR
15271 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
15272 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
15273 mov.l %d1,%d2 # make a copy
15274 andi.l &0x7fff,%d1 # strip sign
15275 andi.w &0x8000,%d2 # keep old sign
15276 sub.l %d0,%d1 # add scale factor
15277 addi.l &0x6000,%d1 # subtract new bias
15278 andi.w &0x7fff,%d1 # clear top bit
15279 or.w %d2,%d1 # concat sgn,exp
15280 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
15281 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
15282 bra.w fsub_unfl_dis
15285 mov.l L_SCR3(%a6),%d1
15286 andi.b &0x30,%d1 # clear rnd prec
15287 fmov.l %d1,%fpcr # set FPCR
15289 bra.b fsub_unfl_ena_cont
15292 # result is equal to the smallest normalized number in the selected precision
15293 # if the precision is extended, this result could not have come from an
15294 # underflow that rounded up.
15297 mov.l L_SCR3(%a6),%d1
15298 andi.b &0xc0,%d1 # fetch rnd prec
15299 beq.w fsub_normal # yes; no underflow occurred
15302 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
15303 bne.w fsub_normal # no; no underflow occurred
15305 tst.l 0x8(%sp) # is lo(man) = 0x0?
15306 bne.w fsub_normal # no; no underflow occurred
15308 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15309 beq.w fsub_normal # no; no underflow occurred
15312 # ok, so now the result has a exponent equal to the smallest normalized
15313 # exponent for the selected precision. also, the mantissa is equal to
15314 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
15316 # now, we must determine whether the pre-rounded result was an underflow
15317 # rounded "up" or a normalized number rounded "down".
15318 # so, we do this be re-executing the add using RZ as the rounding mode and
15319 # seeing if the new result is smaller or equal to the current result.
15321 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
15323 mov.l L_SCR3(%a6),%d1
15324 andi.b &0xc0,%d1 # keep rnd prec
15325 ori.b &rz_mode*0x10,%d1 # insert rnd mode
15326 fmov.l %d1,%fpcr # set FPCR
15327 fmov.l &0x0,%fpsr # clear FPSR
15329 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
15331 fmov.l &0x0,%fpcr # clear FPCR
15333 fabs.x %fp0 # compare absolute values
15335 fcmp.x %fp0,%fp1 # is first result > second?
15337 fbgt.w fsub_unfl # yes; it's an underflow
15338 bra.w fsub_normal # no; it's not an underflow
15340 ##########################################################################
15343 # Sub: inputs are not both normalized; what are they?
15346 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
15347 jmp (tbl_fsub_op.b,%pc,%d1.w*1)
15351 short fsub_norm - tbl_fsub_op # NORM - NORM
15352 short fsub_zero_src - tbl_fsub_op # NORM - ZERO
15353 short fsub_inf_src - tbl_fsub_op # NORM - INF
15354 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15355 short fsub_norm - tbl_fsub_op # NORM - DENORM
15356 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15357 short tbl_fsub_op - tbl_fsub_op #
15358 short tbl_fsub_op - tbl_fsub_op #
15360 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
15361 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
15362 short fsub_inf_src - tbl_fsub_op # ZERO - INF
15363 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15364 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
15365 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15366 short tbl_fsub_op - tbl_fsub_op #
15367 short tbl_fsub_op - tbl_fsub_op #
15369 short fsub_inf_dst - tbl_fsub_op # INF - NORM
15370 short fsub_inf_dst - tbl_fsub_op # INF - ZERO
15371 short fsub_inf_2 - tbl_fsub_op # INF - INF
15372 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15373 short fsub_inf_dst - tbl_fsub_op # INF - DENORM
15374 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15375 short tbl_fsub_op - tbl_fsub_op #
15376 short tbl_fsub_op - tbl_fsub_op #
15378 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
15379 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
15380 short fsub_res_qnan - tbl_fsub_op # QNAN - INF
15381 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
15382 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
15383 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
15384 short tbl_fsub_op - tbl_fsub_op #
15385 short tbl_fsub_op - tbl_fsub_op #
15387 short fsub_norm - tbl_fsub_op # DENORM - NORM
15388 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
15389 short fsub_inf_src - tbl_fsub_op # DENORM - INF
15390 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15391 short fsub_norm - tbl_fsub_op # DENORM - DENORM
15392 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15393 short tbl_fsub_op - tbl_fsub_op #
15394 short tbl_fsub_op - tbl_fsub_op #
15396 short fsub_res_snan - tbl_fsub_op # SNAN - NORM
15397 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
15398 short fsub_res_snan - tbl_fsub_op # SNAN - INF
15399 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
15400 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
15401 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
15402 short tbl_fsub_op - tbl_fsub_op #
15403 short tbl_fsub_op - tbl_fsub_op #
15411 # both operands are ZEROes
15414 mov.b SRC_EX(%a0),%d0
15415 mov.b DST_EX(%a1),%d1
15417 bpl.b fsub_zero_2_chk_rm
15419 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15420 tst.b %d0 # is dst negative?
15421 bmi.b fsub_zero_2_rm # yes
15422 fmov.s &0x00000000,%fp0 # no; return +ZERO
15423 mov.b &z_bmask,FPSR_CC(%a6) # set Z
15427 # the ZEROes have the same signs:
15428 # - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15429 # - -ZERO is returned in the case of RM.
15431 fsub_zero_2_chk_rm:
15432 mov.b 3+L_SCR3(%a6),%d1
15433 andi.b &0x30,%d1 # extract rnd mode
15434 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
15435 beq.b fsub_zero_2_rm # yes
15436 fmov.s &0x00000000,%fp0 # no; return +ZERO
15437 mov.b &z_bmask,FPSR_CC(%a6) # set Z
15441 fmov.s &0x80000000,%fp0 # return -ZERO
15442 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
15446 # one operand is a ZERO and the other is a DENORM or a NORM.
15447 # scale the DENORM or NORM and jump to the regular fsub routine.
15450 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15451 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15452 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15453 bsr.l scale_to_zero_src # scale the operand
15454 clr.w FP_SCR1_EX(%a6)
15455 clr.l FP_SCR1_HI(%a6)
15456 clr.l FP_SCR1_LO(%a6)
15457 bra.w fsub_zero_entry # go execute fsub
15460 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
15461 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15462 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15463 bsr.l scale_to_zero_dst # scale the operand
15464 clr.w FP_SCR0_EX(%a6)
15465 clr.l FP_SCR0_HI(%a6)
15466 clr.l FP_SCR0_LO(%a6)
15467 bra.w fsub_zero_entry # go execute fsub
15470 # both operands are INFs. an OPERR will result if the INFs have the
15471 # same signs. else,
15474 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
15475 mov.b DST_EX(%a1),%d1
15477 bpl.l res_operr # weed out (-INF)+(+INF)
15479 # ok, so it's not an OPERR. but we do have to remember to return
15480 # the src INF since that's where the 881/882 gets the j-bit.
15483 fmovm.x SRC(%a0),&0x80 # return src INF
15484 fneg.x %fp0 # invert sign
15485 fbge.w fsub_inf_done # sign is now positive
15486 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15490 fmovm.x DST(%a1),&0x80 # return dst INF
15491 tst.b DST_EX(%a1) # is INF negative?
15492 bpl.b fsub_inf_done # no
15493 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15497 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
15500 #########################################################################
15501 # XDEF **************************************************************** #
15502 # fsqrt(): emulates the fsqrt instruction #
15503 # fssqrt(): emulates the fssqrt instruction #
15504 # fdsqrt(): emulates the fdsqrt instruction #
15506 # XREF **************************************************************** #
15507 # scale_sqrt() - scale the source operand #
15508 # unf_res() - return default underflow result #
15509 # ovf_res() - return default overflow result #
15510 # res_qnan_1op() - return QNAN result #
15511 # res_snan_1op() - return SNAN result #
15513 # INPUT *************************************************************** #
15514 # a0 = pointer to extended precision source operand #
15515 # d0 rnd prec,mode #
15517 # OUTPUT ************************************************************** #
15519 # fp1 = EXOP (if exception occurred) #
15521 # ALGORITHM *********************************************************** #
15522 # Handle NANs, infinities, and zeroes as special cases. Divide #
15523 # norms/denorms into ext/sgl/dbl precision. #
15524 # For norms/denorms, scale the exponents such that a sqrt #
15525 # instruction won't cause an exception. Use the regular fsqrt to #
15526 # compute a result. Check if the regular operands would have taken #
15527 # an exception. If so, return the default overflow/underflow result #
15528 # and return the EXOP if exceptions are enabled. Else, scale the #
15529 # result operand to the proper exponent. #
15531 #########################################################################
15535 andi.b &0x30,%d0 # clear rnd prec
15536 ori.b &s_mode*0x10,%d0 # insert sgl precision
15541 andi.b &0x30,%d0 # clear rnd prec
15542 ori.b &d_mode*0x10,%d0 # insert dbl precision
15546 mov.l %d0,L_SCR3(%a6) # store rnd info
15548 mov.b STAG(%a6),%d1
15549 bne.w fsqrt_not_norm # optimize on non-norm input
15552 # SQUARE ROOT: norms and denorms ONLY!
15555 tst.b SRC_EX(%a0) # is operand negative?
15556 bmi.l res_operr # yes
15558 andi.b &0xc0,%d0 # is precision extended?
15559 bne.b fsqrt_not_ext # no; go handle sgl or dbl
15561 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15562 fmov.l &0x0,%fpsr # clear FPSR
15564 fsqrt.x (%a0),%fp0 # execute square root
15567 or.l %d1,USER_FPSR(%a6) # set N,INEX
15572 tst.b SRC_EX(%a0) # is operand negative?
15573 bmi.l res_operr # yes
15575 andi.b &0xc0,%d0 # is precision extended?
15576 bne.b fsqrt_not_ext # no; go handle sgl or dbl
15578 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15579 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15580 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15582 bsr.l scale_sqrt # calculate scale factor
15584 bra.w fsqrt_sd_normal
15587 # operand is either single or double
15590 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
15594 # operand is to be rounded to single precision
15597 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15598 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15599 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15601 bsr.l scale_sqrt # calculate scale factor
15603 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
15604 beq.w fsqrt_sd_may_unfl
15605 bgt.w fsqrt_sd_unfl # yes; go handle underflow
15606 cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
15607 beq.w fsqrt_sd_may_ovfl # maybe; go check
15608 blt.w fsqrt_sd_ovfl # yes; go handle overflow
15611 # operand will NOT overflow or underflow when moved in to the fp reg file
15614 fmov.l &0x0,%fpsr # clear FPSR
15615 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15617 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
15619 fmov.l %fpsr,%d1 # save FPSR
15620 fmov.l &0x0,%fpcr # clear FPCR
15622 or.l %d1,USER_FPSR(%a6) # save INEX2,N
15624 fsqrt_sd_normal_exit:
15625 mov.l %d2,-(%sp) # save d2
15626 fmovm.x &0x80,FP_SCR0(%a6) # store out result
15627 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
15628 mov.l %d1,%d2 # make a copy
15629 andi.l &0x7fff,%d1 # strip sign
15630 sub.l %d0,%d1 # add scale factor
15631 andi.w &0x8000,%d2 # keep old sign
15632 or.w %d1,%d2 # concat old sign,new exp
15633 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
15634 mov.l (%sp)+,%d2 # restore d2
15635 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
15639 # operand is to be rounded to double precision
15642 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15643 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15644 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15646 bsr.l scale_sqrt # calculate scale factor
15648 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
15649 beq.w fsqrt_sd_may_unfl
15650 bgt.b fsqrt_sd_unfl # yes; go handle underflow
15651 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
15652 beq.w fsqrt_sd_may_ovfl # maybe; go check
15653 blt.w fsqrt_sd_ovfl # yes; go handle overflow
15654 bra.w fsqrt_sd_normal # no; ho handle normalized op
15656 # we're on the line here and the distinguising characteristic is whether
15657 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15658 # elsewise fall through to underflow.
15660 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
15661 bne.w fsqrt_sd_normal # yes, so no underflow
15664 # operand WILL underflow when moved in to the fp register file
15667 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15669 fmov.l &rz_mode*0x10,%fpcr # set FPCR
15670 fmov.l &0x0,%fpsr # clear FPSR
15672 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
15674 fmov.l %fpsr,%d1 # save status
15675 fmov.l &0x0,%fpcr # clear FPCR
15677 or.l %d1,USER_FPSR(%a6) # save INEX2,N
15679 # if underflow or inexact is enabled, go calculate EXOP first.
15680 mov.b FPCR_ENABLE(%a6),%d1
15681 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
15682 bne.b fsqrt_sd_unfl_ena # yes
15685 fmovm.x &0x80,FP_SCR0(%a6) # store out result
15687 lea FP_SCR0(%a6),%a0 # pass: result addr
15688 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
15689 bsr.l unf_res # calculate default result
15690 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
15691 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
15695 # operand will underflow AND underflow is enabled.
15696 # Therefore, we must return the result rounded to extended precision.
15699 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15700 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15701 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
15703 mov.l %d2,-(%sp) # save d2
15704 mov.l %d1,%d2 # make a copy
15705 andi.l &0x7fff,%d1 # strip sign
15706 andi.w &0x8000,%d2 # keep old sign
15707 sub.l %d0,%d1 # subtract scale factor
15708 addi.l &0x6000,%d1 # add new bias
15710 or.w %d2,%d1 # concat new sign,new exp
15711 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
15712 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
15713 mov.l (%sp)+,%d2 # restore d2
15714 bra.b fsqrt_sd_unfl_dis
15717 # operand WILL overflow.
15720 fmov.l &0x0,%fpsr # clear FPSR
15721 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15723 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
15725 fmov.l &0x0,%fpcr # clear FPCR
15726 fmov.l %fpsr,%d1 # save FPSR
15728 or.l %d1,USER_FPSR(%a6) # save INEX2,N
15731 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15733 mov.b FPCR_ENABLE(%a6),%d1
15734 andi.b &0x13,%d1 # is OVFL or INEX enabled?
15735 bne.b fsqrt_sd_ovfl_ena # yes
15738 # OVFL is not enabled; therefore, we must create the default result by
15739 # calling ovf_res().
15742 btst &neg_bit,FPSR_CC(%a6) # is result negative?
15743 sne %d1 # set sign param accordingly
15744 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
15745 bsr.l ovf_res # calculate default result
15746 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
15747 fmovm.x (%a0),&0x80 # return default result in fp0
15752 # the INEX2 bit has already been updated by the round to the correct precision.
15753 # now, round to extended(and don't alter the FPSR).
15756 mov.l %d2,-(%sp) # save d2
15757 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
15758 mov.l %d1,%d2 # make a copy
15759 andi.l &0x7fff,%d1 # strip sign
15760 andi.w &0x8000,%d2 # keep old sign
15761 sub.l %d0,%d1 # add scale factor
15762 subi.l &0x6000,%d1 # subtract bias
15764 or.w %d2,%d1 # concat sign,exp
15765 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
15766 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
15767 mov.l (%sp)+,%d2 # restore d2
15768 bra.b fsqrt_sd_ovfl_dis
15771 # the move in MAY underflow. so...
15774 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
15775 bne.w fsqrt_sd_ovfl # yes, so overflow
15777 fmov.l &0x0,%fpsr # clear FPSR
15778 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15780 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
15782 fmov.l %fpsr,%d1 # save status
15783 fmov.l &0x0,%fpcr # clear FPCR
15785 or.l %d1,USER_FPSR(%a6) # save INEX2,N
15787 fmov.x %fp0,%fp1 # make a copy of result
15788 fcmp.b %fp1,&0x1 # is |result| >= 1.b?
15789 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
15791 # no, it didn't overflow; we have correct result
15792 bra.w fsqrt_sd_normal_exit
15794 ##########################################################################
15797 # input is not normalized; what is it?
15800 cmpi.b %d1,&DENORM # weed out DENORM
15802 cmpi.b %d1,&ZERO # weed out ZERO
15804 cmpi.b %d1,&INF # weed out INF
15806 cmpi.b %d1,&SNAN # weed out SNAN
15813 # fsqrt(+INF) = +INF
15814 # fsqrt(-INF) = OPERR
15817 tst.b SRC_EX(%a0) # is ZERO positive or negative?
15818 bmi.b fsqrt_zero_m # negative
15820 fmov.s &0x00000000,%fp0 # return +ZERO
15821 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
15824 fmov.s &0x80000000,%fp0 # return -ZERO
15825 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
15829 tst.b SRC_EX(%a0) # is INF positive or negative?
15830 bmi.l res_operr # negative
15832 fmovm.x SRC(%a0),&0x80 # return +INF in fp0
15833 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
15836 ##########################################################################
15838 #########################################################################
15839 # XDEF **************************************************************** #
15840 # addsub_scaler2(): scale inputs to fadd/fsub such that no #
15841 # OVFL/UNFL exceptions will result #
15843 # XREF **************************************************************** #
15844 # norm() - normalize mantissa after adjusting exponent #
15846 # INPUT *************************************************************** #
15847 # FP_SRC(a6) = fp op1(src) #
15848 # FP_DST(a6) = fp op2(dst) #
15850 # OUTPUT ************************************************************** #
15851 # FP_SRC(a6) = fp op1 scaled(src) #
15852 # FP_DST(a6) = fp op2 scaled(dst) #
15853 # d0 = scale amount #
15855 # ALGORITHM *********************************************************** #
15856 # If the DST exponent is > the SRC exponent, set the DST exponent #
15857 # equal to 0x3fff and scale the SRC exponent by the value that the #
15858 # DST exponent was scaled by. If the SRC exponent is greater or equal, #
15859 # do the opposite. Return this scale factor in d0. #
15860 # If the two exponents differ by > the number of mantissa bits #
15861 # plus two, then set the smallest exponent to a very small value as a #
15862 # quick shortcut. #
15864 #########################################################################
15866 global addsub_scaler2
15868 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15869 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15870 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15871 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15872 mov.w SRC_EX(%a0),%d0
15873 mov.w DST_EX(%a1),%d1
15874 mov.w %d0,FP_SCR0_EX(%a6)
15875 mov.w %d1,FP_SCR1_EX(%a6)
15879 mov.w %d0,L_SCR1(%a6) # store src exponent
15880 mov.w %d1,2+L_SCR1(%a6) # store dst exponent
15882 cmp.w %d0, %d1 # is src exp >= dst exp?
15885 # dst exp is > src exp; scale dst to exp = 0x3fff
15887 bsr.l scale_to_zero_dst
15888 mov.l %d0,-(%sp) # save scale factor
15890 cmpi.b STAG(%a6),&DENORM # is dst denormalized?
15893 lea FP_SCR0(%a6),%a0
15894 bsr.l norm # normalize the denorm; result is new exp
15895 neg.w %d0 # new exp = -(shft val)
15896 mov.w %d0,L_SCR1(%a6) # inset new exp
15899 mov.w 2+L_SCR1(%a6),%d0
15900 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
15902 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
15903 bge.b quick_scale12
15905 mov.w L_SCR1(%a6),%d0
15906 add.w 0x2(%sp),%d0 # scale src exponent by scale factor
15907 mov.w FP_SCR0_EX(%a6),%d1
15909 or.w %d1,%d0 # concat {sgn,new exp}
15910 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
15912 mov.l (%sp)+,%d0 # return SCALE factor
15916 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
15917 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
15919 mov.l (%sp)+,%d0 # return SCALE factor
15922 # src exp is >= dst exp; scale src to exp = 0x3fff
15924 bsr.l scale_to_zero_src
15925 mov.l %d0,-(%sp) # save scale factor
15927 cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
15929 lea FP_SCR1(%a6),%a0
15930 bsr.l norm # normalize the denorm; result is new exp
15931 neg.w %d0 # new exp = -(shft val)
15932 mov.w %d0,2+L_SCR1(%a6) # inset new exp
15935 mov.w L_SCR1(%a6),%d0
15936 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
15938 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
15939 bge.b quick_scale22
15941 mov.w 2+L_SCR1(%a6),%d0
15942 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
15943 mov.w FP_SCR1_EX(%a6),%d1
15945 or.w %d1,%d0 # concat {sgn,new exp}
15946 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
15948 mov.l (%sp)+,%d0 # return SCALE factor
15952 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
15953 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
15955 mov.l (%sp)+,%d0 # return SCALE factor
15958 ##########################################################################
15960 #########################################################################
15961 # XDEF **************************************************************** #
15962 # scale_to_zero_src(): scale the exponent of extended precision #
15963 # value at FP_SCR0(a6). #
15965 # XREF **************************************************************** #
15966 # norm() - normalize the mantissa if the operand was a DENORM #
15968 # INPUT *************************************************************** #
15969 # FP_SCR0(a6) = extended precision operand to be scaled #
15971 # OUTPUT ************************************************************** #
15972 # FP_SCR0(a6) = scaled extended precision operand #
15973 # d0 = scale value #
15975 # ALGORITHM *********************************************************** #
15976 # Set the exponent of the input operand to 0x3fff. Save the value #
15977 # of the difference between the original and new exponent. Then, #
15978 # normalize the operand if it was a DENORM. Add this normalization #
15979 # value to the previous value. Return the result. #
15981 #########################################################################
15983 global scale_to_zero_src
15985 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
15986 mov.w %d1,%d0 # make a copy
15988 andi.l &0x7fff,%d1 # extract operand's exponent
15990 andi.w &0x8000,%d0 # extract operand's sgn
15991 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
15993 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
15995 cmpi.b STAG(%a6),&DENORM # is operand normalized?
15996 beq.b stzs_denorm # normalize the DENORM
16000 sub.l %d1,%d0 # scale = BIAS + (-exp)
16005 lea FP_SCR0(%a6),%a0 # pass ptr to src op
16006 bsr.l norm # normalize denorm
16007 neg.l %d0 # new exponent = -(shft val)
16008 mov.l %d0,%d1 # prepare for op_norm call
16009 bra.b stzs_norm # finish scaling
16013 #########################################################################
16014 # XDEF **************************************************************** #
16015 # scale_sqrt(): scale the input operand exponent so a subsequent #
16016 # fsqrt operation won't take an exception. #
16018 # XREF **************************************************************** #
16019 # norm() - normalize the mantissa if the operand was a DENORM #
16021 # INPUT *************************************************************** #
16022 # FP_SCR0(a6) = extended precision operand to be scaled #
16024 # OUTPUT ************************************************************** #
16025 # FP_SCR0(a6) = scaled extended precision operand #
16026 # d0 = scale value #
16028 # ALGORITHM *********************************************************** #
16029 # If the input operand is a DENORM, normalize it. #
16030 # If the exponent of the input operand is even, set the exponent #
16031 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
16032 # exponent of the input operand is off, set the exponent to ox3fff and #
16033 # return a scale factor of "(exp-0x3fff)/2". #
16035 #########################################################################
16039 cmpi.b STAG(%a6),&DENORM # is operand normalized?
16040 beq.b ss_denorm # normalize the DENORM
16042 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
16043 andi.l &0x7fff,%d1 # extract operand's exponent
16045 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
16047 btst &0x0,%d1 # is exp even or odd?
16050 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16053 sub.l %d1,%d0 # scale = BIAS + (-exp)
16054 asr.l &0x1,%d0 # divide scale factor by 2
16058 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16061 sub.l %d1,%d0 # scale = BIAS + (-exp)
16062 asr.l &0x1,%d0 # divide scale factor by 2
16066 lea FP_SCR0(%a6),%a0 # pass ptr to src op
16067 bsr.l norm # normalize denorm
16069 btst &0x0,%d0 # is exp even or odd?
16070 beq.b ss_denorm_even
16072 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16075 asr.l &0x1,%d0 # divide scale factor by 2
16079 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16082 asr.l &0x1,%d0 # divide scale factor by 2
16087 #########################################################################
16088 # XDEF **************************************************************** #
16089 # scale_to_zero_dst(): scale the exponent of extended precision #
16090 # value at FP_SCR1(a6). #
16092 # XREF **************************************************************** #
16093 # norm() - normalize the mantissa if the operand was a DENORM #
16095 # INPUT *************************************************************** #
16096 # FP_SCR1(a6) = extended precision operand to be scaled #
16098 # OUTPUT ************************************************************** #
16099 # FP_SCR1(a6) = scaled extended precision operand #
16100 # d0 = scale value #
16102 # ALGORITHM *********************************************************** #
16103 # Set the exponent of the input operand to 0x3fff. Save the value #
16104 # of the difference between the original and new exponent. Then, #
16105 # normalize the operand if it was a DENORM. Add this normalization #
16106 # value to the previous value. Return the result. #
16108 #########################################################################
16110 global scale_to_zero_dst
16112 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
16113 mov.w %d1,%d0 # make a copy
16115 andi.l &0x7fff,%d1 # extract operand's exponent
16117 andi.w &0x8000,%d0 # extract operand's sgn
16118 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
16120 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
16122 cmpi.b DTAG(%a6),&DENORM # is operand normalized?
16123 beq.b stzd_denorm # normalize the DENORM
16127 sub.l %d1,%d0 # scale = BIAS + (-exp)
16131 lea FP_SCR1(%a6),%a0 # pass ptr to dst op
16132 bsr.l norm # normalize denorm
16133 neg.l %d0 # new exponent = -(shft val)
16134 mov.l %d0,%d1 # prepare for op_norm call
16135 bra.b stzd_norm # finish scaling
16137 ##########################################################################
16139 #########################################################################
16140 # XDEF **************************************************************** #
16141 # res_qnan(): return default result w/ QNAN operand for dyadic #
16142 # res_snan(): return default result w/ SNAN operand for dyadic #
16143 # res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
16144 # res_snan_1op(): return dflt result w/ SNAN operand for monadic #
16146 # XREF **************************************************************** #
16149 # INPUT *************************************************************** #
16150 # FP_SRC(a6) = pointer to extended precision src operand #
16151 # FP_DST(a6) = pointer to extended precision dst operand #
16153 # OUTPUT ************************************************************** #
16154 # fp0 = default result #
16156 # ALGORITHM *********************************************************** #
16157 # If either operand (but not both operands) of an operation is a #
16158 # nonsignalling NAN, then that NAN is returned as the result. If both #
16159 # operands are nonsignalling NANs, then the destination operand #
16160 # nonsignalling NAN is returned as the result. #
16161 # If either operand to an operation is a signalling NAN (SNAN), #
16162 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
16163 # enable bit is set in the FPCR, then the trap is taken and the #
16164 # destination is not modified. If the SNAN trap enable bit is not set, #
16165 # then the SNAN is converted to a nonsignalling NAN (by setting the #
16166 # SNAN bit in the operand to one), and the operation continues as #
16167 # described in the preceding paragraph, for nonsignalling NANs. #
16168 # Make sure the appropriate FPSR bits are set before exiting. #
16170 #########################################################################
16176 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
16178 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
16181 cmp.b STAG(%a6), &QNAN
16183 global res_snan_1op
16186 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
16187 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16188 lea FP_SRC(%a6), %a0
16190 global res_qnan_1op
16193 or.l &nan_mask, USER_FPSR(%a6)
16194 lea FP_SRC(%a6), %a0
16197 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16198 bset &0x6, FP_DST_HI(%a6) # set SNAN bit
16199 lea FP_DST(%a6), %a0
16202 lea FP_DST(%a6), %a0
16203 cmp.b STAG(%a6), &SNAN
16205 or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
16207 or.l &nan_mask, USER_FPSR(%a6)
16209 btst &0x7, FTEMP_EX(%a0) # is NAN neg?
16211 or.l &neg_mask, USER_FPSR(%a6)
16213 fmovm.x (%a0), &0x80
16216 #########################################################################
16217 # XDEF **************************************************************** #
16218 # res_operr(): return default result during operand error #
16220 # XREF **************************************************************** #
16223 # INPUT *************************************************************** #
16226 # OUTPUT ************************************************************** #
16227 # fp0 = default operand error result #
16229 # ALGORITHM *********************************************************** #
16230 # An nonsignalling NAN is returned as the default result when #
16231 # an operand error occurs for the following cases: #
16233 # Multiply: (Infinity x Zero) #
16234 # Divide : (Zero / Zero) || (Infinity / Infinity) #
16236 #########################################################################
16240 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16241 fmovm.x nan_return(%pc), &0x80
16245 long 0x7fff0000, 0xffffffff, 0xffffffff
16247 #########################################################################
16248 # fdbcc(): routine to emulate the fdbcc instruction #
16250 # XDEF **************************************************************** #
16253 # XREF **************************************************************** #
16254 # fetch_dreg() - fetch Dn value #
16255 # store_dreg_l() - store updated Dn value #
16257 # INPUT *************************************************************** #
16258 # d0 = displacement #
16260 # OUTPUT ************************************************************** #
16263 # ALGORITHM *********************************************************** #
16264 # This routine checks which conditional predicate is specified by #
16265 # the stacked fdbcc instruction opcode and then branches to a routine #
16266 # for that predicate. The corresponding fbcc instruction is then used #
16267 # to see whether the condition (specified by the stacked FPSR) is true #
16269 # If a BSUN exception should be indicated, the BSUN and ABSUN #
16270 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
16271 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
16272 # enabled BSUN should not be flagged and the predicate is true, then #
16273 # Dn is fetched and decremented by one. If Dn is not equal to -1, add #
16274 # the displacement value to the stacked PC so that when an "rte" is #
16275 # finally executed, the branch occurs. #
16277 #########################################################################
16280 mov.l %d0,L_SCR1(%a6) # save displacement
16282 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
16284 clr.l %d1 # clear scratch reg
16285 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
16286 ror.l &0x8,%d1 # rotate to top byte
16287 fmov.l %d1,%fpsr # insert into FPSR
16289 mov.w (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16290 jmp (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16293 short fdbcc_f - tbl_fdbcc # 00
16294 short fdbcc_eq - tbl_fdbcc # 01
16295 short fdbcc_ogt - tbl_fdbcc # 02
16296 short fdbcc_oge - tbl_fdbcc # 03
16297 short fdbcc_olt - tbl_fdbcc # 04
16298 short fdbcc_ole - tbl_fdbcc # 05
16299 short fdbcc_ogl - tbl_fdbcc # 06
16300 short fdbcc_or - tbl_fdbcc # 07
16301 short fdbcc_un - tbl_fdbcc # 08
16302 short fdbcc_ueq - tbl_fdbcc # 09
16303 short fdbcc_ugt - tbl_fdbcc # 10
16304 short fdbcc_uge - tbl_fdbcc # 11
16305 short fdbcc_ult - tbl_fdbcc # 12
16306 short fdbcc_ule - tbl_fdbcc # 13
16307 short fdbcc_neq - tbl_fdbcc # 14
16308 short fdbcc_t - tbl_fdbcc # 15
16309 short fdbcc_sf - tbl_fdbcc # 16
16310 short fdbcc_seq - tbl_fdbcc # 17
16311 short fdbcc_gt - tbl_fdbcc # 18
16312 short fdbcc_ge - tbl_fdbcc # 19
16313 short fdbcc_lt - tbl_fdbcc # 20
16314 short fdbcc_le - tbl_fdbcc # 21
16315 short fdbcc_gl - tbl_fdbcc # 22
16316 short fdbcc_gle - tbl_fdbcc # 23
16317 short fdbcc_ngle - tbl_fdbcc # 24
16318 short fdbcc_ngl - tbl_fdbcc # 25
16319 short fdbcc_nle - tbl_fdbcc # 26
16320 short fdbcc_nlt - tbl_fdbcc # 27
16321 short fdbcc_nge - tbl_fdbcc # 28
16322 short fdbcc_ngt - tbl_fdbcc # 29
16323 short fdbcc_sneq - tbl_fdbcc # 30
16324 short fdbcc_st - tbl_fdbcc # 31
16326 #########################################################################
16328 # IEEE Nonaware tests #
16330 # For the IEEE nonaware tests, only the false branch changes the #
16331 # counter. However, the true branch may set bsun so we check to see #
16332 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
16334 # The cases EQ and NE are shared by the Aware and Nonaware groups #
16335 # and are incapable of setting the BSUN exception bit. #
16337 # Typically, only one of the two possible branch directions could #
16338 # have the NAN bit set. #
16339 # (This is assuming the mutual exclusiveness of FPSR cc bit groupings #
16342 #########################################################################
16350 fbeq.w fdbcc_eq_yes # equal?
16352 bra.w fdbcc_false # no; go handle counter
16362 fbneq.w fdbcc_neq_yes # not equal?
16364 bra.w fdbcc_false # no; go handle counter
16374 fbgt.w fdbcc_gt_yes # greater than?
16375 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16376 beq.w fdbcc_false # no;go handle counter
16377 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16378 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16379 bne.w fdbcc_bsun # yes; we have an exception
16380 bra.w fdbcc_false # no; go handle counter
16385 # not greater than:
16390 fbngt.w fdbcc_ngt_yes # not greater than?
16392 bra.w fdbcc_false # no; go handle counter
16394 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16395 beq.b fdbcc_ngt_done # no;go finish
16396 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16397 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16398 bne.w fdbcc_bsun # yes; we have an exception
16400 rts # no; do nothing
16403 # greater than or equal:
16408 fbge.w fdbcc_ge_yes # greater than or equal?
16410 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16411 beq.w fdbcc_false # no;go handle counter
16412 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16413 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16414 bne.w fdbcc_bsun # yes; we have an exception
16415 bra.w fdbcc_false # no; go handle counter
16417 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16418 beq.b fdbcc_ge_yes_done # no;go do nothing
16419 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16420 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16421 bne.w fdbcc_bsun # yes; we have an exception
16426 # not (greater than or equal):
16431 fbnge.w fdbcc_nge_yes # not (greater than or equal)?
16433 bra.w fdbcc_false # no; go handle counter
16435 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16436 beq.b fdbcc_nge_done # no;go finish
16437 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16438 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16439 bne.w fdbcc_bsun # yes; we have an exception
16441 rts # no; do nothing
16449 fblt.w fdbcc_lt_yes # less than?
16451 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16452 beq.w fdbcc_false # no; go handle counter
16453 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16454 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16455 bne.w fdbcc_bsun # yes; we have an exception
16456 bra.w fdbcc_false # no; go handle counter
16466 fbnlt.w fdbcc_nlt_yes # not less than?
16468 bra.w fdbcc_false # no; go handle counter
16470 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16471 beq.b fdbcc_nlt_done # no;go finish
16472 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16473 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16474 bne.w fdbcc_bsun # yes; we have an exception
16476 rts # no; do nothing
16479 # less than or equal:
16484 fble.w fdbcc_le_yes # less than or equal?
16486 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16487 beq.w fdbcc_false # no; go handle counter
16488 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16489 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16490 bne.w fdbcc_bsun # yes; we have an exception
16491 bra.w fdbcc_false # no; go handle counter
16493 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16494 beq.b fdbcc_le_yes_done # no; go do nothing
16495 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16496 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16497 bne.w fdbcc_bsun # yes; we have an exception
16502 # not (less than or equal):
16507 fbnle.w fdbcc_nle_yes # not (less than or equal)?
16509 bra.w fdbcc_false # no; go handle counter
16511 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16512 beq.w fdbcc_nle_done # no; go finish
16513 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16514 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16515 bne.w fdbcc_bsun # yes; we have an exception
16517 rts # no; do nothing
16520 # greater or less than:
16525 fbgl.w fdbcc_gl_yes # greater or less than?
16527 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16528 beq.w fdbcc_false # no; handle counter
16529 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16530 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16531 bne.w fdbcc_bsun # yes; we have an exception
16532 bra.w fdbcc_false # no; go handle counter
16537 # not (greater or less than):
16542 fbngl.w fdbcc_ngl_yes # not (greater or less than)?
16544 bra.w fdbcc_false # no; go handle counter
16546 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16547 beq.b fdbcc_ngl_done # no; go finish
16548 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16549 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16550 bne.w fdbcc_bsun # yes; we have an exception
16552 rts # no; do nothing
16555 # greater, less, or equal:
16560 fbgle.w fdbcc_gle_yes # greater, less, or equal?
16562 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16563 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16564 bne.w fdbcc_bsun # yes; we have an exception
16565 bra.w fdbcc_false # no; go handle counter
16570 # not (greater, less, or equal):
16575 fbngle.w fdbcc_ngle_yes # not (greater, less, or equal)?
16577 bra.w fdbcc_false # no; go handle counter
16579 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16580 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16581 bne.w fdbcc_bsun # yes; we have an exception
16582 rts # no; do nothing
16584 #########################################################################
16586 # Miscellaneous tests #
16588 # For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16590 #########################################################################
16597 fdbcc_f: # no bsun possible
16598 bra.w fdbcc_false # go handle counter
16605 fdbcc_t: # no bsun possible
16609 # signalling false:
16614 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16615 beq.w fdbcc_false # no;go handle counter
16616 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16617 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16618 bne.w fdbcc_bsun # yes; we have an exception
16619 bra.w fdbcc_false # go handle counter
16627 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16628 beq.b fdbcc_st_done # no;go finish
16629 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16630 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16631 bne.w fdbcc_bsun # yes; we have an exception
16636 # signalling equal:
16641 fbseq.w fdbcc_seq_yes # signalling equal?
16643 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16644 beq.w fdbcc_false # no;go handle counter
16645 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16646 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16647 bne.w fdbcc_bsun # yes; we have an exception
16648 bra.w fdbcc_false # go handle counter
16650 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16651 beq.b fdbcc_seq_yes_done # no;go do nothing
16652 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16653 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16654 bne.w fdbcc_bsun # yes; we have an exception
16655 fdbcc_seq_yes_done:
16656 rts # yes; do nothing
16659 # signalling not equal:
16664 fbsneq.w fdbcc_sneq_yes # signalling not equal?
16666 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16667 beq.w fdbcc_false # no;go handle counter
16668 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16669 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16670 bne.w fdbcc_bsun # yes; we have an exception
16671 bra.w fdbcc_false # go handle counter
16673 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
16674 beq.w fdbcc_sneq_done # no;go finish
16675 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16676 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16677 bne.w fdbcc_bsun # yes; we have an exception
16681 #########################################################################
16683 # IEEE Aware tests #
16685 # For the IEEE aware tests, action is only taken if the result is false.#
16686 # Therefore, the opposite branch type is used to jump to the decrement #
16688 # The BSUN exception will not be set for any of these tests. #
16690 #########################################################################
16693 # ordered greater than:
16698 fbogt.w fdbcc_ogt_yes # ordered greater than?
16700 bra.w fdbcc_false # no; go handle counter
16702 rts # yes; do nothing
16705 # unordered or less or equal:
16710 fbule.w fdbcc_ule_yes # unordered or less or equal?
16712 bra.w fdbcc_false # no; go handle counter
16714 rts # yes; do nothing
16717 # ordered greater than or equal:
16722 fboge.w fdbcc_oge_yes # ordered greater than or equal?
16724 bra.w fdbcc_false # no; go handle counter
16726 rts # yes; do nothing
16729 # unordered or less than:
16734 fbult.w fdbcc_ult_yes # unordered or less than?
16736 bra.w fdbcc_false # no; go handle counter
16738 rts # yes; do nothing
16741 # ordered less than:
16746 fbolt.w fdbcc_olt_yes # ordered less than?
16748 bra.w fdbcc_false # no; go handle counter
16750 rts # yes; do nothing
16753 # unordered or greater or equal:
16758 fbuge.w fdbcc_uge_yes # unordered or greater than?
16760 bra.w fdbcc_false # no; go handle counter
16762 rts # yes; do nothing
16765 # ordered less than or equal:
16770 fbole.w fdbcc_ole_yes # ordered greater or less than?
16772 bra.w fdbcc_false # no; go handle counter
16774 rts # yes; do nothing
16777 # unordered or greater than:
16782 fbugt.w fdbcc_ugt_yes # unordered or greater than?
16784 bra.w fdbcc_false # no; go handle counter
16786 rts # yes; do nothing
16789 # ordered greater or less than:
16794 fbogl.w fdbcc_ogl_yes # ordered greater or less than?
16796 bra.w fdbcc_false # no; go handle counter
16798 rts # yes; do nothing
16801 # unordered or equal:
16806 fbueq.w fdbcc_ueq_yes # unordered or equal?
16808 bra.w fdbcc_false # no; go handle counter
16810 rts # yes; do nothing
16818 fbor.w fdbcc_or_yes # ordered?
16820 bra.w fdbcc_false # no; go handle counter
16822 rts # yes; do nothing
16830 fbun.w fdbcc_un_yes # unordered?
16832 bra.w fdbcc_false # no; go handle counter
16834 rts # yes; do nothing
16836 #######################################################################
16839 # the bsun exception bit was not set.
16841 # (1) subtract 1 from the count register
16842 # (2) if (cr == -1) then
16843 # pc = pc of next instruction
16845 # pc += sign_ext(16-bit displacement)
16848 mov.b 1+EXC_OPWORD(%a6), %d1 # fetch lo opword
16849 andi.w &0x7, %d1 # extract count register
16851 bsr.l fetch_dreg # fetch count value
16852 # make sure that d0 isn't corrupted between calls...
16854 subq.w &0x1, %d0 # Dn - 1 -> Dn
16856 bsr.l store_dreg_l # store new count value
16858 cmpi.w %d0, &-0x1 # is (Dn == -1)?
16859 bne.b fdbcc_false_cont # no;
16863 mov.l L_SCR1(%a6),%d0 # fetch displacement
16864 add.l USER_FPIAR(%a6),%d0 # add instruction PC
16865 addq.l &0x4,%d0 # add instruction length
16866 mov.l %d0,EXC_PC(%a6) # set new PC
16869 # the emulation routine set bsun and BSUN was enabled. have to
16870 # fix stack and jump to the bsun handler.
16871 # let the caller of this routine shift the stack frame up to
16872 # eliminate the effective address field.
16874 mov.b &fbsun_flg,SPCOND_FLG(%a6)
16877 #########################################################################
16878 # ftrapcc(): routine to emulate the ftrapcc instruction #
16880 # XDEF **************************************************************** #
16883 # XREF **************************************************************** #
16886 # INPUT *************************************************************** #
16889 # OUTPUT ************************************************************** #
16892 # ALGORITHM *********************************************************** #
16893 # This routine checks which conditional predicate is specified by #
16894 # the stacked ftrapcc instruction opcode and then branches to a routine #
16895 # for that predicate. The corresponding fbcc instruction is then used #
16896 # to see whether the condition (specified by the stacked FPSR) is true #
16898 # If a BSUN exception should be indicated, the BSUN and ABSUN #
16899 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
16900 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
16901 # enabled BSUN should not be flagged and the predicate is true, then #
16902 # the ftrapcc_flg is set in the SPCOND_FLG location. These special #
16903 # flags indicate to the calling routine to emulate the exceptional #
16906 #########################################################################
16910 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
16912 clr.l %d1 # clear scratch reg
16913 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
16914 ror.l &0x8,%d1 # rotate to top byte
16915 fmov.l %d1,%fpsr # insert into FPSR
16917 mov.w (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16918 jmp (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16921 short ftrapcc_f - tbl_ftrapcc # 00
16922 short ftrapcc_eq - tbl_ftrapcc # 01
16923 short ftrapcc_ogt - tbl_ftrapcc # 02
16924 short ftrapcc_oge - tbl_ftrapcc # 03
16925 short ftrapcc_olt - tbl_ftrapcc # 04
16926 short ftrapcc_ole - tbl_ftrapcc # 05
16927 short ftrapcc_ogl - tbl_ftrapcc # 06
16928 short ftrapcc_or - tbl_ftrapcc # 07
16929 short ftrapcc_un - tbl_ftrapcc # 08
16930 short ftrapcc_ueq - tbl_ftrapcc # 09
16931 short ftrapcc_ugt - tbl_ftrapcc # 10
16932 short ftrapcc_uge - tbl_ftrapcc # 11
16933 short ftrapcc_ult - tbl_ftrapcc # 12
16934 short ftrapcc_ule - tbl_ftrapcc # 13
16935 short ftrapcc_neq - tbl_ftrapcc # 14
16936 short ftrapcc_t - tbl_ftrapcc # 15
16937 short ftrapcc_sf - tbl_ftrapcc # 16
16938 short ftrapcc_seq - tbl_ftrapcc # 17
16939 short ftrapcc_gt - tbl_ftrapcc # 18
16940 short ftrapcc_ge - tbl_ftrapcc # 19
16941 short ftrapcc_lt - tbl_ftrapcc # 20
16942 short ftrapcc_le - tbl_ftrapcc # 21
16943 short ftrapcc_gl - tbl_ftrapcc # 22
16944 short ftrapcc_gle - tbl_ftrapcc # 23
16945 short ftrapcc_ngle - tbl_ftrapcc # 24
16946 short ftrapcc_ngl - tbl_ftrapcc # 25
16947 short ftrapcc_nle - tbl_ftrapcc # 26
16948 short ftrapcc_nlt - tbl_ftrapcc # 27
16949 short ftrapcc_nge - tbl_ftrapcc # 28
16950 short ftrapcc_ngt - tbl_ftrapcc # 29
16951 short ftrapcc_sneq - tbl_ftrapcc # 30
16952 short ftrapcc_st - tbl_ftrapcc # 31
16954 #########################################################################
16956 # IEEE Nonaware tests #
16958 # For the IEEE nonaware tests, we set the result based on the #
16959 # floating point condition codes. In addition, we check to see #
16960 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
16962 # The cases EQ and NE are shared by the Aware and Nonaware groups #
16963 # and are incapable of setting the BSUN exception bit. #
16965 # Typically, only one of the two possible branch directions could #
16966 # have the NAN bit set. #
16968 #########################################################################
16976 fbeq.w ftrapcc_trap # equal?
16986 fbneq.w ftrapcc_trap # not equal?
16996 fbgt.w ftrapcc_trap # greater than?
16998 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16999 beq.b ftrapcc_gt_done # no
17000 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17001 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17002 bne.w ftrapcc_bsun # yes
17004 rts # no; do nothing
17007 # not greater than:
17012 fbngt.w ftrapcc_ngt_yes # not greater than?
17016 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17017 beq.w ftrapcc_trap # no; go take trap
17018 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17019 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17020 bne.w ftrapcc_bsun # yes
17021 bra.w ftrapcc_trap # no; go take trap
17024 # greater than or equal:
17029 fbge.w ftrapcc_ge_yes # greater than or equal?
17031 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17032 beq.b ftrapcc_ge_done # no; go finish
17033 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17034 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17035 bne.w ftrapcc_bsun # yes
17037 rts # no; do nothing
17039 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17040 beq.w ftrapcc_trap # no; go take trap
17041 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17042 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17043 bne.w ftrapcc_bsun # yes
17044 bra.w ftrapcc_trap # no; go take trap
17047 # not (greater than or equal):
17052 fbnge.w ftrapcc_nge_yes # not (greater than or equal)?
17056 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17057 beq.w ftrapcc_trap # no; go take trap
17058 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17059 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17060 bne.w ftrapcc_bsun # yes
17061 bra.w ftrapcc_trap # no; go take trap
17069 fblt.w ftrapcc_trap # less than?
17071 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17072 beq.b ftrapcc_lt_done # no; go finish
17073 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17074 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17075 bne.w ftrapcc_bsun # yes
17077 rts # no; do nothing
17085 fbnlt.w ftrapcc_nlt_yes # not less than?
17089 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17090 beq.w ftrapcc_trap # no; go take trap
17091 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17092 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17093 bne.w ftrapcc_bsun # yes
17094 bra.w ftrapcc_trap # no; go take trap
17097 # less than or equal:
17102 fble.w ftrapcc_le_yes # less than or equal?
17104 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17105 beq.b ftrapcc_le_done # no; go finish
17106 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17107 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17108 bne.w ftrapcc_bsun # yes
17110 rts # no; do nothing
17112 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17113 beq.w ftrapcc_trap # no; go take trap
17114 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17115 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17116 bne.w ftrapcc_bsun # yes
17117 bra.w ftrapcc_trap # no; go take trap
17120 # not (less than or equal):
17125 fbnle.w ftrapcc_nle_yes # not (less than or equal)?
17129 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17130 beq.w ftrapcc_trap # no; go take trap
17131 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17132 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17133 bne.w ftrapcc_bsun # yes
17134 bra.w ftrapcc_trap # no; go take trap
17137 # greater or less than:
17142 fbgl.w ftrapcc_trap # greater or less than?
17144 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17145 beq.b ftrapcc_gl_done # no; go finish
17146 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17147 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17148 bne.w ftrapcc_bsun # yes
17150 rts # no; do nothing
17153 # not (greater or less than):
17158 fbngl.w ftrapcc_ngl_yes # not (greater or less than)?
17162 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17163 beq.w ftrapcc_trap # no; go take trap
17164 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17165 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17166 bne.w ftrapcc_bsun # yes
17167 bra.w ftrapcc_trap # no; go take trap
17170 # greater, less, or equal:
17175 fbgle.w ftrapcc_trap # greater, less, or equal?
17177 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17178 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17179 bne.w ftrapcc_bsun # yes
17180 rts # no; do nothing
17183 # not (greater, less, or equal):
17188 fbngle.w ftrapcc_ngle_yes # not (greater, less, or equal)?
17192 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17193 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17194 bne.w ftrapcc_bsun # yes
17195 bra.w ftrapcc_trap # no; go take trap
17197 #########################################################################
17199 # Miscellaneous tests #
17201 # For the IEEE aware tests, we only have to set the result based on the #
17202 # floating point condition codes. The BSUN exception will not be #
17203 # set for any of these tests. #
17205 #########################################################################
17221 bra.w ftrapcc_trap # go take trap
17224 # signalling false:
17229 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17230 beq.b ftrapcc_sf_done # no; go finish
17231 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17232 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17233 bne.w ftrapcc_bsun # yes
17235 rts # no; do nothing
17243 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17244 beq.w ftrapcc_trap # no; go take trap
17245 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17246 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17247 bne.w ftrapcc_bsun # yes
17248 bra.w ftrapcc_trap # no; go take trap
17251 # signalling equal:
17256 fbseq.w ftrapcc_seq_yes # signalling equal?
17258 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17259 beq.w ftrapcc_seq_done # no; go finish
17260 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17261 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17262 bne.w ftrapcc_bsun # yes
17264 rts # no; do nothing
17266 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17267 beq.w ftrapcc_trap # no; go take trap
17268 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17269 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17270 bne.w ftrapcc_bsun # yes
17271 bra.w ftrapcc_trap # no; go take trap
17274 # signalling not equal:
17279 fbsneq.w ftrapcc_sneq_yes # signalling equal?
17281 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17282 beq.w ftrapcc_sneq_no_done # no; go finish
17283 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17284 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17285 bne.w ftrapcc_bsun # yes
17286 ftrapcc_sneq_no_done:
17289 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17290 beq.w ftrapcc_trap # no; go take trap
17291 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17292 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17293 bne.w ftrapcc_bsun # yes
17294 bra.w ftrapcc_trap # no; go take trap
17296 #########################################################################
17298 # IEEE Aware tests #
17300 # For the IEEE aware tests, we only have to set the result based on the #
17301 # floating point condition codes. The BSUN exception will not be #
17302 # set for any of these tests. #
17304 #########################################################################
17307 # ordered greater than:
17312 fbogt.w ftrapcc_trap # ordered greater than?
17317 # unordered or less or equal:
17322 fbule.w ftrapcc_trap # unordered or less or equal?
17327 # ordered greater than or equal:
17332 fboge.w ftrapcc_trap # ordered greater than or equal?
17337 # unordered or less than:
17342 fbult.w ftrapcc_trap # unordered or less than?
17347 # ordered less than:
17352 fbolt.w ftrapcc_trap # ordered less than?
17357 # unordered or greater or equal:
17362 fbuge.w ftrapcc_trap # unordered or greater than?
17367 # ordered less than or equal:
17372 fbole.w ftrapcc_trap # ordered greater or less than?
17377 # unordered or greater than:
17382 fbugt.w ftrapcc_trap # unordered or greater than?
17387 # ordered greater or less than:
17392 fbogl.w ftrapcc_trap # ordered greater or less than?
17397 # unordered or equal:
17402 fbueq.w ftrapcc_trap # unordered or equal?
17412 fbor.w ftrapcc_trap # ordered?
17422 fbun.w ftrapcc_trap # unordered?
17426 #######################################################################
17428 # the bsun exception bit was not set.
17429 # we will need to jump to the ftrapcc vector. the stack frame
17430 # is the same size as that of the fp unimp instruction. the
17431 # only difference is that the <ea> field should hold the PC
17432 # of the ftrapcc instruction and the vector offset field
17433 # should denote the ftrapcc trap.
17435 mov.b &ftrapcc_flg,SPCOND_FLG(%a6)
17438 # the emulation routine set bsun and BSUN was enabled. have to
17439 # fix stack and jump to the bsun handler.
17440 # let the caller of this routine shift the stack frame up to
17441 # eliminate the effective address field.
17443 mov.b &fbsun_flg,SPCOND_FLG(%a6)
17446 #########################################################################
17447 # fscc(): routine to emulate the fscc instruction #
17449 # XDEF **************************************************************** #
17452 # XREF **************************************************************** #
17453 # store_dreg_b() - store result to data register file #
17454 # dec_areg() - decrement an areg for -(an) mode #
17455 # inc_areg() - increment an areg for (an)+ mode #
17456 # _dmem_write_byte() - store result to memory #
17458 # INPUT *************************************************************** #
17461 # OUTPUT ************************************************************** #
17464 # ALGORITHM *********************************************************** #
17465 # This routine checks which conditional predicate is specified by #
17466 # the stacked fscc instruction opcode and then branches to a routine #
17467 # for that predicate. The corresponding fbcc instruction is then used #
17468 # to see whether the condition (specified by the stacked FPSR) is true #
17470 # If a BSUN exception should be indicated, the BSUN and ABSUN #
17471 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
17472 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
17473 # enabled BSUN should not be flagged and the predicate is true, then #
17474 # the result is stored to the data register file or memory #
17476 #########################################################################
17480 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
17482 clr.l %d1 # clear scratch reg
17483 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
17484 ror.l &0x8,%d1 # rotate to top byte
17485 fmov.l %d1,%fpsr # insert into FPSR
17487 mov.w (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17488 jmp (tbl_fscc.b,%pc,%d1.w) # jump to fscc routine
17491 short fscc_f - tbl_fscc # 00
17492 short fscc_eq - tbl_fscc # 01
17493 short fscc_ogt - tbl_fscc # 02
17494 short fscc_oge - tbl_fscc # 03
17495 short fscc_olt - tbl_fscc # 04
17496 short fscc_ole - tbl_fscc # 05
17497 short fscc_ogl - tbl_fscc # 06
17498 short fscc_or - tbl_fscc # 07
17499 short fscc_un - tbl_fscc # 08
17500 short fscc_ueq - tbl_fscc # 09
17501 short fscc_ugt - tbl_fscc # 10
17502 short fscc_uge - tbl_fscc # 11
17503 short fscc_ult - tbl_fscc # 12
17504 short fscc_ule - tbl_fscc # 13
17505 short fscc_neq - tbl_fscc # 14
17506 short fscc_t - tbl_fscc # 15
17507 short fscc_sf - tbl_fscc # 16
17508 short fscc_seq - tbl_fscc # 17
17509 short fscc_gt - tbl_fscc # 18
17510 short fscc_ge - tbl_fscc # 19
17511 short fscc_lt - tbl_fscc # 20
17512 short fscc_le - tbl_fscc # 21
17513 short fscc_gl - tbl_fscc # 22
17514 short fscc_gle - tbl_fscc # 23
17515 short fscc_ngle - tbl_fscc # 24
17516 short fscc_ngl - tbl_fscc # 25
17517 short fscc_nle - tbl_fscc # 26
17518 short fscc_nlt - tbl_fscc # 27
17519 short fscc_nge - tbl_fscc # 28
17520 short fscc_ngt - tbl_fscc # 29
17521 short fscc_sneq - tbl_fscc # 30
17522 short fscc_st - tbl_fscc # 31
17524 #########################################################################
17526 # IEEE Nonaware tests #
17528 # For the IEEE nonaware tests, we set the result based on the #
17529 # floating point condition codes. In addition, we check to see #
17530 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
17532 # The cases EQ and NE are shared by the Aware and Nonaware groups #
17533 # and are incapable of setting the BSUN exception bit. #
17535 # Typically, only one of the two possible branch directions could #
17536 # have the NAN bit set. #
17538 #########################################################################
17546 fbeq.w fscc_eq_yes # equal?
17548 clr.b %d0 # set false
17549 bra.w fscc_done # go finish
17552 bra.w fscc_done # go finish
17560 fbneq.w fscc_neq_yes # not equal?
17562 clr.b %d0 # set false
17563 bra.w fscc_done # go finish
17566 bra.w fscc_done # go finish
17574 fbgt.w fscc_gt_yes # greater than?
17576 clr.b %d0 # set false
17577 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17578 beq.w fscc_done # no;go finish
17579 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17580 bra.w fscc_chk_bsun # go finish
17583 bra.w fscc_done # go finish
17586 # not greater than:
17591 fbngt.w fscc_ngt_yes # not greater than?
17593 clr.b %d0 # set false
17594 bra.w fscc_done # go finish
17597 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17598 beq.w fscc_done # no;go finish
17599 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17600 bra.w fscc_chk_bsun # go finish
17603 # greater than or equal:
17608 fbge.w fscc_ge_yes # greater than or equal?
17610 clr.b %d0 # set false
17611 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17612 beq.w fscc_done # no;go finish
17613 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17614 bra.w fscc_chk_bsun # go finish
17617 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17618 beq.w fscc_done # no;go finish
17619 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17620 bra.w fscc_chk_bsun # go finish
17623 # not (greater than or equal):
17628 fbnge.w fscc_nge_yes # not (greater than or equal)?
17630 clr.b %d0 # set false
17631 bra.w fscc_done # go finish
17634 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17635 beq.w fscc_done # no;go finish
17636 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17637 bra.w fscc_chk_bsun # go finish
17645 fblt.w fscc_lt_yes # less than?
17647 clr.b %d0 # set false
17648 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17649 beq.w fscc_done # no;go finish
17650 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17651 bra.w fscc_chk_bsun # go finish
17654 bra.w fscc_done # go finish
17662 fbnlt.w fscc_nlt_yes # not less than?
17664 clr.b %d0 # set false
17665 bra.w fscc_done # go finish
17668 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17669 beq.w fscc_done # no;go finish
17670 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17671 bra.w fscc_chk_bsun # go finish
17674 # less than or equal:
17679 fble.w fscc_le_yes # less than or equal?
17681 clr.b %d0 # set false
17682 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17683 beq.w fscc_done # no;go finish
17684 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17685 bra.w fscc_chk_bsun # go finish
17688 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17689 beq.w fscc_done # no;go finish
17690 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17691 bra.w fscc_chk_bsun # go finish
17694 # not (less than or equal):
17699 fbnle.w fscc_nle_yes # not (less than or equal)?
17701 clr.b %d0 # set false
17702 bra.w fscc_done # go finish
17705 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17706 beq.w fscc_done # no;go finish
17707 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17708 bra.w fscc_chk_bsun # go finish
17711 # greater or less than:
17716 fbgl.w fscc_gl_yes # greater or less than?
17718 clr.b %d0 # set false
17719 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17720 beq.w fscc_done # no;go finish
17721 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17722 bra.w fscc_chk_bsun # go finish
17725 bra.w fscc_done # go finish
17728 # not (greater or less than):
17733 fbngl.w fscc_ngl_yes # not (greater or less than)?
17735 clr.b %d0 # set false
17736 bra.w fscc_done # go finish
17739 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17740 beq.w fscc_done # no;go finish
17741 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17742 bra.w fscc_chk_bsun # go finish
17745 # greater, less, or equal:
17750 fbgle.w fscc_gle_yes # greater, less, or equal?
17752 clr.b %d0 # set false
17753 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17754 bra.w fscc_chk_bsun # go finish
17757 bra.w fscc_done # go finish
17760 # not (greater, less, or equal):
17765 fbngle.w fscc_ngle_yes # not (greater, less, or equal)?
17767 clr.b %d0 # set false
17768 bra.w fscc_done # go finish
17771 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17772 bra.w fscc_chk_bsun # go finish
17774 #########################################################################
17776 # Miscellaneous tests #
17778 # For the IEEE aware tests, we only have to set the result based on the #
17779 # floating point condition codes. The BSUN exception will not be #
17780 # set for any of these tests. #
17782 #########################################################################
17790 clr.b %d0 # set false
17791 bra.w fscc_done # go finish
17800 bra.w fscc_done # go finish
17803 # signalling false:
17808 clr.b %d0 # set false
17809 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17810 beq.w fscc_done # no;go finish
17811 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17812 bra.w fscc_chk_bsun # go finish
17821 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17822 beq.w fscc_done # no;go finish
17823 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17824 bra.w fscc_chk_bsun # go finish
17827 # signalling equal:
17832 fbseq.w fscc_seq_yes # signalling equal?
17834 clr.b %d0 # set false
17835 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17836 beq.w fscc_done # no;go finish
17837 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17838 bra.w fscc_chk_bsun # go finish
17841 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17842 beq.w fscc_done # no;go finish
17843 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17844 bra.w fscc_chk_bsun # go finish
17847 # signalling not equal:
17852 fbsneq.w fscc_sneq_yes # signalling equal?
17854 clr.b %d0 # set false
17855 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17856 beq.w fscc_done # no;go finish
17857 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17858 bra.w fscc_chk_bsun # go finish
17861 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17862 beq.w fscc_done # no;go finish
17863 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17864 bra.w fscc_chk_bsun # go finish
17866 #########################################################################
17868 # IEEE Aware tests #
17870 # For the IEEE aware tests, we only have to set the result based on the #
17871 # floating point condition codes. The BSUN exception will not be #
17872 # set for any of these tests. #
17874 #########################################################################
17877 # ordered greater than:
17882 fbogt.w fscc_ogt_yes # ordered greater than?
17884 clr.b %d0 # set false
17885 bra.w fscc_done # go finish
17888 bra.w fscc_done # go finish
17891 # unordered or less or equal:
17896 fbule.w fscc_ule_yes # unordered or less or equal?
17898 clr.b %d0 # set false
17899 bra.w fscc_done # go finish
17902 bra.w fscc_done # go finish
17905 # ordered greater than or equal:
17910 fboge.w fscc_oge_yes # ordered greater than or equal?
17912 clr.b %d0 # set false
17913 bra.w fscc_done # go finish
17916 bra.w fscc_done # go finish
17919 # unordered or less than:
17924 fbult.w fscc_ult_yes # unordered or less than?
17926 clr.b %d0 # set false
17927 bra.w fscc_done # go finish
17930 bra.w fscc_done # go finish
17933 # ordered less than:
17938 fbolt.w fscc_olt_yes # ordered less than?
17940 clr.b %d0 # set false
17941 bra.w fscc_done # go finish
17944 bra.w fscc_done # go finish
17947 # unordered or greater or equal:
17952 fbuge.w fscc_uge_yes # unordered or greater than?
17954 clr.b %d0 # set false
17955 bra.w fscc_done # go finish
17958 bra.w fscc_done # go finish
17961 # ordered less than or equal:
17966 fbole.w fscc_ole_yes # ordered greater or less than?
17968 clr.b %d0 # set false
17969 bra.w fscc_done # go finish
17972 bra.w fscc_done # go finish
17975 # unordered or greater than:
17980 fbugt.w fscc_ugt_yes # unordered or greater than?
17982 clr.b %d0 # set false
17983 bra.w fscc_done # go finish
17986 bra.w fscc_done # go finish
17989 # ordered greater or less than:
17994 fbogl.w fscc_ogl_yes # ordered greater or less than?
17996 clr.b %d0 # set false
17997 bra.w fscc_done # go finish
18000 bra.w fscc_done # go finish
18003 # unordered or equal:
18008 fbueq.w fscc_ueq_yes # unordered or equal?
18010 clr.b %d0 # set false
18011 bra.w fscc_done # go finish
18014 bra.w fscc_done # go finish
18022 fbor.w fscc_or_yes # ordered?
18024 clr.b %d0 # set false
18025 bra.w fscc_done # go finish
18028 bra.w fscc_done # go finish
18036 fbun.w fscc_un_yes # unordered?
18038 clr.b %d0 # set false
18039 bra.w fscc_done # go finish
18042 bra.w fscc_done # go finish
18044 #######################################################################
18047 # the bsun exception bit was set. now, check to see is BSUN
18048 # is enabled. if so, don't store result and correct stack frame
18049 # for a bsun exception.
18052 btst &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18056 # the bsun exception bit was not set.
18057 # the result has been selected.
18058 # now, check to see if the result is to be stored in the data register
18059 # file or in memory.
18062 mov.l %d0,%a0 # save result for a moment
18064 mov.b 1+EXC_OPWORD(%a6),%d1 # fetch lo opword
18065 mov.l %d1,%d0 # make a copy
18066 andi.b &0x38,%d1 # extract src mode
18068 bne.b fscc_mem_op # it's a memory operation
18071 andi.w &0x7,%d1 # pass index in d1
18072 mov.l %a0,%d0 # pass result in d0
18073 bsr.l store_dreg_b # save result in regfile
18077 # the stacked <ea> is correct with the exception of:
18078 # -> Dn : <ea> is garbage
18080 # if the addressing mode is post-increment or pre-decrement,
18081 # then the address registers have not been updated.
18084 cmpi.b %d1,&0x18 # is <ea> (An)+ ?
18085 beq.b fscc_mem_inc # yes
18086 cmpi.b %d1,&0x20 # is <ea> -(An) ?
18087 beq.b fscc_mem_dec # yes
18089 mov.l %a0,%d0 # pass result in d0
18090 mov.l EXC_EA(%a6),%a0 # fetch <ea>
18091 bsr.l _dmem_write_byte # write result byte
18093 tst.l %d1 # did dstore fail?
18094 bne.w fscc_err # yes
18098 # addressing mode is post-increment. write the result byte. if the write
18099 # fails then don't update the address register. if write passes then
18100 # call inc_areg() to update the address register.
18102 mov.l %a0,%d0 # pass result in d0
18103 mov.l EXC_EA(%a6),%a0 # fetch <ea>
18104 bsr.l _dmem_write_byte # write result byte
18106 tst.l %d1 # did dstore fail?
18107 bne.w fscc_err # yes
18109 mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18110 andi.w &0x7,%d1 # pass index in d1
18111 movq.l &0x1,%d0 # pass amt to inc by
18112 bsr.l inc_areg # increment address register
18116 # addressing mode is pre-decrement. write the result byte. if the write
18117 # fails then don't update the address register. if the write passes then
18118 # call dec_areg() to update the address register.
18120 mov.l %a0,%d0 # pass result in d0
18121 mov.l EXC_EA(%a6),%a0 # fetch <ea>
18122 bsr.l _dmem_write_byte # write result byte
18124 tst.l %d1 # did dstore fail?
18125 bne.w fscc_err # yes
18127 mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18128 andi.w &0x7,%d1 # pass index in d1
18129 movq.l &0x1,%d0 # pass amt to dec by
18130 bsr.l dec_areg # decrement address register
18134 # the emulation routine set bsun and BSUN was enabled. have to
18135 # fix stack and jump to the bsun handler.
18136 # let the caller of this routine shift the stack frame up to
18137 # eliminate the effective address field.
18139 mov.b &fbsun_flg,SPCOND_FLG(%a6)
18142 # the byte write to memory has failed. pass the failing effective address
18143 # and a FSLW to funimp_dacc().
18145 mov.w &0x00a1,EXC_VOFF(%a6)
18148 #########################################################################
18149 # XDEF **************************************************************** #
18150 # fmovm_dynamic(): emulate "fmovm" dynamic instruction #
18152 # XREF **************************************************************** #
18153 # fetch_dreg() - fetch data register #
18154 # {i,d,}mem_read() - fetch data from memory #
18155 # _mem_write() - write data to memory #
18156 # iea_iacc() - instruction memory access error occurred #
18157 # iea_dacc() - data memory access error occurred #
18158 # restore() - restore An index regs if access error occurred #
18160 # INPUT *************************************************************** #
18163 # OUTPUT ************************************************************** #
18164 # If instr is "fmovm Dn,-(A7)" from supervisor mode, #
18165 # d0 = size of dump #
18167 # Else if instruction access error, #
18169 # Else if data access error, #
18171 # a0 = address of fault #
18175 # ALGORITHM *********************************************************** #
18176 # The effective address must be calculated since this is entered #
18177 # from an "Unimplemented Effective Address" exception handler. So, we #
18178 # have our own fcalc_ea() routine here. If an access error is flagged #
18179 # by a _{i,d,}mem_read() call, we must exit through the special #
18181 # The data register is determined and its value loaded to get the #
18182 # string of FP registers affected. This value is used as an index into #
18183 # a lookup table such that we can determine the number of bytes #
18185 # If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
18186 # to read in all FP values. Again, _mem_read() may fail and require a #
18188 # If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
18189 # to write all FP values. _mem_write() may also fail. #
18190 # If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
18191 # then we return the size of the dump and the string to the caller #
18192 # so that the move can occur outside of this routine. This special #
18193 # case is required so that moves to the system stack are handled #
18197 # fmovm.x dn, <ea> #
18198 # fmovm.x <ea>, dn #
18200 # <WORD 1> <WORD2> #
18201 # 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
18203 # & = (0): predecrement addressing mode #
18204 # (1): postincrement or control addressing mode #
18205 # @ = (0): move listed regs from memory to the FPU #
18206 # (1): move listed regs from the FPU to memory #
18207 # $$$ : index of data register holding reg select mask #
18210 # If the data register holds a zero, then the #
18211 # instruction is a nop. #
18213 #########################################################################
18215 global fmovm_dynamic
18218 # extract the data register in which the bit string resides...
18219 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
18220 andi.w &0x70,%d1 # extract reg bits
18221 lsr.b &0x4,%d1 # shift into lo bits
18223 # fetch the bit string into d0...
18224 bsr.l fetch_dreg # fetch reg string
18226 andi.l &0x000000ff,%d0 # keep only lo byte
18228 mov.l %d0,-(%sp) # save strg
18229 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
18230 mov.l %d0,-(%sp) # save size
18231 bsr.l fmovm_calc_ea # calculate <ea>
18232 mov.l (%sp)+,%d0 # restore size
18233 mov.l (%sp)+,%d1 # restore strg
18235 # if the bit string is a zero, then the operation is a no-op
18236 # but, make sure that we've calculated ea and advanced the opword pointer
18237 beq.w fmovm_data_done
18239 # separate move ins from move outs...
18240 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
18241 beq.w fmovm_data_in # it's a move out
18247 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
18248 bne.w fmovm_out_ctrl # control
18250 ############################
18252 # for predecrement mode, the bit string is the opposite of both control
18253 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18254 # here, we convert it to be just like the others...
18255 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18257 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
18258 beq.b fmovm_out_ctrl # user
18260 fmovm_out_predec_s:
18261 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18262 bne.b fmovm_out_ctrl
18264 # the operation was unfortunately an: fmovm.x dn,-(sp)
18265 # called from supervisor mode.
18266 # we're also passing "size" and "strg" back to the calling routine
18269 ############################
18271 mov.l %a0,%a1 # move <ea> to a1
18273 sub.l %d0,%sp # subtract size of dump
18276 tst.b %d1 # should FP0 be moved?
18277 bpl.b fmovm_out_ctrl_fp1 # no
18279 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
18280 mov.l 0x4+EXC_FP0(%a6),(%a0)+
18281 mov.l 0x8+EXC_FP0(%a6),(%a0)+
18283 fmovm_out_ctrl_fp1:
18284 lsl.b &0x1,%d1 # should FP1 be moved?
18285 bpl.b fmovm_out_ctrl_fp2 # no
18287 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
18288 mov.l 0x4+EXC_FP1(%a6),(%a0)+
18289 mov.l 0x8+EXC_FP1(%a6),(%a0)+
18291 fmovm_out_ctrl_fp2:
18292 lsl.b &0x1,%d1 # should FP2 be moved?
18293 bpl.b fmovm_out_ctrl_fp3 # no
18295 fmovm.x &0x20,(%a0) # yes
18298 fmovm_out_ctrl_fp3:
18299 lsl.b &0x1,%d1 # should FP3 be moved?
18300 bpl.b fmovm_out_ctrl_fp4 # no
18302 fmovm.x &0x10,(%a0) # yes
18305 fmovm_out_ctrl_fp4:
18306 lsl.b &0x1,%d1 # should FP4 be moved?
18307 bpl.b fmovm_out_ctrl_fp5 # no
18309 fmovm.x &0x08,(%a0) # yes
18312 fmovm_out_ctrl_fp5:
18313 lsl.b &0x1,%d1 # should FP5 be moved?
18314 bpl.b fmovm_out_ctrl_fp6 # no
18316 fmovm.x &0x04,(%a0) # yes
18319 fmovm_out_ctrl_fp6:
18320 lsl.b &0x1,%d1 # should FP6 be moved?
18321 bpl.b fmovm_out_ctrl_fp7 # no
18323 fmovm.x &0x02,(%a0) # yes
18326 fmovm_out_ctrl_fp7:
18327 lsl.b &0x1,%d1 # should FP7 be moved?
18328 bpl.b fmovm_out_ctrl_done # no
18330 fmovm.x &0x01,(%a0) # yes
18333 fmovm_out_ctrl_done:
18334 mov.l %a1,L_SCR1(%a6)
18336 lea (%sp),%a0 # pass: supervisor src
18337 mov.l %d0,-(%sp) # save size
18338 bsr.l _dmem_write # copy data to user mem
18341 add.l %d0,%sp # clear fpreg data from stack
18343 tst.l %d1 # did dstore err?
18344 bne.w fmovm_out_err # yes
18352 mov.l %a0,L_SCR1(%a6)
18354 sub.l %d0,%sp # make room for fpregs
18357 mov.l %d1,-(%sp) # save bit string for later
18358 mov.l %d0,-(%sp) # save # of bytes
18360 bsr.l _dmem_read # copy data from user mem
18362 mov.l (%sp)+,%d0 # retrieve # of bytes
18364 tst.l %d1 # did dfetch fail?
18365 bne.w fmovm_in_err # yes
18367 mov.l (%sp)+,%d1 # load bit string
18369 lea (%sp),%a0 # addr of stack
18371 tst.b %d1 # should FP0 be moved?
18372 bpl.b fmovm_data_in_fp1 # no
18374 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
18375 mov.l (%a0)+,0x4+EXC_FP0(%a6)
18376 mov.l (%a0)+,0x8+EXC_FP0(%a6)
18379 lsl.b &0x1,%d1 # should FP1 be moved?
18380 bpl.b fmovm_data_in_fp2 # no
18382 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
18383 mov.l (%a0)+,0x4+EXC_FP1(%a6)
18384 mov.l (%a0)+,0x8+EXC_FP1(%a6)
18387 lsl.b &0x1,%d1 # should FP2 be moved?
18388 bpl.b fmovm_data_in_fp3 # no
18390 fmovm.x (%a0)+,&0x20 # yes
18393 lsl.b &0x1,%d1 # should FP3 be moved?
18394 bpl.b fmovm_data_in_fp4 # no
18396 fmovm.x (%a0)+,&0x10 # yes
18399 lsl.b &0x1,%d1 # should FP4 be moved?
18400 bpl.b fmovm_data_in_fp5 # no
18402 fmovm.x (%a0)+,&0x08 # yes
18405 lsl.b &0x1,%d1 # should FP5 be moved?
18406 bpl.b fmovm_data_in_fp6 # no
18408 fmovm.x (%a0)+,&0x04 # yes
18411 lsl.b &0x1,%d1 # should FP6 be moved?
18412 bpl.b fmovm_data_in_fp7 # no
18414 fmovm.x (%a0)+,&0x02 # yes
18417 lsl.b &0x1,%d1 # should FP7 be moved?
18418 bpl.b fmovm_data_in_done # no
18420 fmovm.x (%a0)+,&0x01 # yes
18422 fmovm_data_in_done:
18423 add.l %d0,%sp # remove fpregs from stack
18426 #####################################
18431 ##############################################################################
18434 # table indexed by the operation's bit string that gives the number
18435 # of bytes that will be moved.
18437 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18440 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18441 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18442 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18443 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18444 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18445 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18446 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18447 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18448 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18449 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18450 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18451 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18452 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18454 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18455 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18456 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18457 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18458 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18460 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18461 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18462 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18464 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18465 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18466 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18467 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18468 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18470 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18471 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18474 # table to convert a pre-decrement bit string into a post-increment
18475 # or control bit string.
18476 # ex: 0x00 ==> 0x00
18486 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18487 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18488 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18489 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18490 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18491 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18492 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18493 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18494 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18495 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18496 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18497 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18498 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18499 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18500 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18501 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18502 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18503 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18504 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18505 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18506 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18507 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18508 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18509 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18510 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18511 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18512 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18513 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18514 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18515 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18516 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18517 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18519 global fmovm_calc_ea
18520 ###############################################
18521 # _fmovm_calc_ea: calculate effective address #
18522 ###############################################
18524 mov.l %d0,%a0 # move # bytes to a0
18526 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
18527 # easily changed if they were inputs passed in registers.
18528 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
18529 mov.w %d0,%d1 # make a copy
18531 andi.w &0x3f,%d0 # extract mode field
18532 andi.l &0x7,%d1 # extract reg field
18534 # jump to the corresponding function for each {MODE,REG} pair.
18535 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18536 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18540 short tbl_fea_mode - tbl_fea_mode
18541 short tbl_fea_mode - tbl_fea_mode
18542 short tbl_fea_mode - tbl_fea_mode
18543 short tbl_fea_mode - tbl_fea_mode
18544 short tbl_fea_mode - tbl_fea_mode
18545 short tbl_fea_mode - tbl_fea_mode
18546 short tbl_fea_mode - tbl_fea_mode
18547 short tbl_fea_mode - tbl_fea_mode
18549 short tbl_fea_mode - tbl_fea_mode
18550 short tbl_fea_mode - tbl_fea_mode
18551 short tbl_fea_mode - tbl_fea_mode
18552 short tbl_fea_mode - tbl_fea_mode
18553 short tbl_fea_mode - tbl_fea_mode
18554 short tbl_fea_mode - tbl_fea_mode
18555 short tbl_fea_mode - tbl_fea_mode
18556 short tbl_fea_mode - tbl_fea_mode
18558 short faddr_ind_a0 - tbl_fea_mode
18559 short faddr_ind_a1 - tbl_fea_mode
18560 short faddr_ind_a2 - tbl_fea_mode
18561 short faddr_ind_a3 - tbl_fea_mode
18562 short faddr_ind_a4 - tbl_fea_mode
18563 short faddr_ind_a5 - tbl_fea_mode
18564 short faddr_ind_a6 - tbl_fea_mode
18565 short faddr_ind_a7 - tbl_fea_mode
18567 short faddr_ind_p_a0 - tbl_fea_mode
18568 short faddr_ind_p_a1 - tbl_fea_mode
18569 short faddr_ind_p_a2 - tbl_fea_mode
18570 short faddr_ind_p_a3 - tbl_fea_mode
18571 short faddr_ind_p_a4 - tbl_fea_mode
18572 short faddr_ind_p_a5 - tbl_fea_mode
18573 short faddr_ind_p_a6 - tbl_fea_mode
18574 short faddr_ind_p_a7 - tbl_fea_mode
18576 short faddr_ind_m_a0 - tbl_fea_mode
18577 short faddr_ind_m_a1 - tbl_fea_mode
18578 short faddr_ind_m_a2 - tbl_fea_mode
18579 short faddr_ind_m_a3 - tbl_fea_mode
18580 short faddr_ind_m_a4 - tbl_fea_mode
18581 short faddr_ind_m_a5 - tbl_fea_mode
18582 short faddr_ind_m_a6 - tbl_fea_mode
18583 short faddr_ind_m_a7 - tbl_fea_mode
18585 short faddr_ind_disp_a0 - tbl_fea_mode
18586 short faddr_ind_disp_a1 - tbl_fea_mode
18587 short faddr_ind_disp_a2 - tbl_fea_mode
18588 short faddr_ind_disp_a3 - tbl_fea_mode
18589 short faddr_ind_disp_a4 - tbl_fea_mode
18590 short faddr_ind_disp_a5 - tbl_fea_mode
18591 short faddr_ind_disp_a6 - tbl_fea_mode
18592 short faddr_ind_disp_a7 - tbl_fea_mode
18594 short faddr_ind_ext - tbl_fea_mode
18595 short faddr_ind_ext - tbl_fea_mode
18596 short faddr_ind_ext - tbl_fea_mode
18597 short faddr_ind_ext - tbl_fea_mode
18598 short faddr_ind_ext - tbl_fea_mode
18599 short faddr_ind_ext - tbl_fea_mode
18600 short faddr_ind_ext - tbl_fea_mode
18601 short faddr_ind_ext - tbl_fea_mode
18603 short fabs_short - tbl_fea_mode
18604 short fabs_long - tbl_fea_mode
18605 short fpc_ind - tbl_fea_mode
18606 short fpc_ind_ext - tbl_fea_mode
18607 short tbl_fea_mode - tbl_fea_mode
18608 short tbl_fea_mode - tbl_fea_mode
18609 short tbl_fea_mode - tbl_fea_mode
18610 short tbl_fea_mode - tbl_fea_mode
18612 ###################################
18613 # Address register indirect: (An) #
18614 ###################################
18616 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
18620 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
18624 mov.l %a2,%a0 # Get current a2
18628 mov.l %a3,%a0 # Get current a3
18632 mov.l %a4,%a0 # Get current a4
18636 mov.l %a5,%a0 # Get current a5
18640 mov.l (%a6),%a0 # Get current a6
18644 mov.l EXC_A7(%a6),%a0 # Get current a7
18647 #####################################################
18648 # Address register indirect w/ postincrement: (An)+ #
18649 #####################################################
18651 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
18653 add.l %a0,%d1 # Increment
18654 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
18659 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
18661 add.l %a0,%d1 # Increment
18662 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
18667 mov.l %a2,%d0 # Get current a2
18669 add.l %a0,%d1 # Increment
18670 mov.l %d1,%a2 # Save incr value
18675 mov.l %a3,%d0 # Get current a3
18677 add.l %a0,%d1 # Increment
18678 mov.l %d1,%a3 # Save incr value
18683 mov.l %a4,%d0 # Get current a4
18685 add.l %a0,%d1 # Increment
18686 mov.l %d1,%a4 # Save incr value
18691 mov.l %a5,%d0 # Get current a5
18693 add.l %a0,%d1 # Increment
18694 mov.l %d1,%a5 # Save incr value
18699 mov.l (%a6),%d0 # Get current a6
18701 add.l %a0,%d1 # Increment
18702 mov.l %d1,(%a6) # Save incr value
18707 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18709 mov.l EXC_A7(%a6),%d0 # Get current a7
18711 add.l %a0,%d1 # Increment
18712 mov.l %d1,EXC_A7(%a6) # Save incr value
18716 ####################################################
18717 # Address register indirect w/ predecrement: -(An) #
18718 ####################################################
18720 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
18721 sub.l %a0,%d0 # Decrement
18722 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
18727 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
18728 sub.l %a0,%d0 # Decrement
18729 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
18734 mov.l %a2,%d0 # Get current a2
18735 sub.l %a0,%d0 # Decrement
18736 mov.l %d0,%a2 # Save decr value
18741 mov.l %a3,%d0 # Get current a3
18742 sub.l %a0,%d0 # Decrement
18743 mov.l %d0,%a3 # Save decr value
18748 mov.l %a4,%d0 # Get current a4
18749 sub.l %a0,%d0 # Decrement
18750 mov.l %d0,%a4 # Save decr value
18755 mov.l %a5,%d0 # Get current a5
18756 sub.l %a0,%d0 # Decrement
18757 mov.l %d0,%a5 # Save decr value
18762 mov.l (%a6),%d0 # Get current a6
18763 sub.l %a0,%d0 # Decrement
18764 mov.l %d0,(%a6) # Save decr value
18769 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18771 mov.l EXC_A7(%a6),%d0 # Get current a7
18772 sub.l %a0,%d0 # Decrement
18773 mov.l %d0,EXC_A7(%a6) # Save decr value
18777 ########################################################
18778 # Address register indirect w/ displacement: (d16, An) #
18779 ########################################################
18781 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18782 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18783 bsr.l _imem_read_word
18785 tst.l %d1 # did ifetch fail?
18786 bne.l iea_iacc # yes
18788 mov.w %d0,%a0 # sign extend displacement
18790 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
18794 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18795 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18796 bsr.l _imem_read_word
18798 tst.l %d1 # did ifetch fail?
18799 bne.l iea_iacc # yes
18801 mov.w %d0,%a0 # sign extend displacement
18803 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
18807 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18808 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18809 bsr.l _imem_read_word
18811 tst.l %d1 # did ifetch fail?
18812 bne.l iea_iacc # yes
18814 mov.w %d0,%a0 # sign extend displacement
18816 add.l %a2,%a0 # a2 + d16
18820 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18821 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18822 bsr.l _imem_read_word
18824 tst.l %d1 # did ifetch fail?
18825 bne.l iea_iacc # yes
18827 mov.w %d0,%a0 # sign extend displacement
18829 add.l %a3,%a0 # a3 + d16
18833 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18834 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18835 bsr.l _imem_read_word
18837 tst.l %d1 # did ifetch fail?
18838 bne.l iea_iacc # yes
18840 mov.w %d0,%a0 # sign extend displacement
18842 add.l %a4,%a0 # a4 + d16
18846 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18847 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18848 bsr.l _imem_read_word
18850 tst.l %d1 # did ifetch fail?
18851 bne.l iea_iacc # yes
18853 mov.w %d0,%a0 # sign extend displacement
18855 add.l %a5,%a0 # a5 + d16
18859 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18860 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18861 bsr.l _imem_read_word
18863 tst.l %d1 # did ifetch fail?
18864 bne.l iea_iacc # yes
18866 mov.w %d0,%a0 # sign extend displacement
18868 add.l (%a6),%a0 # a6 + d16
18872 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18873 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18874 bsr.l _imem_read_word
18876 tst.l %d1 # did ifetch fail?
18877 bne.l iea_iacc # yes
18879 mov.w %d0,%a0 # sign extend displacement
18881 add.l EXC_A7(%a6),%a0 # a7 + d16
18884 ########################################################################
18885 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18886 # " " " w/ " (base displacement): (bd, An, Xn) #
18887 # Memory indirect postindexed: ([bd, An], Xn, od) #
18888 # Memory indirect preindexed: ([bd, An, Xn], od) #
18889 ########################################################################
18892 bsr.l fetch_dreg # fetch base areg
18895 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18896 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18897 bsr.l _imem_read_word # fetch extword in d0
18899 tst.l %d1 # did ifetch fail?
18900 bne.l iea_iacc # yes
18905 bne.w fcalc_mem_ind
18907 mov.l %d0,L_SCR1(%a6) # hold opword
18911 andi.w &0xf,%d1 # extract index regno
18913 # count on fetch_dreg() not to alter a0...
18914 bsr.l fetch_dreg # fetch index
18916 mov.l %d2,-(%sp) # save d2
18917 mov.l L_SCR1(%a6),%d2 # fetch opword
18919 btst &0xb,%d2 # is it word or long?
18921 ext.l %d0 # sign extend word index
18925 andi.l &0x3,%d1 # extract scale value
18927 lsl.l %d1,%d0 # shift index by scale
18929 extb.l %d2 # sign extend displacement
18930 add.l %d2,%d0 # index + disp
18931 add.l %d0,%a0 # An + (index + disp)
18933 mov.l (%sp)+,%d2 # restore old d2
18936 ###########################
18937 # Absolute short: (XXX).W #
18938 ###########################
18940 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18941 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18942 bsr.l _imem_read_word # fetch short address
18944 tst.l %d1 # did ifetch fail?
18945 bne.l iea_iacc # yes
18947 mov.w %d0,%a0 # return <ea> in a0
18950 ##########################
18951 # Absolute long: (XXX).L #
18952 ##########################
18954 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18955 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
18956 bsr.l _imem_read_long # fetch long address
18958 tst.l %d1 # did ifetch fail?
18959 bne.l iea_iacc # yes
18961 mov.l %d0,%a0 # return <ea> in a0
18964 #######################################################
18965 # Program counter indirect w/ displacement: (d16, PC) #
18966 #######################################################
18968 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18969 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18970 bsr.l _imem_read_word # fetch word displacement
18972 tst.l %d1 # did ifetch fail?
18973 bne.l iea_iacc # yes
18975 mov.w %d0,%a0 # sign extend displacement
18977 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
18979 # _imem_read_word() increased the extwptr by 2. need to adjust here.
18980 subq.l &0x2,%a0 # adjust <ea>
18983 ##########################################################
18984 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18985 # " " w/ " (base displacement): (bd, PC, An) #
18986 # PC memory indirect postindexed: ([bd, PC], Xn, od) #
18987 # PC memory indirect preindexed: ([bd, PC, Xn], od) #
18988 ##########################################################
18990 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18991 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18992 bsr.l _imem_read_word # fetch ext word
18994 tst.l %d1 # did ifetch fail?
18995 bne.l iea_iacc # yes
18997 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
18998 subq.l &0x2,%a0 # adjust base
19000 btst &0x8,%d0 # is disp only 8 bits?
19001 bne.w fcalc_mem_ind # calc memory indirect
19003 mov.l %d0,L_SCR1(%a6) # store opword
19005 mov.l %d0,%d1 # make extword copy
19006 rol.w &0x4,%d1 # rotate reg num into place
19007 andi.w &0xf,%d1 # extract register number
19009 # count on fetch_dreg() not to alter a0...
19010 bsr.l fetch_dreg # fetch index
19012 mov.l %d2,-(%sp) # save d2
19013 mov.l L_SCR1(%a6),%d2 # fetch opword
19015 btst &0xb,%d2 # is index word or long?
19016 bne.b fpii8_long # long
19017 ext.l %d0 # sign extend word index
19020 rol.w &0x7,%d1 # rotate scale value into place
19021 andi.l &0x3,%d1 # extract scale value
19023 lsl.l %d1,%d0 # shift index by scale
19025 extb.l %d2 # sign extend displacement
19026 add.l %d2,%d0 # disp + index
19027 add.l %d0,%a0 # An + (index + disp)
19029 mov.l (%sp)+,%d2 # restore temp register
19037 btst &0x6,%d0 # is the index suppressed?
19040 movm.l &0x3c00,-(%sp) # save d2-d5
19042 mov.l %d0,%d5 # put extword in d5
19043 mov.l %a0,%d3 # put base in d3
19045 clr.l %d2 # yes, so index = 0
19046 bra.b fbase_supp_ck
19050 mov.l %d0,L_SCR1(%a6) # save d0 (opword)
19051 bfextu %d0{&16:&4},%d1 # fetch dreg index
19054 movm.l &0x3c00,-(%sp) # save d2-d5
19055 mov.l %d0,%d2 # put index in d2
19056 mov.l L_SCR1(%a6),%d5
19059 btst &0xb,%d5 # is index word or long?
19064 bfextu %d5{&21:&2},%d0
19067 # base address (passed as parameter in d3):
19068 # we clear the value here if it should actually be suppressed.
19070 btst &0x7,%d5 # is the bd suppressed?
19074 # base displacement:
19076 bfextu %d5{&26:&2},%d0 # get bd size
19077 # beq.l fmovm_error # if (size == 0) it's reserved
19083 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19084 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19085 bsr.l _imem_read_long
19087 tst.l %d1 # did ifetch fail?
19088 bne.l fcea_iacc # yes
19093 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19094 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
19095 bsr.l _imem_read_word
19097 tst.l %d1 # did ifetch fail?
19098 bne.l fcea_iacc # yes
19100 ext.l %d0 # sign extend bd
19103 add.l %d0,%d3 # base += bd
19105 # outer displacement:
19107 bfextu %d5{&30:&2},%d0 # is od suppressed?
19114 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19115 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19116 bsr.l _imem_read_long
19118 tst.l %d1 # did ifetch fail?
19119 bne.l fcea_iacc # yes
19124 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19125 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
19126 bsr.l _imem_read_word
19128 tst.l %d1 # did ifetch fail?
19129 bne.l fcea_iacc # yes
19131 ext.l %d0 # sign extend od
19140 btst &0x2,%d5 # pre or post indexing?
19144 bsr.l _dmem_read_long
19146 tst.l %d1 # did dfetch fail?
19147 bne.w fcea_err # yes
19149 add.l %d2,%d0 # <ea> += index
19150 add.l %d4,%d0 # <ea> += od
19154 add.l %d2,%d3 # preindexing
19156 bsr.l _dmem_read_long
19158 tst.l %d1 # did dfetch fail?
19159 bne.w fcea_err # yes
19161 add.l %d4,%d0 # ea += od
19165 add.l %d2,%d3 # ea = (base + bd) + index
19170 movm.l (%sp)+,&0x003c # restore d2-d5
19173 #########################################################
19177 movm.l (%sp)+,&0x003c # restore d2-d5
19182 movm.l (%sp)+,&0x003c # restore d2-d5
19195 mov.l L_SCR1(%a6),%a0
19198 #########################################################################
19199 # XDEF **************************************************************** #
19200 # fmovm_ctrl(): emulate fmovm.l of control registers instr #
19202 # XREF **************************************************************** #
19203 # _imem_read_long() - read longword from memory #
19204 # iea_iacc() - _imem_read_long() failed; error recovery #
19206 # INPUT *************************************************************** #
19209 # OUTPUT ************************************************************** #
19210 # If _imem_read_long() doesn't fail: #
19211 # USER_FPCR(a6) = new FPCR value #
19212 # USER_FPSR(a6) = new FPSR value #
19213 # USER_FPIAR(a6) = new FPIAR value #
19215 # ALGORITHM *********************************************************** #
19216 # Decode the instruction type by looking at the extension word #
19217 # in order to see how many control registers to fetch from memory. #
19218 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
19219 # the special access error exit handler iea_iacc(). #
19221 # Instruction word decoding: #
19223 # fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
19226 # 1111 0010 00 111100 100$ $$00 0000 0000 #
19228 # $$$ (100): FPCR #
19233 #########################################################################
19237 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
19238 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
19239 beq.w fctrl_in_7 # yes
19240 cmpi.b %d0,&0x98 # fpcr & fpsr ?
19241 beq.w fctrl_in_6 # yes
19242 cmpi.b %d0,&0x94 # fpcr & fpiar ?
19243 beq.b fctrl_in_5 # yes
19245 # fmovem.l #<data>, fpsr/fpiar
19247 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19248 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19249 bsr.l _imem_read_long # fetch FPSR from mem
19251 tst.l %d1 # did ifetch fail?
19252 bne.l iea_iacc # yes
19254 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
19255 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19256 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19257 bsr.l _imem_read_long # fetch FPIAR from mem
19259 tst.l %d1 # did ifetch fail?
19260 bne.l iea_iacc # yes
19262 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
19265 # fmovem.l #<data>, fpcr/fpiar
19267 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19268 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19269 bsr.l _imem_read_long # fetch FPCR from mem
19271 tst.l %d1 # did ifetch fail?
19272 bne.l iea_iacc # yes
19274 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
19275 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19276 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19277 bsr.l _imem_read_long # fetch FPIAR from mem
19279 tst.l %d1 # did ifetch fail?
19280 bne.l iea_iacc # yes
19282 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
19285 # fmovem.l #<data>, fpcr/fpsr
19287 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19288 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19289 bsr.l _imem_read_long # fetch FPCR from mem
19291 tst.l %d1 # did ifetch fail?
19292 bne.l iea_iacc # yes
19294 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
19295 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19296 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19297 bsr.l _imem_read_long # fetch FPSR from mem
19299 tst.l %d1 # did ifetch fail?
19300 bne.l iea_iacc # yes
19302 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
19305 # fmovem.l #<data>, fpcr/fpsr/fpiar
19307 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19308 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19309 bsr.l _imem_read_long # fetch FPCR from mem
19311 tst.l %d1 # did ifetch fail?
19312 bne.l iea_iacc # yes
19314 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
19315 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19316 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19317 bsr.l _imem_read_long # fetch FPSR from mem
19319 tst.l %d1 # did ifetch fail?
19320 bne.l iea_iacc # yes
19322 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
19323 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19324 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19325 bsr.l _imem_read_long # fetch FPIAR from mem
19327 tst.l %d1 # did ifetch fail?
19328 bne.l iea_iacc # yes
19330 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
19333 #########################################################################
19334 # XDEF **************************************************************** #
19335 # _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
19337 # XREF **************************************************************** #
19338 # inc_areg() - increment an address register #
19339 # dec_areg() - decrement an address register #
19341 # INPUT *************************************************************** #
19342 # d0 = number of bytes to adjust <ea> by #
19344 # OUTPUT ************************************************************** #
19347 # ALGORITHM *********************************************************** #
19348 # "Dummy" CALCulate Effective Address: #
19349 # The stacked <ea> for FP unimplemented instructions and opclass #
19350 # two packed instructions is correct with the exception of... #
19352 # 1) -(An) : The register is not updated regardless of size. #
19353 # Also, for extended precision and packed, the #
19354 # stacked <ea> value is 8 bytes too big #
19355 # 2) (An)+ : The register is not updated. #
19356 # 3) #<data> : The upper longword of the immediate operand is #
19357 # stacked b,w,l and s sizes are completely stacked. #
19358 # d,x, and p are not. #
19360 #########################################################################
19364 mov.l %d0, %a0 # move # bytes to %a0
19366 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
19367 mov.l %d0, %d1 # make a copy
19369 andi.w &0x38, %d0 # extract mode field
19370 andi.l &0x7, %d1 # extract reg field
19372 cmpi.b %d0,&0x18 # is mode (An)+ ?
19373 beq.b dcea_pi # yes
19375 cmpi.b %d0,&0x20 # is mode -(An) ?
19376 beq.b dcea_pd # yes
19378 or.w %d1,%d0 # concat mode,reg
19379 cmpi.b %d0,&0x3c # is mode #<data>?
19381 beq.b dcea_imm # yes
19383 mov.l EXC_EA(%a6),%a0 # return <ea>
19386 # need to set immediate data flag here since we'll need to do
19387 # an imem_read to fetch this later.
19389 mov.b &immed_flg,SPCOND_FLG(%a6)
19390 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19393 # here, the <ea> is stacked correctly. however, we must update the
19394 # address register...
19396 mov.l %a0,%d0 # pass amt to inc by
19397 bsr.l inc_areg # inc addr register
19399 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19402 # the <ea> is stacked correctly for all but extended and packed which
19403 # the <ea>s are 8 bytes too large.
19404 # it would make no sense to have a pre-decrement to a7 in supervisor
19405 # mode so we don't even worry about this tricky case here : )
19407 mov.l %a0,%d0 # pass amt to dec by
19408 bsr.l dec_areg # dec addr register
19410 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19412 cmpi.b %d0,&0xc # is opsize ext or packed?
19413 beq.b dcea_pd2 # yes
19416 sub.l &0x8,%a0 # correct <ea>
19417 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
19420 #########################################################################
19421 # XDEF **************************************************************** #
19422 # _calc_ea_fout(): calculate correct stacked <ea> for extended #
19423 # and packed data opclass 3 operations. #
19425 # XREF **************************************************************** #
19428 # INPUT *************************************************************** #
19431 # OUTPUT ************************************************************** #
19432 # a0 = return correct effective address #
19434 # ALGORITHM *********************************************************** #
19435 # For opclass 3 extended and packed data operations, the <ea> #
19436 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
19437 # modes. Also, while we're at it, the index register itself must get #
19439 # So, for -(an), we must subtract 8 off of the stacked <ea> value #
19440 # and return that value as the correct <ea> and store that value in An. #
19441 # For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
19443 #########################################################################
19445 # This calc_ea is currently used to retrieve the correct <ea>
19446 # for fmove outs of type extended and packed.
19447 global _calc_ea_fout
19449 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
19450 mov.l %d0,%d1 # make a copy
19452 andi.w &0x38,%d0 # extract mode field
19453 andi.l &0x7,%d1 # extract reg field
19455 cmpi.b %d0,&0x18 # is mode (An)+ ?
19456 beq.b ceaf_pi # yes
19458 cmpi.b %d0,&0x20 # is mode -(An) ?
19459 beq.w ceaf_pd # yes
19461 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19464 # (An)+ : extended and packed fmove out
19465 # : stacked <ea> is correct
19466 # : "An" not updated
19468 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19469 mov.l EXC_EA(%a6),%a0
19470 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
19474 short ceaf_pi0 - tbl_ceaf_pi
19475 short ceaf_pi1 - tbl_ceaf_pi
19476 short ceaf_pi2 - tbl_ceaf_pi
19477 short ceaf_pi3 - tbl_ceaf_pi
19478 short ceaf_pi4 - tbl_ceaf_pi
19479 short ceaf_pi5 - tbl_ceaf_pi
19480 short ceaf_pi6 - tbl_ceaf_pi
19481 short ceaf_pi7 - tbl_ceaf_pi
19484 addi.l &0xc,EXC_DREGS+0x8(%a6)
19487 addi.l &0xc,EXC_DREGS+0xc(%a6)
19502 addi.l &0xc,EXC_A6(%a6)
19505 mov.b &mia7_flg,SPCOND_FLG(%a6)
19506 addi.l &0xc,EXC_A7(%a6)
19509 # -(An) : extended and packed fmove out
19510 # : stacked <ea> = actual <ea> + 8
19511 # : "An" not updated
19513 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19514 mov.l EXC_EA(%a6),%a0
19516 sub.l &0x8,EXC_EA(%a6)
19517 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
19521 short ceaf_pd0 - tbl_ceaf_pd
19522 short ceaf_pd1 - tbl_ceaf_pd
19523 short ceaf_pd2 - tbl_ceaf_pd
19524 short ceaf_pd3 - tbl_ceaf_pd
19525 short ceaf_pd4 - tbl_ceaf_pd
19526 short ceaf_pd5 - tbl_ceaf_pd
19527 short ceaf_pd6 - tbl_ceaf_pd
19528 short ceaf_pd7 - tbl_ceaf_pd
19531 mov.l %a0,EXC_DREGS+0x8(%a6)
19534 mov.l %a0,EXC_DREGS+0xc(%a6)
19549 mov.l %a0,EXC_A6(%a6)
19552 mov.l %a0,EXC_A7(%a6)
19553 mov.b &mda7_flg,SPCOND_FLG(%a6)
19556 #########################################################################
19557 # XDEF **************************************************************** #
19558 # _load_fop(): load operand for unimplemented FP exception #
19560 # XREF **************************************************************** #
19561 # set_tag_x() - determine ext prec optype tag #
19562 # set_tag_s() - determine sgl prec optype tag #
19563 # set_tag_d() - determine dbl prec optype tag #
19564 # unnorm_fix() - convert normalized number to denorm or zero #
19565 # norm() - normalize a denormalized number #
19566 # get_packed() - fetch a packed operand from memory #
19567 # _dcalc_ea() - calculate <ea>, fixing An in process #
19569 # _imem_read_{word,long}() - read from instruction memory #
19570 # _dmem_read() - read from data memory #
19571 # _dmem_read_{byte,word,long}() - read from data memory #
19573 # facc_in_{b,w,l,d,x}() - mem read failed; special exit point #
19575 # INPUT *************************************************************** #
19578 # OUTPUT ************************************************************** #
19579 # If memory access doesn't fail: #
19580 # FP_SRC(a6) = source operand in extended precision #
19581 # FP_DST(a6) = destination operand in extended precision #
19583 # ALGORITHM *********************************************************** #
19584 # This is called from the Unimplemented FP exception handler in #
19585 # order to load the source and maybe destination operand into #
19586 # FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load #
19587 # the source and destination from the FP register file. Set the optype #
19588 # tags for both if dyadic, one for monadic. If a number is an UNNORM, #
19589 # convert it to a DENORM or a ZERO. #
19590 # If the instruction is opclass two (memory->reg), then fetch #
19591 # the destination from the register file and the source operand from #
19592 # memory. Tag and fix both as above w/ opclass zero instructions. #
19593 # If the source operand is byte,word,long, or single, it may be #
19594 # in the data register file. If it's actually out in memory, use one of #
19595 # the mem_read() routines to fetch it. If the mem_read() access returns #
19596 # a failing value, exit through the special facc_in() routine which #
19597 # will create an access error exception frame from the current exception #
19599 # Immediate data and regular data accesses are separated because #
19600 # if an immediate data access fails, the resulting fault status #
19601 # longword stacked for the access error exception must have the #
19602 # instruction bit set. #
19604 #########################################################################
19609 # 15 13 12 10 9 7 6 0
19611 # ---------------------------------
19612 # | opclass | RX | RY | EXTENSION | (2nd word of general FP instruction)
19613 # ---------------------------------
19616 # bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19617 # cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011)
19618 # beq.w op010 # handle <ea> -> fpn
19619 # bgt.w op011 # handle fpn -> <ea>
19621 # we're not using op011 for now...
19622 btst &0x6,EXC_CMDREG(%a6)
19625 ############################
19626 # OPCLASS '000: reg -> reg #
19627 ############################
19629 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension word lo
19630 btst &0x5,%d0 # testing extension bits
19631 beq.b op000_src # (bit 5 == 0) => monadic
19632 btst &0x4,%d0 # (bit 5 == 1)
19633 beq.b op000_dst # (bit 4 == 0) => dyadic
19634 and.w &0x007f,%d0 # extract extension bits {6:0}
19635 cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?
19636 bne.b op000_src # it's an fcmp
19639 bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19640 bsr.l load_fpn2 # fetch dst fpreg into FP_DST
19642 bsr.l set_tag_x # get dst optype tag
19644 cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?
19645 beq.b op000_dst_unnorm # yes
19647 mov.b %d0, DTAG(%a6) # store the dst optype tag
19650 bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19651 bsr.l load_fpn1 # fetch src fpreg into FP_SRC
19653 bsr.l set_tag_x # get src optype tag
19655 cmpi.b %d0, &UNNORM # is src fpreg an UNNORM?
19656 beq.b op000_src_unnorm # yes
19658 mov.b %d0, STAG(%a6) # store the src optype tag
19662 bsr.l unnorm_fix # fix the dst UNNORM
19663 bra.b op000_dst_cont
19665 bsr.l unnorm_fix # fix the src UNNORM
19666 bra.b op000_src_cont
19668 #############################
19669 # OPCLASS '010: <ea> -> reg #
19670 #############################
19672 mov.w EXC_CMDREG(%a6),%d0 # fetch extension word
19673 btst &0x5,%d0 # testing extension bits
19674 beq.b op010_src # (bit 5 == 0) => monadic
19675 btst &0x4,%d0 # (bit 5 == 1)
19676 beq.b op010_dst # (bit 4 == 0) => dyadic
19677 and.w &0x007f,%d0 # extract extension bits {6:0}
19678 cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?
19679 bne.b op010_src # it's an fcmp
19682 bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19683 bsr.l load_fpn2 # fetch dst fpreg ptr
19685 bsr.l set_tag_x # get dst type tag
19687 cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?
19688 beq.b op010_dst_unnorm # yes
19690 mov.b %d0, DTAG(%a6) # store the dst optype tag
19693 bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19695 bfextu EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19696 bne.w fetch_from_mem # src op is in memory
19699 clr.b STAG(%a6) # either NORM or ZERO
19700 bfextu EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19702 mov.w (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19703 jmp (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19706 bsr.l unnorm_fix # fix the dst UNNORM
19707 bra.b op010_dst_cont
19711 short opd_long - tbl_op010_dreg
19712 short opd_sgl - tbl_op010_dreg
19713 short tbl_op010_dreg - tbl_op010_dreg
19714 short tbl_op010_dreg - tbl_op010_dreg
19715 short opd_word - tbl_op010_dreg
19716 short tbl_op010_dreg - tbl_op010_dreg
19717 short opd_byte - tbl_op010_dreg
19718 short tbl_op010_dreg - tbl_op010_dreg
19721 # LONG: can be either NORM or ZERO...
19724 bsr.l fetch_dreg # fetch long in d0
19725 fmov.l %d0, %fp0 # load a long
19726 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19727 fbeq.w opd_long_zero # long is a ZERO
19730 mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19734 # WORD: can be either NORM or ZERO...
19737 bsr.l fetch_dreg # fetch word in d0
19738 fmov.w %d0, %fp0 # load a word
19739 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19740 fbeq.w opd_word_zero # WORD is a ZERO
19743 mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19747 # BYTE: can be either NORM or ZERO...
19750 bsr.l fetch_dreg # fetch word in d0
19751 fmov.b %d0, %fp0 # load a byte
19752 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19753 fbeq.w opd_byte_zero # byte is a ZERO
19756 mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19760 # SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19762 # separate SNANs and DENORMs so they can be loaded w/ special care.
19763 # all others can simply be moved "in" using fmove.
19766 bsr.l fetch_dreg # fetch sgl in d0
19767 mov.l %d0,L_SCR1(%a6)
19769 lea L_SCR1(%a6), %a0 # pass: ptr to the sgl
19770 bsr.l set_tag_s # determine sgl type
19771 mov.b %d0, STAG(%a6) # save the src tag
19773 cmpi.b %d0, &SNAN # is it an SNAN?
19774 beq.w get_sgl_snan # yes
19776 cmpi.b %d0, &DENORM # is it a DENORM?
19777 beq.w get_sgl_denorm # yes
19779 fmov.s (%a0), %fp0 # no, so can load it regular
19780 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19783 ##############################################################################
19785 #########################################################################
19786 # fetch_from_mem(): #
19787 # - src is out in memory. must: #
19788 # (1) calc ea - must read AFTER you know the src type since #
19789 # if the ea is -() or ()+, need to know # of bytes. #
19790 # (2) read it in from either user or supervisor space #
19791 # (3) if (b || w || l) then simply read in #
19792 # if (s || d || x) then check for SNAN,UNNORM,DENORM #
19793 # if (packed) then punt for now #
19795 # %d0 : src type field #
19796 #########################################################################
19798 clr.b STAG(%a6) # either NORM or ZERO
19800 mov.w (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19801 jmp (tbl_fp_type.b,%pc,%d0.w*1)
19805 short load_long - tbl_fp_type
19806 short load_sgl - tbl_fp_type
19807 short load_ext - tbl_fp_type
19808 short load_packed - tbl_fp_type
19809 short load_word - tbl_fp_type
19810 short load_dbl - tbl_fp_type
19811 short load_byte - tbl_fp_type
19812 short tbl_fp_type - tbl_fp_type
19814 #########################################
19815 # load a LONG into %fp0: #
19816 # -number can't fault #
19818 # (2) read 4 bytes into L_SCR1 #
19819 # (3) fmov.l into %fp0 #
19820 #########################################
19822 movq.l &0x4, %d0 # pass: 4 (bytes)
19823 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19825 cmpi.b SPCOND_FLG(%a6),&immed_flg
19826 beq.b load_long_immed
19828 bsr.l _dmem_read_long # fetch src operand from memory
19830 tst.l %d1 # did dfetch fail?
19831 bne.l facc_in_l # yes
19834 fmov.l %d0, %fp0 # read into %fp0;convert to xprec
19835 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19837 fbeq.w load_long_zero # src op is a ZERO
19840 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19844 bsr.l _imem_read_long # fetch src operand immed data
19846 tst.l %d1 # did ifetch fail?
19847 bne.l funimp_iacc # yes
19848 bra.b load_long_cont
19850 #########################################
19851 # load a WORD into %fp0: #
19852 # -number can't fault #
19854 # (2) read 2 bytes into L_SCR1 #
19855 # (3) fmov.w into %fp0 #
19856 #########################################
19858 movq.l &0x2, %d0 # pass: 2 (bytes)
19859 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19861 cmpi.b SPCOND_FLG(%a6),&immed_flg
19862 beq.b load_word_immed
19864 bsr.l _dmem_read_word # fetch src operand from memory
19866 tst.l %d1 # did dfetch fail?
19867 bne.l facc_in_w # yes
19870 fmov.w %d0, %fp0 # read into %fp0;convert to xprec
19871 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19873 fbeq.w load_word_zero # src op is a ZERO
19876 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19880 bsr.l _imem_read_word # fetch src operand immed data
19882 tst.l %d1 # did ifetch fail?
19883 bne.l funimp_iacc # yes
19884 bra.b load_word_cont
19886 #########################################
19887 # load a BYTE into %fp0: #
19888 # -number can't fault #
19890 # (2) read 1 byte into L_SCR1 #
19891 # (3) fmov.b into %fp0 #
19892 #########################################
19894 movq.l &0x1, %d0 # pass: 1 (byte)
19895 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19897 cmpi.b SPCOND_FLG(%a6),&immed_flg
19898 beq.b load_byte_immed
19900 bsr.l _dmem_read_byte # fetch src operand from memory
19902 tst.l %d1 # did dfetch fail?
19903 bne.l facc_in_b # yes
19906 fmov.b %d0, %fp0 # read into %fp0;convert to xprec
19907 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19909 fbeq.w load_byte_zero # src op is a ZERO
19912 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19916 bsr.l _imem_read_word # fetch src operand immed data
19918 tst.l %d1 # did ifetch fail?
19919 bne.l funimp_iacc # yes
19920 bra.b load_byte_cont
19922 #########################################
19923 # load a SGL into %fp0: #
19924 # -number can't fault #
19926 # (2) read 4 bytes into L_SCR1 #
19927 # (3) fmov.s into %fp0 #
19928 #########################################
19930 movq.l &0x4, %d0 # pass: 4 (bytes)
19931 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19933 cmpi.b SPCOND_FLG(%a6),&immed_flg
19934 beq.b load_sgl_immed
19936 bsr.l _dmem_read_long # fetch src operand from memory
19937 mov.l %d0, L_SCR1(%a6) # store src op on stack
19939 tst.l %d1 # did dfetch fail?
19940 bne.l facc_in_l # yes
19943 lea L_SCR1(%a6), %a0 # pass: ptr to sgl src op
19944 bsr.l set_tag_s # determine src type tag
19945 mov.b %d0, STAG(%a6) # save src optype tag on stack
19947 cmpi.b %d0, &DENORM # is it a sgl DENORM?
19948 beq.w get_sgl_denorm # yes
19950 cmpi.b %d0, &SNAN # is it a sgl SNAN?
19951 beq.w get_sgl_snan # yes
19953 fmov.s L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec
19954 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19958 bsr.l _imem_read_long # fetch src operand immed data
19960 tst.l %d1 # did ifetch fail?
19961 bne.l funimp_iacc # yes
19962 bra.b load_sgl_cont
19964 # must convert sgl denorm format to an Xprec denorm fmt suitable for
19966 # %a0 : points to sgl denorm
19968 clr.w FP_SRC_EX(%a6)
19969 bfextu (%a0){&9:&23}, %d0 # fetch sgl hi(_mantissa)
19971 mov.l %d0, FP_SRC_HI(%a6) # set ext hi(_mantissa)
19972 clr.l FP_SRC_LO(%a6) # set ext lo(_mantissa)
19974 clr.w FP_SRC_EX(%a6)
19975 btst &0x7, (%a0) # is sgn bit set?
19976 beq.b sgl_dnrm_norm
19977 bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value
19980 lea FP_SRC(%a6), %a0
19981 bsr.l norm # normalize number
19982 mov.w &0x3f81, %d1 # xprec exp = 0x3f81
19983 sub.w %d0, %d1 # exp = 0x3f81 - shft amt.
19984 or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}
19986 mov.b &NORM, STAG(%a6) # fix src type tag
19989 # convert sgl to ext SNAN
19990 # %a0 : points to sgl SNAN
19992 mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
19993 bfextu (%a0){&9:&23}, %d0
19994 lsl.l &0x8, %d0 # extract and insert hi(man)
19995 mov.l %d0, FP_SRC_HI(%a6)
19996 clr.l FP_SRC_LO(%a6)
19998 btst &0x7, (%a0) # see if sign of SNAN is set
19999 beq.b no_sgl_snan_sgn
20000 bset &0x7, FP_SRC_EX(%a6)
20004 #########################################
20005 # load a DBL into %fp0: #
20006 # -number can't fault #
20008 # (2) read 8 bytes into L_SCR(1,2)#
20009 # (3) fmov.d into %fp0 #
20010 #########################################
20012 movq.l &0x8, %d0 # pass: 8 (bytes)
20013 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
20015 cmpi.b SPCOND_FLG(%a6),&immed_flg
20016 beq.b load_dbl_immed
20018 lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space
20019 movq.l &0x8, %d0 # pass: # bytes to read
20020 bsr.l _dmem_read # fetch src operand from memory
20022 tst.l %d1 # did dfetch fail?
20023 bne.l facc_in_d # yes
20026 lea L_SCR1(%a6), %a0 # pass: ptr to input dbl
20027 bsr.l set_tag_d # determine src type tag
20028 mov.b %d0, STAG(%a6) # set src optype tag
20030 cmpi.b %d0, &DENORM # is it a dbl DENORM?
20031 beq.w get_dbl_denorm # yes
20033 cmpi.b %d0, &SNAN # is it a dbl SNAN?
20034 beq.w get_dbl_snan # yes
20036 fmov.d L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec
20037 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
20041 lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space
20042 movq.l &0x8, %d0 # pass: # bytes to read
20043 bsr.l _imem_read # fetch src operand from memory
20045 tst.l %d1 # did ifetch fail?
20046 bne.l funimp_iacc # yes
20047 bra.b load_dbl_cont
20049 # must convert dbl denorm format to an Xprec denorm fmt suitable for
20051 # %a0 : loc. of dbl denorm
20053 clr.w FP_SRC_EX(%a6)
20054 bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)
20055 mov.l %d0, FP_SRC_HI(%a6)
20056 bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)
20059 mov.l %d0, FP_SRC_LO(%a6)
20061 btst &0x7, (%a0) # is sgn bit set?
20062 beq.b dbl_dnrm_norm
20063 bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value
20066 lea FP_SRC(%a6), %a0
20067 bsr.l norm # normalize number
20068 mov.w &0x3c01, %d1 # xprec exp = 0x3c01
20069 sub.w %d0, %d1 # exp = 0x3c01 - shft amt.
20070 or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}
20072 mov.b &NORM, STAG(%a6) # fix src type tag
20075 # convert dbl to ext SNAN
20076 # %a0 : points to dbl SNAN
20078 mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20080 bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)
20081 mov.l %d0, FP_SRC_HI(%a6)
20082 bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)
20085 mov.l %d0, FP_SRC_LO(%a6)
20087 btst &0x7, (%a0) # see if sign of SNAN is set
20088 beq.b no_dbl_snan_sgn
20089 bset &0x7, FP_SRC_EX(%a6)
20093 #################################################
20094 # load a Xprec into %fp0: #
20095 # -number can't fault #
20097 # (2) read 12 bytes into L_SCR(1,2) #
20098 # (3) fmov.x into %fp0 #
20099 #################################################
20101 mov.l &0xc, %d0 # pass: 12 (bytes)
20102 bsr.l _dcalc_ea # calc <ea>
20104 lea FP_SRC(%a6), %a1 # pass: ptr to input ext tmp space
20105 mov.l &0xc, %d0 # pass: # of bytes to read
20106 bsr.l _dmem_read # fetch src operand from memory
20108 tst.l %d1 # did dfetch fail?
20109 bne.l facc_in_x # yes
20111 lea FP_SRC(%a6), %a0 # pass: ptr to src op
20112 bsr.l set_tag_x # determine src type tag
20114 cmpi.b %d0, &UNNORM # is the src op an UNNORM?
20115 beq.b load_ext_unnorm # yes
20117 mov.b %d0, STAG(%a6) # store the src optype tag
20121 bsr.l unnorm_fix # fix the src UNNORM
20122 mov.b %d0, STAG(%a6) # store the src optype tag
20125 #################################################
20126 # load a packed into %fp0: #
20127 # -number can't fault #
20129 # (2) read 12 bytes into L_SCR(1,2,3) #
20130 # (3) fmov.x into %fp0 #
20131 #################################################
20135 lea FP_SRC(%a6),%a0 # pass ptr to src op
20136 bsr.l set_tag_x # determine src type tag
20137 cmpi.b %d0,&UNNORM # is the src op an UNNORM ZERO?
20138 beq.b load_packed_unnorm # yes
20140 mov.b %d0,STAG(%a6) # store the src optype tag
20143 load_packed_unnorm:
20144 bsr.l unnorm_fix # fix the UNNORM ZERO
20145 mov.b %d0,STAG(%a6) # store the src optype tag
20148 #########################################################################
20149 # XDEF **************************************************************** #
20150 # fout(): move from fp register to memory or data register #
20152 # XREF **************************************************************** #
20153 # _round() - needed to create EXOP for sgl/dbl precision #
20154 # norm() - needed to create EXOP for extended precision #
20155 # ovf_res() - create default overflow result for sgl/dbl precision#
20156 # unf_res() - create default underflow result for sgl/dbl prec. #
20157 # dst_dbl() - create rounded dbl precision result. #
20158 # dst_sgl() - create rounded sgl precision result. #
20159 # fetch_dreg() - fetch dynamic k-factor reg for packed. #
20160 # bindec() - convert FP binary number to packed number. #
20161 # _mem_write() - write data to memory. #
20162 # _mem_write2() - write data to memory unless supv mode -(a7) exc.#
20163 # _dmem_write_{byte,word,long}() - write data to memory. #
20164 # store_dreg_{b,w,l}() - store data to data register file. #
20165 # facc_out_{b,w,l,d,x}() - data access error occurred. #
20167 # INPUT *************************************************************** #
20168 # a0 = pointer to extended precision source operand #
20169 # d0 = round prec,mode #
20171 # OUTPUT ************************************************************** #
20172 # fp0 : intermediate underflow or overflow result if #
20173 # OVFL/UNFL occurred for a sgl or dbl operand #
20175 # ALGORITHM *********************************************************** #
20176 # This routine is accessed by many handlers that need to do an #
20177 # opclass three move of an operand out to memory. #
20178 # Decode an fmove out (opclass 3) instruction to determine if #
20179 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
20180 # register or memory. The algorithm uses a standard "fmove" to create #
20181 # the rounded result. Also, since exceptions are disabled, this also #
20182 # create the correct OPERR default result if appropriate. #
20183 # For sgl or dbl precision, overflow or underflow can occur. If #
20184 # either occurs and is enabled, the EXOP. #
20185 # For extended precision, the stacked <ea> must be fixed along #
20186 # w/ the address index register as appropriate w/ _calc_ea_fout(). If #
20187 # the source is a denorm and if underflow is enabled, an EXOP must be #
20189 # For packed, the k-factor must be fetched from the instruction #
20190 # word or a data register. The <ea> must be fixed as w/ extended #
20191 # precision. Then, bindec() is called to create the appropriate #
20193 # If at any time an access error is flagged by one of the move- #
20194 # to-memory routines, then a special exit must be made so that the #
20195 # access error can be handled properly. #
20197 #########################################################################
20201 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20202 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20203 jmp (tbl_fout.b,%pc,%a1) # jump to routine
20207 short fout_long - tbl_fout
20208 short fout_sgl - tbl_fout
20209 short fout_ext - tbl_fout
20210 short fout_pack - tbl_fout
20211 short fout_word - tbl_fout
20212 short fout_dbl - tbl_fout
20213 short fout_byte - tbl_fout
20214 short fout_pack - tbl_fout
20216 #################################################################
20217 # fmove.b out ###################################################
20218 #################################################################
20220 # Only "Unimplemented Data Type" exceptions enter here. The operand
20221 # is either a DENORM or a NORM.
20223 tst.b STAG(%a6) # is operand normalized?
20224 bne.b fout_byte_denorm # no
20226 fmovm.x SRC(%a0),&0x80 # load value
20229 fmov.l %d0,%fpcr # insert rnd prec,mode
20231 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
20233 fmov.l &0x0,%fpcr # clear FPCR
20234 fmov.l %fpsr,%d1 # fetch FPSR
20235 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20237 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20238 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20239 beq.b fout_byte_dn # must save to integer regfile
20241 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20242 bsr.l _dmem_write_byte # write byte
20244 tst.l %d1 # did dstore fail?
20245 bne.l facc_out_b # yes
20250 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20256 mov.l SRC_EX(%a0),%d1
20257 andi.l &0x80000000,%d1 # keep DENORM sign
20258 ori.l &0x00800000,%d1 # make smallest sgl
20260 bra.b fout_byte_norm
20262 #################################################################
20263 # fmove.w out ###################################################
20264 #################################################################
20266 # Only "Unimplemented Data Type" exceptions enter here. The operand
20267 # is either a DENORM or a NORM.
20269 tst.b STAG(%a6) # is operand normalized?
20270 bne.b fout_word_denorm # no
20272 fmovm.x SRC(%a0),&0x80 # load value
20275 fmov.l %d0,%fpcr # insert rnd prec:mode
20277 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
20279 fmov.l &0x0,%fpcr # clear FPCR
20280 fmov.l %fpsr,%d1 # fetch FPSR
20281 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20283 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20284 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20285 beq.b fout_word_dn # must save to integer regfile
20287 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20288 bsr.l _dmem_write_word # write word
20290 tst.l %d1 # did dstore fail?
20291 bne.l facc_out_w # yes
20296 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20302 mov.l SRC_EX(%a0),%d1
20303 andi.l &0x80000000,%d1 # keep DENORM sign
20304 ori.l &0x00800000,%d1 # make smallest sgl
20306 bra.b fout_word_norm
20308 #################################################################
20309 # fmove.l out ###################################################
20310 #################################################################
20312 # Only "Unimplemented Data Type" exceptions enter here. The operand
20313 # is either a DENORM or a NORM.
20315 tst.b STAG(%a6) # is operand normalized?
20316 bne.b fout_long_denorm # no
20318 fmovm.x SRC(%a0),&0x80 # load value
20321 fmov.l %d0,%fpcr # insert rnd prec:mode
20323 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
20325 fmov.l &0x0,%fpcr # clear FPCR
20326 fmov.l %fpsr,%d1 # fetch FPSR
20327 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20330 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20331 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20332 beq.b fout_long_dn # must save to integer regfile
20334 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20335 bsr.l _dmem_write_long # write long
20337 tst.l %d1 # did dstore fail?
20338 bne.l facc_out_l # yes
20343 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20349 mov.l SRC_EX(%a0),%d1
20350 andi.l &0x80000000,%d1 # keep DENORM sign
20351 ori.l &0x00800000,%d1 # make smallest sgl
20353 bra.b fout_long_norm
20355 #################################################################
20356 # fmove.x out ###################################################
20357 #################################################################
20359 # Only "Unimplemented Data Type" exceptions enter here. The operand
20360 # is either a DENORM or a NORM.
20361 # The DENORM causes an Underflow exception.
20364 # we copy the extended precision result to FP_SCR0 so that the reserved
20365 # 16-bit field gets zeroed. we do this since we promise not to disturb
20366 # what's at SRC(a0).
20367 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20368 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
20369 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20370 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20372 fmovm.x SRC(%a0),&0x80 # return result
20374 bsr.l _calc_ea_fout # fix stacked <ea>
20376 mov.l %a0,%a1 # pass: dst addr
20377 lea FP_SCR0(%a6),%a0 # pass: src addr
20378 mov.l &0xc,%d0 # pass: opsize is 12 bytes
20380 # we must not yet write the extended precision data to the stack
20381 # in the pre-decrement case from supervisor mode or else we'll corrupt
20382 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
20383 cmpi.b SPCOND_FLG(%a6),&mda7_flg
20386 bsr.l _dmem_write # write ext prec number to memory
20388 tst.l %d1 # did dstore fail?
20389 bne.w fout_ext_err # yes
20391 tst.b STAG(%a6) # is operand normalized?
20392 bne.b fout_ext_denorm # no
20395 # the number is a DENORM. must set the underflow exception bit
20397 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20399 mov.b FPCR_ENABLE(%a6),%d0
20400 andi.b &0x0a,%d0 # is UNFL or INEX enabled?
20401 bne.b fout_ext_exc # yes
20404 # we don't want to do the write if the exception occurred in supervisor mode
20405 # so _mem_write2() handles this for us.
20407 bsr.l _mem_write2 # write ext prec number to memory
20409 tst.l %d1 # did dstore fail?
20410 bne.w fout_ext_err # yes
20412 tst.b STAG(%a6) # is operand normalized?
20413 bne.b fout_ext_denorm # no
20417 lea FP_SCR0(%a6),%a0
20418 bsr.l norm # normalize the mantissa
20419 neg.w %d0 # new exp = -(shft amt)
20421 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
20422 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
20423 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
20427 mov.l EXC_A6(%a6),(%a6) # fix stacked a6
20430 #########################################################################
20431 # fmove.s out ###########################################################
20432 #########################################################################
20434 andi.b &0x30,%d0 # clear rnd prec
20435 ori.b &s_mode*0x10,%d0 # insert sgl prec
20436 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
20439 # operand is a normalized number. first, we check to see if the move out
20440 # would cause either an underflow or overflow. these cases are handled
20441 # separately. otherwise, set the FPCR to the proper rounding mode and
20442 # execute the move.
20444 mov.w SRC_EX(%a0),%d0 # extract exponent
20445 andi.w &0x7fff,%d0 # strip sign
20447 cmpi.w %d0,&SGL_HI # will operand overflow?
20448 bgt.w fout_sgl_ovfl # yes; go handle OVFL
20449 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
20450 cmpi.w %d0,&SGL_LO # will operand underflow?
20451 blt.w fout_sgl_unfl # yes; go handle underflow
20454 # NORMs(in range) can be stored out by a simple "fmov.s"
20455 # Unnormalized inputs can come through this point.
20458 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
20460 fmov.l L_SCR3(%a6),%fpcr # set FPCR
20461 fmov.l &0x0,%fpsr # clear FPSR
20463 fmov.s %fp0,%d0 # store does convert and round
20465 fmov.l &0x0,%fpcr # clear FPCR
20466 fmov.l %fpsr,%d1 # save FPSR
20468 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
20470 fout_sgl_exg_write:
20471 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20472 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20473 beq.b fout_sgl_exg_write_dn # must save to integer regfile
20475 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20476 bsr.l _dmem_write_long # write long
20478 tst.l %d1 # did dstore fail?
20479 bne.l facc_out_l # yes
20483 fout_sgl_exg_write_dn:
20484 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20490 # here, we know that the operand would UNFL if moved out to single prec,
20491 # so, denorm and round and then use generic store single routine to
20492 # write the value to memory.
20495 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20497 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20498 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20499 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20502 clr.l %d0 # pass: S.F. = 0
20504 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
20505 bne.b fout_sgl_unfl_cont # let DENORMs fall through
20507 lea FP_SCR0(%a6),%a0
20508 bsr.l norm # normalize the DENORM
20510 fout_sgl_unfl_cont:
20511 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
20512 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
20513 bsr.l unf_res # calc default underflow result
20515 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
20516 bsr.l dst_sgl # convert to single prec
20518 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20519 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20520 beq.b fout_sgl_unfl_dn # must save to integer regfile
20522 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20523 bsr.l _dmem_write_long # write long
20525 tst.l %d1 # did dstore fail?
20526 bne.l facc_out_l # yes
20528 bra.b fout_sgl_unfl_chkexc
20531 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20535 fout_sgl_unfl_chkexc:
20536 mov.b FPCR_ENABLE(%a6),%d1
20537 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20538 bne.w fout_sd_exc_unfl # yes
20543 # it's definitely an overflow so call ovf_res to get the correct answer
20546 tst.b 3+SRC_HI(%a0) # is result inexact?
20547 bne.b fout_sgl_ovfl_inex2
20548 tst.l SRC_LO(%a0) # is result inexact?
20549 bne.b fout_sgl_ovfl_inex2
20550 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20551 bra.b fout_sgl_ovfl_cont
20552 fout_sgl_ovfl_inex2:
20553 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20555 fout_sgl_ovfl_cont:
20558 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20559 # overflow result. DON'T save the returned ccodes from ovf_res() since
20560 # fmove out doesn't alter them.
20561 tst.b SRC_EX(%a0) # is operand negative?
20562 smi %d1 # set if so
20563 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
20564 bsr.l ovf_res # calc OVFL result
20565 fmovm.x (%a0),&0x80 # load default overflow result
20566 fmov.s %fp0,%d0 # store to single
20568 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20569 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20570 beq.b fout_sgl_ovfl_dn # must save to integer regfile
20572 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20573 bsr.l _dmem_write_long # write long
20575 tst.l %d1 # did dstore fail?
20576 bne.l facc_out_l # yes
20578 bra.b fout_sgl_ovfl_chkexc
20581 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20585 fout_sgl_ovfl_chkexc:
20586 mov.b FPCR_ENABLE(%a6),%d1
20587 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20588 bne.w fout_sd_exc_ovfl # yes
20593 # move out MAY overflow:
20594 # (1) force the exp to 0x3fff
20595 # (2) do a move w/ appropriate rnd mode
20596 # (3) if exp still equals zero, then insert original exponent
20597 # for the correct result.
20598 # if exp now equals one, then it overflowed so call ovf_res.
20601 mov.w SRC_EX(%a0),%d1 # fetch current sign
20602 andi.w &0x8000,%d1 # keep it,clear exp
20603 ori.w &0x3fff,%d1 # insert exp = 0
20604 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
20605 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20606 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20608 fmov.l L_SCR3(%a6),%fpcr # set FPCR
20610 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
20611 fmov.l &0x0,%fpcr # clear FPCR
20613 fabs.x %fp0 # need absolute value
20614 fcmp.b %fp0,&0x2 # did exponent increase?
20615 fblt.w fout_sgl_exg # no; go finish NORM
20616 bra.w fout_sgl_ovfl # yes; go handle overflow
20623 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20624 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20625 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20627 cmpi.b STAG(%a6),&DENORM # was src a DENORM?
20628 bne.b fout_sd_exc_cont # no
20630 lea FP_SCR0(%a6),%a0
20634 bfins %d0,FP_SCR0_EX(%a6){&1:&15}
20635 bra.b fout_sd_exc_cont
20639 mov.l (%sp)+,%a0 # restore a0
20641 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20642 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20643 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20646 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
20647 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
20648 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
20650 mov.b 3+L_SCR3(%a6),%d1
20654 mov.b 3+L_SCR3(%a6),%d1
20657 clr.l %d0 # pass: zero g,r,s
20658 bsr.l _round # round the DENORM
20660 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
20661 beq.b fout_sd_exc_done # no
20662 bset &0x7,FP_SCR0_EX(%a6) # yes
20665 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
20668 #################################################################
20669 # fmove.d out ###################################################
20670 #################################################################
20672 andi.b &0x30,%d0 # clear rnd prec
20673 ori.b &d_mode*0x10,%d0 # insert dbl prec
20674 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
20677 # operand is a normalized number. first, we check to see if the move out
20678 # would cause either an underflow or overflow. these cases are handled
20679 # separately. otherwise, set the FPCR to the proper rounding mode and
20680 # execute the move.
20682 mov.w SRC_EX(%a0),%d0 # extract exponent
20683 andi.w &0x7fff,%d0 # strip sign
20685 cmpi.w %d0,&DBL_HI # will operand overflow?
20686 bgt.w fout_dbl_ovfl # yes; go handle OVFL
20687 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
20688 cmpi.w %d0,&DBL_LO # will operand underflow?
20689 blt.w fout_dbl_unfl # yes; go handle underflow
20692 # NORMs(in range) can be stored out by a simple "fmov.d"
20693 # Unnormalized inputs can come through this point.
20696 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
20698 fmov.l L_SCR3(%a6),%fpcr # set FPCR
20699 fmov.l &0x0,%fpsr # clear FPSR
20701 fmov.d %fp0,L_SCR1(%a6) # store does convert and round
20703 fmov.l &0x0,%fpcr # clear FPCR
20704 fmov.l %fpsr,%d0 # save FPSR
20706 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
20708 mov.l EXC_EA(%a6),%a1 # pass: dst addr
20709 lea L_SCR1(%a6),%a0 # pass: src addr
20710 movq.l &0x8,%d0 # pass: opsize is 8 bytes
20711 bsr.l _dmem_write # store dbl fop to memory
20713 tst.l %d1 # did dstore fail?
20714 bne.l facc_out_d # yes
20716 rts # no; so we're finished
20719 # here, we know that the operand would UNFL if moved out to double prec,
20720 # so, denorm and round and then use generic store double routine to
20721 # write the value to memory.
20724 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20726 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20727 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20728 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20731 clr.l %d0 # pass: S.F. = 0
20733 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
20734 bne.b fout_dbl_unfl_cont # let DENORMs fall through
20736 lea FP_SCR0(%a6),%a0
20737 bsr.l norm # normalize the DENORM
20739 fout_dbl_unfl_cont:
20740 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
20741 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
20742 bsr.l unf_res # calc default underflow result
20744 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
20745 bsr.l dst_dbl # convert to single prec
20746 mov.l %d0,L_SCR1(%a6)
20747 mov.l %d1,L_SCR2(%a6)
20749 mov.l EXC_EA(%a6),%a1 # pass: dst addr
20750 lea L_SCR1(%a6),%a0 # pass: src addr
20751 movq.l &0x8,%d0 # pass: opsize is 8 bytes
20752 bsr.l _dmem_write # store dbl fop to memory
20754 tst.l %d1 # did dstore fail?
20755 bne.l facc_out_d # yes
20757 mov.b FPCR_ENABLE(%a6),%d1
20758 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20759 bne.w fout_sd_exc_unfl # yes
20764 # it's definitely an overflow so call ovf_res to get the correct answer
20767 mov.w 2+SRC_LO(%a0),%d0
20769 bne.b fout_dbl_ovfl_inex2
20771 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20772 bra.b fout_dbl_ovfl_cont
20773 fout_dbl_ovfl_inex2:
20774 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20776 fout_dbl_ovfl_cont:
20779 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20780 # overflow result. DON'T save the returned ccodes from ovf_res() since
20781 # fmove out doesn't alter them.
20782 tst.b SRC_EX(%a0) # is operand negative?
20783 smi %d1 # set if so
20784 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
20785 bsr.l ovf_res # calc OVFL result
20786 fmovm.x (%a0),&0x80 # load default overflow result
20787 fmov.d %fp0,L_SCR1(%a6) # store to double
20789 mov.l EXC_EA(%a6),%a1 # pass: dst addr
20790 lea L_SCR1(%a6),%a0 # pass: src addr
20791 movq.l &0x8,%d0 # pass: opsize is 8 bytes
20792 bsr.l _dmem_write # store dbl fop to memory
20794 tst.l %d1 # did dstore fail?
20795 bne.l facc_out_d # yes
20797 mov.b FPCR_ENABLE(%a6),%d1
20798 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20799 bne.w fout_sd_exc_ovfl # yes
20804 # move out MAY overflow:
20805 # (1) force the exp to 0x3fff
20806 # (2) do a move w/ appropriate rnd mode
20807 # (3) if exp still equals zero, then insert original exponent
20808 # for the correct result.
20809 # if exp now equals one, then it overflowed so call ovf_res.
20812 mov.w SRC_EX(%a0),%d1 # fetch current sign
20813 andi.w &0x8000,%d1 # keep it,clear exp
20814 ori.w &0x3fff,%d1 # insert exp = 0
20815 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
20816 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20817 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20819 fmov.l L_SCR3(%a6),%fpcr # set FPCR
20821 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
20822 fmov.l &0x0,%fpcr # clear FPCR
20824 fabs.x %fp0 # need absolute value
20825 fcmp.b %fp0,&0x2 # did exponent increase?
20826 fblt.w fout_dbl_exg # no; go finish NORM
20827 bra.w fout_dbl_ovfl # yes; go handle overflow
20829 #########################################################################
20830 # XDEF **************************************************************** #
20831 # dst_dbl(): create double precision value from extended prec. #
20833 # XREF **************************************************************** #
20836 # INPUT *************************************************************** #
20837 # a0 = pointer to source operand in extended precision #
20839 # OUTPUT ************************************************************** #
20840 # d0 = hi(double precision result) #
20841 # d1 = lo(double precision result) #
20843 # ALGORITHM *********************************************************** #
20845 # Changes extended precision to double precision. #
20846 # Note: no attempt is made to round the extended value to double. #
20847 # dbl_sign = ext_sign #
20848 # dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
20849 # get rid of ext integer bit #
20850 # dbl_mant = ext_mant{62:12} #
20852 # --------------- --------------- --------------- #
20853 # extended -> |s| exp | |1| ms mant | | ls mant | #
20854 # --------------- --------------- --------------- #
20855 # 95 64 63 62 32 31 11 0 #
20860 # --------------- --------------- #
20861 # double -> |s|exp| mant | | mant | #
20862 # --------------- --------------- #
20865 #########################################################################
20868 clr.l %d0 # clear d0
20869 mov.w FTEMP_EX(%a0),%d0 # get exponent
20870 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
20871 addi.w &DBL_BIAS,%d0 # add double precision bias
20872 tst.b FTEMP_HI(%a0) # is number a denorm?
20873 bmi.b dst_get_dupper # no
20874 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
20876 swap %d0 # d0 now in upper word
20877 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
20878 tst.b FTEMP_EX(%a0) # test sign
20879 bpl.b dst_get_dman # if positive, go process mantissa
20880 bset &0x1f,%d0 # if negative, set sign
20882 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20883 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
20884 or.l %d1,%d0 # put these bits in ms word of double
20885 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
20886 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20887 mov.l &21,%d0 # load shift count
20888 lsl.l %d0,%d1 # put lower 11 bits in upper bits
20889 mov.l %d1,L_SCR2(%a6) # build lower lword in memory
20890 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
20891 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
20892 mov.l L_SCR2(%a6),%d1
20893 or.l %d0,%d1 # put them in double result
20894 mov.l L_SCR1(%a6),%d0
20897 #########################################################################
20898 # XDEF **************************************************************** #
20899 # dst_sgl(): create single precision value from extended prec #
20901 # XREF **************************************************************** #
20903 # INPUT *************************************************************** #
20904 # a0 = pointer to source operand in extended precision #
20906 # OUTPUT ************************************************************** #
20907 # d0 = single precision result #
20909 # ALGORITHM *********************************************************** #
20911 # Changes extended precision to single precision. #
20912 # sgl_sign = ext_sign #
20913 # sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
20914 # get rid of ext integer bit #
20915 # sgl_mant = ext_mant{62:12} #
20917 # --------------- --------------- --------------- #
20918 # extended -> |s| exp | |1| ms mant | | ls mant | #
20919 # --------------- --------------- --------------- #
20920 # 95 64 63 62 40 32 31 12 0 #
20925 # --------------- #
20926 # single -> |s|exp| mant | #
20927 # --------------- #
20930 #########################################################################
20934 mov.w FTEMP_EX(%a0),%d0 # get exponent
20935 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
20936 addi.w &SGL_BIAS,%d0 # add single precision bias
20937 tst.b FTEMP_HI(%a0) # is number a denorm?
20938 bmi.b dst_get_supper # no
20939 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
20941 swap %d0 # put exp in upper word of d0
20942 lsl.l &0x7,%d0 # shift it into single exp bits
20943 tst.b FTEMP_EX(%a0) # test sign
20944 bpl.b dst_get_sman # if positive, continue
20945 bset &0x1f,%d0 # if negative, put in sign first
20947 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20948 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
20949 lsr.l &0x8,%d1 # and put them flush right
20950 or.l %d1,%d0 # put these bits in ms word of single
20953 ##############################################################################
20955 bsr.l _calc_ea_fout # fetch the <ea>
20958 mov.b STAG(%a6),%d0 # fetch input type
20959 bne.w fout_pack_not_norm # input is not NORM
20962 btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
20963 beq.b fout_pack_s # static
20966 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
20970 bsr.l fetch_dreg # fetch Dn w/ k-factor
20972 bra.b fout_pack_type
20974 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
20977 bfexts %d0{&25:&7},%d0 # extract k-factor
20980 lea FP_SRC(%a6),%a0 # pass: ptr to input
20982 # bindec is currently scrambling FP_SRC for denorm inputs.
20983 # we'll have to change this, but for now, tough luck!!!
20984 bsr.l bindec # convert xprec to packed
20986 # andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
20987 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
20991 tst.b 3+FP_SCR0_EX(%a6)
20992 bne.b fout_pack_set
20993 tst.l FP_SCR0_HI(%a6)
20994 bne.b fout_pack_set
20995 tst.l FP_SCR0_LO(%a6)
20996 bne.b fout_pack_set
20998 # add the extra condition that only if the k-factor was zero, too, should
20999 # we zero the exponent
21001 bne.b fout_pack_set
21002 # "mantissa" is all zero which means that the answer is zero. but, the '040
21003 # algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
21004 # if the mantissa is zero, I will zero the exponent, too.
21005 # the question now is whether the exponents sign bit is allowed to be non-zero
21006 # for a zero, also...
21007 andi.w &0xf000,FP_SCR0(%a6)
21011 lea FP_SCR0(%a6),%a0 # pass: src addr
21014 mov.l (%sp)+,%a1 # pass: dst addr
21015 mov.l &0xc,%d0 # pass: opsize is 12 bytes
21017 cmpi.b SPCOND_FLG(%a6),&mda7_flg
21020 bsr.l _dmem_write # write ext prec number to memory
21022 tst.l %d1 # did dstore fail?
21023 bne.w fout_ext_err # yes
21027 # we don't want to do the write if the exception occurred in supervisor mode
21028 # so _mem_write2() handles this for us.
21030 bsr.l _mem_write2 # write ext prec number to memory
21032 tst.l %d1 # did dstore fail?
21033 bne.w fout_ext_err # yes
21037 fout_pack_not_norm:
21038 cmpi.b %d0,&DENORM # is it a DENORM?
21039 beq.w fout_pack_norm # yes
21040 lea FP_SRC(%a6),%a0
21041 clr.w 2+FP_SRC_EX(%a6)
21042 cmpi.b %d0,&SNAN # is it an SNAN?
21043 beq.b fout_pack_snan # yes
21044 bra.b fout_pack_write # no
21047 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21048 bset &0x6,FP_SRC_HI(%a6) # set snan bit
21049 bra.b fout_pack_write
21051 #########################################################################
21052 # XDEF **************************************************************** #
21053 # fetch_dreg(): fetch register according to index in d1 #
21055 # XREF **************************************************************** #
21058 # INPUT *************************************************************** #
21059 # d1 = index of register to fetch from #
21061 # OUTPUT ************************************************************** #
21062 # d0 = value of register fetched #
21064 # ALGORITHM *********************************************************** #
21065 # According to the index value in d1 which can range from zero #
21066 # to fifteen, load the corresponding register file value (where #
21067 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
21068 # stack. The rest should still be in their original places. #
21070 #########################################################################
21072 # this routine leaves d1 intact for subsequent store_dreg calls.
21075 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
21076 jmp (tbl_fdreg.b,%pc,%d0.w*1)
21079 short fdreg0 - tbl_fdreg
21080 short fdreg1 - tbl_fdreg
21081 short fdreg2 - tbl_fdreg
21082 short fdreg3 - tbl_fdreg
21083 short fdreg4 - tbl_fdreg
21084 short fdreg5 - tbl_fdreg
21085 short fdreg6 - tbl_fdreg
21086 short fdreg7 - tbl_fdreg
21087 short fdreg8 - tbl_fdreg
21088 short fdreg9 - tbl_fdreg
21089 short fdrega - tbl_fdreg
21090 short fdregb - tbl_fdreg
21091 short fdregc - tbl_fdreg
21092 short fdregd - tbl_fdreg
21093 short fdrege - tbl_fdreg
21094 short fdregf - tbl_fdreg
21097 mov.l EXC_DREGS+0x0(%a6),%d0
21100 mov.l EXC_DREGS+0x4(%a6),%d0
21121 mov.l EXC_DREGS+0x8(%a6),%d0
21124 mov.l EXC_DREGS+0xc(%a6),%d0
21142 mov.l EXC_A7(%a6),%d0
21145 #########################################################################
21146 # XDEF **************************************************************** #
21147 # store_dreg_l(): store longword to data register specified by d1 #
21149 # XREF **************************************************************** #
21152 # INPUT *************************************************************** #
21153 # d0 = longowrd value to store #
21154 # d1 = index of register to fetch from #
21156 # OUTPUT ************************************************************** #
21157 # (data register is updated) #
21159 # ALGORITHM *********************************************************** #
21160 # According to the index value in d1, store the longword value #
21161 # in d0 to the corresponding data register. D0/D1 are on the stack #
21162 # while the rest are in their initial places. #
21164 #########################################################################
21166 global store_dreg_l
21168 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
21169 jmp (tbl_sdregl.b,%pc,%d1.w*1)
21172 short sdregl0 - tbl_sdregl
21173 short sdregl1 - tbl_sdregl
21174 short sdregl2 - tbl_sdregl
21175 short sdregl3 - tbl_sdregl
21176 short sdregl4 - tbl_sdregl
21177 short sdregl5 - tbl_sdregl
21178 short sdregl6 - tbl_sdregl
21179 short sdregl7 - tbl_sdregl
21182 mov.l %d0,EXC_DREGS+0x0(%a6)
21185 mov.l %d0,EXC_DREGS+0x4(%a6)
21206 #########################################################################
21207 # XDEF **************************************************************** #
21208 # store_dreg_w(): store word to data register specified by d1 #
21210 # XREF **************************************************************** #
21213 # INPUT *************************************************************** #
21214 # d0 = word value to store #
21215 # d1 = index of register to fetch from #
21217 # OUTPUT ************************************************************** #
21218 # (data register is updated) #
21220 # ALGORITHM *********************************************************** #
21221 # According to the index value in d1, store the word value #
21222 # in d0 to the corresponding data register. D0/D1 are on the stack #
21223 # while the rest are in their initial places. #
21225 #########################################################################
21227 global store_dreg_w
21229 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
21230 jmp (tbl_sdregw.b,%pc,%d1.w*1)
21233 short sdregw0 - tbl_sdregw
21234 short sdregw1 - tbl_sdregw
21235 short sdregw2 - tbl_sdregw
21236 short sdregw3 - tbl_sdregw
21237 short sdregw4 - tbl_sdregw
21238 short sdregw5 - tbl_sdregw
21239 short sdregw6 - tbl_sdregw
21240 short sdregw7 - tbl_sdregw
21243 mov.w %d0,2+EXC_DREGS+0x0(%a6)
21246 mov.w %d0,2+EXC_DREGS+0x4(%a6)
21267 #########################################################################
21268 # XDEF **************************************************************** #
21269 # store_dreg_b(): store byte to data register specified by d1 #
21271 # XREF **************************************************************** #
21274 # INPUT *************************************************************** #
21275 # d0 = byte value to store #
21276 # d1 = index of register to fetch from #
21278 # OUTPUT ************************************************************** #
21279 # (data register is updated) #
21281 # ALGORITHM *********************************************************** #
21282 # According to the index value in d1, store the byte value #
21283 # in d0 to the corresponding data register. D0/D1 are on the stack #
21284 # while the rest are in their initial places. #
21286 #########################################################################
21288 global store_dreg_b
21290 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
21291 jmp (tbl_sdregb.b,%pc,%d1.w*1)
21294 short sdregb0 - tbl_sdregb
21295 short sdregb1 - tbl_sdregb
21296 short sdregb2 - tbl_sdregb
21297 short sdregb3 - tbl_sdregb
21298 short sdregb4 - tbl_sdregb
21299 short sdregb5 - tbl_sdregb
21300 short sdregb6 - tbl_sdregb
21301 short sdregb7 - tbl_sdregb
21304 mov.b %d0,3+EXC_DREGS+0x0(%a6)
21307 mov.b %d0,3+EXC_DREGS+0x4(%a6)
21328 #########################################################################
21329 # XDEF **************************************************************** #
21330 # inc_areg(): increment an address register by the value in d0 #
21332 # XREF **************************************************************** #
21335 # INPUT *************************************************************** #
21336 # d0 = amount to increment by #
21337 # d1 = index of address register to increment #
21339 # OUTPUT ************************************************************** #
21340 # (address register is updated) #
21342 # ALGORITHM *********************************************************** #
21343 # Typically used for an instruction w/ a post-increment <ea>, #
21344 # this routine adds the increment value in d0 to the address register #
21345 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
21346 # in their original places. #
21347 # For a7, if the increment amount is one, then we have to #
21348 # increment by two. For any a7 update, set the mia7_flag so that if #
21349 # an access error exception occurs later in emulation, this address #
21350 # register update can be undone. #
21352 #########################################################################
21356 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
21357 jmp (tbl_iareg.b,%pc,%d1.w*1)
21360 short iareg0 - tbl_iareg
21361 short iareg1 - tbl_iareg
21362 short iareg2 - tbl_iareg
21363 short iareg3 - tbl_iareg
21364 short iareg4 - tbl_iareg
21365 short iareg5 - tbl_iareg
21366 short iareg6 - tbl_iareg
21367 short iareg7 - tbl_iareg
21369 iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
21371 iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
21373 iareg2: add.l %d0,%a2
21375 iareg3: add.l %d0,%a3
21377 iareg4: add.l %d0,%a4
21379 iareg5: add.l %d0,%a5
21381 iareg6: add.l %d0,(%a6)
21383 iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
21386 add.l %d0,EXC_A7(%a6)
21389 addq.l &0x2,EXC_A7(%a6)
21392 #########################################################################
21393 # XDEF **************************************************************** #
21394 # dec_areg(): decrement an address register by the value in d0 #
21396 # XREF **************************************************************** #
21399 # INPUT *************************************************************** #
21400 # d0 = amount to decrement by #
21401 # d1 = index of address register to decrement #
21403 # OUTPUT ************************************************************** #
21404 # (address register is updated) #
21406 # ALGORITHM *********************************************************** #
21407 # Typically used for an instruction w/ a pre-decrement <ea>, #
21408 # this routine adds the decrement value in d0 to the address register #
21409 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
21410 # in their original places. #
21411 # For a7, if the decrement amount is one, then we have to #
21412 # decrement by two. For any a7 update, set the mda7_flag so that if #
21413 # an access error exception occurs later in emulation, this address #
21414 # register update can be undone. #
21416 #########################################################################
21420 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
21421 jmp (tbl_dareg.b,%pc,%d1.w*1)
21424 short dareg0 - tbl_dareg
21425 short dareg1 - tbl_dareg
21426 short dareg2 - tbl_dareg
21427 short dareg3 - tbl_dareg
21428 short dareg4 - tbl_dareg
21429 short dareg5 - tbl_dareg
21430 short dareg6 - tbl_dareg
21431 short dareg7 - tbl_dareg
21433 dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
21435 dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
21437 dareg2: sub.l %d0,%a2
21439 dareg3: sub.l %d0,%a3
21441 dareg4: sub.l %d0,%a4
21443 dareg5: sub.l %d0,%a5
21445 dareg6: sub.l %d0,(%a6)
21447 dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
21450 sub.l %d0,EXC_A7(%a6)
21453 subq.l &0x2,EXC_A7(%a6)
21456 ##############################################################################
21458 #########################################################################
21459 # XDEF **************************************************************** #
21460 # load_fpn1(): load FP register value into FP_SRC(a6). #
21462 # XREF **************************************************************** #
21465 # INPUT *************************************************************** #
21466 # d0 = index of FP register to load #
21468 # OUTPUT ************************************************************** #
21469 # FP_SRC(a6) = value loaded from FP register file #
21471 # ALGORITHM *********************************************************** #
21472 # Using the index in d0, load FP_SRC(a6) with a number from the #
21473 # FP register file. #
21475 #########################################################################
21479 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21480 jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
21483 short load_fpn1_0 - tbl_load_fpn1
21484 short load_fpn1_1 - tbl_load_fpn1
21485 short load_fpn1_2 - tbl_load_fpn1
21486 short load_fpn1_3 - tbl_load_fpn1
21487 short load_fpn1_4 - tbl_load_fpn1
21488 short load_fpn1_5 - tbl_load_fpn1
21489 short load_fpn1_6 - tbl_load_fpn1
21490 short load_fpn1_7 - tbl_load_fpn1
21493 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21494 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21495 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21496 lea FP_SRC(%a6), %a0
21499 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21500 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21501 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21502 lea FP_SRC(%a6), %a0
21505 fmovm.x &0x20, FP_SRC(%a6)
21506 lea FP_SRC(%a6), %a0
21509 fmovm.x &0x10, FP_SRC(%a6)
21510 lea FP_SRC(%a6), %a0
21513 fmovm.x &0x08, FP_SRC(%a6)
21514 lea FP_SRC(%a6), %a0
21517 fmovm.x &0x04, FP_SRC(%a6)
21518 lea FP_SRC(%a6), %a0
21521 fmovm.x &0x02, FP_SRC(%a6)
21522 lea FP_SRC(%a6), %a0
21525 fmovm.x &0x01, FP_SRC(%a6)
21526 lea FP_SRC(%a6), %a0
21529 #############################################################################
21531 #########################################################################
21532 # XDEF **************************************************************** #
21533 # load_fpn2(): load FP register value into FP_DST(a6). #
21535 # XREF **************************************************************** #
21538 # INPUT *************************************************************** #
21539 # d0 = index of FP register to load #
21541 # OUTPUT ************************************************************** #
21542 # FP_DST(a6) = value loaded from FP register file #
21544 # ALGORITHM *********************************************************** #
21545 # Using the index in d0, load FP_DST(a6) with a number from the #
21546 # FP register file. #
21548 #########################################################################
21552 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21553 jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
21556 short load_fpn2_0 - tbl_load_fpn2
21557 short load_fpn2_1 - tbl_load_fpn2
21558 short load_fpn2_2 - tbl_load_fpn2
21559 short load_fpn2_3 - tbl_load_fpn2
21560 short load_fpn2_4 - tbl_load_fpn2
21561 short load_fpn2_5 - tbl_load_fpn2
21562 short load_fpn2_6 - tbl_load_fpn2
21563 short load_fpn2_7 - tbl_load_fpn2
21566 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
21567 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
21568 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
21569 lea FP_DST(%a6), %a0
21572 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
21573 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
21574 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
21575 lea FP_DST(%a6), %a0
21578 fmovm.x &0x20, FP_DST(%a6)
21579 lea FP_DST(%a6), %a0
21582 fmovm.x &0x10, FP_DST(%a6)
21583 lea FP_DST(%a6), %a0
21586 fmovm.x &0x08, FP_DST(%a6)
21587 lea FP_DST(%a6), %a0
21590 fmovm.x &0x04, FP_DST(%a6)
21591 lea FP_DST(%a6), %a0
21594 fmovm.x &0x02, FP_DST(%a6)
21595 lea FP_DST(%a6), %a0
21598 fmovm.x &0x01, FP_DST(%a6)
21599 lea FP_DST(%a6), %a0
21602 #############################################################################
21604 #########################################################################
21605 # XDEF **************************************************************** #
21606 # store_fpreg(): store an fp value to the fpreg designated d0. #
21608 # XREF **************************************************************** #
21611 # INPUT *************************************************************** #
21612 # fp0 = extended precision value to store #
21613 # d0 = index of floating-point register #
21615 # OUTPUT ************************************************************** #
21618 # ALGORITHM *********************************************************** #
21619 # Store the value in fp0 to the FP register designated by the #
21620 # value in d0. The FP number can be DENORM or SNAN so we have to be #
21621 # careful that we don't take an exception here. #
21623 #########################################################################
21627 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21628 jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
21631 short store_fpreg_0 - tbl_store_fpreg
21632 short store_fpreg_1 - tbl_store_fpreg
21633 short store_fpreg_2 - tbl_store_fpreg
21634 short store_fpreg_3 - tbl_store_fpreg
21635 short store_fpreg_4 - tbl_store_fpreg
21636 short store_fpreg_5 - tbl_store_fpreg
21637 short store_fpreg_6 - tbl_store_fpreg
21638 short store_fpreg_7 - tbl_store_fpreg
21641 fmovm.x &0x80, EXC_FP0(%a6)
21644 fmovm.x &0x80, EXC_FP1(%a6)
21647 fmovm.x &0x01, -(%sp)
21648 fmovm.x (%sp)+, &0x20
21651 fmovm.x &0x01, -(%sp)
21652 fmovm.x (%sp)+, &0x10
21655 fmovm.x &0x01, -(%sp)
21656 fmovm.x (%sp)+, &0x08
21659 fmovm.x &0x01, -(%sp)
21660 fmovm.x (%sp)+, &0x04
21663 fmovm.x &0x01, -(%sp)
21664 fmovm.x (%sp)+, &0x02
21667 fmovm.x &0x01, -(%sp)
21668 fmovm.x (%sp)+, &0x01
21671 #########################################################################
21672 # XDEF **************************************************************** #
21673 # _denorm(): denormalize an intermediate result #
21675 # XREF **************************************************************** #
21678 # INPUT *************************************************************** #
21679 # a0 = points to the operand to be denormalized #
21680 # (in the internal extended format) #
21682 # d0 = rounding precision #
21684 # OUTPUT ************************************************************** #
21685 # a0 = pointer to the denormalized result #
21686 # (in the internal extended format) #
21688 # d0 = guard,round,sticky #
21690 # ALGORITHM *********************************************************** #
21691 # According to the exponent underflow threshold for the given #
21692 # precision, shift the mantissa bits to the right in order raise the #
21693 # exponent of the operand to the threshold value. While shifting the #
21694 # mantissa bits right, maintain the value of the guard, round, and #
21697 # (1) _denorm() is called by the underflow routines #
21698 # (2) _denorm() does NOT affect the status register #
21700 #########################################################################
21703 # table of exponent threshold values for each precision
21713 # Load the exponent threshold for the precision selected and check
21714 # to see if (threshold - exponent) is > 65 in which case we can
21715 # simply calculate the sticky bit and zero the mantissa. otherwise
21716 # we have to call the denormalization routine.
21718 lsr.b &0x2, %d0 # shift prec to lo bits
21719 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21720 mov.w %d1, %d0 # copy d1 into d0
21721 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
21722 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
21723 bpl.b denorm_set_stky # yes; just calc sticky
21725 clr.l %d0 # clear g,r,s
21726 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21727 beq.b denorm_call # no; don't change anything
21728 bset &29, %d0 # yes; set sticky bit
21731 bsr.l dnrm_lp # denormalize the number
21735 # all bit would have been shifted off during the denorm so simply
21736 # calculate if the sticky should be set and clear the entire mantissa.
21739 mov.l &0x20000000, %d0 # set sticky bit in return value
21740 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
21741 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
21742 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
21746 # dnrm_lp(): normalize exponent/mantissa to specified threshold #
21749 # %a0 : points to the operand to be denormalized #
21750 # %d0{31:29} : initial guard,round,sticky #
21751 # %d1{15:0} : denormalization threshold #
21753 # %a0 : points to the denormalized operand #
21754 # %d0{31:29} : final guard,round,sticky #
21757 # *** Local Equates *** #
21758 set GRS, L_SCR2 # g,r,s temp storage
21759 set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
21765 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
21766 # in memory so as to make the bitfield extraction for denormalization easier.
21768 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21769 mov.l %d0, GRS(%a6) # place g,r,s after it
21772 # check to see how much less than the underflow threshold the operand
21775 mov.l %d1, %d0 # copy the denorm threshold
21776 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
21777 ble.b dnrm_no_lp # d1 <= 0
21778 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
21780 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
21782 bra.w case_3 # (d1 >= 64)
21785 # No normalization necessary
21788 mov.l GRS(%a6), %d0 # restore original g,r,s
21794 # %d0 = denorm threshold
21795 # %d1 = "n" = amt to shift
21797 # ---------------------------------------------------------
21798 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21799 # ---------------------------------------------------------
21800 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21809 # <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21810 # ---------------------------------------------------------
21811 # |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
21812 # ---------------------------------------------------------
21815 mov.l %d2, -(%sp) # create temp storage
21817 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
21819 sub.w %d1, %d0 # %d0 = 32 - %d1
21821 cmpi.w %d1, &29 # is shft amt >= 29
21822 blt.b case1_extract # no; no fix needed
21823 mov.b GRS(%a6), %d2
21824 or.b %d2, 3+FTEMP_LO2(%a6)
21827 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21828 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21829 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21831 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
21832 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
21834 bftst %d0{&2:&30} # were bits shifted off?
21835 beq.b case1_sticky_clear # no; go finish
21836 bset &rnd_stky_bit, %d0 # yes; set sticky bit
21838 case1_sticky_clear:
21839 and.l &0xe0000000, %d0 # clear all but G,R,S
21840 mov.l (%sp)+, %d2 # restore temp register
21846 # %d0 = denorm threshold
21847 # %d1 = "n" = amt to shift
21849 # ---------------------------------------------------------
21850 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21851 # ---------------------------------------------------------
21852 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21855 # \ \ -------------------
21856 # \ -------------------- \
21857 # ------------------- \ \
21861 # <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21862 # ---------------------------------------------------------
21863 # |0...............0|0....0| NEW_LO |grs |
21864 # ---------------------------------------------------------
21867 mov.l %d2, -(%sp) # create temp storage
21869 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
21870 subi.w &0x20, %d1 # %d1 now between 0 and 32
21872 sub.w %d1, %d0 # %d0 = 32 - %d1
21874 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21875 # the number of bits to check for the sticky detect.
21876 # it only plays a role in shift amounts of 61-63.
21877 mov.b GRS(%a6), %d2
21878 or.b %d2, 3+FTEMP_LO2(%a6)
21880 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21881 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21883 bftst %d1{&2:&30} # were any bits shifted off?
21884 bne.b case2_set_sticky # yes; set sticky bit
21885 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
21886 bne.b case2_set_sticky # yes; set sticky bit
21888 mov.l %d1, %d0 # move new G,R,S to %d0
21892 mov.l %d1, %d0 # move new G,R,S to %d0
21893 bset &rnd_stky_bit, %d0 # set sticky bit
21896 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
21897 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
21898 and.l &0xe0000000, %d0 # clear all but G,R,S
21900 mov.l (%sp)+,%d2 # restore temp register
21906 # %d0 = denorm threshold
21907 # %d1 = amt to shift
21910 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
21912 cmpi.w %d1, &65 # is shift amt > 65?
21913 blt.b case3_64 # no; it's == 64
21914 beq.b case3_65 # no; it's == 65
21919 # Shift value is > 65 and out of range. All bits are shifted off.
21920 # Return a zero mantissa with the sticky bit set
21922 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
21923 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
21924 mov.l &0x20000000, %d0 # set sticky bit
21930 # ---------------------------------------------------------
21931 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21932 # ---------------------------------------------------------
21933 # <-------(32)------>
21937 # \ ------------------------------
21938 # ------------------------------- \
21942 # <-------(32)------>
21943 # ---------------------------------------------------------
21944 # |0...............0|0................0|grs |
21945 # ---------------------------------------------------------
21948 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
21949 mov.l %d0, %d1 # make a copy
21950 and.l &0xc0000000, %d0 # extract G,R
21951 and.l &0x3fffffff, %d1 # extract other bits
21953 bra.b case3_complete
21958 # ---------------------------------------------------------
21959 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21960 # ---------------------------------------------------------
21961 # <-------(32)------>
21965 # \ ------------------------------
21966 # -------------------------------- \
21970 # <-------(31)----->
21971 # ---------------------------------------------------------
21972 # |0...............0|0................0|0rs |
21973 # ---------------------------------------------------------
21976 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
21977 and.l &0x80000000, %d0 # extract R bit
21978 lsr.l &0x1, %d0 # shift high bit into R bit
21979 and.l &0x7fffffff, %d1 # extract other bits
21982 # last operation done was an "and" of the bits shifted off so the condition
21983 # codes are already set so branch accordingly.
21984 bne.b case3_set_sticky # yes; go set new sticky
21985 tst.l FTEMP_LO(%a0) # were any bits shifted off?
21986 bne.b case3_set_sticky # yes; go set new sticky
21987 tst.b GRS(%a6) # were any bits shifted off?
21988 bne.b case3_set_sticky # yes; go set new sticky
21991 # no bits were shifted off so don't set the sticky bit.
21993 # the entire mantissa is zero.
21995 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
21996 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
22000 # some bits were shifted off so set the sticky bit.
22001 # the entire mantissa is zero.
22004 bset &rnd_stky_bit,%d0 # set new sticky bit
22005 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
22006 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
22009 #########################################################################
22010 # XDEF **************************************************************** #
22011 # _round(): round result according to precision/mode #
22013 # XREF **************************************************************** #
22016 # INPUT *************************************************************** #
22017 # a0 = ptr to input operand in internal extended format #
22018 # d1(hi) = contains rounding precision: #
22019 # ext = $0000xxxx #
22020 # sgl = $0004xxxx #
22021 # dbl = $0008xxxx #
22022 # d1(lo) = contains rounding mode: #
22027 # d0{31:29} = contains the g,r,s bits (extended) #
22029 # OUTPUT ************************************************************** #
22030 # a0 = pointer to rounded result #
22032 # ALGORITHM *********************************************************** #
22033 # On return the value pointed to by a0 is correctly rounded, #
22034 # a0 is preserved and the g-r-s bits in d0 are cleared. #
22035 # The result is not typed - the tag field is invalid. The #
22036 # result is still in the internal extended format. #
22038 # The INEX bit of USER_FPSR will be set if the rounded result was #
22039 # inexact (i.e. if any of the g-r-s bits were set). #
22041 #########################################################################
22046 # ext_grs() looks at the rounding precision and sets the appropriate
22048 # If (G,R,S == 0) then result is exact and round is done, else set
22049 # the inex flag in status reg and continue.
22051 bsr.l ext_grs # extract G,R,S
22053 tst.l %d0 # are G,R,S zero?
22054 beq.w truncate # yes; round is complete
22056 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22059 # Use rounding mode as an index into a jump table for these modes.
22060 # All of the following assumes grs != 0.
22062 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22063 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
22066 short rnd_near - tbl_mode
22067 short truncate - tbl_mode # RZ always truncates
22068 short rnd_mnus - tbl_mode
22069 short rnd_plus - tbl_mode
22071 #################################################################
22072 # ROUND PLUS INFINITY #
22074 # If sign of fp number = 0 (positive), then add 1 to l. #
22075 #################################################################
22077 tst.b FTEMP_SGN(%a0) # check for sign
22078 bmi.w truncate # if positive then truncate
22080 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
22081 swap %d1 # set up d1 for round prec.
22083 cmpi.b %d1, &s_mode # is prec = sgl?
22084 beq.w add_sgl # yes
22085 bgt.w add_dbl # no; it's dbl
22086 bra.w add_ext # no; it's ext
22088 #################################################################
22089 # ROUND MINUS INFINITY #
22091 # If sign of fp number = 1 (negative), then add 1 to l. #
22092 #################################################################
22094 tst.b FTEMP_SGN(%a0) # check for sign
22095 bpl.w truncate # if negative then truncate
22097 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
22098 swap %d1 # set up d1 for round prec.
22100 cmpi.b %d1, &s_mode # is prec = sgl?
22101 beq.w add_sgl # yes
22102 bgt.w add_dbl # no; it's dbl
22103 bra.w add_ext # no; it's ext
22105 #################################################################
22108 # If (g=1), then add 1 to l and if (r=s=0), then clear l #
22109 # Note that this will round to even in case of a tie. #
22110 #################################################################
22112 asl.l &0x1, %d0 # shift g-bit to c-bit
22113 bcc.w truncate # if (g=1) then
22115 swap %d1 # set up d1 for round prec.
22117 cmpi.b %d1, &s_mode # is prec = sgl?
22118 beq.w add_sgl # yes
22119 bgt.w add_dbl # no; it's dbl
22120 bra.w add_ext # no; it's ext
22122 # *** LOCAL EQUATES ***
22123 set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
22124 set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
22126 #########################
22128 #########################
22130 add.l &ad_1_sgl, FTEMP_HI(%a0)
22131 bcc.b scc_clr # no mantissa overflow
22132 roxr.w FTEMP_HI(%a0) # shift v-bit back in
22133 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
22134 add.w &0x1, FTEMP_EX(%a0) # and incr exponent
22136 tst.l %d0 # test for rs = 0
22138 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22140 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22141 clr.l FTEMP_LO(%a0) # clear d2
22144 #########################
22146 #########################
22148 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
22149 bcc.b xcc_clr # test for carry out
22150 addq.l &1,FTEMP_HI(%a0) # propagate carry
22152 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
22153 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
22154 roxr.w FTEMP_LO(%a0)
22155 roxr.w FTEMP_LO+2(%a0)
22156 add.w &0x1,FTEMP_EX(%a0) # and inc exp
22158 tst.l %d0 # test rs = 0
22160 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
22164 #########################
22166 #########################
22168 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22169 bcc.b dcc_clr # no carry
22170 addq.l &0x1, FTEMP_HI(%a0) # propagate carry
22171 bcc.b dcc_clr # no carry
22173 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
22174 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
22175 roxr.w FTEMP_LO(%a0)
22176 roxr.w FTEMP_LO+2(%a0)
22177 addq.w &0x1, FTEMP_EX(%a0) # incr exponent
22179 tst.l %d0 # test for rs = 0
22181 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22184 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22187 ###########################
22188 # Truncate all other bits #
22189 ###########################
22191 swap %d1 # select rnd prec
22193 cmpi.b %d1, &s_mode # is prec sgl?
22194 beq.w sgl_done # yes
22195 bgt.b dbl_done # no; it's dbl
22200 # ext_grs(): extract guard, round and sticky bits according to
22201 # rounding precision.
22204 # d0 = extended precision g,r,s (in d0{31:29})
22205 # d1 = {PREC,ROUND}
22207 # d0{31:29} = guard, round, sticky
22209 # The ext_grs extract the guard/round/sticky bits according to the
22210 # selected rounding precision. It is called by the round subroutine
22211 # only. All registers except d0 are kept intact. d0 becomes an
22212 # updated guard,round,sticky in d0{31:29}
22214 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22215 # prior to usage, and needs to restore d1 to original. this
22216 # routine is tightly tied to the round routine and not meant to
22217 # uphold standard subroutine calling practices.
22221 swap %d1 # have d1.w point to round precision
22222 tst.b %d1 # is rnd prec = extended?
22223 bne.b ext_grs_not_ext # no; go handle sgl or dbl
22226 # %d0 actually already hold g,r,s since _round() had it before calling
22227 # this function. so, as long as we don't disturb it, we are "returning" it.
22230 swap %d1 # yes; return to correct positions
22234 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
22236 cmpi.b %d1, &s_mode # is rnd prec = sgl?
22237 bne.b ext_grs_dbl # no; go handle dbl
22242 # -----------------------------------------------------
22243 # | EXP |XXXXXXX| |xx | |grs|
22244 # -----------------------------------------------------
22246 # ee ---------------------
22252 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22253 mov.l &30, %d2 # of the sgl prec. limits
22254 lsl.l %d2, %d3 # shift g-r bits to MSB of d3
22255 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
22256 and.l &0x0000003f, %d2 # s bit is the or of all other
22257 bne.b ext_grs_st_stky # bits to the right of g-r
22258 tst.l FTEMP_LO(%a0) # test lower mantissa
22259 bne.b ext_grs_st_stky # if any are set, set sticky
22260 tst.l %d0 # test original g,r,s
22261 bne.b ext_grs_st_stky # if any are set, set sticky
22262 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
22267 # -----------------------------------------------------
22268 # | EXP |XXXXXXX| | |xx |grs|
22269 # -----------------------------------------------------
22277 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22278 mov.l &30, %d2 # of the dbl prec. limits
22279 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
22280 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
22281 and.l &0x000001ff, %d2 # s bit is the or-ing of all
22282 bne.b ext_grs_st_stky # other bits to the right of g-r
22283 tst.l %d0 # test word original g,r,s
22284 bne.b ext_grs_st_stky # if any are set, set sticky
22285 bra.b ext_grs_end_sd # if clear, exit
22288 bset &rnd_stky_bit, %d3 # set sticky bit
22290 mov.l %d3, %d0 # return grs to d0
22292 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
22294 swap %d1 # restore d1 to original
22297 #########################################################################
22298 # norm(): normalize the mantissa of an extended precision input. the #
22299 # input operand should not be normalized already. #
22301 # XDEF **************************************************************** #
22304 # XREF **************************************************************** #
22307 # INPUT *************************************************************** #
22308 # a0 = pointer fp extended precision operand to normalize #
22310 # OUTPUT ************************************************************** #
22311 # d0 = number of bit positions the mantissa was shifted #
22312 # a0 = the input operand's mantissa is normalized; the exponent #
22315 #########################################################################
22318 mov.l %d2, -(%sp) # create some temp regs
22321 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
22322 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
22324 bfffo %d0{&0:&32}, %d2 # how many places to shift?
22325 beq.b norm_lo # hi(man) is all zeroes!
22328 lsl.l %d2, %d0 # left shift hi(man)
22329 bfextu %d1{&0:%d2}, %d3 # extract lo bits
22331 or.l %d3, %d0 # create hi(man)
22332 lsl.l %d2, %d1 # create lo(man)
22334 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
22335 mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
22337 mov.l %d2, %d0 # return shift amount
22339 mov.l (%sp)+, %d3 # restore temp regs
22345 bfffo %d1{&0:&32}, %d2 # how many places to shift?
22346 lsl.l %d2, %d1 # shift lo(man)
22347 add.l &32, %d2 # add 32 to shft amount
22349 mov.l %d1, FTEMP_HI(%a0) # store hi(man)
22350 clr.l FTEMP_LO(%a0) # lo(man) is now zero
22352 mov.l %d2, %d0 # return shift amount
22354 mov.l (%sp)+, %d3 # restore temp regs
22359 #########################################################################
22360 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
22361 # - returns corresponding optype tag #
22363 # XDEF **************************************************************** #
22366 # XREF **************************************************************** #
22367 # norm() - normalize the mantissa #
22369 # INPUT *************************************************************** #
22370 # a0 = pointer to unnormalized extended precision number #
22372 # OUTPUT ************************************************************** #
22373 # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
22374 # a0 = input operand has been converted to a norm, denorm, or #
22375 # zero; both the exponent and mantissa are changed. #
22377 #########################################################################
22381 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22382 bne.b unnorm_shift # hi(man) is not all zeroes
22385 # hi(man) is all zeroes so see if any bits in lo(man) are set
22388 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22389 beq.w unnorm_zero # yes
22391 add.w &32, %d0 # no; fix shift distance
22394 # d0 = # shifts needed for complete normalization
22397 clr.l %d1 # clear top word
22398 mov.w FTEMP_EX(%a0), %d1 # extract exponent
22399 and.w &0x7fff, %d1 # strip off sgn
22401 cmp.w %d0, %d1 # will denorm push exp < 0?
22402 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
22405 # exponent would not go < 0. Therefore, number stays normalized
22407 sub.w %d0, %d1 # shift exponent value
22408 mov.w FTEMP_EX(%a0), %d0 # load old exponent
22409 and.w &0x8000, %d0 # save old sign
22410 or.w %d0, %d1 # {sgn,new exp}
22411 mov.w %d1, FTEMP_EX(%a0) # insert new exponent
22413 bsr.l norm # normalize UNNORM
22415 mov.b &NORM, %d0 # return new optype tag
22419 # exponent would go < 0, so only denormalize until exp = 0
22422 cmp.b %d1, &32 # is exp <= 32?
22423 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
22425 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22426 mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
22428 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
22429 lsl.l %d1, %d0 # extract new lo(man)
22430 mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
22432 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
22434 mov.b &DENORM, %d0 # return new optype tag
22438 # only mantissa bits set are in lo(man)
22440 unnorm_nrm_zero_lrg:
22441 sub.w &32, %d1 # adjust shft amt by 32
22443 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
22444 lsl.l %d1, %d0 # left shift lo(man)
22446 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
22447 clr.l FTEMP_LO(%a0) # lo(man) = 0
22449 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
22451 mov.b &DENORM, %d0 # return new optype tag
22455 # whole mantissa is zero so this UNNORM is actually a zero
22458 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
22460 mov.b &ZERO, %d0 # fix optype tag
22463 #########################################################################
22464 # XDEF **************************************************************** #
22465 # set_tag_x(): return the optype of the input ext fp number #
22467 # XREF **************************************************************** #
22470 # INPUT *************************************************************** #
22471 # a0 = pointer to extended precision operand #
22473 # OUTPUT ************************************************************** #
22474 # d0 = value of type tag #
22475 # one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
22477 # ALGORITHM *********************************************************** #
22478 # Simply test the exponent, j-bit, and mantissa values to #
22479 # determine the type of operand. #
22480 # If it's an unnormalized zero, alter the operand and force it #
22481 # to be a normal zero. #
22483 #########################################################################
22487 mov.w FTEMP_EX(%a0), %d0 # extract exponent
22488 andi.w &0x7fff, %d0 # strip off sign
22489 cmpi.w %d0, &0x7fff # is (EXP == MAX)?
22492 btst &0x7,FTEMP_HI(%a0)
22498 tst.w %d0 # is exponent = 0?
22501 tst.l FTEMP_HI(%a0)
22503 tst.l FTEMP_LO(%a0)
22511 # must distinguish now "Unnormalized zeroes" which we
22512 # must convert to zero.
22514 tst.l FTEMP_HI(%a0)
22515 bne.b is_unnorm_reg_x
22516 tst.l FTEMP_LO(%a0)
22517 bne.b is_unnorm_reg_x
22518 # it's an "unnormalized zero". let's convert it to an actual zero...
22519 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
22526 tst.l FTEMP_LO(%a0)
22528 mov.l FTEMP_HI(%a0), %d0
22529 and.l &0x7fffffff, %d0 # msb is a don't care!
22535 btst &0x6, FTEMP_HI(%a0)
22543 #########################################################################
22544 # XDEF **************************************************************** #
22545 # set_tag_d(): return the optype of the input dbl fp number #
22547 # XREF **************************************************************** #
22550 # INPUT *************************************************************** #
22551 # a0 = points to double precision operand #
22553 # OUTPUT ************************************************************** #
22554 # d0 = value of type tag #
22555 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
22557 # ALGORITHM *********************************************************** #
22558 # Simply test the exponent, j-bit, and mantissa values to #
22559 # determine the type of operand. #
22561 #########################################################################
22565 mov.l FTEMP(%a0), %d0
22568 andi.l &0x7ff00000, %d0
22569 beq.b zero_or_denorm_d
22571 cmpi.l %d0, &0x7ff00000
22578 and.l &0x000fffff, %d1
22589 and.l &0x000fffff, %d1
22606 #########################################################################
22607 # XDEF **************************************************************** #
22608 # set_tag_s(): return the optype of the input sgl fp number #
22610 # XREF **************************************************************** #
22613 # INPUT *************************************************************** #
22614 # a0 = pointer to single precision operand #
22616 # OUTPUT ************************************************************** #
22617 # d0 = value of type tag #
22618 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
22620 # ALGORITHM *********************************************************** #
22621 # Simply test the exponent, j-bit, and mantissa values to #
22622 # determine the type of operand. #
22624 #########################################################################
22628 mov.l FTEMP(%a0), %d0
22631 andi.l &0x7f800000, %d0
22632 beq.b zero_or_denorm_s
22634 cmpi.l %d0, &0x7f800000
22641 and.l &0x007fffff, %d1
22650 and.l &0x007fffff, %d1
22665 #########################################################################
22666 # XDEF **************************************************************** #
22667 # unf_res(): routine to produce default underflow result of a #
22668 # scaled extended precision number; this is used by #
22669 # fadd/fdiv/fmul/etc. emulation routines. #
22670 # unf_res4(): same as above but for fsglmul/fsgldiv which use #
22671 # single round prec and extended prec mode. #
22673 # XREF **************************************************************** #
22674 # _denorm() - denormalize according to scale factor #
22675 # _round() - round denormalized number according to rnd prec #
22677 # INPUT *************************************************************** #
22678 # a0 = pointer to extended precison operand #
22679 # d0 = scale factor #
22680 # d1 = rounding precision/mode #
22682 # OUTPUT ************************************************************** #
22683 # a0 = pointer to default underflow result in extended precision #
22684 # d0.b = result FPSR_cc which caller may or may not want to save #
22686 # ALGORITHM *********************************************************** #
22687 # Convert the input operand to "internal format" which means the #
22688 # exponent is extended to 16 bits and the sign is stored in the unused #
22689 # portion of the extended precison operand. Denormalize the number #
22690 # according to the scale factor passed in d0. Then, round the #
22691 # denormalized result. #
22692 # Set the FPSR_exc bits as appropriate but return the cc bits in #
22693 # d0 in case the caller doesn't want to save them (as is the case for #
22695 # unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
22696 # precision and the rounding mode to single. #
22698 #########################################################################
22701 mov.l %d1, -(%sp) # save rnd prec,mode on stack
22703 btst &0x7, FTEMP_EX(%a0) # make "internal" format
22706 mov.w FTEMP_EX(%a0), %d1 # extract exponent
22709 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
22711 mov.l %a0, -(%sp) # save operand ptr during calls
22713 mov.l 0x4(%sp),%d0 # pass rnd prec.
22716 bsr.l _denorm # denorm result
22719 mov.w 0x6(%sp),%d1 # load prec:mode into %d1
22720 andi.w &0xc0,%d1 # extract rnd prec
22726 bsr.l _round # round the denorm
22730 # result is now rounded properly. convert back to normal format
22731 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
22732 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
22733 beq.b unf_res_chkifzero # no; result is positive
22734 bset &0x7, FTEMP_EX(%a0) # set result sgn
22735 clr.b FTEMP_SGN(%a0) # clear temp sign
22737 # the number may have become zero after rounding. set ccodes accordingly.
22740 tst.l FTEMP_HI(%a0) # is value now a zero?
22741 bne.b unf_res_cont # no
22742 tst.l FTEMP_LO(%a0)
22743 bne.b unf_res_cont # no
22744 # bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
22745 bset &z_bit, %d0 # yes; set zero ccode bit
22750 # can inex1 also be set along with unfl and inex2???
22752 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22754 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22755 beq.b unf_res_end # no
22756 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22759 add.l &0x4, %sp # clear stack
22762 # unf_res() for fsglmul() and fsgldiv().
22765 mov.l %d1,-(%sp) # save rnd prec,mode on stack
22767 btst &0x7,FTEMP_EX(%a0) # make "internal" format
22770 mov.w FTEMP_EX(%a0),%d1 # extract exponent
22773 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
22775 mov.l %a0,-(%sp) # save operand ptr during calls
22777 clr.l %d0 # force rnd prec = ext
22778 bsr.l _denorm # denorm result
22781 mov.w &s_mode,%d1 # force rnd prec = sgl
22783 mov.w 0x6(%sp),%d1 # load rnd mode
22784 andi.w &0x30,%d1 # extract rnd prec
22786 bsr.l _round # round the denorm
22790 # result is now rounded properly. convert back to normal format
22791 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
22792 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
22793 beq.b unf_res4_chkifzero # no; result is positive
22794 bset &0x7,FTEMP_EX(%a0) # set result sgn
22795 clr.b FTEMP_SGN(%a0) # clear temp sign
22797 # the number may have become zero after rounding. set ccodes accordingly.
22798 unf_res4_chkifzero:
22800 tst.l FTEMP_HI(%a0) # is value now a zero?
22801 bne.b unf_res4_cont # no
22802 tst.l FTEMP_LO(%a0)
22803 bne.b unf_res4_cont # no
22804 # bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
22805 bset &z_bit,%d0 # yes; set zero ccode bit
22810 # can inex1 also be set along with unfl and inex2???
22812 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22814 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22815 beq.b unf_res4_end # no
22816 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22819 add.l &0x4,%sp # clear stack
22822 #########################################################################
22823 # XDEF **************************************************************** #
22824 # ovf_res(): routine to produce the default overflow result of #
22825 # an overflowing number. #
22826 # ovf_res2(): same as above but the rnd mode/prec are passed #
22829 # XREF **************************************************************** #
22832 # INPUT *************************************************************** #
22833 # d1.b = '-1' => (-); '0' => (+) #
22835 # d0 = rnd mode/prec #
22837 # hi(d0) = rnd prec #
22838 # lo(d0) = rnd mode #
22840 # OUTPUT ************************************************************** #
22841 # a0 = points to extended precision result #
22842 # d0.b = condition code bits #
22844 # ALGORITHM *********************************************************** #
22845 # The default overflow result can be determined by the sign of #
22846 # the result and the rounding mode/prec in effect. These bits are #
22847 # concatenated together to create an index into the default result #
22848 # table. A pointer to the correct result is returned in a0. The #
22849 # resulting condition codes are returned in d0 in case the caller #
22850 # doesn't want FPSR_cc altered (as is the case for fmove out). #
22852 #########################################################################
22856 andi.w &0x10,%d1 # keep result sign
22857 lsr.b &0x4,%d0 # shift prec/mode
22858 or.b %d0,%d1 # concat the two
22859 mov.w %d1,%d0 # make a copy
22860 lsl.b &0x1,%d1 # multiply d1 by 2
22865 and.w &0x10, %d1 # keep result sign
22866 or.b %d0, %d1 # insert rnd mode
22868 or.b %d0, %d1 # insert rnd prec
22869 mov.w %d1, %d0 # make a copy
22870 lsl.b &0x1, %d1 # shift left by 1
22873 # use the rounding mode, precision, and result sign as in index into the
22874 # two tables below to fetch the default result and the result ccodes.
22877 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22878 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22883 byte 0x2, 0x0, 0x0, 0x2
22884 byte 0x2, 0x0, 0x0, 0x2
22885 byte 0x2, 0x0, 0x0, 0x2
22886 byte 0x0, 0x0, 0x0, 0x0
22887 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22888 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22889 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22892 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22893 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22894 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22895 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22897 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22898 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22899 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22900 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22902 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22903 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22904 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22905 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22907 long 0x00000000,0x00000000,0x00000000,0x00000000
22908 long 0x00000000,0x00000000,0x00000000,0x00000000
22909 long 0x00000000,0x00000000,0x00000000,0x00000000
22910 long 0x00000000,0x00000000,0x00000000,0x00000000
22912 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22913 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22914 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22915 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22917 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22918 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22919 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22920 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22922 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22923 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22924 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22925 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22927 #########################################################################
22928 # XDEF **************************************************************** #
22929 # get_packed(): fetch a packed operand from memory and then #
22930 # convert it to a floating-point binary number. #
22932 # XREF **************************************************************** #
22933 # _dcalc_ea() - calculate the correct <ea> #
22934 # _mem_read() - fetch the packed operand from memory #
22935 # facc_in_x() - the fetch failed so jump to special exit code #
22936 # decbin() - convert packed to binary extended precision #
22938 # INPUT *************************************************************** #
22941 # OUTPUT ************************************************************** #
22942 # If no failure on _mem_read(): #
22943 # FP_SRC(a6) = packed operand now as a binary FP number #
22945 # ALGORITHM *********************************************************** #
22946 # Get the correct <ea> which is the value on the exception stack #
22947 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
22948 # Then, fetch the operand from memory. If the fetch fails, exit #
22949 # through facc_in_x(). #
22950 # If the packed operand is a ZERO,NAN, or INF, convert it to #
22951 # its binary representation here. Else, call decbin() which will #
22952 # convert the packed value to an extended precision binary value. #
22954 #########################################################################
22956 # the stacked <ea> for packed is correct except for -(An).
22957 # the base reg must be updated for both -(An) and (An)+.
22960 mov.l &0xc,%d0 # packed is 12 bytes
22961 bsr.l _dcalc_ea # fetch <ea>; correct An
22963 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
22964 mov.l &0xc,%d0 # pass: 12 bytes
22965 bsr.l _dmem_read # read packed operand
22967 tst.l %d1 # did dfetch fail?
22968 bne.l facc_in_x # yes
22970 # The packed operand is an INF or a NAN if the exponent field is all ones.
22971 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
22972 cmpi.w %d0,&0x7fff # INF or NAN?
22973 bne.b gp_try_zero # no
22974 rts # operand is an INF or NAN
22976 # The packed operand is a zero if the mantissa is all zero, else it's
22977 # a normal packed op.
22979 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
22980 andi.b &0x0f,%d0 # clear all but last nybble
22981 bne.b gp_not_spec # not a zero
22982 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
22983 bne.b gp_not_spec # not a zero
22984 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
22985 bne.b gp_not_spec # not a zero
22986 rts # operand is a ZERO
22988 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
22989 bsr.l decbin # convert to extended
22990 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
22993 #########################################################################
22994 # decbin(): Converts normalized packed bcd value pointed to by register #
22995 # a0 to extended-precision value in fp0. #
22997 # INPUT *************************************************************** #
22998 # a0 = pointer to normalized packed bcd value #
23000 # OUTPUT ************************************************************** #
23001 # fp0 = exact fp representation of the packed bcd value. #
23003 # ALGORITHM *********************************************************** #
23004 # Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
23005 # and NaN operands are dispatched without entering this routine) #
23006 # value in 68881/882 format at location (a0). #
23008 # A1. Convert the bcd exponent to binary by successive adds and #
23009 # muls. Set the sign according to SE. Subtract 16 to compensate #
23010 # for the mantissa which is to be interpreted as 17 integer #
23011 # digits, rather than 1 integer and 16 fraction digits. #
23012 # Note: this operation can never overflow. #
23014 # A2. Convert the bcd mantissa to binary by successive #
23015 # adds and muls in FP0. Set the sign according to SM. #
23016 # The mantissa digits will be converted with the decimal point #
23017 # assumed following the least-significant digit. #
23018 # Note: this operation can never overflow. #
23020 # A3. Count the number of leading/trailing zeros in the #
23021 # bcd string. If SE is positive, count the leading zeros; #
23022 # if negative, count the trailing zeros. Set the adjusted #
23023 # exponent equal to the exponent from A1 and the zero count #
23024 # added if SM = 1 and subtracted if SM = 0. Scale the #
23025 # mantissa the equivalent of forcing in the bcd value: #
23027 # SM = 0 a non-zero digit in the integer position #
23028 # SM = 1 a non-zero digit in Mant0, lsd of the fraction #
23030 # this will insure that any value, regardless of its #
23031 # representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
23034 # A4. Calculate the factor 10^exp in FP1 using a table of #
23035 # 10^(2^n) values. To reduce the error in forming factors #
23036 # greater than 10^27, a directed rounding scheme is used with #
23037 # tables rounded to RN, RM, and RP, according to the table #
23038 # in the comments of the pwrten section. #
23040 # A5. Form the final binary number by scaling the mantissa by #
23041 # the exponent factor. This is done by multiplying the #
23042 # mantissa in FP0 by the factor in FP1 if the adjusted #
23043 # exponent sign is positive, and dividing FP0 by FP1 if #
23044 # it is negative. #
23046 # Clean up and return. Check if the final mul or div was inexact. #
23047 # If so, set INEX1 in USER_FPSR. #
23049 #########################################################################
23052 # PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23053 # to nearest, minus, and plus, respectively. The tables include
23054 # 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
23055 # is required until the power is greater than 27, however, all
23056 # tables include the first 5 for ease of indexing.
23072 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23073 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23074 mov.l 0x8(%a0),FP_SCR0_LO(%a6)
23076 lea FP_SCR0(%a6),%a0
23078 movm.l &0x3c00,-(%sp) # save d2-d5
23079 fmovm.x &0x1,-(%sp) # save fp1
23081 # Calculate exponent:
23082 # 1. Copy bcd value in memory for use as a working copy.
23083 # 2. Calculate absolute value of exponent in d1 by mul and add.
23084 # 3. Correct for exponent sign.
23085 # 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23086 # (i.e., all digits assumed left of the decimal point.)
23091 # (*) d0: temp digit storage
23092 # (*) d1: accumulator for binary exponent
23093 # (*) d2: digit count
23094 # (*) d3: offset pointer
23095 # ( ) d4: first word of bcd
23096 # ( ) a0: pointer to working bcd value
23097 # ( ) a6: pointer to original bcd value
23098 # (*) FP_SCR1: working copy of original bcd value
23099 # (*) L_SCR1: copy of original exponent word
23102 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
23103 mov.l &ESTRT,%d3 # counter to pick up digits
23104 mov.l (%a0),%d4 # get first word of bcd
23105 clr.l %d1 # zero d1 for accumulator
23107 mulu.l &0xa,%d1 # mul partial product by one digit place
23108 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
23109 add.l %d0,%d1 # d1 = d1 + d0
23110 addq.b &4,%d3 # advance d3 to the next digit
23111 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
23112 btst &30,%d4 # get SE
23113 beq.b e_pos # don't negate if pos
23114 neg.l %d1 # negate before subtracting
23116 sub.l &16,%d1 # sub to compensate for shift of mant
23117 bge.b e_save # if still pos, do not neg
23118 neg.l %d1 # now negative, make pos and set SE
23119 or.l &0x40000000,%d4 # set SE in d4,
23120 or.l &0x40000000,(%a0) # and in working bcd
23122 mov.l %d1,-(%sp) # save exp on stack
23125 # Calculate mantissa:
23126 # 1. Calculate absolute value of mantissa in fp0 by mul and add.
23127 # 2. Correct for mantissa sign.
23128 # (i.e., all digits assumed left of the decimal point.)
23133 # (*) d0: temp digit storage
23134 # (*) d1: lword counter
23135 # (*) d2: digit count
23136 # (*) d3: offset pointer
23137 # ( ) d4: words 2 and 3 of bcd
23138 # ( ) a0: pointer to working bcd value
23139 # ( ) a6: pointer to original bcd value
23140 # (*) fp0: mantissa accumulator
23141 # ( ) FP_SCR1: working copy of original bcd value
23142 # ( ) L_SCR1: copy of original exponent word
23145 mov.l &1,%d1 # word counter, init to 1
23146 fmov.s &0x00000000,%fp0 # accumulator
23149 # Since the packed number has a long word between the first & second parts,
23150 # get the integer digit then skip down & get the rest of the
23151 # mantissa. We will unroll the loop once.
23153 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
23154 fadd.b %d0,%fp0 # add digit to sum in fp0
23157 # Get the rest of the mantissa.
23160 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
23161 mov.l &FSTRT,%d3 # counter to pick up digits
23162 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
23164 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
23165 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
23166 fadd.b %d0,%fp0 # fp0 = fp0 + digit
23169 # If all the digits (8) in that long word have been converted (d2=0),
23170 # then inc d1 (=2) to point to the next long word and reset d3 to 0
23171 # to initialize the digit offset, and set d2 to 7 for the digit count;
23172 # else continue with this long word.
23174 addq.b &4,%d3 # advance d3 to the next digit
23175 dbf.w %d2,md2b # check for last digit in this lw
23177 addq.l &1,%d1 # inc lw pointer in mantissa
23178 cmp.l %d1,&2 # test for last lw
23179 ble.b loadlw # if not, get last one
23181 # Check the sign of the mant and make the value in fp0 the same sign.
23184 btst &31,(%a0) # test sign of the mantissa
23185 beq.b ap_st_z # if clear, go to append/strip zeros
23186 fneg.x %fp0 # if set, negate fp0
23188 # Append/strip zeros:
23190 # For adjusted exponents which have an absolute value greater than 27*,
23191 # this routine calculates the amount needed to normalize the mantissa
23192 # for the adjusted exponent. That number is subtracted from the exp
23193 # if the exp was positive, and added if it was negative. The purpose
23194 # of this is to reduce the value of the exponent and the possibility
23195 # of error in calculation of pwrten.
23197 # 1. Branch on the sign of the adjusted exponent.
23198 # 2p.(positive exp)
23199 # 2. Check M16 and the digits in lwords 2 and 3 in descending order.
23200 # 3. Add one for each zero encountered until a non-zero digit.
23201 # 4. Subtract the count from the exp.
23202 # 5. Check if the exp has crossed zero in #3 above; make the exp abs
23204 # 6. Multiply the mantissa by 10**count.
23205 # 2n.(negative exp)
23206 # 2. Check the digits in lwords 3 and 2 in descending order.
23207 # 3. Add one for each zero encountered until a non-zero digit.
23208 # 4. Add the count to the exp.
23209 # 5. Check if the exp has crossed zero in #3 above; clear SE.
23210 # 6. Divide the mantissa by 10**count.
23212 # *Why 27? If the adjusted exponent is within -28 < expA < 28, than
23213 # any adjustment due to append/strip zeros will drive the resultane
23214 # exponent towards zero. Since all pwrten constants with a power
23215 # of 27 or less are exact, there is no need to use this routine to
23216 # attempt to lessen the resultant exponent.
23221 # (*) d0: temp digit storage
23222 # (*) d1: zero count
23223 # (*) d2: digit count
23224 # (*) d3: offset pointer
23225 # ( ) d4: first word of bcd
23226 # (*) d5: lword counter
23227 # ( ) a0: pointer to working bcd value
23228 # ( ) FP_SCR1: working copy of original bcd value
23229 # ( ) L_SCR1: copy of original exponent word
23232 # First check the absolute value of the exponent to see if this
23233 # routine is necessary. If so, then check the sign of the exponent
23234 # and do append (+) or strip (-) zeros accordingly.
23235 # This section handles a positive adjusted exponent.
23238 mov.l (%sp),%d1 # load expA for range test
23239 cmp.l %d1,&27 # test is with 27
23240 ble.w pwrten # if abs(expA) <28, skip ap/st zeros
23241 btst &30,(%a0) # check sign of exp
23242 bne.b ap_st_n # if neg, go to neg side
23243 clr.l %d1 # zero count reg
23244 mov.l (%a0),%d4 # load lword 1 to d4
23245 bfextu %d4{&28:&4},%d0 # get M16 in d0
23246 bne.b ap_p_fx # if M16 is non-zero, go fix exp
23247 addq.l &1,%d1 # inc zero count
23248 mov.l &1,%d5 # init lword counter
23249 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
23250 bne.b ap_p_cl # if lw 2 is zero, skip it
23251 addq.l &8,%d1 # and inc count by 8
23252 addq.l &1,%d5 # inc lword counter
23253 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
23255 clr.l %d3 # init offset reg
23256 mov.l &7,%d2 # init digit counter
23258 bfextu %d4{%d3:&4},%d0 # get digit
23259 bne.b ap_p_fx # if non-zero, go to fix exp
23260 addq.l &4,%d3 # point to next digit
23261 addq.l &1,%d1 # inc digit counter
23262 dbf.w %d2,ap_p_gd # get next digit
23264 mov.l %d1,%d0 # copy counter to d2
23265 mov.l (%sp),%d1 # get adjusted exp from memory
23266 sub.l %d0,%d1 # subtract count from exp
23267 bge.b ap_p_fm # if still pos, go to pwrten
23268 neg.l %d1 # now its neg; get abs
23269 mov.l (%a0),%d4 # load lword 1 to d4
23270 or.l &0x40000000,%d4 # and set SE in d4
23271 or.l &0x40000000,(%a0) # and in memory
23273 # Calculate the mantissa multiplier to compensate for the striping of
23274 # zeros from the mantissa.
23277 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
23278 clr.l %d3 # init table index
23279 fmov.s &0x3f800000,%fp1 # init fp1 to 1
23280 mov.l &3,%d2 # init d2 to count bits in counter
23282 asr.l &1,%d0 # shift lsb into carry
23283 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
23284 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23286 add.l &12,%d3 # inc d3 to next rtable entry
23287 tst.l %d0 # check if d0 is zero
23288 bne.b ap_p_el # if not, get next bit
23289 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
23290 bra.b pwrten # go calc pwrten
23292 # This section handles a negative adjusted exponent.
23295 clr.l %d1 # clr counter
23296 mov.l &2,%d5 # set up d5 to point to lword 3
23297 mov.l (%a0,%d5.L*4),%d4 # get lword 3
23298 bne.b ap_n_cl # if not zero, check digits
23299 sub.l &1,%d5 # dec d5 to point to lword 2
23300 addq.l &8,%d1 # inc counter by 8
23301 mov.l (%a0,%d5.L*4),%d4 # get lword 2
23303 mov.l &28,%d3 # point to last digit
23304 mov.l &7,%d2 # init digit counter
23306 bfextu %d4{%d3:&4},%d0 # get digit
23307 bne.b ap_n_fx # if non-zero, go to exp fix
23308 subq.l &4,%d3 # point to previous digit
23309 addq.l &1,%d1 # inc digit counter
23310 dbf.w %d2,ap_n_gd # get next digit
23312 mov.l %d1,%d0 # copy counter to d0
23313 mov.l (%sp),%d1 # get adjusted exp from memory
23314 sub.l %d0,%d1 # subtract count from exp
23315 bgt.b ap_n_fm # if still pos, go fix mantissa
23316 neg.l %d1 # take abs of exp and clr SE
23317 mov.l (%a0),%d4 # load lword 1 to d4
23318 and.l &0xbfffffff,%d4 # and clr SE in d4
23319 and.l &0xbfffffff,(%a0) # and in memory
23321 # Calculate the mantissa multiplier to compensate for the appending of
23322 # zeros to the mantissa.
23325 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
23326 clr.l %d3 # init table index
23327 fmov.s &0x3f800000,%fp1 # init fp1 to 1
23328 mov.l &3,%d2 # init d2 to count bits in counter
23330 asr.l &1,%d0 # shift lsb into carry
23331 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
23332 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23334 add.l &12,%d3 # inc d3 to next rtable entry
23335 tst.l %d0 # check if d0 is zero
23336 bne.b ap_n_el # if not, get next bit
23337 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
23340 # Calculate power-of-ten factor from adjusted and shifted exponent.
23347 # (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23348 # (*) d3: FPCR work copy
23349 # ( ) d4: first word of bcd
23350 # (*) a1: RTABLE pointer
23354 # (*) d3: PWRTxx table index
23355 # ( ) a0: pointer to working copy of bcd
23356 # (*) a1: PWRTxx pointer
23357 # (*) fp1: power-of-ten accumulator
23359 # Pwrten calculates the exponent factor in the selected rounding mode
23360 # according to the following table:
23362 # Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
23383 mov.l USER_FPCR(%a6),%d3 # get user's FPCR
23384 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
23385 mov.l (%a0),%d4 # reload 1st bcd word to d4
23386 asl.l &2,%d2 # format d2 to be
23387 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
23388 add.l %d0,%d2 # in d2 as index into RTABLE
23389 lea.l RTABLE(%pc),%a1 # load rtable base
23390 mov.b (%a1,%d2),%d0 # load new rounding bits from table
23391 clr.l %d3 # clear d3 to force no exc and extended
23392 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
23393 fmov.l %d3,%fpcr # write new FPCR
23394 asr.l &1,%d0 # write correct PTENxx table
23395 bcc.b not_rp # to a1
23396 lea.l PTENRP(%pc),%a1 # it is RP
23397 bra.b calc_p # go to init section
23399 asr.l &1,%d0 # keep checking
23401 lea.l PTENRM(%pc),%a1 # it is RM
23402 bra.b calc_p # go to init section
23404 lea.l PTENRN(%pc),%a1 # it is RN
23406 mov.l %d1,%d0 # copy exp to d0;use d0
23407 bpl.b no_neg # if exp is negative,
23408 neg.l %d0 # invert it
23409 or.l &0x40000000,(%a0) # and set SE bit
23411 clr.l %d3 # table index
23412 fmov.s &0x3f800000,%fp1 # init fp1 to 1
23414 asr.l &1,%d0 # shift next bit into carry
23415 bcc.b e_next # if zero, skip the mul
23416 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23418 add.l &12,%d3 # inc d3 to next rtable entry
23419 tst.l %d0 # check if d0 is zero
23420 bne.b e_loop # not zero, continue shifting
23423 # Check the sign of the adjusted exp and make the value in fp0 the
23424 # same sign. If the exp was pos then multiply fp1*fp0;
23425 # else divide fp0/fp1.
23429 # ( ) a0: pointer to working bcd value
23430 # (*) fp0: mantissa accumulator
23431 # ( ) fp1: scaling factor - 10**(abs(exp))
23434 btst &30,(%a0) # test the sign of the exponent
23435 beq.b mul # if clear, go to multiply
23437 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
23440 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
23443 # Clean up and return with result in fp0.
23445 # If the final mul/div in decbin incurred an inex exception,
23446 # it will be inex2, but will be reported as inex1 by get_op.
23449 fmov.l %fpsr,%d0 # get status register
23450 bclr &inex2_bit+8,%d0 # test for inex2 and clear it
23451 beq.b no_exc # skip this if no exc
23452 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23454 add.l &0x4,%sp # clear 1 lw param
23455 fmovm.x (%sp)+,&0x40 # restore fp1
23456 movm.l (%sp)+,&0x3c # restore d2-d5
23461 #########################################################################
23462 # bindec(): Converts an input in extended precision format to bcd format#
23464 # INPUT *************************************************************** #
23465 # a0 = pointer to the input extended precision value in memory. #
23466 # the input may be either normalized, unnormalized, or #
23468 # d0 = contains the k-factor sign-extended to 32-bits. #
23470 # OUTPUT ************************************************************** #
23471 # FP_SCR0(a6) = bcd format result on the stack. #
23473 # ALGORITHM *********************************************************** #
23475 # A1. Set RM and size ext; Set SIGMA = sign of input. #
23476 # The k-factor is saved for use in d7. Clear the #
23477 # BINDEC_FLG for separating normalized/denormalized #
23478 # input. If input is unnormalized or denormalized, #
23481 # A2. Set X = abs(input). #
23483 # A3. Compute ILOG. #
23484 # ILOG is the log base 10 of the input value. It is #
23485 # approximated by adding e + 0.f when the original #
23486 # value is viewed as 2^^e * 1.f in extended precision. #
23487 # This value is stored in d6. #
23489 # A4. Clr INEX bit. #
23490 # The operation in A3 above may have set INEX2. #
23492 # A5. Set ICTR = 0; #
23493 # ICTR is a flag used in A13. It must be set before the #
23496 # A6. Calculate LEN. #
23497 # LEN is the number of digits to be displayed. The #
23498 # k-factor can dictate either the total number of digits, #
23499 # if it is a positive number, or the number of digits #
23500 # after the decimal point which are to be included as #
23501 # significant. See the 68882 manual for examples. #
23502 # If LEN is computed to be greater than 17, set OPERR in #
23503 # USER_FPSR. LEN is stored in d4. #
23505 # A7. Calculate SCALE. #
23506 # SCALE is equal to 10^ISCALE, where ISCALE is the number #
23507 # of decimal places needed to insure LEN integer digits #
23508 # in the output before conversion to bcd. LAMBDA is the #
23509 # sign of ISCALE, used in A9. Fp1 contains #
23510 # 10^^(abs(ISCALE)) using a rounding mode which is a #
23511 # function of the original rounding mode and the signs #
23512 # of ISCALE and X. A table is given in the code. #
23514 # A8. Clr INEX; Force RZ. #
23515 # The operation in A3 above may have set INEX2. #
23516 # RZ mode is forced for the scaling operation to insure #
23517 # only one rounding error. The grs bits are collected in #
23518 # the INEX flag for use in A10. #
23520 # A9. Scale X -> Y. #
23521 # The mantissa is scaled to the desired number of #
23522 # significant digits. The excess digits are collected #
23525 # A10. Or in INEX. #
23526 # If INEX is set, round error occurred. This is #
23527 # compensated for by 'or-ing' in the INEX2 flag to #
23530 # A11. Restore original FPCR; set size ext. #
23531 # Perform FINT operation in the user's rounding mode. #
23532 # Keep the size to extended. #
23534 # A12. Calculate YINT = FINT(Y) according to user's rounding #
23535 # mode. The FPSP routine sintd0 is used. The output #
23538 # A13. Check for LEN digits. #
23539 # If the int operation results in more than LEN digits, #
23540 # or less than LEN -1 digits, adjust ILOG and repeat from #
23541 # A6. This test occurs only on the first pass. If the #
23542 # result is exactly 10^LEN, decrement ILOG and divide #
23543 # the mantissa by 10. #
23545 # A14. Convert the mantissa to bcd. #
23546 # The binstr routine is used to convert the LEN digit #
23547 # mantissa to bcd in memory. The input to binstr is #
23548 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
23549 # such that the decimal point is to the left of bit 63. #
23550 # The bcd digits are stored in the correct position in #
23551 # the final string area in memory. #
23553 # A15. Convert the exponent to bcd. #
23554 # As in A14 above, the exp is converted to bcd and the #
23555 # digits are stored in the final string. #
23556 # Test the length of the final exponent string. If the #
23557 # length is 4, set operr. #
23559 # A16. Write sign bits to final string. #
23561 #########################################################################
23563 set BINDEC_FLG, EXC_TEMP # DENORM flag
23565 # Constants in extended precision
23567 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23569 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23571 # Constants in single precision
23573 long 0x3F800000,0x00000000,0x00000000,0x00000000
23575 long 0x40000000,0x00000000,0x00000000,0x00000000
23577 long 0x41200000,0x00000000,0x00000000,0x00000000
23579 long 0x459A2800,0x00000000,0x00000000,0x00000000
23587 # Implementation Notes:
23589 # The registers are used as follows:
23591 # d0: scratch; LEN input to binstr
23593 # d2: upper 32-bits of mantissa for binstr
23594 # d3: scratch;lower 32-bits of mantissa for binstr
23599 # a0: ptr for original operand/final result
23600 # a1: scratch pointer
23601 # a2: pointer to FP_X; abs(original value) in ext
23612 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
23613 fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
23615 # A1. Set RM and size ext. Set SIGMA = sign input;
23616 # The k-factor is saved for use in d7. Clear BINDEC_FLG for
23617 # separating normalized/denormalized input. If the input
23618 # is a denormalized number, set the BINDEC_FLG memory word
23619 # to signal denorm. If the input is unnormalized, normalize
23620 # the input and test for denormalized result.
23622 fmov.l &rm_mode*0x10,%fpcr # set RM and ext
23623 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
23624 mov.l %d0,%d7 # move k-factor to d7
23626 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
23627 cmpi.b STAG(%a6),&DENORM # is input a DENORM?
23628 bne.w A2_str # no; input is a NORM
23631 # Normalize the denorm
23635 and.w &0x7fff,%d0 # strip sign of normalized exp
23645 # Test if the normalized input is denormalized
23648 bgt.b pos_exp # if greater than zero, it is a norm
23649 st BINDEC_FLG(%a6) # set flag for denorm
23651 and.w &0x7fff,%d0 # strip sign of normalized exp
23656 # A2. Set X = abs(input).
23659 mov.l (%a0),FP_SCR1(%a6) # move input to work space
23660 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
23661 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
23662 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
23664 # A3. Compute ILOG.
23665 # ILOG is the log base 10 of the input value. It is approx-
23666 # imated by adding e + 0.f when the original value is viewed
23667 # as 2^^e * 1.f in extended precision. This value is stored
23672 # d0: k-factor/exponent
23678 # d7: k-factor/Unchanged
23679 # a0: ptr for original operand/final result
23682 # fp0: x/float(ILOG)
23686 # F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23688 # L_SCR2:first word of X packed/Unchanged
23690 tst.b BINDEC_FLG(%a6) # check for denorm
23691 beq.b A3_cont # if clr, continue with norm
23692 mov.l &-4933,%d6 # force ILOG = -4933
23695 mov.w FP_SCR1(%a6),%d0 # move exp to d0
23696 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
23697 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
23698 sub.w &0x3fff,%d0 # strip off bias
23699 fadd.w %d0,%fp0 # add in exp
23700 fsub.s FONE(%pc),%fp0 # subtract off 1.0
23701 fbge.w pos_res # if pos, branch
23702 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
23703 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
23704 bra.b A4_str # go move out ILOG
23706 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
23707 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
23710 # A4. Clr INEX bit.
23711 # The operation in A3 above may have set INEX2.
23714 fmov.l &0,%fpsr # zero all of fpsr - nothing needed
23717 # A5. Set ICTR = 0;
23718 # ICTR is a flag used in A13. It must be set before the
23719 # loop entry A6. The lower word of d5 is used for ICTR.
23721 clr.w %d5 # clear ICTR
23723 # A6. Calculate LEN.
23724 # LEN is the number of digits to be displayed. The k-factor
23725 # can dictate either the total number of digits, if it is
23726 # a positive number, or the number of digits after the
23727 # original decimal point which are to be included as
23728 # significant. See the 68882 manual for examples.
23729 # If LEN is computed to be greater than 17, set OPERR in
23730 # USER_FPSR. LEN is stored in d4.
23734 # d0: exponent/Unchanged
23737 # d4: exc picture/LEN
23738 # d5: ICTR/Unchanged
23739 # d6: ILOG/Unchanged
23740 # d7: k-factor/Unchanged
23741 # a0: ptr for original operand/final result
23744 # fp0: float(ILOG)/Unchanged
23748 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23750 # L_SCR2:first word of X packed/Unchanged
23753 tst.l %d7 # branch on sign of k
23754 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
23755 mov.l %d7,%d4 # if k > 0, LEN = k
23756 bra.b len_ck # skip to LEN check
23758 mov.l %d6,%d4 # first load ILOG to d4
23759 sub.l %d7,%d4 # subtract off k
23760 addq.l &1,%d4 # add in the 1
23762 tst.l %d4 # LEN check: branch on sign of LEN
23763 ble.b LEN_ng # if neg, set LEN = 1
23764 cmp.l %d4,&17 # test if LEN > 17
23765 ble.b A7_str # if not, forget it
23766 mov.l &17,%d4 # set max LEN = 17
23767 tst.l %d7 # if negative, never set OPERR
23768 ble.b A7_str # if positive, continue
23769 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
23770 bra.b A7_str # finished here
23772 mov.l &1,%d4 # min LEN is 1
23775 # A7. Calculate SCALE.
23776 # SCALE is equal to 10^ISCALE, where ISCALE is the number
23777 # of decimal places needed to insure LEN integer digits
23778 # in the output before conversion to bcd. LAMBDA is the sign
23779 # of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
23780 # the rounding mode as given in the following table (see
23781 # Coonen, p. 7.23 as ref.; however, the SCALE variable is
23782 # of opposite sign in bindec.sa from Coonen).
23785 # FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
23786 # ----------------------------------------------
23787 # RN 00 0 0 00/0 RN
23788 # RN 00 0 1 00/0 RN
23789 # RN 00 1 0 00/0 RN
23790 # RN 00 1 1 00/0 RN
23791 # RZ 01 0 0 11/3 RP
23792 # RZ 01 0 1 11/3 RP
23793 # RZ 01 1 0 10/2 RM
23794 # RZ 01 1 1 10/2 RM
23795 # RM 10 0 0 11/3 RP
23796 # RM 10 0 1 10/2 RM
23797 # RM 10 1 0 10/2 RM
23798 # RM 10 1 1 11/3 RP
23799 # RP 11 0 0 10/2 RM
23800 # RP 11 0 1 11/3 RP
23801 # RP 11 1 0 11/3 RP
23802 # RP 11 1 1 10/2 RM
23806 # d0: exponent/scratch - final is 0
23807 # d2: x/0 or 24 for A9
23808 # d3: x/scratch - offset ptr into PTENRM array
23809 # d4: LEN/Unchanged
23810 # d5: 0/ICTR:LAMBDA
23811 # d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23812 # d7: k-factor/Unchanged
23813 # a0: ptr for original operand/final result
23814 # a1: x/ptr to PTENRM array
23816 # fp0: float(ILOG)/Unchanged
23820 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23822 # L_SCR2:first word of X packed/Unchanged
23825 tst.l %d7 # test sign of k
23826 bgt.b k_pos # if pos and > 0, skip this
23827 cmp.l %d7,%d6 # test k - ILOG
23828 blt.b k_pos # if ILOG >= k, skip this
23829 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
23831 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
23832 addq.l &1,%d0 # add the 1
23833 sub.l %d4,%d0 # sub off LEN
23834 swap %d5 # use upper word of d5 for LAMBDA
23835 clr.w %d5 # set it zero initially
23836 clr.w %d2 # set up d2 for very small case
23837 tst.l %d0 # test sign of ISCALE
23838 bge.b iscale # if pos, skip next inst
23839 addq.w &1,%d5 # if neg, set LAMBDA true
23840 cmp.l %d0,&0xffffecd4 # test iscale <= -4908
23841 bgt.b no_inf # if false, skip rest
23842 add.l &24,%d0 # add in 24 to iscale
23843 mov.l &24,%d2 # put 24 in d2 for A9
23845 neg.l %d0 # and take abs of ISCALE
23847 fmov.s FONE(%pc),%fp1 # init fp1 to 1
23848 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
23849 lsl.w &1,%d1 # put them in bits 2:1
23850 add.w %d5,%d1 # add in LAMBDA
23851 lsl.w &1,%d1 # put them in bits 3:1
23852 tst.l L_SCR2(%a6) # test sign of original x
23853 bge.b x_pos # if pos, don't set bit 0
23854 addq.l &1,%d1 # if neg, set bit 0
23856 lea.l RBDTBL(%pc),%a2 # load rbdtbl base
23857 mov.b (%a2,%d1),%d3 # load d3 with new rmode
23858 lsl.l &4,%d3 # put bits in proper position
23859 fmov.l %d3,%fpcr # load bits into fpu
23860 lsr.l &4,%d3 # put bits in proper position
23861 tst.b %d3 # decode new rmode for pten table
23862 bne.b not_rn # if zero, it is RN
23863 lea.l PTENRN(%pc),%a1 # load a1 with RN table base
23864 bra.b rmode # exit decode
23866 lsr.b &1,%d3 # get lsb in carry
23867 bcc.b not_rp2 # if carry clear, it is RM
23868 lea.l PTENRP(%pc),%a1 # load a1 with RP table base
23869 bra.b rmode # exit decode
23871 lea.l PTENRM(%pc),%a1 # load a1 with RM table base
23873 clr.l %d3 # clr table index
23875 lsr.l &1,%d0 # shift next bit into carry
23876 bcc.b e_next2 # if zero, skip the mul
23877 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23879 add.l &12,%d3 # inc d3 to next pwrten table entry
23880 tst.l %d0 # test if ISCALE is zero
23881 bne.b e_loop2 # if not, loop
23883 # A8. Clr INEX; Force RZ.
23884 # The operation in A3 above may have set INEX2.
23885 # RZ mode is forced for the scaling operation to insure
23886 # only one rounding error. The grs bits are collected in
23887 # the INEX flag for use in A10.
23892 fmov.l &0,%fpsr # clr INEX
23893 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
23895 # A9. Scale X -> Y.
23896 # The mantissa is scaled to the desired number of significant
23897 # digits. The excess digits are collected in INEX2. If mul,
23898 # Check d2 for excess 10 exponential value. If not zero,
23899 # the iscale value would have caused the pwrten calculation
23900 # to overflow. Only a negative iscale can cause this, so
23901 # multiply by 10^(d2), which is now only allowed to be 24,
23902 # with a multiply by 10^8 and 10^16, which is exact since
23903 # 10^24 is exact. If the input was denormalized, we must
23904 # create a busy stack frame with the mul command and the
23905 # two operands, and allow the fpu to complete the multiply.
23909 # d0: FPCR with RZ mode/Unchanged
23910 # d2: 0 or 24/unchanged
23912 # d4: LEN/Unchanged
23914 # d6: ILOG/Unchanged
23915 # d7: k-factor/Unchanged
23916 # a0: ptr for original operand/final result
23917 # a1: ptr to PTENRM array/Unchanged
23919 # fp0: float(ILOG)/X adjusted for SCALE (Y)
23920 # fp1: 10^ISCALE/Unchanged
23923 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23925 # L_SCR2:first word of X packed/Unchanged
23928 fmov.x (%a0),%fp0 # load X from memory
23929 fabs.x %fp0 # use abs(X)
23930 tst.w %d5 # LAMBDA is in lower word of d5
23931 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
23932 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
23933 bra.w A10_st # branch to A10
23936 tst.b BINDEC_FLG(%a6) # check for denorm
23937 beq.w A9_norm # if norm, continue with mul
23939 # for DENORM, we must calculate:
23940 # fp0 = input_op * 10^ISCALE * 10^24
23941 # since the input operand is a DENORM, we can't multiply it directly.
23942 # so, we do the multiplication of the exponents and mantissas separately.
23943 # in this way, we avoid underflow on intermediate stages of the
23944 # multiplication and guarantee a result without exception.
23945 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
23947 mov.w (%sp),%d3 # grab exponent
23948 andi.w &0x7fff,%d3 # clear sign
23949 ori.w &0x8000,(%a0) # make DENORM exp negative
23950 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
23951 subi.w &0x3fff,%d3 # subtract BIAS
23953 subi.w &0x3fff,%d3 # subtract BIAS
23955 subi.w &0x3fff,%d3 # subtract BIAS
23957 bmi.w sc_mul_err # is result is DENORM, punt!!!
23959 andi.w &0x8000,(%sp) # keep sign
23960 or.w %d3,(%sp) # insert new exponent
23961 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
23962 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
23963 mov.l 0x4(%a0),-(%sp)
23964 mov.l &0x3fff0000,-(%sp) # force exp to zero
23965 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
23968 # fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
23969 # fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
23970 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
23971 mov.l 36+4(%a1),-(%sp)
23972 mov.l &0x3fff0000,-(%sp) # force exp to zero
23973 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
23974 mov.l 48+4(%a1),-(%sp)
23975 mov.l &0x3fff0000,-(%sp)# force exp to zero
23976 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
23977 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
23984 tst.w %d2 # test for small exp case
23985 beq.b A9_con # if zero, continue as normal
23986 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
23987 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
23989 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
23992 # If INEX is set, round error occurred. This is compensated
23993 # for by 'or-ing' in the INEX2 flag to the lsb of Y.
23997 # d0: FPCR with RZ mode/FPSR with INEX2 isolated
24000 # d4: LEN/Unchanged
24002 # d6: ILOG/Unchanged
24003 # d7: k-factor/Unchanged
24004 # a0: ptr for original operand/final result
24005 # a1: ptr to PTENxx array/Unchanged
24006 # a2: x/ptr to FP_SCR1(a6)
24007 # fp0: Y/Y with lsb adjusted
24008 # fp1: 10^ISCALE/Unchanged
24012 fmov.l %fpsr,%d0 # get FPSR
24013 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
24014 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
24015 btst &9,%d0 # check if INEX2 set
24016 beq.b A11_st # if clear, skip rest
24017 or.l &1,8(%a2) # or in 1 to lsb of mantissa
24018 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
24021 # A11. Restore original FPCR; set size ext.
24022 # Perform FINT operation in the user's rounding mode. Keep
24023 # the size to extended. The sintdo entry point in the sint
24024 # routine expects the FPCR value to be in USER_FPCR for
24025 # mode and precision. The original FPCR is saved in L_SCR1.
24028 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
24029 and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
24030 # ;block exceptions
24033 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24034 # The FPSP routine sintd0 is used. The output is in fp0.
24038 # d0: FPSR with AINEX cleared/FPCR with size set to ext
24041 # d4: LEN/Unchanged
24042 # d5: ICTR:LAMBDA/Unchanged
24043 # d6: ILOG/Unchanged
24044 # d7: k-factor/Unchanged
24045 # a0: ptr for original operand/src ptr for sintdo
24046 # a1: ptr to PTENxx array/Unchanged
24047 # a2: ptr to FP_SCR1(a6)/Unchanged
24048 # a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24050 # fp1: 10^ISCALE/Unchanged
24053 # F_SCR2:Y adjusted for inex/Y with original exponent
24054 # L_SCR1:x/original USER_FPCR
24055 # L_SCR2:first word of X packed/Unchanged
24058 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
24059 mov.l L_SCR1(%a6),-(%sp)
24060 mov.l L_SCR2(%a6),-(%sp)
24062 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
24063 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
24064 tst.l L_SCR2(%a6) # test sign of original operand
24065 bge.b do_fint12 # if pos, use Y
24066 or.l &0x80000000,(%a0) # if neg, use -Y
24068 mov.l USER_FPSR(%a6),-(%sp)
24069 # bsr sintdo # sint routine returns int in fp0
24071 fmov.l USER_FPCR(%a6),%fpcr
24072 fmov.l &0x0,%fpsr # clear the AEXC bits!!!
24073 ## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
24074 ## andi.l &0x00000030,%d0
24075 ## fmov.l %d0,%fpcr
24076 fint.x FP_SCR1(%a6),%fp0 # do fint()
24078 or.w %d0,FPSR_EXCEPT(%a6)
24079 ## fmov.l &0x0,%fpcr
24080 ## fmov.l %fpsr,%d0 # don't keep ccodes
24081 ## or.w %d0,FPSR_EXCEPT(%a6)
24083 mov.b (%sp),USER_FPSR(%a6)
24086 mov.l (%sp)+,L_SCR2(%a6)
24087 mov.l (%sp)+,L_SCR1(%a6)
24088 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
24090 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
24091 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
24093 # A13. Check for LEN digits.
24094 # If the int operation results in more than LEN digits,
24095 # or less than LEN -1 digits, adjust ILOG and repeat from
24096 # A6. This test occurs only on the first pass. If the
24097 # result is exactly 10^LEN, decrement ILOG and divide
24098 # the mantissa by 10. The calculation of 10^LEN cannot
24099 # be inexact, since all powers of ten up to 10^27 are exact
24100 # in extended precision, so the use of a previous power-of-ten
24101 # table will introduce no error.
24106 # d0: FPCR with size set to ext/scratch final = 0
24108 # d3: x/scratch final = x
24109 # d4: LEN/LEN adjusted
24110 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24111 # d6: ILOG/ILOG adjusted
24112 # d7: k-factor/Unchanged
24113 # a0: pointer into memory for packed bcd string formation
24114 # a1: ptr to PTENxx array/Unchanged
24115 # a2: ptr to FP_SCR1(a6)/Unchanged
24116 # fp0: int portion of Y/abs(YINT) adjusted
24117 # fp1: 10^ISCALE/Unchanged
24120 # F_SCR2:Y with original exponent/Unchanged
24121 # L_SCR1:original USER_FPCR/Unchanged
24122 # L_SCR2:first word of X packed/Unchanged
24125 swap %d5 # put ICTR in lower word of d5
24126 tst.w %d5 # check if ICTR = 0
24127 bne not_zr # if non-zero, go to second test
24129 # Compute 10^(LEN-1)
24131 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
24132 mov.l %d4,%d0 # put LEN in d0
24133 subq.l &1,%d0 # d0 = LEN -1
24134 clr.l %d3 # clr table index
24136 lsr.l &1,%d0 # shift next bit into carry
24137 bcc.b l_next # if zero, skip the mul
24138 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
24140 add.l &12,%d3 # inc d3 to next pwrten table entry
24141 tst.l %d0 # test if LEN is zero
24142 bne.b l_loop # if not, loop
24144 # 10^LEN-1 is computed for this test and A14. If the input was
24145 # denormalized, check only the case in which YINT > 10^LEN.
24147 tst.b BINDEC_FLG(%a6) # check if input was norm
24148 beq.b A13_con # if norm, continue with checking
24149 fabs.x %fp0 # take abs of YINT
24152 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24155 fabs.x %fp0 # take abs of YINT
24156 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
24157 fbge.w test_2 # if greater, do next test
24158 subq.l &1,%d6 # subtract 1 from ILOG
24159 mov.w &1,%d5 # set ICTR
24160 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
24161 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
24162 bra.w A6_str # return to A6 and recompute YINT
24164 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
24165 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
24166 fblt.w A14_st # if less, all is ok, go to A14
24167 fbgt.w fix_ex # if greater, fix and redo
24168 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
24169 addq.l &1,%d6 # and inc ILOG
24170 bra.b A14_st # and continue elsewhere
24172 addq.l &1,%d6 # increment ILOG by 1
24173 mov.w &1,%d5 # set ICTR
24174 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
24175 bra.w A6_str # return to A6 and recompute YINT
24177 # Since ICTR <> 0, we have already been through one adjustment,
24178 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24179 # 10^LEN is again computed using whatever table is in a1 since the
24180 # value calculated cannot be inexact.
24183 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
24184 mov.l %d4,%d0 # put LEN in d0
24185 clr.l %d3 # clr table index
24187 lsr.l &1,%d0 # shift next bit into carry
24188 bcc.b z_next # if zero, skip the mul
24189 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
24191 add.l &12,%d3 # inc d3 to next pwrten table entry
24192 tst.l %d0 # test if LEN is zero
24193 bne.b z_loop # if not, loop
24194 fabs.x %fp0 # get abs(YINT)
24195 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
24196 fbneq.w A14_st # if not, skip this
24197 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
24198 addq.l &1,%d6 # and inc ILOG by 1
24199 addq.l &1,%d4 # and inc LEN
24200 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
24202 # A14. Convert the mantissa to bcd.
24203 # The binstr routine is used to convert the LEN digit
24204 # mantissa to bcd in memory. The input to binstr is
24205 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24206 # such that the decimal point is to the left of bit 63.
24207 # The bcd digits are stored in the correct position in
24208 # the final string area in memory.
24213 # d0: x/LEN call to binstr - final is 0
24215 # d2: x/ms 32-bits of mant of abs(YINT)
24216 # d3: x/ls 32-bits of mant of abs(YINT)
24217 # d4: LEN/Unchanged
24218 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24220 # d7: k-factor/Unchanged
24221 # a0: pointer into memory for packed bcd string formation
24222 # /ptr to first mantissa byte in result string
24223 # a1: ptr to PTENxx array/Unchanged
24224 # a2: ptr to FP_SCR1(a6)/Unchanged
24225 # fp0: int portion of Y/abs(YINT) adjusted
24226 # fp1: 10^ISCALE/Unchanged
24227 # fp2: 10^LEN/Unchanged
24228 # F_SCR1:x/Work area for final result
24229 # F_SCR2:Y with original exponent/Unchanged
24230 # L_SCR1:original USER_FPCR/Unchanged
24231 # L_SCR2:first word of X packed/Unchanged
24234 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
24235 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
24236 lea.l FP_SCR0(%a6),%a0
24237 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
24238 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
24239 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
24240 clr.l 4(%a0) # zero word 2 of FP_RES
24241 clr.l 8(%a0) # zero word 3 of FP_RES
24242 mov.l (%a0),%d0 # move exponent to d0
24243 swap %d0 # put exponent in lower word
24244 beq.b no_sft # if zero, don't shift
24245 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
24246 tst.l %d0 # check if > 1
24247 bgt.b no_sft # if so, don't shift
24248 neg.l %d0 # make exp positive
24250 lsr.l &1,%d2 # shift d2:d3 right, add 0s
24251 roxr.l &1,%d3 # the number of places
24252 dbf.w %d0,m_loop # given in d0
24254 tst.l %d2 # check for mantissa of zero
24255 bne.b no_zr # if not, go on
24256 tst.l %d3 # continue zero check
24257 beq.b zer_m # if zero, go directly to binstr
24259 clr.l %d1 # put zero in d1 for addx
24260 add.l &0x00000080,%d3 # inc at bit 7
24261 addx.l %d1,%d2 # continue inc
24262 and.l &0xffffff80,%d3 # strip off lsb not used by 882
24264 mov.l %d4,%d0 # put LEN in d0 for binstr call
24265 addq.l &3,%a0 # a0 points to M16 byte in result
24266 bsr binstr # call binstr to convert mant
24269 # A15. Convert the exponent to bcd.
24270 # As in A14 above, the exp is converted to bcd and the
24271 # digits are stored in the final string.
24273 # Digits are stored in L_SCR1(a6) on return from BINDEC as:
24276 # -----------------------------------------
24277 # | 0 | e3 | e2 | e1 | e4 | X | X | X |
24278 # -----------------------------------------
24280 # And are moved into their proper places in FP_SCR0. If digit e4
24281 # is non-zero, OPERR is signaled. In all cases, all 4 digits are
24282 # written as specified in the 881/882 manual for packed decimal.
24286 # d0: x/LEN call to binstr - final is 0
24287 # d1: x/scratch (0);shift count for final exponent packing
24288 # d2: x/ms 32-bits of exp fraction/scratch
24289 # d3: x/ls 32-bits of exp fraction
24290 # d4: LEN/Unchanged
24291 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24293 # d7: k-factor/Unchanged
24294 # a0: ptr to result string/ptr to L_SCR1(a6)
24295 # a1: ptr to PTENxx array/Unchanged
24296 # a2: ptr to FP_SCR1(a6)/Unchanged
24297 # fp0: abs(YINT) adjusted/float(ILOG)
24298 # fp1: 10^ISCALE/Unchanged
24299 # fp2: 10^LEN/Unchanged
24300 # F_SCR1:Work area for final result/BCD result
24301 # F_SCR2:Y with original exponent/ILOG/10^4
24302 # L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24303 # L_SCR2:first word of X packed/Unchanged
24306 tst.b BINDEC_FLG(%a6) # check for denorm
24308 ftest.x %fp0 # test for zero
24309 fbeq.w den_zero # if zero, use k-factor or 4933
24310 fmov.l %d6,%fp0 # float ILOG
24311 fabs.x %fp0 # get abs of ILOG
24314 tst.l %d7 # check sign of the k-factor
24315 blt.b use_ilog # if negative, use ILOG
24316 fmov.s F4933(%pc),%fp0 # force exponent to 4933
24317 bra.b convrt # do it
24319 fmov.l %d6,%fp0 # float ILOG
24320 fabs.x %fp0 # get abs of ILOG
24323 ftest.x %fp0 # test for zero
24324 fbneq.w not_zero # if zero, force exponent
24325 fmov.s FONE(%pc),%fp0 # force exponent to 1
24326 bra.b convrt # do it
24328 fmov.l %d6,%fp0 # float ILOG
24329 fabs.x %fp0 # get abs of ILOG
24331 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
24332 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
24333 mov.l 4(%a2),%d2 # move word 2 to d2
24334 mov.l 8(%a2),%d3 # move word 3 to d3
24335 mov.w (%a2),%d0 # move exp to d0
24336 beq.b x_loop_fin # if zero, skip the shift
24337 sub.w &0x3ffd,%d0 # subtract off bias
24338 neg.w %d0 # make exp positive
24340 lsr.l &1,%d2 # shift d2:d3 right
24341 roxr.l &1,%d3 # the number of places
24342 dbf.w %d0,x_loop # given in d0
24344 clr.l %d1 # put zero in d1 for addx
24345 add.l &0x00000080,%d3 # inc at bit 6
24346 addx.l %d1,%d2 # continue inc
24347 and.l &0xffffff80,%d3 # strip off lsb not used by 882
24348 mov.l &4,%d0 # put 4 in d0 for binstr call
24349 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
24350 bsr binstr # call binstr to convert exp
24351 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
24352 mov.l &12,%d1 # use d1 for shift count
24353 lsr.l %d1,%d0 # shift d0 right by 12
24354 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
24355 lsr.l %d1,%d0 # shift d0 right by 12
24356 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
24357 tst.b %d0 # check if e4 is zero
24358 beq.b A16_st # if zero, skip rest
24359 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
24362 # A16. Write sign bits to final string.
24363 # Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24367 # d0: x/scratch - final is x
24370 # d4: LEN/Unchanged
24371 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24372 # d6: ILOG/ILOG adjusted
24373 # d7: k-factor/Unchanged
24374 # a0: ptr to L_SCR1(a6)/Unchanged
24375 # a1: ptr to PTENxx array/Unchanged
24376 # a2: ptr to FP_SCR1(a6)/Unchanged
24377 # fp0: float(ILOG)/Unchanged
24378 # fp1: 10^ISCALE/Unchanged
24379 # fp2: 10^LEN/Unchanged
24380 # F_SCR1:BCD result with correct signs
24382 # L_SCR1:Exponent digits on return from binstr
24383 # L_SCR2:first word of X packed/Unchanged
24386 clr.l %d0 # clr d0 for collection of signs
24387 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
24388 tst.l L_SCR2(%a6) # check sign of original mantissa
24389 bge.b mant_p # if pos, don't set SM
24390 mov.l &2,%d0 # move 2 in to d0 for SM
24392 tst.l %d6 # check sign of ILOG
24393 bge.b wr_sgn # if pos, don't set SE
24394 addq.l &1,%d0 # set bit 0 in d0 for SE
24396 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
24398 # Clean up and restore all registers used.
24400 fmov.l &0,%fpsr # clear possible inex2/ainex bits
24401 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
24402 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
24407 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24408 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24409 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24410 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24411 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24412 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
24413 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
24414 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
24415 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
24416 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
24417 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
24418 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
24419 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
24423 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24424 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24425 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24426 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24427 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24428 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
24429 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
24430 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
24431 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
24432 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
24433 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
24434 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
24435 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
24439 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24440 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24441 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24442 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24443 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24444 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
24445 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
24446 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
24447 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
24448 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
24449 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
24450 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
24451 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
24453 #########################################################################
24454 # binstr(): Converts a 64-bit binary integer to bcd. #
24456 # INPUT *************************************************************** #
24457 # d2:d3 = 64-bit binary integer #
24458 # d0 = desired length (LEN) #
24459 # a0 = pointer to start in memory for bcd characters #
24460 # (This pointer must point to byte 4 of the first #
24461 # lword of the packed decimal memory string.) #
24463 # OUTPUT ************************************************************** #
24464 # a0 = pointer to LEN bcd digits representing the 64-bit integer. #
24466 # ALGORITHM *********************************************************** #
24467 # The 64-bit binary is assumed to have a decimal point before #
24468 # bit 63. The fraction is multiplied by 10 using a mul by 2 #
24469 # shift and a mul by 8 shift. The bits shifted out of the #
24470 # msb form a decimal digit. This process is iterated until #
24471 # LEN digits are formed. #
24473 # A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
24474 # digit formed will be assumed the least significant. This is #
24475 # to force the first byte formed to have a 0 in the upper 4 bits. #
24477 # A2. Beginning of the loop: #
24478 # Copy the fraction in d2:d3 to d4:d5. #
24480 # A3. Multiply the fraction in d2:d3 by 8 using bit-field #
24481 # extracts and shifts. The three msbs from d2 will go into d1. #
24483 # A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
24484 # will be collected by the carry. #
24486 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
24487 # into d2:d3. D1 will contain the bcd digit formed. #
24489 # A6. Test d7. If zero, the digit formed is the ms digit. If non- #
24490 # zero, it is the ls digit. Put the digit in its place in the #
24491 # upper word of d0. If it is the ls digit, write the word #
24492 # from d0 to memory. #
24494 # A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
24496 #########################################################################
24498 # Implementation Notes:
24500 # The registers are used as follows:
24503 # d1: temp used to form the digit
24504 # d2: upper 32-bits of fraction for mul by 8
24505 # d3: lower 32-bits of fraction for mul by 8
24506 # d4: upper 32-bits of fraction for mul by 2
24507 # d5: lower 32-bits of fraction for mul by 2
24508 # d6: temp for bit-field extracts
24509 # d7: byte digit formation word;digit count {0,1}
24510 # a0: pointer into memory for packed bcd string formation
24515 movm.l &0xff00,-(%sp) # {%d0-%d7}
24520 mov.l &1,%d7 # init d7 for second digit
24521 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
24523 # A2. Copy d2:d3 to d4:d5. Start loop.
24526 mov.l %d2,%d4 # copy the fraction before muls
24527 mov.l %d3,%d5 # to d4:d5
24529 # A3. Multiply d2:d3 by 8; extract msbs into d1.
24531 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
24532 asl.l &3,%d2 # shift d2 left by 3 places
24533 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
24534 asl.l &3,%d3 # shift d3 left by 3 places
24535 or.l %d6,%d2 # or in msbs from d3 into d2
24537 # A4. Multiply d4:d5 by 2; add carry out to d1.
24539 asl.l &1,%d5 # mul d5 by 2
24540 roxl.l &1,%d4 # mul d4 by 2
24541 swap %d6 # put 0 in d6 lower word
24542 addx.w %d6,%d1 # add in extend from mul by 2
24544 # A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
24546 add.l %d5,%d3 # add lower 32 bits
24547 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24548 addx.l %d4,%d2 # add with extend upper 32 bits
24549 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24550 addx.w %d6,%d1 # add in extend from add to d1
24551 swap %d6 # with d6 = 0; put 0 in upper word
24553 # A6. Test d7 and branch.
24555 tst.w %d7 # if zero, store digit & to loop
24556 beq.b first_d # if non-zero, form byte & write
24558 swap %d7 # bring first digit to word d7b
24559 asl.w &4,%d7 # first digit in upper 4 bits d7b
24560 add.w %d1,%d7 # add in ls digit to d7b
24561 mov.b %d7,(%a0)+ # store d7b byte in memory
24562 swap %d7 # put LEN counter in word d7a
24563 clr.w %d7 # set d7a to signal no digits done
24564 dbf.w %d0,loop # do loop some more!
24565 bra.b end_bstr # finished, so exit
24567 swap %d7 # put digit word in d7b
24568 mov.w %d1,%d7 # put new digit in d7b
24569 swap %d7 # put LEN counter in word d7a
24570 addq.w &1,%d7 # set d7a to signal first digit done
24571 dbf.w %d0,loop # do loop some more!
24572 swap %d7 # put last digit in string
24573 lsl.w &4,%d7 # move it to upper 4 bits
24574 mov.b %d7,(%a0)+ # store it in memory string
24576 # Clean up and return with result in fp0.
24579 movm.l (%sp)+,&0xff # {%d0-%d7}
24582 #########################################################################
24583 # XDEF **************************************************************** #
24584 # facc_in_b(): dmem_read_byte failed #
24585 # facc_in_w(): dmem_read_word failed #
24586 # facc_in_l(): dmem_read_long failed #
24587 # facc_in_d(): dmem_read of dbl prec failed #
24588 # facc_in_x(): dmem_read of ext prec failed #
24590 # facc_out_b(): dmem_write_byte failed #
24591 # facc_out_w(): dmem_write_word failed #
24592 # facc_out_l(): dmem_write_long failed #
24593 # facc_out_d(): dmem_write of dbl prec failed #
24594 # facc_out_x(): dmem_write of ext prec failed #
24596 # XREF **************************************************************** #
24597 # _real_access() - exit through access error handler #
24599 # INPUT *************************************************************** #
24602 # OUTPUT ************************************************************** #
24605 # ALGORITHM *********************************************************** #
24606 # Flow jumps here when an FP data fetch call gets an error #
24607 # result. This means the operating system wants an access error frame #
24608 # made out of the current exception stack frame. #
24609 # So, we first call restore() which makes sure that any updated #
24610 # -(an)+ register gets returned to its pre-exception value and then #
24611 # we change the stack to an access error stack frame. #
24613 #########################################################################
24616 movq.l &0x1,%d0 # one byte
24617 bsr.w restore # fix An
24619 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
24623 movq.l &0x2,%d0 # two bytes
24624 bsr.w restore # fix An
24626 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
24630 movq.l &0x4,%d0 # four bytes
24631 bsr.w restore # fix An
24633 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
24637 movq.l &0x8,%d0 # eight bytes
24638 bsr.w restore # fix An
24640 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
24644 movq.l &0xc,%d0 # twelve bytes
24645 bsr.w restore # fix An
24647 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
24650 ################################################################
24653 movq.l &0x1,%d0 # one byte
24654 bsr.w restore # restore An
24656 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
24660 movq.l &0x2,%d0 # two bytes
24661 bsr.w restore # restore An
24663 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
24667 movq.l &0x4,%d0 # four bytes
24668 bsr.w restore # restore An
24670 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
24674 movq.l &0x8,%d0 # eight bytes
24675 bsr.w restore # restore An
24677 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
24681 mov.l &0xc,%d0 # twelve bytes
24682 bsr.w restore # restore An
24684 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
24686 # here's where we actually create the access error frame from the
24687 # current exception stack frame.
24689 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24691 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
24692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24693 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
24697 mov.l (%sp),-(%sp) # store SR, hi(PC)
24698 mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
24699 mov.l 0xc(%sp),0x8(%sp) # store EA
24700 mov.l &0x00000001,0xc(%sp) # store FSLW
24701 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
24702 mov.w &0x4008,0x6(%sp) # store voff
24704 btst &0x5,(%sp) # supervisor or user mode?
24705 beq.b facc_out2 # user
24706 bset &0x2,0xd(%sp) # set supervisor TM bit
24711 ##################################################################
24713 # if the effective addressing mode was predecrement or postincrement,
24714 # the emulation has already changed its value to the correct post-
24715 # instruction value. but since we're exiting to the access error
24716 # handler, then AN must be returned to its pre-instruction value.
24719 mov.b EXC_OPWORD+0x1(%a6),%d1
24720 andi.b &0x38,%d1 # extract opmode
24721 cmpi.b %d1,&0x18 # postinc?
24723 cmpi.b %d1,&0x20 # predec?
24728 mov.b EXC_OPWORD+0x1(%a6),%d1
24729 andi.w &0x0007,%d1 # fetch An
24731 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
24732 jmp (tbl_rest_inc.b,%pc,%d1.w*1)
24735 short ri_a0 - tbl_rest_inc
24736 short ri_a1 - tbl_rest_inc
24737 short ri_a2 - tbl_rest_inc
24738 short ri_a3 - tbl_rest_inc
24739 short ri_a4 - tbl_rest_inc
24740 short ri_a5 - tbl_rest_inc
24741 short ri_a6 - tbl_rest_inc
24742 short ri_a7 - tbl_rest_inc
24745 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
24748 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
24751 sub.l %d0,%a2 # fix a2
24754 sub.l %d0,%a3 # fix a3
24757 sub.l %d0,%a4 # fix a4
24760 sub.l %d0,%a5 # fix a5
24763 sub.l %d0,(%a6) # fix stacked a6
24765 # if it's a fmove out instruction, we don't have to fix a7
24766 # because we hadn't changed it yet. if it's an opclass two
24767 # instruction (data moved in) and the exception was in supervisor
24768 # mode, then also also wasn't updated. if it was user mode, then
24769 # restore the correct a7 which is in the USP currently.
24771 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
24772 bne.b ri_a7_done # out
24774 btst &0x5,EXC_SR(%a6) # user or supervisor?
24775 bne.b ri_a7_done # supervisor
24776 movc %usp,%a0 # restore USP
24782 # need to invert adjustment value if the <ea> was predec