1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3 M68000 Hi-Performance Microprocessor Division
4 M68060 Software Package
5 Production Release P1.00 -- October 10, 1994
7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10 To the maximum extent permitted by applicable law,
11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13 and any warranty against infringement with regard to the SOFTWARE
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
16 To the maximum extent permitted by applicable law,
17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
26 No licenses are granted by implication, estoppel or otherwise under any patents
27 or trademarks of Motorola, Inc.
28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
30 # This file is appended to the top of the 060FPSP package
31 # and contains the entry points into the package. The user, in
32 # effect, branches to one of the branch table entries located
33 # after _060FPSP_TABLE.
34 # Also, subroutine stubs exist in this file (_fpsp_done for
35 # example) that are referenced by the FPSP package itself in order
36 # to call a given routine. The stub routine actually performs the
37 # callout. The FPSP code does a "bsr" to the stub routine. This
38 # extra layer of hierarchy adds a slight performance penalty but
39 # it makes the FPSP code easier to read and more mainatinable.
50 set _off_fpu_dis, 0x20
70 ###############################################################
72 # Here's the table of ENTRY POINTS for those linking the package.
94 ###############################################################
98 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
99 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
106 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
107 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
114 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
115 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
122 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
123 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
130 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
131 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
138 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
139 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
146 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
147 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
154 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
155 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
162 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
163 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
167 global _real_fpu_disabled
170 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
171 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
178 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
179 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
186 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
187 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
194 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
195 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
199 #######################################
204 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
205 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
212 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
213 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
220 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
221 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
225 global _imem_read_word
228 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
229 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
233 global _imem_read_long
236 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
237 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
241 global _dmem_read_byte
244 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
245 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
249 global _dmem_read_word
252 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
253 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
257 global _dmem_read_long
260 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
261 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
265 global _dmem_write_byte
268 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
269 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
273 global _dmem_write_word
276 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
277 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
281 global _dmem_write_long
284 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
285 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
290 # This file contains a set of define statements for constants
291 # in order to promote readability within the corecode itself.
294 set LOCAL_SIZE, 192 # stack frame size(bytes)
295 set LV, -LOCAL_SIZE # stack offset
297 set EXC_SR, 0x4 # stack status register
298 set EXC_PC, 0x6 # stack pc
299 set EXC_VOFF, 0xa # stacked vector offset
300 set EXC_EA, 0xc # stacked <ea>
302 set EXC_FP, 0x0 # frame pointer
304 set EXC_AREGS, -68 # offset of all address regs
305 set EXC_DREGS, -100 # offset of all data regs
306 set EXC_FPREGS, -36 # offset of all fp regs
308 set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
309 set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
310 set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
311 set EXC_A5, EXC_AREGS+(5*4)
312 set EXC_A4, EXC_AREGS+(4*4)
313 set EXC_A3, EXC_AREGS+(3*4)
314 set EXC_A2, EXC_AREGS+(2*4)
315 set EXC_A1, EXC_AREGS+(1*4)
316 set EXC_A0, EXC_AREGS+(0*4)
317 set EXC_D7, EXC_DREGS+(7*4)
318 set EXC_D6, EXC_DREGS+(6*4)
319 set EXC_D5, EXC_DREGS+(5*4)
320 set EXC_D4, EXC_DREGS+(4*4)
321 set EXC_D3, EXC_DREGS+(3*4)
322 set EXC_D2, EXC_DREGS+(2*4)
323 set EXC_D1, EXC_DREGS+(1*4)
324 set EXC_D0, EXC_DREGS+(0*4)
326 set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
327 set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
328 set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
330 set FP_SCR1, LV+80 # fp scratch 1
331 set FP_SCR1_EX, FP_SCR1+0
332 set FP_SCR1_SGN, FP_SCR1+2
333 set FP_SCR1_HI, FP_SCR1+4
334 set FP_SCR1_LO, FP_SCR1+8
336 set FP_SCR0, LV+68 # fp scratch 0
337 set FP_SCR0_EX, FP_SCR0+0
338 set FP_SCR0_SGN, FP_SCR0+2
339 set FP_SCR0_HI, FP_SCR0+4
340 set FP_SCR0_LO, FP_SCR0+8
342 set FP_DST, LV+56 # fp destination operand
343 set FP_DST_EX, FP_DST+0
344 set FP_DST_SGN, FP_DST+2
345 set FP_DST_HI, FP_DST+4
346 set FP_DST_LO, FP_DST+8
348 set FP_SRC, LV+44 # fp source operand
349 set FP_SRC_EX, FP_SRC+0
350 set FP_SRC_SGN, FP_SRC+2
351 set FP_SRC_HI, FP_SRC+4
352 set FP_SRC_LO, FP_SRC+8
354 set USER_FPIAR, LV+40 # FP instr address register
356 set USER_FPSR, LV+36 # FP status register
357 set FPSR_CC, USER_FPSR+0 # FPSR condition codes
358 set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
359 set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
360 set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
362 set USER_FPCR, LV+32 # FP control register
363 set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
364 set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
366 set L_SCR3, LV+28 # integer scratch 3
367 set L_SCR2, LV+24 # integer scratch 2
368 set L_SCR1, LV+20 # integer scratch 1
370 set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
372 set EXC_TEMP2, LV+24 # temporary space
373 set EXC_TEMP, LV+16 # temporary space
375 set DTAG, LV+15 # destination operand type
376 set STAG, LV+14 # source operand type
378 set SPCOND_FLG, LV+10 # flag: special case (see below)
380 set EXC_CC, LV+8 # saved condition codes
381 set EXC_EXTWPTR, LV+4 # saved current PC (active)
382 set EXC_EXTWORD, LV+2 # saved extension word
383 set EXC_CMDREG, LV+2 # saved extension word
384 set EXC_OPWORD, LV+0 # saved operation word
386 ################################
390 set FTEMP, 0 # offsets within an
391 set FTEMP_EX, 0 # extended precision
392 set FTEMP_SGN, 2 # value saved in memory.
397 set LOCAL, 0 # offsets within an
398 set LOCAL_EX, 0 # extended precision
399 set LOCAL_SGN, 2 # value saved in memory.
404 set DST, 0 # offsets within an
405 set DST_EX, 0 # extended precision
406 set DST_HI, 4 # value saved in memory.
409 set SRC, 0 # offsets within an
410 set SRC_EX, 0 # extended precision
411 set SRC_HI, 4 # value saved in memory.
414 set SGL_LO, 0x3f81 # min sgl prec exponent
415 set SGL_HI, 0x407e # max sgl prec exponent
416 set DBL_LO, 0x3c01 # min dbl prec exponent
417 set DBL_HI, 0x43fe # max dbl prec exponent
418 set EXT_LO, 0x0 # min ext prec exponent
419 set EXT_HI, 0x7ffe # max ext prec exponent
421 set EXT_BIAS, 0x3fff # extended precision bias
422 set SGL_BIAS, 0x007f # single precision bias
423 set DBL_BIAS, 0x03ff # double precision bias
425 set NORM, 0x00 # operand type for STAG/DTAG
426 set ZERO, 0x01 # operand type for STAG/DTAG
427 set INF, 0x02 # operand type for STAG/DTAG
428 set QNAN, 0x03 # operand type for STAG/DTAG
429 set DENORM, 0x04 # operand type for STAG/DTAG
430 set SNAN, 0x05 # operand type for STAG/DTAG
431 set UNNORM, 0x06 # operand type for STAG/DTAG
436 set neg_bit, 0x3 # negative result
437 set z_bit, 0x2 # zero result
438 set inf_bit, 0x1 # infinite result
439 set nan_bit, 0x0 # NAN result
441 set q_sn_bit, 0x7 # sign bit of quotient byte
443 set bsun_bit, 7 # branch on unordered
444 set snan_bit, 6 # signalling NAN
445 set operr_bit, 5 # operand error
446 set ovfl_bit, 4 # overflow
447 set unfl_bit, 3 # underflow
448 set dz_bit, 2 # divide by zero
449 set inex2_bit, 1 # inexact result 2
450 set inex1_bit, 0 # inexact result 1
452 set aiop_bit, 7 # accrued inexact operation bit
453 set aovfl_bit, 6 # accrued overflow bit
454 set aunfl_bit, 5 # accrued underflow bit
455 set adz_bit, 4 # accrued dz bit
456 set ainex_bit, 3 # accrued inexact bit
458 #############################
459 # FPSR individual bit masks #
460 #############################
461 set neg_mask, 0x08000000 # negative bit mask (lw)
462 set inf_mask, 0x02000000 # infinity bit mask (lw)
463 set z_mask, 0x04000000 # zero bit mask (lw)
464 set nan_mask, 0x01000000 # nan bit mask (lw)
466 set neg_bmask, 0x08 # negative bit mask (byte)
467 set inf_bmask, 0x02 # infinity bit mask (byte)
468 set z_bmask, 0x04 # zero bit mask (byte)
469 set nan_bmask, 0x01 # nan bit mask (byte)
471 set bsun_mask, 0x00008000 # bsun exception mask
472 set snan_mask, 0x00004000 # snan exception mask
473 set operr_mask, 0x00002000 # operr exception mask
474 set ovfl_mask, 0x00001000 # overflow exception mask
475 set unfl_mask, 0x00000800 # underflow exception mask
476 set dz_mask, 0x00000400 # dz exception mask
477 set inex2_mask, 0x00000200 # inex2 exception mask
478 set inex1_mask, 0x00000100 # inex1 exception mask
480 set aiop_mask, 0x00000080 # accrued illegal operation
481 set aovfl_mask, 0x00000040 # accrued overflow
482 set aunfl_mask, 0x00000020 # accrued underflow
483 set adz_mask, 0x00000010 # accrued divide by zero
484 set ainex_mask, 0x00000008 # accrued inexact
486 ######################################
487 # FPSR combinations used in the FPSP #
488 ######################################
489 set dzinf_mask, inf_mask+dz_mask+adz_mask
490 set opnan_mask, nan_mask+operr_mask+aiop_mask
491 set nzi_mask, 0x01ffffff #clears N, Z, and I
492 set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
493 set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
494 set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
495 set inx1a_mask, inex1_mask+ainex_mask
496 set inx2a_mask, inex2_mask+ainex_mask
497 set snaniop_mask, nan_mask+snan_mask+aiop_mask
498 set snaniop2_mask, snan_mask+aiop_mask
499 set naniop_mask, nan_mask+aiop_mask
500 set neginf_mask, neg_mask+inf_mask
501 set infaiop_mask, inf_mask+aiop_mask
502 set negz_mask, neg_mask+z_mask
503 set opaop_mask, operr_mask+aiop_mask
504 set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
505 set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
510 set rnd_stky_bit, 29 # stky bit pos in longword
512 set sign_bit, 0x7 # sign bit
513 set signan_bit, 0x6 # signalling nan bit
515 set sgl_thresh, 0x3f81 # minimum sgl exponent
516 set dbl_thresh, 0x3c01 # minimum dbl exponent
518 set x_mode, 0x0 # extended precision
519 set s_mode, 0x4 # single precision
520 set d_mode, 0x8 # double precision
522 set rn_mode, 0x0 # round-to-nearest
523 set rz_mode, 0x1 # round-to-zero
524 set rm_mode, 0x2 # round-tp-minus-infinity
525 set rp_mode, 0x3 # round-to-plus-infinity
527 set mantissalen, 64 # length of mantissa in bits
529 set BYTE, 1 # len(byte) == 1 byte
530 set WORD, 2 # len(word) == 2 bytes
531 set LONG, 4 # len(longword) == 2 bytes
533 set BSUN_VEC, 0xc0 # bsun vector offset
534 set INEX_VEC, 0xc4 # inexact vector offset
535 set DZ_VEC, 0xc8 # dz vector offset
536 set UNFL_VEC, 0xcc # unfl vector offset
537 set OPERR_VEC, 0xd0 # operr vector offset
538 set OVFL_VEC, 0xd4 # ovfl vector offset
539 set SNAN_VEC, 0xd8 # snan vector offset
541 ###########################
542 # SPecial CONDition FLaGs #
543 ###########################
544 set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
545 set fbsun_flg, 0x02 # flag bit: bsun exception
546 set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
547 set mda7_flg, 0x08 # flag bit: -(a7) <ea>
548 set fmovm_flg, 0x40 # flag bit: fmovm instruction
549 set immed_flg, 0x80 # flag bit: &<data> <ea>
557 ##################################
558 # TRANSCENDENTAL "LAST-OP" FLAGS #
559 ##################################
560 set FMUL_OP, 0x0 # fmul instr performed last
561 set FDIV_OP, 0x1 # fdiv performed last
562 set FADD_OP, 0x2 # fadd performed last
563 set FMOV_OP, 0x3 # fmov performed last
568 T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
569 T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
571 PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
572 PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
575 long 0x3FE45F30,0x6DC9C883
577 #########################################################################
578 # XDEF **************************************************************** #
579 # _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
581 # This handler should be the first code executed upon taking the #
582 # FP Overflow exception in an operating system. #
584 # XREF **************************************************************** #
585 # _imem_read_long() - read instruction longword #
586 # fix_skewed_ops() - adjust src operand in fsave frame #
587 # set_tag_x() - determine optype of src/dst operands #
588 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
589 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
590 # load_fpn2() - load dst operand from FP regfile #
591 # fout() - emulate an opclass 3 instruction #
592 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
593 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
594 # _real_ovfl() - "callout" for Overflow exception enabled code #
595 # _real_inex() - "callout" for Inexact exception enabled code #
596 # _real_trace() - "callout" for Trace exception code #
598 # INPUT *************************************************************** #
599 # - The system stack contains the FP Ovfl exception stack frame #
600 # - The fsave frame contains the source operand #
602 # OUTPUT ************************************************************** #
603 # Overflow Exception enabled: #
604 # - The system stack is unchanged #
605 # - The fsave frame contains the adjusted src op for opclass 0,2 #
606 # Overflow Exception disabled: #
607 # - The system stack is unchanged #
608 # - The "exception present" flag in the fsave frame is cleared #
610 # ALGORITHM *********************************************************** #
611 # On the 060, if an FP overflow is present as the result of any #
612 # instruction, the 060 will take an overflow exception whether the #
613 # exception is enabled or disabled in the FPCR. For the disabled case, #
614 # This handler emulates the instruction to determine what the correct #
615 # default result should be for the operation. This default result is #
616 # then stored in either the FP regfile, data regfile, or memory. #
617 # Finally, the handler exits through the "callout" _fpsp_done() #
618 # denoting that no exceptional conditions exist within the machine. #
619 # If the exception is enabled, then this handler must create the #
620 # exceptional operand and plave it in the fsave state frame, and store #
621 # the default result (only if the instruction is opclass 3). For #
622 # exceptions enabled, this handler must exit through the "callout" #
623 # _real_ovfl() so that the operating system enabled overflow handler #
624 # can handle this case. #
625 # Two other conditions exist. First, if overflow was disabled #
626 # but the inexact exception was enabled, this handler must exit #
627 # through the "callout" _real_inex() regardless of whether the result #
629 # Also, in the case of an opclass three instruction where #
630 # overflow was disabled and the trace exception was enabled, this #
631 # handler must exit through the "callout" _real_trace(). #
633 #########################################################################
638 #$# sub.l &24,%sp # make room for src/dst
640 link.w %a6,&-LOCAL_SIZE # init stack frame
642 fsave FP_SRC(%a6) # grab the "busy" frame
644 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
645 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
646 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
648 # the FPIAR holds the "current PC" of the faulting instruction
649 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
650 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
651 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
652 bsr.l _imem_read_long # fetch the instruction words
653 mov.l %d0,EXC_OPWORD(%a6)
655 ##############################################################################
657 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
661 lea FP_SRC(%a6),%a0 # pass: ptr to src op
662 bsr.l fix_skewed_ops # fix src op
664 # since, I believe, only NORMs and DENORMs can come through here,
665 # maybe we can avoid the subroutine call.
666 lea FP_SRC(%a6),%a0 # pass: ptr to src op
667 bsr.l set_tag_x # tag the operand type
668 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
670 # bit five of the fp extension word separates the monadic and dyadic operations
671 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
672 # will never take this exception.
673 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
674 beq.b fovfl_extract # monadic
676 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
677 bsr.l load_fpn2 # load dst into FP_DST
679 lea FP_DST(%a6),%a0 # pass: ptr to dst op
680 bsr.l set_tag_x # tag the operand type
681 cmpi.b %d0,&UNNORM # is operand an UNNORM?
682 bne.b fovfl_op2_done # no
683 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
685 mov.b %d0,DTAG(%a6) # save dst optype tag
689 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
690 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
691 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
692 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
693 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
694 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
697 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
699 mov.b 1+EXC_CMDREG(%a6),%d1
700 andi.w &0x007f,%d1 # extract extension
702 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
704 fmov.l &0x0,%fpcr # zero current control regs
710 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
711 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
712 jsr (tbl_unsupp.l,%pc,%d1.l*1)
714 # the operation has been emulated. the result is in fp0.
715 # the EXOP, if an exception occurred, is in fp1.
716 # we must save the default result regardless of whether
717 # traps are enabled or disabled.
718 bfextu EXC_CMDREG(%a6){&6:&3},%d0
721 # the exceptional possibilities we have left ourselves with are ONLY overflow
722 # and inexact. and, the inexact is such that overflow occurred and was disabled
723 # but inexact was enabled.
724 btst &ovfl_bit,FPCR_ENABLE(%a6)
727 btst &inex2_bit,FPCR_ENABLE(%a6)
730 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
731 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
732 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
738 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
739 # in fp1. now, simply jump to _real_ovfl()!
741 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
743 mov.w &0xe005,2+FP_SRC(%a6) # save exc status
745 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
746 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
747 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
749 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
755 # overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
756 # we must jump to real_inex().
759 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
761 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
762 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
764 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
765 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
766 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
768 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
774 ########################################################################
778 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
779 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
780 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
782 # the src operand is definitely a NORM(!), so tag it as such
783 mov.b &NORM,STAG(%a6) # set src optype tag
786 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
788 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
790 fmov.l &0x0,%fpcr # zero current control regs
793 lea FP_SRC(%a6),%a0 # pass ptr to src operand
797 btst &ovfl_bit,FPCR_ENABLE(%a6)
800 btst &inex2_bit,FPCR_ENABLE(%a6)
803 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
804 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
805 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
810 btst &0x7,(%sp) # is trace on?
811 beq.l _fpsp_done # no
813 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
814 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
817 #########################################################################
818 # XDEF **************************************************************** #
819 # _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
821 # This handler should be the first code executed upon taking the #
822 # FP Underflow exception in an operating system. #
824 # XREF **************************************************************** #
825 # _imem_read_long() - read instruction longword #
826 # fix_skewed_ops() - adjust src operand in fsave frame #
827 # set_tag_x() - determine optype of src/dst operands #
828 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
829 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
830 # load_fpn2() - load dst operand from FP regfile #
831 # fout() - emulate an opclass 3 instruction #
832 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
833 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
834 # _real_ovfl() - "callout" for Overflow exception enabled code #
835 # _real_inex() - "callout" for Inexact exception enabled code #
836 # _real_trace() - "callout" for Trace exception code #
838 # INPUT *************************************************************** #
839 # - The system stack contains the FP Unfl exception stack frame #
840 # - The fsave frame contains the source operand #
842 # OUTPUT ************************************************************** #
843 # Underflow Exception enabled: #
844 # - The system stack is unchanged #
845 # - The fsave frame contains the adjusted src op for opclass 0,2 #
846 # Underflow Exception disabled: #
847 # - The system stack is unchanged #
848 # - The "exception present" flag in the fsave frame is cleared #
850 # ALGORITHM *********************************************************** #
851 # On the 060, if an FP underflow is present as the result of any #
852 # instruction, the 060 will take an underflow exception whether the #
853 # exception is enabled or disabled in the FPCR. For the disabled case, #
854 # This handler emulates the instruction to determine what the correct #
855 # default result should be for the operation. This default result is #
856 # then stored in either the FP regfile, data regfile, or memory. #
857 # Finally, the handler exits through the "callout" _fpsp_done() #
858 # denoting that no exceptional conditions exist within the machine. #
859 # If the exception is enabled, then this handler must create the #
860 # exceptional operand and plave it in the fsave state frame, and store #
861 # the default result (only if the instruction is opclass 3). For #
862 # exceptions enabled, this handler must exit through the "callout" #
863 # _real_unfl() so that the operating system enabled overflow handler #
864 # can handle this case. #
865 # Two other conditions exist. First, if underflow was disabled #
866 # but the inexact exception was enabled and the result was inexact, #
867 # this handler must exit through the "callout" _real_inex(). #
869 # Also, in the case of an opclass three instruction where #
870 # underflow was disabled and the trace exception was enabled, this #
871 # handler must exit through the "callout" _real_trace(). #
873 #########################################################################
878 #$# sub.l &24,%sp # make room for src/dst
880 link.w %a6,&-LOCAL_SIZE # init stack frame
882 fsave FP_SRC(%a6) # grab the "busy" frame
884 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
885 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
886 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
888 # the FPIAR holds the "current PC" of the faulting instruction
889 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
890 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
891 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
892 bsr.l _imem_read_long # fetch the instruction words
893 mov.l %d0,EXC_OPWORD(%a6)
895 ##############################################################################
897 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
901 lea FP_SRC(%a6),%a0 # pass: ptr to src op
902 bsr.l fix_skewed_ops # fix src op
904 lea FP_SRC(%a6),%a0 # pass: ptr to src op
905 bsr.l set_tag_x # tag the operand type
906 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
908 # bit five of the fp ext word separates the monadic and dyadic operations
909 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
910 # will never take this exception.
911 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
912 beq.b funfl_extract # monadic
914 # now, what's left that's not dyadic is fsincos. we can distinguish it
915 # from all dyadics by the '0110xxx pattern
916 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
917 bne.b funfl_extract # yes
919 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
920 bsr.l load_fpn2 # load dst into FP_DST
922 lea FP_DST(%a6),%a0 # pass: ptr to dst op
923 bsr.l set_tag_x # tag the operand type
924 cmpi.b %d0,&UNNORM # is operand an UNNORM?
925 bne.b funfl_op2_done # no
926 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
928 mov.b %d0,DTAG(%a6) # save dst optype tag
932 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
933 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
934 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
935 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
936 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
937 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
940 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
942 mov.b 1+EXC_CMDREG(%a6),%d1
943 andi.w &0x007f,%d1 # extract extension
945 andi.l &0x00ff01ff,USER_FPSR(%a6)
947 fmov.l &0x0,%fpcr # zero current control regs
953 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
954 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
955 jsr (tbl_unsupp.l,%pc,%d1.l*1)
957 bfextu EXC_CMDREG(%a6){&6:&3},%d0
960 # The `060 FPU multiplier hardware is such that if the result of a
961 # multiply operation is the smallest possible normalized number
962 # (0x00000000_80000000_00000000), then the machine will take an
963 # underflow exception. Since this is incorrect, we need to check
964 # if our emulation, after re-doing the operation, decided that
965 # no underflow was called for. We do these checks only in
966 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
967 # special case will simply exit gracefully with the correct result.
969 # the exceptional possibilities we have left ourselves with are ONLY overflow
970 # and inexact. and, the inexact is such that overflow occurred and was disabled
971 # but inexact was enabled.
972 btst &unfl_bit,FPCR_ENABLE(%a6)
976 btst &inex2_bit,FPCR_ENABLE(%a6)
980 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
981 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
982 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
988 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
989 # in fp1 (don't forget to save fp0). what to do now?
990 # well, we simply have to get to go to _real_unfl()!
993 # The `060 FPU multiplier hardware is such that if the result of a
994 # multiply operation is the smallest possible normalized number
995 # (0x00000000_80000000_00000000), then the machine will take an
996 # underflow exception. Since this is incorrect, we check here to see
997 # if our emulation, after re-doing the operation, decided that
998 # no underflow was called for.
999 btst &unfl_bit,FPSR_EXCEPT(%a6)
1003 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1005 mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1007 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1008 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1011 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1017 # underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018 # we must jump to real_inex().
1021 # The `060 FPU multiplier hardware is such that if the result of a
1022 # multiply operation is the smallest possible normalized number
1023 # (0x00000000_80000000_00000000), then the machine will take an
1024 # underflow exception.
1025 # But, whether bogus or not, if inexact is enabled AND it occurred,
1026 # then we have to branch to real_inex.
1028 btst &inex2_bit,FPSR_EXCEPT(%a6)
1033 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1035 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1036 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1038 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1039 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1042 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1048 #######################################################################
1052 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1056 # the src operand is definitely a NORM(!), so tag it as such
1057 mov.b &NORM,STAG(%a6) # set src optype tag
1060 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1062 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1064 fmov.l &0x0,%fpcr # zero current control regs
1067 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1071 btst &unfl_bit,FPCR_ENABLE(%a6)
1072 bne.w funfl_unfl_on2
1074 btst &inex2_bit,FPCR_ENABLE(%a6)
1075 bne.w funfl_inex_on2
1077 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1078 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1084 btst &0x7,(%sp) # is trace on?
1085 beq.l _fpsp_done # no
1087 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1088 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1091 #########################################################################
1092 # XDEF **************************************************************** #
1093 # _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1094 # Data Type" exception. #
1096 # This handler should be the first code executed upon taking the #
1097 # FP Unimplemented Data Type exception in an operating system. #
1099 # XREF **************************************************************** #
1100 # _imem_read_{word,long}() - read instruction word/longword #
1101 # fix_skewed_ops() - adjust src operand in fsave frame #
1102 # set_tag_x() - determine optype of src/dst operands #
1103 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
1104 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
1105 # load_fpn2() - load dst operand from FP regfile #
1106 # load_fpn1() - load src operand from FP regfile #
1107 # fout() - emulate an opclass 3 instruction #
1108 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1109 # _real_inex() - "callout" to operating system inexact handler #
1110 # _fpsp_done() - "callout" for exit; work all done #
1111 # _real_trace() - "callout" for Trace enabled exception #
1112 # funimp_skew() - adjust fsave src ops to "incorrect" value #
1113 # _real_snan() - "callout" for SNAN exception #
1114 # _real_operr() - "callout" for OPERR exception #
1115 # _real_ovfl() - "callout" for OVFL exception #
1116 # _real_unfl() - "callout" for UNFL exception #
1117 # get_packed() - fetch packed operand from memory #
1119 # INPUT *************************************************************** #
1120 # - The system stack contains the "Unimp Data Type" stk frame #
1121 # - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1123 # OUTPUT ************************************************************** #
1124 # If Inexact exception (opclass 3): #
1125 # - The system stack is changed to an Inexact exception stk frame #
1126 # If SNAN exception (opclass 3): #
1127 # - The system stack is changed to an SNAN exception stk frame #
1128 # If OPERR exception (opclass 3): #
1129 # - The system stack is changed to an OPERR exception stk frame #
1130 # If OVFL exception (opclass 3): #
1131 # - The system stack is changed to an OVFL exception stk frame #
1132 # If UNFL exception (opclass 3): #
1133 # - The system stack is changed to an UNFL exception stack frame #
1134 # If Trace exception enabled: #
1135 # - The system stack is changed to a Trace exception stack frame #
1136 # Else: (normal case) #
1137 # - Correct result has been stored as appropriate #
1139 # ALGORITHM *********************************************************** #
1140 # Two main instruction types can enter here: (1) DENORM or UNNORM #
1141 # unimplemented data types. These can be either opclass 0,2 or 3 #
1142 # instructions, and (2) PACKED unimplemented data format instructions #
1143 # also of opclasses 0,2, or 3. #
1144 # For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1145 # operand from the fsave state frame and the dst operand (if dyadic) #
1146 # from the FP register file. The instruction is then emulated by #
1147 # choosing an emulation routine from a table of routines indexed by #
1148 # instruction type. Once the instruction has been emulated and result #
1149 # saved, then we check to see if any enabled exceptions resulted from #
1150 # instruction emulation. If none, then we exit through the "callout" #
1151 # _fpsp_done(). If there is an enabled FP exception, then we insert #
1152 # this exception into the FPU in the fsave state frame and then exit #
1153 # through _fpsp_done(). #
1154 # PACKED opclass 0 and 2 is similar in how the instruction is #
1155 # emulated and exceptions handled. The differences occur in how the #
1156 # handler loads the packed op (by calling get_packed() routine) and #
1157 # by the fact that a Trace exception could be pending for PACKED ops. #
1158 # If a Trace exception is pending, then the current exception stack #
1159 # frame is changed to a Trace exception stack frame and an exit is #
1160 # made through _real_trace(). #
1161 # For UNNORM/DENORM opclass 3, the actual move out to memory is #
1162 # performed by calling the routine fout(). If no exception should occur #
1163 # as the result of emulation, then an exit either occurs through #
1164 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1165 # (a Trace stack frame must be created here, too). If an FP exception #
1166 # should occur, then we must create an exception stack frame of that #
1167 # type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1168 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1169 # emulation is performed in a similar manner. #
1171 #########################################################################
1174 # (1) DENORM and UNNORM (unimplemented) data types:
1179 # pre-instruction * *
1180 # ***************** *****************
1181 # * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1182 # ***************** *****************
1185 # ***************** *****************
1187 # ***************** *****************
1189 # (2) PACKED format (unsupported) opclasses two and three:
1205 link.w %a6,&-LOCAL_SIZE # init stack frame
1207 fsave FP_SRC(%a6) # save fp state
1209 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1210 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1213 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1216 mov.l %usp,%a0 # fetch user stack pointer
1217 mov.l %a0,EXC_A7(%a6) # save on stack
1219 # if the exception is an opclass zero or two unimplemented data type
1220 # exception, then the a7' calculated here is wrong since it doesn't
1221 # stack an ea. however, we don't need an a7' for this case anyways.
1223 lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1224 mov.l %a0,EXC_A7(%a6) # save on stack
1228 # the FPIAR holds the "current PC" of the faulting instruction
1229 # the FPIAR should be set correctly for ALL exceptions passing through
1231 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1233 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1234 bsr.l _imem_read_long # fetch the instruction words
1235 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1237 ############################
1239 clr.b SPCOND_FLG(%a6) # clear special condition flag
1241 # Separate opclass three (fpn-to-mem) ops since they have a different
1242 # stack frame and protocol.
1243 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1246 # Separate packed opclass two instructions.
1247 bfextu EXC_CMDREG(%a6){&0:&6},%d0
1252 # I'm not sure at this point what FPSR bits are valid for this instruction.
1253 # so, since the emulation routines re-create them anyways, zero exception field
1254 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1256 fmov.l &0x0,%fpcr # zero current control regs
1259 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260 # precision format if the src format was single or double and the
1261 # source data type was an INF, NAN, DENORM, or UNNORM
1262 lea FP_SRC(%a6),%a0 # pass ptr to input
1263 bsr.l fix_skewed_ops
1265 # we don't know whether the src operand or the dst operand (or both) is the
1266 # UNNORM or DENORM. call the function that tags the operand type. if the
1267 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268 lea FP_SRC(%a6),%a0 # pass: ptr to src op
1269 bsr.l set_tag_x # tag the operand type
1270 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1272 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1275 mov.b %d0,STAG(%a6) # save src optype tag
1277 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1279 # bit five of the fp extension word separates the monadic and dyadic operations
1281 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1282 beq.b fu_extract # monadic
1283 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1284 beq.b fu_extract # yes, so it's monadic, too
1286 bsr.l load_fpn2 # load dst into FP_DST
1288 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1289 bsr.l set_tag_x # tag the operand type
1290 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1291 bne.b fu_op2_done # no
1292 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1294 mov.b %d0,DTAG(%a6) # save dst optype tag
1298 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1300 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1305 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1309 # Exceptions in order of precedence:
1311 # SNAN : all dyadic ops
1312 # OPERR : fsqrt(-NORM)
1313 # OVFL : all except ftst,fcmp
1314 # UNFL : all except ftst,fcmp
1316 # INEX2 : all except ftst,fcmp
1317 # INEX1 : none (packed doesn't go through here)
1320 # we determine the highest priority exception(if any) set by the
1321 # emulation routine that has also been enabled by the user.
1322 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1323 bne.b fu_in_ena # some are enabled
1326 # fcmp and ftst do not store any result.
1327 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1328 andi.b &0x38,%d0 # extract bits 3-5
1329 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1330 beq.b fu_in_exit # yes
1332 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333 bsr.l store_fpreg # store the result
1337 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1338 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1346 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1347 bfffo %d0{&24:&8},%d0 # find highest priority exception
1348 bne.b fu_in_exc # there is at least one set
1351 # No exceptions occurred that were also enabled. Now:
1353 # if (OVFL && ovfl_disabled && inexact_enabled) {
1354 # branch to _real_inex() (even if the result was exact!);
1356 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1360 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361 beq.b fu_in_cont # no
1364 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365 beq.b fu_in_cont # no
1366 bra.w fu_in_exc_ovfl # go insert overflow frame
1369 # An exception occurred and that exception was enabled:
1371 # shift enabled exception field into lo byte of d0;
1372 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1375 # * this is the case where we must call _real_inex() now or else
1376 # * there will be no other way to pass it the exceptional operand
1378 # call _real_inex();
1380 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1384 subi.l &24,%d0 # fix offset to be 0-8
1385 cmpi.b %d0,&0x6 # is exception INEX? (6)
1386 bne.b fu_in_exc_exit # no
1388 # the enabled exception was inexact
1389 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390 bne.w fu_in_exc_unfl # yes
1391 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392 bne.w fu_in_exc_ovfl # yes
1394 # here, we insert the correct fsave status value into the fsave frame for the
1395 # corresponding exception. the operand in the fsave frame should be the original
1398 mov.l %d0,-(%sp) # save d0
1399 bsr.l funimp_skew # skew sgl or dbl inputs
1400 mov.l (%sp)+,%d0 # restore d0
1402 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1404 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1405 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1408 frestore FP_SRC(%a6) # restore src op
1415 short 0xe000,0xe006,0xe004,0xe005
1416 short 0xe003,0xe002,0xe001,0xe001
1420 bra.b fu_in_exc_exit
1423 bra.b fu_in_exc_exit
1425 # If the input operand to this operation was opclass two and a single
1426 # or double precision denorm, inf, or nan, the operand needs to be
1427 # "corrected" in order to have the proper equivalent extended precision
1429 global fix_skewed_ops
1431 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1434 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1439 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1440 andi.w &0x7fff,%d0 # strip sign
1441 cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1442 beq.b fso_sgl_dnrm_zero # yes
1443 cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1444 beq.b fso_infnan # yes
1448 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449 beq.b fso_zero # it's a skewed zero
1451 # here, we count on norm not to alter a0...
1452 bsr.l norm # normalize mantissa
1453 neg.w %d0 # -shft amt
1454 addi.w &0x3f81,%d0 # adjust new exponent
1455 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1456 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1460 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1464 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1465 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1469 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1470 andi.w &0x7fff,%d0 # strip sign
1471 cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1472 beq.b fso_dbl_dnrm_zero # yes
1473 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1474 beq.b fso_infnan # yes
1478 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479 bne.b fso_dbl_dnrm # it's a skewed denorm
1480 tst.l LOCAL_LO(%a0) # is it a zero?
1481 beq.b fso_zero # yes
1483 # here, we count on norm not to alter a0...
1484 bsr.l norm # normalize mantissa
1485 neg.w %d0 # -shft amt
1486 addi.w &0x3c01,%d0 # adjust new exponent
1487 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1488 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1491 #################################################################
1493 # fmove out took an unimplemented data type exception.
1494 # the src operand is in FP_SRC. Call _fout() to write out the result and
1495 # to determine which exceptions, if any, to take.
1498 # Separate packed move outs from the UNNORM and DENORM move outs.
1499 bfextu EXC_CMDREG(%a6){&3:&3},%d0
1506 # I'm not sure at this point what FPSR bits are valid for this instruction.
1507 # so, since the emulation routines re-create them anyways, zero exception field.
1508 # fmove out doesn't affect ccodes.
1509 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1511 fmov.l &0x0,%fpcr # zero current control regs
1514 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515 # call here. just figure out what it is...
1516 mov.w FP_SRC_EX(%a6),%d0 # get exponent
1517 andi.w &0x7fff,%d0 # strip sign
1518 beq.b fu_out_denorm # it's a DENORM
1521 bsr.l unnorm_fix # yes; fix it
1527 mov.b &DENORM,STAG(%a6)
1531 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1533 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1535 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1536 bsr.l fout # call fmove out routine
1538 # Exceptions in order of precedence:
1541 # OPERR : fmove.{b,w,l} out of large UNNORM
1542 # OVFL : fmove.{s,d}
1543 # UNFL : fmove.{s,d,x}
1546 # INEX1 : none (packed doesn't travel through here)
1548 # determine the highest priority exception(if any) set by the
1549 # emulation routine that has also been enabled by the user.
1550 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1551 bne.w fu_out_ena # some are enabled
1555 mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1557 # on extended precision opclass three instructions using pre-decrement or
1558 # post-increment addressing mode, the address register is not updated. is the
1559 # address register was the stack pointer used from user mode, then let's update
1560 # it here. if it was used from supervisor mode, then we have to handle this
1561 # as a special case.
1562 btst &0x5,EXC_SR(%a6)
1565 mov.l EXC_A7(%a6),%a0 # restore a7
1569 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1570 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1575 btst &0x7,(%sp) # is trace on?
1576 bne.b fu_out_trace # yes
1580 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581 # ("fmov.x fpm,-(a7)") if so,
1583 cmpi.b SPCOND_FLG(%a6),&mda7_flg
1584 bne.b fu_out_done_cont
1586 # the extended precision result is still in fp0. but, we need to save it
1587 # somewhere on the stack until we can copy it to its final resting place.
1588 # here, we're counting on the top of the stack to be the old place-holders
1589 # for fp0/fp1 which have already been restored. that way, we can write
1590 # over those destinations with the shifted stack frame.
1591 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1593 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1594 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1597 mov.l (%a6),%a6 # restore frame pointer
1599 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1602 # now, copy the result to the proper place on the stack
1603 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1607 add.l &LOCAL_SIZE-0x8,%sp
1615 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1616 bfffo %d0{&24:&8},%d0 # find highest priority exception
1617 bne.b fu_out_exc # there is at least one set
1619 # no exceptions were set.
1620 # if a disabled overflow occurred and inexact was enabled but the result
1621 # was exact, then a branch to _real_inex() is made.
1622 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623 beq.w fu_out_done # no
1626 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627 beq.w fu_out_done # no
1631 # The fp move out that took the "Unimplemented Data Type" exception was
1632 # being traced. Since the stack frames are similar, get the "current" PC
1633 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1635 # UNSUPP FRAME TRACE FRAME
1636 # ***************** *****************
1637 # * EA * * Current *
1639 # ***************** *****************
1640 # * 0x3 * 0x0dc * * 0x2 * 0x024 *
1641 # ***************** *****************
1644 # ***************** *****************
1646 # ***************** *****************
1649 mov.w &0x2024,0x6(%sp)
1650 fmov.l %fpiar,0x8(%sp)
1653 # an exception occurred and that exception was enabled.
1655 subi.l &24,%d0 # fix offset to be 0-8
1657 # we don't mess with the existing fsave frame. just re-insert it and
1658 # jump to the "_real_{}()" handler...
1659 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1660 jmp (tbl_fu_out.b,%pc,%d0.w*1)
1664 short tbl_fu_out - tbl_fu_out # BSUN can't happen
1665 short tbl_fu_out - tbl_fu_out # SNAN can't happen
1666 short fu_operr - tbl_fu_out # OPERR
1667 short fu_ovfl - tbl_fu_out # OVFL
1668 short fu_unfl - tbl_fu_out # UNFL
1669 short tbl_fu_out - tbl_fu_out # DZ can't happen
1670 short fu_inex - tbl_fu_out # INEX2
1671 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1673 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1676 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1677 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1680 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1681 mov.w &0xe006,2+FP_SRC(%a6)
1683 frestore FP_SRC(%a6)
1691 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1695 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1696 mov.w &0xe004,2+FP_SRC(%a6)
1698 frestore FP_SRC(%a6)
1706 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1708 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1709 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1712 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1713 mov.w &0xe005,2+FP_SRC(%a6)
1715 frestore FP_SRC(%a6) # restore EXOP
1721 # underflow can happen for extended precision. extended precision opclass
1722 # three instruction exceptions don't update the stack pointer. so, if the
1723 # exception occurred from user mode, then simply update a7 and exit normally.
1724 # if the exception occurred from supervisor mode, check if
1726 mov.l EXC_A6(%a6),(%a6) # restore a6
1728 btst &0x5,EXC_SR(%a6)
1731 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1732 mov.l %a0,%usp # to or not...
1735 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1737 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1738 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1741 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1742 mov.w &0xe003,2+FP_SRC(%a6)
1744 frestore FP_SRC(%a6) # restore EXOP
1751 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1754 # the extended precision result is still in fp0. but, we need to save it
1755 # somewhere on the stack until we can copy it to its final resting place
1756 # (where the exc frame is currently). make sure it's not at the top of the
1757 # frame or it will get overwritten when the exc stack frame is shifted "down".
1758 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1759 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1761 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1762 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1765 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1766 mov.w &0xe003,2+FP_DST(%a6)
1768 frestore FP_DST(%a6) # restore EXOP
1770 mov.l (%a6),%a6 # restore frame pointer
1772 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1776 # now, copy the result to the proper place on the stack
1777 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1781 add.l &LOCAL_SIZE-0x8,%sp
1785 # fmove in and out enter here.
1787 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1789 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1790 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1793 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1794 mov.w &0xe001,2+FP_SRC(%a6)
1796 frestore FP_SRC(%a6) # restore EXOP
1803 #########################################################################
1804 #########################################################################
1808 # I'm not sure at this point what FPSR bits are valid for this instruction.
1809 # so, since the emulation routines re-create them anyways, zero exception field
1810 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1812 fmov.l &0x0,%fpcr # zero current control regs
1815 bsr.l get_packed # fetch packed src operand
1817 lea FP_SRC(%a6),%a0 # pass ptr to src
1818 bsr.l set_tag_x # set src optype tag
1820 mov.b %d0,STAG(%a6) # save src optype tag
1822 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1824 # bit five of the fp extension word separates the monadic and dyadic operations
1826 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1827 beq.b fu_extract_p # monadic
1828 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1829 beq.b fu_extract_p # yes, so it's monadic, too
1831 bsr.l load_fpn2 # load dst into FP_DST
1833 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1834 bsr.l set_tag_x # tag the operand type
1835 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1836 bne.b fu_op2_done_p # no
1837 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1839 mov.b %d0,DTAG(%a6) # save dst optype tag
1843 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1845 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1850 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1854 # Exceptions in order of precedence:
1856 # SNAN : all dyadic ops
1857 # OPERR : fsqrt(-NORM)
1858 # OVFL : all except ftst,fcmp
1859 # UNFL : all except ftst,fcmp
1861 # INEX2 : all except ftst,fcmp
1865 # we determine the highest priority exception(if any) set by the
1866 # emulation routine that has also been enabled by the user.
1867 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1868 bne.w fu_in_ena_p # some are enabled
1871 # fcmp and ftst do not store any result.
1872 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1873 andi.b &0x38,%d0 # extract bits 3-5
1874 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1875 beq.b fu_in_exit_p # yes
1877 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878 bsr.l store_fpreg # store the result
1882 btst &0x5,EXC_SR(%a6) # user or supervisor?
1883 bne.w fu_in_exit_s_p # supervisor
1885 mov.l EXC_A7(%a6),%a0 # update user a7
1889 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1890 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1893 unlk %a6 # unravel stack frame
1895 btst &0x7,(%sp) # is trace on?
1896 bne.w fu_trace_p # yes
1898 bra.l _fpsp_done # exit to os
1900 # the exception occurred in supervisor mode. check to see if the
1901 # addressing mode was (a7)+. if so, we'll need to shift the
1904 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905 beq.b fu_in_exit_cont_p # no
1907 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1908 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1911 unlk %a6 # unravel stack frame
1913 # shift the stack frame "up". we don't really care about the <ea> field.
1914 mov.l 0x4(%sp),0x10(%sp)
1915 mov.l 0x0(%sp),0xc(%sp)
1918 btst &0x7,(%sp) # is trace on?
1919 bne.w fu_trace_p # yes
1921 bra.l _fpsp_done # exit to os
1924 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1925 bfffo %d0{&24:&8},%d0 # find highest priority exception
1926 bne.b fu_in_exc_p # at least one was set
1929 # No exceptions occurred that were also enabled. Now:
1931 # if (OVFL && ovfl_disabled && inexact_enabled) {
1932 # branch to _real_inex() (even if the result was exact!);
1934 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1938 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939 beq.w fu_in_cont_p # no
1942 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943 beq.w fu_in_cont_p # no
1944 bra.w fu_in_exc_ovfl_p # do _real_inex() now
1947 # An exception occurred and that exception was enabled:
1949 # shift enabled exception field into lo byte of d0;
1950 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1953 # * this is the case where we must call _real_inex() now or else
1954 # * there will be no other way to pass it the exceptional operand
1956 # call _real_inex();
1958 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1962 subi.l &24,%d0 # fix offset to be 0-8
1963 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1964 blt.b fu_in_exc_exit_p # no
1966 # the enabled exception was inexact
1967 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968 bne.w fu_in_exc_unfl_p # yes
1969 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970 bne.w fu_in_exc_ovfl_p # yes
1972 # here, we insert the correct fsave status value into the fsave frame for the
1973 # corresponding exception. the operand in the fsave frame should be the original
1975 # as a reminder for future predicted pain and agony, we are passing in fsave the
1976 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1979 btst &0x5,EXC_SR(%a6) # user or supervisor?
1980 bne.w fu_in_exc_exit_s_p # supervisor
1982 mov.l EXC_A7(%a6),%a0 # update user a7
1985 fu_in_exc_exit_cont_p:
1986 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1988 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1989 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1992 frestore FP_SRC(%a6) # restore src op
1996 btst &0x7,(%sp) # is trace enabled?
1997 bne.w fu_trace_p # yes
2002 short 0xe000,0xe006,0xe004,0xe005
2003 short 0xe003,0xe002,0xe001,0xe001
2007 bra.w fu_in_exc_exit_p
2011 bra.w fu_in_exc_exit_p
2014 btst &mia7_bit,SPCOND_FLG(%a6)
2015 beq.b fu_in_exc_exit_cont_p
2017 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2019 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2020 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2023 frestore FP_SRC(%a6) # restore src op
2025 unlk %a6 # unravel stack frame
2027 # shift stack frame "up". who cares about <ea> field.
2028 mov.l 0x4(%sp),0x10(%sp)
2029 mov.l 0x0(%sp),0xc(%sp)
2032 btst &0x7,(%sp) # is trace on?
2033 bne.b fu_trace_p # yes
2035 bra.l _fpsp_done # exit to os
2038 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2040 # trace stack frame then jump to _real_trace().
2042 # UNSUPP FRAME TRACE FRAME
2043 # ***************** *****************
2044 # * EA * * Current *
2046 # ***************** *****************
2047 # * 0x2 * 0x0dc * * 0x2 * 0x024 *
2048 # ***************** *****************
2051 # ***************** *****************
2053 # ***************** *****************
2055 mov.w &0x2024,0x6(%sp)
2056 fmov.l %fpiar,0x8(%sp)
2060 #########################################################
2061 #########################################################
2065 # I'm not sure at this point what FPSR bits are valid for this instruction.
2066 # so, since the emulation routines re-create them anyways, zero exception field.
2067 # fmove out doesn't affect ccodes.
2068 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2070 fmov.l &0x0,%fpcr # zero current control regs
2073 bfextu EXC_CMDREG(%a6){&6:&3},%d0
2076 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2077 # able to detect all operand types.
2079 bsr.l set_tag_x # tag the operand type
2080 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2082 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2085 mov.b %d0,STAG(%a6) # save src optype tag
2088 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2090 lea FP_SRC(%a6),%a0 # pass ptr to src operand
2092 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2093 bsr.l fout # call fmove out routine
2095 # Exceptions in order of precedence:
2098 # OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2105 # determine the highest priority exception(if any) set by the
2106 # emulation routine that has also been enabled by the user.
2107 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2108 bne.w fu_out_ena_p # some are enabled
2111 mov.l EXC_A6(%a6),(%a6) # restore a6
2113 btst &0x5,EXC_SR(%a6) # user or supervisor?
2114 bne.b fu_out_exit_s_p # supervisor
2116 mov.l EXC_A7(%a6),%a0 # update user a7
2120 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2121 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2124 unlk %a6 # unravel stack frame
2126 btst &0x7,(%sp) # is trace on?
2127 bne.w fu_trace_p # yes
2129 bra.l _fpsp_done # exit to os
2131 # the exception occurred in supervisor mode. check to see if the
2132 # addressing mode was -(a7). if so, we'll need to shift the
2133 # stack frame "down".
2135 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136 beq.b fu_out_exit_cont_p # no
2138 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2139 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2142 mov.l (%a6),%a6 # restore frame pointer
2144 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2147 # now, copy the result to the proper place on the stack
2148 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2152 add.l &LOCAL_SIZE-0x8,%sp
2160 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2161 bfffo %d0{&24:&8},%d0 # find highest priority exception
2164 mov.l EXC_A6(%a6),(%a6) # restore a6
2166 # an exception occurred and that exception was enabled.
2167 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2174 btst &0x5,EXC_SR(%a6)
2177 mov.l EXC_A7(%a6),%a0
2182 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2185 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186 # the strategy is to move the exception frame "down" 12 bytes. then, we
2187 # can store the default result where the exception frame was.
2188 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2189 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2192 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2193 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2195 frestore FP_SRC(%a6) # restore src operand
2197 mov.l (%a6),%a6 # restore frame pointer
2199 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2203 # now, we copy the default result to its proper location
2204 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2208 add.l &LOCAL_SIZE-0x8,%sp
2214 btst &0x5,EXC_SR(%a6)
2217 mov.l EXC_A7(%a6),%a0
2222 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2225 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226 # the strategy is to move the exception frame "down" 12 bytes. then, we
2227 # can store the default result where the exception frame was.
2228 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2229 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2232 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2233 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2235 frestore FP_SRC(%a6) # restore src operand
2237 mov.l (%a6),%a6 # restore frame pointer
2239 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2243 # now, we copy the default result to its proper location
2244 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2248 add.l &LOCAL_SIZE-0x8,%sp
2254 btst &0x5,EXC_SR(%a6)
2257 mov.l EXC_A7(%a6),%a0
2262 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2265 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266 # the strategy is to move the exception frame "down" 12 bytes. then, we
2267 # can store the default result where the exception frame was.
2268 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2269 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2272 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2273 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2275 frestore FP_SRC(%a6) # restore src operand
2277 mov.l (%a6),%a6 # restore frame pointer
2279 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2283 # now, we copy the default result to its proper location
2284 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2288 add.l &LOCAL_SIZE-0x8,%sp
2293 #########################################################################
2296 # if we're stuffing a source operand back into an fsave frame then we
2297 # have to make sure that for single or double source operands that the
2298 # format stuffed is as weird as the hardware usually makes it.
2302 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303 cmpi.b %d0,&0x1 # was src sgl?
2304 beq.b funimp_skew_sgl # yes
2305 cmpi.b %d0,&0x5 # was src dbl?
2306 beq.b funimp_skew_dbl # yes
2310 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2311 andi.w &0x7fff,%d0 # strip sign
2312 beq.b funimp_skew_sgl_not
2314 bgt.b funimp_skew_sgl_not
2315 neg.w %d0 # make exponent negative
2316 addi.w &0x3f81,%d0 # find amt to shift
2317 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2318 lsr.l %d0,%d1 # shift it
2319 bset &31,%d1 # set j-bit
2320 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2321 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2322 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2323 funimp_skew_sgl_not:
2327 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2328 andi.w &0x7fff,%d0 # strip sign
2329 beq.b funimp_skew_dbl_not
2331 bgt.b funimp_skew_dbl_not
2333 tst.b FP_SRC_EX(%a6) # make "internal format"
2334 smi.b 0x2+FP_SRC(%a6)
2335 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2336 clr.l %d0 # clear g,r,s
2337 lea FP_SRC(%a6),%a0 # pass ptr to src op
2338 mov.w &0x3c01,%d1 # pass denorm threshold
2339 bsr.l dnrm_lp # denorm it
2340 mov.w &0x3c00,%d0 # new exponent
2341 tst.b 0x2+FP_SRC(%a6) # is sign set?
2342 beq.b fss_dbl_denorm_done # no
2343 bset &15,%d0 # set sign
2344 fss_dbl_denorm_done:
2345 bset &0x7,FP_SRC_HI(%a6) # set j-bit
2346 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2347 funimp_skew_dbl_not:
2350 #########################################################################
2353 btst &0x5,EXC_SR(%a6)
2355 mov.l 0x0(%a0),FP_DST_EX(%a6)
2356 mov.l 0x4(%a0),FP_DST_HI(%a6)
2357 mov.l 0x8(%a0),FP_DST_LO(%a6)
2361 #########################################################################
2362 # XDEF **************************************************************** #
2363 # _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2364 # effective address" exception. #
2366 # This handler should be the first code executed upon taking the #
2367 # FP Unimplemented Effective Address exception in an operating #
2370 # XREF **************************************************************** #
2371 # _imem_read_long() - read instruction longword #
2372 # fix_skewed_ops() - adjust src operand in fsave frame #
2373 # set_tag_x() - determine optype of src/dst operands #
2374 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
2375 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
2376 # load_fpn2() - load dst operand from FP regfile #
2377 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2378 # decbin() - convert packed data to FP binary data #
2379 # _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2380 # _real_access() - "callout" for access error exception #
2381 # _mem_read() - read extended immediate operand from memory #
2382 # _fpsp_done() - "callout" for exit; work all done #
2383 # _real_trace() - "callout" for Trace enabled exception #
2384 # fmovm_dynamic() - emulate dynamic fmovm instruction #
2385 # fmovm_ctrl() - emulate fmovm control instruction #
2387 # INPUT *************************************************************** #
2388 # - The system stack contains the "Unimplemented <ea>" stk frame #
2390 # OUTPUT ************************************************************** #
2391 # If access error: #
2392 # - The system stack is changed to an access error stack frame #
2393 # If FPU disabled: #
2394 # - The system stack is changed to an FPU disabled stack frame #
2395 # If Trace exception enabled: #
2396 # - The system stack is changed to a Trace exception stack frame #
2397 # Else: (normal case) #
2398 # - None (correct result has been stored as appropriate) #
2400 # ALGORITHM *********************************************************** #
2401 # This exception handles 3 types of operations: #
2402 # (1) FP Instructions using extended precision or packed immediate #
2403 # addressing mode. #
2404 # (2) The "fmovm.x" instruction w/ dynamic register specification. #
2405 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2407 # For immediate data operations, the data is read in w/ a #
2408 # _mem_read() "callout", converted to FP binary (if packed), and used #
2409 # as the source operand to the instruction specified by the instruction #
2410 # word. If no FP exception should be reported ads a result of the #
2411 # emulation, then the result is stored to the destination register and #
2412 # the handler exits through _fpsp_done(). If an enabled exc has been #
2413 # signalled as a result of emulation, then an fsave state frame #
2414 # corresponding to the FP exception type must be entered into the 060 #
2415 # FPU before exiting. In either the enabled or disabled cases, we #
2416 # must also check if a Trace exception is pending, in which case, we #
2417 # must create a Trace exception stack frame from the current exception #
2418 # stack frame. If no Trace is pending, we simply exit through #
2420 # For "fmovm.x", call the routine fmovm_dynamic() which will #
2421 # decode and emulate the instruction. No FP exceptions can be pending #
2422 # as a result of this operation emulation. A Trace exception can be #
2423 # pending, though, which means the current stack frame must be changed #
2424 # to a Trace stack frame and an exit made through _real_trace(). #
2425 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2426 # was executed from supervisor mode, this handler must store the FP #
2427 # register file values to the system stack by itself since #
2428 # fmovm_dynamic() can't handle this. A normal exit is made through #
2430 # For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2431 # Again, a Trace exception may be pending and an exit made through #
2432 # _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2434 # Before any of the above is attempted, it must be checked to #
2435 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2436 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2437 # has higher priority, we check the disabled bit in the PCR. If set, #
2438 # then we must create an 8 word "FPU disabled" exception stack frame #
2439 # from the current 4 word exception stack frame. This includes #
2440 # reproducing the effective address of the instruction to put on the #
2441 # new stack frame. #
2443 # In the process of all emulation work, if a _mem_read() #
2444 # "callout" returns a failing result indicating an access error, then #
2445 # we must create an access error stack frame from the current stack #
2446 # frame. This information includes a faulting address and a fault- #
2447 # status-longword. These are created within this handler. #
2449 #########################################################################
2454 # This exception type takes priority over the "Line F Emulator"
2455 # exception. Therefore, the FPU could be disabled when entering here.
2456 # So, we must check to see if it's disabled and handle that case separately.
2457 mov.l %d0,-(%sp) # save d0
2458 movc %pcr,%d0 # load proc cr
2459 btst &0x1,%d0 # is FPU disabled?
2460 bne.w iea_disabled # yes
2461 mov.l (%sp)+,%d0 # restore d0
2463 link %a6,&-LOCAL_SIZE # init stack frame
2465 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2466 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2469 # PC of instruction that took the exception is the PC in the frame
2470 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2472 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2473 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2474 bsr.l _imem_read_long # fetch the instruction words
2475 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2477 #########################################################################
2479 tst.w %d0 # is operation fmovem?
2480 bmi.w iea_fmovm # yes
2483 # here, we will have:
2484 # fabs fdabs fsabs facos fmod
2485 # fadd fdadd fsadd fasin frem
2487 # fdiv fddiv fsdiv fatanh fsin
2489 # fintrz fcosh fsinh
2490 # fmove fdmove fsmove fetox ftan
2491 # fmul fdmul fsmul fetoxm1 ftanh
2492 # fneg fdneg fsneg fgetexp ftentox
2493 # fsgldiv fgetman ftwotox
2496 # fsub fdsub fssub flogn
2498 # which can all use f<op>.{x,p}
2499 # so, now it's immediate data extended precision AND PACKED FORMAT!
2502 andi.l &0x00ff00ff,USER_FPSR(%a6)
2504 btst &0xa,%d0 # is src fmt x or p?
2505 bne.b iea_op_pack # packed
2508 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2509 lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2510 mov.l &0xc,%d0 # pass: 12 bytes
2511 bsr.l _imem_read # read extended immediate
2513 tst.l %d1 # did ifetch fail?
2514 bne.w iea_iacc # yes
2520 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2521 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2522 mov.l &0xc,%d0 # pass: 12 bytes
2523 bsr.l _imem_read # read packed operand
2525 tst.l %d1 # did ifetch fail?
2526 bne.w iea_iacc # yes
2528 # The packed operand is an INF or a NAN if the exponent field is all ones.
2529 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2530 cmpi.w %d0,&0x7fff # INF or NAN?
2531 beq.b iea_op_setsrc # operand is an INF or NAN
2533 # The packed operand is a zero if the mantissa is all zero, else it's
2534 # a normal packed op.
2535 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2536 andi.b &0x0f,%d0 # clear all but last nybble
2537 bne.b iea_op_gp_not_spec # not a zero
2538 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2539 bne.b iea_op_gp_not_spec # not a zero
2540 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2541 beq.b iea_op_setsrc # operand is a ZERO
2543 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2544 bsr.l decbin # convert to extended
2545 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2548 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2550 # FP_SRC now holds the src operand.
2551 lea FP_SRC(%a6),%a0 # pass: ptr to src op
2552 bsr.l set_tag_x # tag the operand type
2553 mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2554 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2555 bne.b iea_op_getdst # no
2556 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2557 mov.b %d0,STAG(%a6) # set new optype tag
2559 clr.b STORE_FLG(%a6) # clear "store result" boolean
2561 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2562 beq.b iea_op_extract # monadic
2563 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2564 bne.b iea_op_spec # yes
2567 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568 bsr.l load_fpn2 # load dst operand
2570 lea FP_DST(%a6),%a0 # pass: ptr to dst op
2571 bsr.l set_tag_x # tag the operand type
2572 mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2573 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2574 bne.b iea_op_extract # no
2575 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2576 mov.b %d0,DTAG(%a6) # set new optype tag
2577 bra.b iea_op_extract
2579 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2581 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2582 beq.b iea_op_extract # yes
2583 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584 # store a result. then, only fcmp will branch back and pick up a dst operand.
2585 st STORE_FLG(%a6) # don't store a final result
2586 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2587 beq.b iea_op_loaddst # yes
2591 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2593 mov.b 1+EXC_CMDREG(%a6),%d1
2594 andi.w &0x007f,%d1 # extract extension
2602 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603 jsr (tbl_unsupp.l,%pc,%d1.l*1)
2606 # Exceptions in order of precedence:
2608 # SNAN : all operations
2609 # OPERR : all reg-reg or mem-reg operations that can normally operr
2610 # OVFL : same as OPERR
2611 # UNFL : same as OPERR
2612 # DZ : same as OPERR
2613 # INEX2 : same as OPERR
2614 # INEX1 : all packed immediate operations
2617 # we determine the highest priority exception(if any) set by the
2618 # emulation routine that has also been enabled by the user.
2619 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2620 bne.b iea_op_ena # some are enabled
2622 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2623 # these don't save results.
2625 tst.b STORE_FLG(%a6) # does this op store a result?
2626 bne.b iea_op_exit1 # exit with no frestore
2629 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630 bsr.l store_fpreg # store the result
2633 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2636 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2637 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2640 unlk %a6 # unravel the frame
2642 btst &0x7,(%sp) # is trace on?
2643 bne.w iea_op_trace # yes
2645 bra.l _fpsp_done # exit to os
2648 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2649 bfffo %d0{&24:&8},%d0 # find highest priority exception
2650 bne.b iea_op_exc # at least one was set
2652 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2653 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2654 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2658 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659 beq.b iea_op_store # no
2660 bra.b iea_op_exc_ovfl # yes
2662 # an enabled exception occurred. we have to insert the exception type back into
2665 subi.l &24,%d0 # fix offset to be 0-8
2666 cmpi.b %d0,&0x6 # is exception INEX?
2667 bne.b iea_op_exc_force # no
2669 # the enabled exception was inexact. so, if it occurs with an overflow
2670 # or underflow that was disabled, then we have to force an overflow or
2672 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673 bne.b iea_op_exc_ovfl # yes
2674 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675 bne.b iea_op_exc_unfl # yes
2678 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679 bra.b iea_op_exit2 # exit with frestore
2682 short 0xe002, 0xe006, 0xe004, 0xe005
2683 short 0xe003, 0xe002, 0xe001, 0xe001
2686 mov.w &0xe005,2+FP_SRC(%a6)
2690 mov.w &0xe003,2+FP_SRC(%a6)
2693 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2696 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2697 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2700 frestore FP_SRC(%a6) # restore exceptional state
2702 unlk %a6 # unravel the frame
2704 btst &0x7,(%sp) # is trace on?
2705 bne.b iea_op_trace # yes
2707 bra.l _fpsp_done # exit to os
2710 # The opclass two instruction that took an "Unimplemented Effective Address"
2711 # exception was being traced. Make the "current" PC the FPIAR and put it in
2712 # the trace stack frame then jump to _real_trace().
2714 # UNIMP EA FRAME TRACE FRAME
2715 # ***************** *****************
2716 # * 0x0 * 0x0f0 * * Current *
2717 # ***************** * PC *
2718 # * Current * *****************
2719 # * PC * * 0x2 * 0x024 *
2720 # ***************** *****************
2722 # ***************** * PC *
2727 mov.l (%sp),-(%sp) # shift stack frame "down"
2728 mov.w 0x8(%sp),0x4(%sp)
2729 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2730 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2734 #########################################################################
2736 btst &14,%d0 # ctrl or data reg
2737 beq.w iea_fmovm_ctrl
2741 btst &0x5,EXC_SR(%a6) # user or supervisor mode
2742 bne.b iea_fmovm_data_s
2746 mov.l %a0,EXC_A7(%a6) # store current a7
2747 bsr.l fmovm_dynamic # do dynamic fmovm
2748 mov.l EXC_A7(%a6),%a0 # load possibly new a7
2749 mov.l %a0,%usp # update usp
2750 bra.w iea_fmovm_exit
2753 clr.b SPCOND_FLG(%a6)
2754 lea 0x2+EXC_VOFF(%a6),%a0
2755 mov.l %a0,EXC_A7(%a6)
2756 bsr.l fmovm_dynamic # do dynamic fmovm
2758 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2759 beq.w iea_fmovm_data_predec
2760 cmpi.b SPCOND_FLG(%a6),&mia7_flg
2761 bne.w iea_fmovm_exit
2763 # right now, d0 = the size.
2764 # the data has been fetched from the supervisor stack, but we have not
2765 # incremented the stack pointer by the appropriate number of bytes.
2767 iea_fmovm_data_postinc:
2768 btst &0x7,EXC_SR(%a6)
2769 bne.b iea_fmovm_data_pi_trace
2771 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2775 lea (EXC_SR,%a6,%d0),%a0
2776 mov.l %a0,EXC_SR(%a6)
2778 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2779 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2786 iea_fmovm_data_pi_trace:
2787 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2792 lea (EXC_SR-0x4,%a6,%d0),%a0
2793 mov.l %a0,EXC_SR(%a6)
2795 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2796 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2803 # right now, d1 = size and d0 = the strg.
2804 iea_fmovm_data_predec:
2805 mov.b %d1,EXC_VOFF(%a6) # store strg
2806 mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2808 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2809 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2812 mov.l (%a6),-(%sp) # make a copy of a6
2813 mov.l %d0,-(%sp) # save d0
2814 mov.l %d1,-(%sp) # save d1
2815 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2818 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2819 neg.l %d0 # get negative of size
2821 btst &0x7,EXC_SR(%a6) # is trace enabled?
2822 beq.b iea_fmovm_data_p2
2824 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2827 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2829 pea (%a6,%d0) # create final sp
2830 bra.b iea_fmovm_data_p3
2833 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834 mov.l (%sp)+,(EXC_PC,%a6,%d0)
2835 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2837 pea (0x4,%a6,%d0) # create final sp
2841 mov.b EXC_VOFF(%a6),%d1 # fetch strg
2845 fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2850 fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2855 fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2860 fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2865 fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2870 fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2875 fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2880 fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2887 btst &0x7,(%sp) # is trace enabled?
2891 #########################################################################
2894 bsr.l fmovm_ctrl # load ctrl regs
2897 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2898 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2901 btst &0x7,EXC_SR(%a6) # is trace on?
2902 bne.b iea_fmovm_trace # yes
2904 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2906 unlk %a6 # unravel the frame
2908 bra.l _fpsp_done # exit to os
2911 # The control reg instruction that took an "Unimplemented Effective Address"
2912 # exception was being traced. The "Current PC" for the trace frame is the
2913 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914 # After fixing the stack frame, jump to _real_trace().
2916 # UNIMP EA FRAME TRACE FRAME
2917 # ***************** *****************
2918 # * 0x0 * 0x0f0 * * Current *
2919 # ***************** * PC *
2920 # * Current * *****************
2921 # * PC * * 0x2 * 0x024 *
2922 # ***************** *****************
2924 # ***************** * PC *
2928 # this ain't a pretty solution, but it works:
2929 # -restore a6 (not with unlk)
2930 # -shift stack frame down over where old a6 used to be
2931 # -add LOCAL_SIZE to stack pointer
2933 mov.l (%a6),%a6 # restore frame pointer
2934 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938 add.l &LOCAL_SIZE,%sp # clear stack frame
2942 #########################################################################
2943 # The FPU is disabled and so we should really have taken the "Line
2944 # F Emulator" exception. So, here we create an 8-word stack frame
2945 # from our 4-word stack frame. This means we must calculate the length
2946 # the faulting instruction to get the "next PC". This is trivial for
2947 # immediate operands but requires some extra work for fmovm dynamic
2948 # which can use most addressing modes.
2950 mov.l (%sp)+,%d0 # restore d0
2952 link %a6,&-LOCAL_SIZE # init stack frame
2954 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2956 # PC of instruction that took the exception is the PC in the frame
2957 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2959 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2960 bsr.l _imem_read_long # fetch the instruction words
2961 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2963 tst.w %d0 # is instr fmovm?
2964 bmi.b iea_dis_fmovm # yes
2965 # instruction is using an extended precision immediate operand. Therefore,
2966 # the total instruction length is 16 bytes.
2968 mov.l &0x10,%d0 # 16 bytes of instruction
2971 btst &0xe,%d0 # is instr fmovm ctrl
2972 bne.b iea_dis_fmovm_data # no
2973 # the instruction is a fmovm.l with 2 or 3 registers.
2974 bfextu %d0{&19:&3},%d1
2976 cmpi.b %d1,&0x7 # move all regs?
2980 # the instruction is an fmovm.x dynamic which can use many addressing
2981 # modes and thus can have several different total instruction lengths.
2982 # call fmovm_calc_ea which will go through the ea calc process and,
2983 # as a by-product, will tell us how long the instruction is.
2987 mov.l EXC_EXTWPTR(%a6),%d0
2988 sub.l EXC_PC(%a6),%d0
2990 mov.w %d0,EXC_VOFF(%a6) # store stack shift value
2992 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2996 # here, we actually create the 8-word frame from the 4-word frame,
2997 # with the "next PC" as additional info.
2998 # the <ea> field is let as undefined.
2999 subq.l &0x8,%sp # make room for new stack
3000 mov.l %d0,-(%sp) # save d0
3001 mov.w 0xc(%sp),0x4(%sp) # move SR
3002 mov.l 0xe(%sp),0x6(%sp) # move Current PC
3005 mov.l 0x6(%sp),0x10(%sp) # move Current PC
3006 add.l %d0,0x6(%sp) # make Next PC
3007 mov.w &0x402c,0xa(%sp) # insert offset,frame format
3008 mov.l (%sp)+,%d0 # restore d0
3010 bra.l _real_fpu_disabled
3018 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3021 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3025 subq.w &0x8,%sp # make stack frame bigger
3026 mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3027 mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3028 mov.w &0x4008,0x6(%sp) # store voff
3029 mov.l 0x2(%sp),0x8(%sp) # store ea
3030 mov.l &0x09428001,0xc(%sp) # store fslw
3033 btst &0x5,(%sp) # user or supervisor mode?
3034 beq.b iea_acc_done2 # user
3035 bset &0x2,0xd(%sp) # set supervisor TM bit
3041 lea -LOCAL_SIZE(%a6),%sp
3046 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3047 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3051 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3058 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059 add.w &LOCAL_SIZE-0x4,%sp
3063 #########################################################################
3064 # XDEF **************************************************************** #
3065 # _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3067 # This handler should be the first code executed upon taking the #
3068 # FP Operand Error exception in an operating system. #
3070 # XREF **************************************************************** #
3071 # _imem_read_long() - read instruction longword #
3072 # fix_skewed_ops() - adjust src operand in fsave frame #
3073 # _real_operr() - "callout" to operating system operr handler #
3074 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3075 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3076 # facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3078 # INPUT *************************************************************** #
3079 # - The system stack contains the FP Operr exception frame #
3080 # - The fsave frame contains the source operand #
3082 # OUTPUT ************************************************************** #
3083 # No access error: #
3084 # - The system stack is unchanged #
3085 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3087 # ALGORITHM *********************************************************** #
3088 # In a system where the FP Operr exception is enabled, the goal #
3089 # is to get to the handler specified at _real_operr(). But, on the 060, #
3090 # for opclass zero and two instruction taking this exception, the #
3091 # input operand in the fsave frame may be incorrect for some cases #
3092 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3093 # do just this and then exits through _real_operr(). #
3094 # For opclass 3 instructions, the 060 doesn't store the default #
3095 # operr result out to memory or data register file as it should. #
3096 # This code must emulate the move out before finally exiting through #
3097 # _real_inex(). The move out, if to memory, is performed using #
3098 # _mem_write() "callout" routines that may return a failing result. #
3099 # In this special case, the handler must exit through facc_out() #
3100 # which creates an access error stack frame from the current operr #
3103 #########################################################################
3108 link.w %a6,&-LOCAL_SIZE # init stack frame
3110 fsave FP_SRC(%a6) # grab the "busy" frame
3112 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3113 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3116 # the FPIAR holds the "current PC" of the faulting instruction
3117 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3119 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3120 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3121 bsr.l _imem_read_long # fetch the instruction words
3122 mov.l %d0,EXC_OPWORD(%a6)
3124 ##############################################################################
3126 btst &13,%d0 # is instr an fmove out?
3127 bne.b foperr_out # fmove out
3130 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131 # this would be the case for opclass two operations with a source infinity or
3132 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133 # cause an operr so we don't need to check for them here.
3134 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3135 bsr.l fix_skewed_ops # fix src op
3138 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3139 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3142 frestore FP_SRC(%a6)
3147 ########################################################################
3150 # the hardware does not save the default result to memory on enabled
3151 # operand error exceptions. we do this here before passing control to
3152 # the user operand error handler.
3154 # byte, word, and long destination format operations can pass
3155 # through here. we simply need to test the sign of the src
3156 # operand and save the appropriate minimum or maximum integer value
3157 # to the effective address as pointed to by the stacked effective address.
3159 # although packed opclass three operations can take operand error
3160 # exceptions, they won't pass through here since they are caught
3161 # first by the unsupported data format exception handler. that handler
3162 # sends them directly to _real_operr() if necessary.
3166 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3169 bne.b foperr_out_not_qnan
3170 # the operand is either an infinity or a QNAN.
3171 tst.l FP_SRC_LO(%a6)
3172 bne.b foperr_out_qnan
3173 mov.l FP_SRC_HI(%a6),%d1
3174 andi.l &0x7fffffff,%d1
3175 beq.b foperr_out_not_qnan
3177 mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3178 bra.b foperr_out_jmp
3180 foperr_out_not_qnan:
3181 mov.l &0x7fffffff,%d1
3182 tst.b FP_SRC_EX(%a6)
3183 bpl.b foperr_out_not_qnan2
3185 foperr_out_not_qnan2:
3186 mov.l %d1,L_SCR1(%a6)
3189 bfextu %d0{&19:&3},%d0 # extract dst format field
3190 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3191 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3192 jmp (tbl_operr.b,%pc,%a0)
3195 short foperr_out_l - tbl_operr # long word integer
3196 short tbl_operr - tbl_operr # sgl prec shouldn't happen
3197 short tbl_operr - tbl_operr # ext prec shouldn't happen
3198 short foperr_exit - tbl_operr # packed won't enter here
3199 short foperr_out_w - tbl_operr # word integer
3200 short tbl_operr - tbl_operr # dbl prec shouldn't happen
3201 short foperr_out_b - tbl_operr # byte integer
3202 short tbl_operr - tbl_operr # packed won't enter here
3205 mov.b L_SCR1(%a6),%d0 # load positive default result
3206 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3207 ble.b foperr_out_b_save_dn # yes
3208 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3209 bsr.l _dmem_write_byte # write the default result
3211 tst.l %d1 # did dstore fail?
3212 bne.l facc_out_b # yes
3215 foperr_out_b_save_dn:
3217 bsr.l store_dreg_b # store result to regfile
3221 mov.w L_SCR1(%a6),%d0 # load positive default result
3222 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3223 ble.b foperr_out_w_save_dn # yes
3224 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3225 bsr.l _dmem_write_word # write the default result
3227 tst.l %d1 # did dstore fail?
3228 bne.l facc_out_w # yes
3231 foperr_out_w_save_dn:
3233 bsr.l store_dreg_w # store result to regfile
3237 mov.l L_SCR1(%a6),%d0 # load positive default result
3238 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3239 ble.b foperr_out_l_save_dn # yes
3240 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3241 bsr.l _dmem_write_long # write the default result
3243 tst.l %d1 # did dstore fail?
3244 bne.l facc_out_l # yes
3247 foperr_out_l_save_dn:
3249 bsr.l store_dreg_l # store result to regfile
3252 #########################################################################
3253 # XDEF **************************************************************** #
3254 # _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3256 # This handler should be the first code executed upon taking the #
3257 # FP Signalling NAN exception in an operating system. #
3259 # XREF **************************************************************** #
3260 # _imem_read_long() - read instruction longword #
3261 # fix_skewed_ops() - adjust src operand in fsave frame #
3262 # _real_snan() - "callout" to operating system SNAN handler #
3263 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3264 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3265 # facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3266 # _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3268 # INPUT *************************************************************** #
3269 # - The system stack contains the FP SNAN exception frame #
3270 # - The fsave frame contains the source operand #
3272 # OUTPUT ************************************************************** #
3273 # No access error: #
3274 # - The system stack is unchanged #
3275 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3277 # ALGORITHM *********************************************************** #
3278 # In a system where the FP SNAN exception is enabled, the goal #
3279 # is to get to the handler specified at _real_snan(). But, on the 060, #
3280 # for opclass zero and two instructions taking this exception, the #
3281 # input operand in the fsave frame may be incorrect for some cases #
3282 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3283 # do just this and then exits through _real_snan(). #
3284 # For opclass 3 instructions, the 060 doesn't store the default #
3285 # SNAN result out to memory or data register file as it should. #
3286 # This code must emulate the move out before finally exiting through #
3287 # _real_snan(). The move out, if to memory, is performed using #
3288 # _mem_write() "callout" routines that may return a failing result. #
3289 # In this special case, the handler must exit through facc_out() #
3290 # which creates an access error stack frame from the current SNAN #
3292 # For the case of an extended precision opclass 3 instruction, #
3293 # if the effective addressing mode was -() or ()+, then the address #
3294 # register must get updated by calling _calc_ea_fout(). If the <ea> #
3295 # was -(a7) from supervisor mode, then the exception frame currently #
3296 # on the system stack must be carefully moved "down" to make room #
3297 # for the operand being moved. #
3299 #########################################################################
3304 link.w %a6,&-LOCAL_SIZE # init stack frame
3306 fsave FP_SRC(%a6) # grab the "busy" frame
3308 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3309 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3312 # the FPIAR holds the "current PC" of the faulting instruction
3313 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3315 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3316 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3317 bsr.l _imem_read_long # fetch the instruction words
3318 mov.l %d0,EXC_OPWORD(%a6)
3320 ##############################################################################
3322 btst &13,%d0 # is instr an fmove out?
3323 bne.w fsnan_out # fmove out
3326 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327 # this would be the case for opclass two operations with a source infinity or
3328 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3330 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3331 bsr.l fix_skewed_ops # fix src op
3334 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3335 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3338 frestore FP_SRC(%a6)
3343 ########################################################################
3346 # the hardware does not save the default result to memory on enabled
3347 # snan exceptions. we do this here before passing control to
3348 # the user snan handler.
3350 # byte, word, long, and packed destination format operations can pass
3351 # through here. since packed format operations already were handled by
3352 # fpsp_unsupp(), then we need to do nothing else for them here.
3353 # for byte, word, and long, we simply need to test the sign of the src
3354 # operand and save the appropriate minimum or maximum integer value
3355 # to the effective address as pointed to by the stacked effective address.
3359 bfextu %d0{&19:&3},%d0 # extract dst format field
3360 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3361 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3362 jmp (tbl_snan.b,%pc,%a0)
3365 short fsnan_out_l - tbl_snan # long word integer
3366 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367 short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368 short tbl_snan - tbl_snan # packed needs no help
3369 short fsnan_out_w - tbl_snan # word integer
3370 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371 short fsnan_out_b - tbl_snan # byte integer
3372 short tbl_snan - tbl_snan # packed needs no help
3375 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3376 bset &6,%d0 # set SNAN bit
3377 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3378 ble.b fsnan_out_b_dn # yes
3379 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3380 bsr.l _dmem_write_byte # write the default result
3382 tst.l %d1 # did dstore fail?
3383 bne.l facc_out_b # yes
3388 bsr.l store_dreg_b # store result to regfile
3392 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3393 bset &14,%d0 # set SNAN bit
3394 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3395 ble.b fsnan_out_w_dn # yes
3396 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3397 bsr.l _dmem_write_word # write the default result
3399 tst.l %d1 # did dstore fail?
3400 bne.l facc_out_w # yes
3405 bsr.l store_dreg_w # store result to regfile
3409 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3410 bset &30,%d0 # set SNAN bit
3411 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3412 ble.b fsnan_out_l_dn # yes
3413 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3414 bsr.l _dmem_write_long # write the default result
3416 tst.l %d1 # did dstore fail?
3417 bne.l facc_out_l # yes
3422 bsr.l store_dreg_l # store result to regfile
3426 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3427 ble.b fsnan_out_d_dn # yes
3428 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3429 andi.l &0x80000000,%d0 # keep sign
3430 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3431 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3432 lsr.l &0x8,%d1 # shift mantissa for sgl
3433 or.l %d1,%d0 # create sgl SNAN
3434 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3435 bsr.l _dmem_write_long # write the default result
3437 tst.l %d1 # did dstore fail?
3438 bne.l facc_out_l # yes
3442 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3443 andi.l &0x80000000,%d0 # keep sign
3444 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3446 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3447 lsr.l &0x8,%d1 # shift mantissa for sgl
3448 or.l %d1,%d0 # create sgl SNAN
3451 bsr.l store_dreg_l # store result to regfile
3455 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3456 andi.l &0x80000000,%d0 # keep sign
3457 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3458 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3459 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3460 mov.l &11,%d0 # load shift amt
3462 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3463 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3464 andi.l &0x000007ff,%d1
3466 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3467 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3469 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3470 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3471 mov.l EXC_EA(%a6),%a1 # pass: dst addr
3472 movq.l &0x8,%d0 # pass: size of 8 bytes
3473 bsr.l _dmem_write # write the default result
3475 tst.l %d1 # did dstore fail?
3476 bne.l facc_out_d # yes
3480 # for extended precision, if the addressing mode is pre-decrement or
3481 # post-increment, then the address register did not get updated.
3482 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3484 clr.b SPCOND_FLG(%a6) # clear special case flag
3486 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487 clr.w 2+FP_SCR0(%a6)
3488 mov.l FP_SRC_HI(%a6),%d0
3490 mov.l %d0,FP_SCR0_HI(%a6)
3491 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3493 btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3494 bne.b fsnan_out_x_s # yes
3496 mov.l %usp,%a0 # fetch user stack pointer
3497 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3498 mov.l (%a6),EXC_A6(%a6)
3500 bsr.l _calc_ea_fout # find the correct ea,update An
3502 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3504 mov.l EXC_A7(%a6),%a0
3505 mov.l %a0,%usp # restore user stack pointer
3506 mov.l EXC_A6(%a6),(%a6)
3509 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3510 movq.l &0xc,%d0 # pass: size of extended
3511 bsr.l _dmem_write # write the default result
3513 tst.l %d1 # did dstore fail?
3514 bne.l facc_out_x # yes
3519 mov.l (%a6),EXC_A6(%a6)
3521 bsr.l _calc_ea_fout # find the correct ea,update An
3523 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3525 mov.l EXC_A6(%a6),(%a6)
3527 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528 bne.b fsnan_out_x_save # no
3530 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3532 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3535 frestore FP_SRC(%a6)
3537 mov.l EXC_A6(%a6),%a6 # restore frame pointer
3539 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3543 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3547 add.l &LOCAL_SIZE-0x8,%sp
3551 #########################################################################
3552 # XDEF **************************************************************** #
3553 # _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3555 # This handler should be the first code executed upon taking the #
3556 # FP Inexact exception in an operating system. #
3558 # XREF **************************************************************** #
3559 # _imem_read_long() - read instruction longword #
3560 # fix_skewed_ops() - adjust src operand in fsave frame #
3561 # set_tag_x() - determine optype of src/dst operands #
3562 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3563 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
3564 # load_fpn2() - load dst operand from FP regfile #
3565 # smovcr() - emulate an "fmovcr" instruction #
3566 # fout() - emulate an opclass 3 instruction #
3567 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3568 # _real_inex() - "callout" to operating system inexact handler #
3570 # INPUT *************************************************************** #
3571 # - The system stack contains the FP Inexact exception frame #
3572 # - The fsave frame contains the source operand #
3574 # OUTPUT ************************************************************** #
3575 # - The system stack is unchanged #
3576 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3578 # ALGORITHM *********************************************************** #
3579 # In a system where the FP Inexact exception is enabled, the goal #
3580 # is to get to the handler specified at _real_inex(). But, on the 060, #
3581 # for opclass zero and two instruction taking this exception, the #
3582 # hardware doesn't store the correct result to the destination FP #
3583 # register as did the '040 and '881/2. This handler must emulate the #
3584 # instruction in order to get this value and then store it to the #
3585 # correct register before calling _real_inex(). #
3586 # For opclass 3 instructions, the 060 doesn't store the default #
3587 # inexact result out to memory or data register file as it should. #
3588 # This code must emulate the move out by calling fout() before finally #
3589 # exiting through _real_inex(). #
3591 #########################################################################
3596 link.w %a6,&-LOCAL_SIZE # init stack frame
3598 fsave FP_SRC(%a6) # grab the "busy" frame
3600 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3601 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3604 # the FPIAR holds the "current PC" of the faulting instruction
3605 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3607 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3608 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3609 bsr.l _imem_read_long # fetch the instruction words
3610 mov.l %d0,EXC_OPWORD(%a6)
3612 ##############################################################################
3614 btst &13,%d0 # is instr an fmove out?
3615 bne.w finex_out # fmove out
3618 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619 # longword integer directly into the upper longword of the mantissa along
3620 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3621 bfextu %d0{&19:&3},%d0 # fetch instr size
3622 bne.b finex_cont # instr size is not long
3623 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3624 bne.b finex_cont # no
3626 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3627 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3628 mov.w &0xe001,0x2+FP_SRC(%a6)
3631 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3632 bsr.l fix_skewed_ops # fix src op
3634 # Here, we zero the ccode and exception byte field since we're going to
3635 # emulate the whole instruction. Notice, though, that we don't kill the
3636 # INEX1 bit. This is because a packed op has long since been converted
3637 # to extended before arriving here. Therefore, we need to retain the
3638 # INEX1 bit from when the operand was first converted.
3639 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3641 fmov.l &0x0,%fpcr # zero current control regs
3644 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645 cmpi.b %d1,&0x17 # is op an fmovecr?
3646 beq.w finex_fmovcr # yes
3648 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3649 bsr.l set_tag_x # tag the operand type
3650 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3652 # bits four and five of the fp extension word separate the monadic and dyadic
3653 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654 # will never take this exception, but fsincos will.
3655 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3656 beq.b finex_extract # monadic
3658 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3659 bne.b finex_extract # yes
3661 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662 bsr.l load_fpn2 # load dst into FP_DST
3664 lea FP_DST(%a6),%a0 # pass: ptr to dst op
3665 bsr.l set_tag_x # tag the operand type
3666 cmpi.b %d0,&UNNORM # is operand an UNNORM?
3667 bne.b finex_op2_done # no
3668 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3670 mov.b %d0,DTAG(%a6) # save dst optype tag
3674 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3676 mov.b 1+EXC_CMDREG(%a6),%d1
3677 andi.w &0x007f,%d1 # extract extension
3682 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683 jsr (tbl_unsupp.l,%pc,%d1.l*1)
3685 # the operation has been emulated. the result is in fp0.
3687 bfextu EXC_CMDREG(%a6){&6:&3},%d0
3691 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3692 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3695 frestore FP_SRC(%a6)
3702 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3703 mov.b 1+EXC_CMDREG(%a6),%d1
3704 andi.l &0x0000007f,%d1 # pass rom offset
3708 ########################################################################
3711 # the hardware does not save the default result to memory on enabled
3712 # inexact exceptions. we do this here before passing control to
3713 # the user inexact handler.
3715 # byte, word, and long destination format operations can pass
3716 # through here. so can double and single precision.
3717 # although packed opclass three operations can take inexact
3718 # exceptions, they won't pass through here since they are caught
3719 # first by the unsupported data format exception handler. that handler
3720 # sends them directly to _real_inex() if necessary.
3724 mov.b &NORM,STAG(%a6) # src is a NORM
3727 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3729 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3731 lea FP_SRC(%a6),%a0 # pass ptr to src operand
3733 bsr.l fout # store the default result
3737 #########################################################################
3738 # XDEF **************************************************************** #
3739 # _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3741 # This handler should be the first code executed upon taking #
3742 # the FP DZ exception in an operating system. #
3744 # XREF **************************************************************** #
3745 # _imem_read_long() - read instruction longword from memory #
3746 # fix_skewed_ops() - adjust fsave operand #
3747 # _real_dz() - "callout" exit point from FP DZ handler #
3749 # INPUT *************************************************************** #
3750 # - The system stack contains the FP DZ exception stack. #
3751 # - The fsave frame contains the source operand. #
3753 # OUTPUT ************************************************************** #
3754 # - The system stack contains the FP DZ exception stack. #
3755 # - The fsave frame contains the adjusted source operand. #
3757 # ALGORITHM *********************************************************** #
3758 # In a system where the DZ exception is enabled, the goal is to #
3759 # get to the handler specified at _real_dz(). But, on the 060, when the #
3760 # exception is taken, the input operand in the fsave state frame may #
3761 # be incorrect for some cases and need to be adjusted. So, this package #
3762 # adjusts the operand using fix_skewed_ops() and then branches to #
3765 #########################################################################
3770 link.w %a6,&-LOCAL_SIZE # init stack frame
3772 fsave FP_SRC(%a6) # grab the "busy" frame
3774 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3775 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3778 # the FPIAR holds the "current PC" of the faulting instruction
3779 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3781 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3782 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3783 bsr.l _imem_read_long # fetch the instruction words
3784 mov.l %d0,EXC_OPWORD(%a6)
3786 ##############################################################################
3789 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790 # this would be the case for opclass two operations with a source zero
3791 # in the sgl or dbl format.
3792 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3793 bsr.l fix_skewed_ops # fix src op
3796 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3797 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3800 frestore FP_SRC(%a6)
3805 #########################################################################
3806 # XDEF **************************************************************** #
3807 # _fpsp_fline(): 060FPSP entry point for "Line F emulator" #
3808 # exception when the "reduced" version of the #
3809 # FPSP is implemented that does not emulate #
3810 # FP unimplemented instructions. #
3812 # This handler should be the first code executed upon taking a #
3813 # "Line F Emulator" exception in an operating system integrating #
3814 # the reduced version of 060FPSP. #
3816 # XREF **************************************************************** #
3817 # _real_fpu_disabled() - Handle "FPU disabled" exceptions #
3818 # _real_fline() - Handle all other cases (treated equally) #
3820 # INPUT *************************************************************** #
3821 # - The system stack contains a "Line F Emulator" exception #
3824 # OUTPUT ************************************************************** #
3825 # - The system stack is unchanged. #
3827 # ALGORITHM *********************************************************** #
3828 # When a "Line F Emulator" exception occurs in a system where #
3829 # "FPU Unimplemented" instructions will not be emulated, the exception #
3830 # can occur because then FPU is disabled or the instruction is to be #
3831 # classifed as "Line F". This module determines which case exists and #
3832 # calls the appropriate "callout". #
3834 #########################################################################
3839 # check to see if the FPU is disabled. if so, jump to the OS entry
3840 # point for that condition.
3841 cmpi.w 0x6(%sp),&0x402c
3842 beq.l _real_fpu_disabled
3846 #########################################################################
3847 # XDEF **************************************************************** #
3848 # _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
3850 # XREF **************************************************************** #
3851 # inc_areg() - increment an address register #
3852 # dec_areg() - decrement an address register #
3854 # INPUT *************************************************************** #
3855 # d0 = number of bytes to adjust <ea> by #
3857 # OUTPUT ************************************************************** #
3860 # ALGORITHM *********************************************************** #
3861 # "Dummy" CALCulate Effective Address: #
3862 # The stacked <ea> for FP unimplemented instructions and opclass #
3863 # two packed instructions is correct with the exception of... #
3865 # 1) -(An) : The register is not updated regardless of size. #
3866 # Also, for extended precision and packed, the #
3867 # stacked <ea> value is 8 bytes too big #
3868 # 2) (An)+ : The register is not updated. #
3869 # 3) #<data> : The upper longword of the immediate operand is #
3870 # stacked b,w,l and s sizes are completely stacked. #
3871 # d,x, and p are not. #
3873 #########################################################################
3877 mov.l %d0, %a0 # move # bytes to %a0
3879 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
3880 mov.l %d0, %d1 # make a copy
3882 andi.w &0x38, %d0 # extract mode field
3883 andi.l &0x7, %d1 # extract reg field
3885 cmpi.b %d0,&0x18 # is mode (An)+ ?
3888 cmpi.b %d0,&0x20 # is mode -(An) ?
3891 or.w %d1,%d0 # concat mode,reg
3892 cmpi.b %d0,&0x3c # is mode #<data>?
3894 beq.b dcea_imm # yes
3896 mov.l EXC_EA(%a6),%a0 # return <ea>
3899 # need to set immediate data flag here since we'll need to do
3900 # an imem_read to fetch this later.
3902 mov.b &immed_flg,SPCOND_FLG(%a6)
3903 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3906 # here, the <ea> is stacked correctly. however, we must update the
3907 # address register...
3909 mov.l %a0,%d0 # pass amt to inc by
3910 bsr.l inc_areg # inc addr register
3912 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3915 # the <ea> is stacked correctly for all but extended and packed which
3916 # the <ea>s are 8 bytes too large.
3917 # it would make no sense to have a pre-decrement to a7 in supervisor
3918 # mode so we don't even worry about this tricky case here : )
3920 mov.l %a0,%d0 # pass amt to dec by
3921 bsr.l dec_areg # dec addr register
3923 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3925 cmpi.b %d0,&0xc # is opsize ext or packed?
3926 beq.b dcea_pd2 # yes
3929 sub.l &0x8,%a0 # correct <ea>
3930 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
3933 #########################################################################
3934 # XDEF **************************************************************** #
3935 # _calc_ea_fout(): calculate correct stacked <ea> for extended #
3936 # and packed data opclass 3 operations. #
3938 # XREF **************************************************************** #
3941 # INPUT *************************************************************** #
3944 # OUTPUT ************************************************************** #
3945 # a0 = return correct effective address #
3947 # ALGORITHM *********************************************************** #
3948 # For opclass 3 extended and packed data operations, the <ea> #
3949 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
3950 # modes. Also, while we're at it, the index register itself must get #
3952 # So, for -(an), we must subtract 8 off of the stacked <ea> value #
3953 # and return that value as the correct <ea> and store that value in An. #
3954 # For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
3956 #########################################################################
3958 # This calc_ea is currently used to retrieve the correct <ea>
3959 # for fmove outs of type extended and packed.
3960 global _calc_ea_fout
3962 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
3963 mov.l %d0,%d1 # make a copy
3965 andi.w &0x38,%d0 # extract mode field
3966 andi.l &0x7,%d1 # extract reg field
3968 cmpi.b %d0,&0x18 # is mode (An)+ ?
3971 cmpi.b %d0,&0x20 # is mode -(An) ?
3974 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3977 # (An)+ : extended and packed fmove out
3978 # : stacked <ea> is correct
3979 # : "An" not updated
3981 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982 mov.l EXC_EA(%a6),%a0
3983 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
3987 short ceaf_pi0 - tbl_ceaf_pi
3988 short ceaf_pi1 - tbl_ceaf_pi
3989 short ceaf_pi2 - tbl_ceaf_pi
3990 short ceaf_pi3 - tbl_ceaf_pi
3991 short ceaf_pi4 - tbl_ceaf_pi
3992 short ceaf_pi5 - tbl_ceaf_pi
3993 short ceaf_pi6 - tbl_ceaf_pi
3994 short ceaf_pi7 - tbl_ceaf_pi
3997 addi.l &0xc,EXC_DREGS+0x8(%a6)
4000 addi.l &0xc,EXC_DREGS+0xc(%a6)
4015 addi.l &0xc,EXC_A6(%a6)
4018 mov.b &mia7_flg,SPCOND_FLG(%a6)
4019 addi.l &0xc,EXC_A7(%a6)
4022 # -(An) : extended and packed fmove out
4023 # : stacked <ea> = actual <ea> + 8
4024 # : "An" not updated
4026 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027 mov.l EXC_EA(%a6),%a0
4029 sub.l &0x8,EXC_EA(%a6)
4030 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
4034 short ceaf_pd0 - tbl_ceaf_pd
4035 short ceaf_pd1 - tbl_ceaf_pd
4036 short ceaf_pd2 - tbl_ceaf_pd
4037 short ceaf_pd3 - tbl_ceaf_pd
4038 short ceaf_pd4 - tbl_ceaf_pd
4039 short ceaf_pd5 - tbl_ceaf_pd
4040 short ceaf_pd6 - tbl_ceaf_pd
4041 short ceaf_pd7 - tbl_ceaf_pd
4044 mov.l %a0,EXC_DREGS+0x8(%a6)
4047 mov.l %a0,EXC_DREGS+0xc(%a6)
4062 mov.l %a0,EXC_A6(%a6)
4065 mov.l %a0,EXC_A7(%a6)
4066 mov.b &mda7_flg,SPCOND_FLG(%a6)
4070 # This table holds the offsets of the emulation routines for each individual
4071 # math operation relative to the address of this table. Included are
4072 # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073 # this table is for the version if the 060FPSP without transcendentals.
4074 # The location within the table is determined by the extension bits of the
4075 # operation longword.
4080 long fin - tbl_unsupp # 00: fmove
4081 long fint - tbl_unsupp # 01: fint
4082 long tbl_unsupp - tbl_unsupp # 02: fsinh
4083 long fintrz - tbl_unsupp # 03: fintrz
4084 long fsqrt - tbl_unsupp # 04: fsqrt
4085 long tbl_unsupp - tbl_unsupp
4086 long tbl_unsupp - tbl_unsupp # 06: flognp1
4087 long tbl_unsupp - tbl_unsupp
4088 long tbl_unsupp - tbl_unsupp # 08: fetoxm1
4089 long tbl_unsupp - tbl_unsupp # 09: ftanh
4090 long tbl_unsupp - tbl_unsupp # 0a: fatan
4091 long tbl_unsupp - tbl_unsupp
4092 long tbl_unsupp - tbl_unsupp # 0c: fasin
4093 long tbl_unsupp - tbl_unsupp # 0d: fatanh
4094 long tbl_unsupp - tbl_unsupp # 0e: fsin
4095 long tbl_unsupp - tbl_unsupp # 0f: ftan
4096 long tbl_unsupp - tbl_unsupp # 10: fetox
4097 long tbl_unsupp - tbl_unsupp # 11: ftwotox
4098 long tbl_unsupp - tbl_unsupp # 12: ftentox
4099 long tbl_unsupp - tbl_unsupp
4100 long tbl_unsupp - tbl_unsupp # 14: flogn
4101 long tbl_unsupp - tbl_unsupp # 15: flog10
4102 long tbl_unsupp - tbl_unsupp # 16: flog2
4103 long tbl_unsupp - tbl_unsupp
4104 long fabs - tbl_unsupp # 18: fabs
4105 long tbl_unsupp - tbl_unsupp # 19: fcosh
4106 long fneg - tbl_unsupp # 1a: fneg
4107 long tbl_unsupp - tbl_unsupp
4108 long tbl_unsupp - tbl_unsupp # 1c: facos
4109 long tbl_unsupp - tbl_unsupp # 1d: fcos
4110 long tbl_unsupp - tbl_unsupp # 1e: fgetexp
4111 long tbl_unsupp - tbl_unsupp # 1f: fgetman
4112 long fdiv - tbl_unsupp # 20: fdiv
4113 long tbl_unsupp - tbl_unsupp # 21: fmod
4114 long fadd - tbl_unsupp # 22: fadd
4115 long fmul - tbl_unsupp # 23: fmul
4116 long fsgldiv - tbl_unsupp # 24: fsgldiv
4117 long tbl_unsupp - tbl_unsupp # 25: frem
4118 long tbl_unsupp - tbl_unsupp # 26: fscale
4119 long fsglmul - tbl_unsupp # 27: fsglmul
4120 long fsub - tbl_unsupp # 28: fsub
4121 long tbl_unsupp - tbl_unsupp
4122 long tbl_unsupp - tbl_unsupp
4123 long tbl_unsupp - tbl_unsupp
4124 long tbl_unsupp - tbl_unsupp
4125 long tbl_unsupp - tbl_unsupp
4126 long tbl_unsupp - tbl_unsupp
4127 long tbl_unsupp - tbl_unsupp
4128 long tbl_unsupp - tbl_unsupp # 30: fsincos
4129 long tbl_unsupp - tbl_unsupp # 31: fsincos
4130 long tbl_unsupp - tbl_unsupp # 32: fsincos
4131 long tbl_unsupp - tbl_unsupp # 33: fsincos
4132 long tbl_unsupp - tbl_unsupp # 34: fsincos
4133 long tbl_unsupp - tbl_unsupp # 35: fsincos
4134 long tbl_unsupp - tbl_unsupp # 36: fsincos
4135 long tbl_unsupp - tbl_unsupp # 37: fsincos
4136 long fcmp - tbl_unsupp # 38: fcmp
4137 long tbl_unsupp - tbl_unsupp
4138 long ftst - tbl_unsupp # 3a: ftst
4139 long tbl_unsupp - tbl_unsupp
4140 long tbl_unsupp - tbl_unsupp
4141 long tbl_unsupp - tbl_unsupp
4142 long tbl_unsupp - tbl_unsupp
4143 long tbl_unsupp - tbl_unsupp
4144 long fsin - tbl_unsupp # 40: fsmove
4145 long fssqrt - tbl_unsupp # 41: fssqrt
4146 long tbl_unsupp - tbl_unsupp
4147 long tbl_unsupp - tbl_unsupp
4148 long fdin - tbl_unsupp # 44: fdmove
4149 long fdsqrt - tbl_unsupp # 45: fdsqrt
4150 long tbl_unsupp - tbl_unsupp
4151 long tbl_unsupp - tbl_unsupp
4152 long tbl_unsupp - tbl_unsupp
4153 long tbl_unsupp - tbl_unsupp
4154 long tbl_unsupp - tbl_unsupp
4155 long tbl_unsupp - tbl_unsupp
4156 long tbl_unsupp - tbl_unsupp
4157 long tbl_unsupp - tbl_unsupp
4158 long tbl_unsupp - tbl_unsupp
4159 long tbl_unsupp - tbl_unsupp
4160 long tbl_unsupp - tbl_unsupp
4161 long tbl_unsupp - tbl_unsupp
4162 long tbl_unsupp - tbl_unsupp
4163 long tbl_unsupp - tbl_unsupp
4164 long tbl_unsupp - tbl_unsupp
4165 long tbl_unsupp - tbl_unsupp
4166 long tbl_unsupp - tbl_unsupp
4167 long tbl_unsupp - tbl_unsupp
4168 long fsabs - tbl_unsupp # 58: fsabs
4169 long tbl_unsupp - tbl_unsupp
4170 long fsneg - tbl_unsupp # 5a: fsneg
4171 long tbl_unsupp - tbl_unsupp
4172 long fdabs - tbl_unsupp # 5c: fdabs
4173 long tbl_unsupp - tbl_unsupp
4174 long fdneg - tbl_unsupp # 5e: fdneg
4175 long tbl_unsupp - tbl_unsupp
4176 long fsdiv - tbl_unsupp # 60: fsdiv
4177 long tbl_unsupp - tbl_unsupp
4178 long fsadd - tbl_unsupp # 62: fsadd
4179 long fsmul - tbl_unsupp # 63: fsmul
4180 long fddiv - tbl_unsupp # 64: fddiv
4181 long tbl_unsupp - tbl_unsupp
4182 long fdadd - tbl_unsupp # 66: fdadd
4183 long fdmul - tbl_unsupp # 67: fdmul
4184 long fssub - tbl_unsupp # 68: fssub
4185 long tbl_unsupp - tbl_unsupp
4186 long tbl_unsupp - tbl_unsupp
4187 long tbl_unsupp - tbl_unsupp
4188 long fdsub - tbl_unsupp # 6c: fdsub
4190 #################################################
4191 # Add this here so non-fp modules can compile.
4192 # (smovcr is called from fpsp_inex.)
4197 #########################################################################
4198 # XDEF **************************************************************** #
4199 # fmovm_dynamic(): emulate "fmovm" dynamic instruction #
4201 # XREF **************************************************************** #
4202 # fetch_dreg() - fetch data register #
4203 # {i,d,}mem_read() - fetch data from memory #
4204 # _mem_write() - write data to memory #
4205 # iea_iacc() - instruction memory access error occurred #
4206 # iea_dacc() - data memory access error occurred #
4207 # restore() - restore An index regs if access error occurred #
4209 # INPUT *************************************************************** #
4212 # OUTPUT ************************************************************** #
4213 # If instr is "fmovm Dn,-(A7)" from supervisor mode, #
4214 # d0 = size of dump #
4216 # Else if instruction access error, #
4218 # Else if data access error, #
4220 # a0 = address of fault #
4224 # ALGORITHM *********************************************************** #
4225 # The effective address must be calculated since this is entered #
4226 # from an "Unimplemented Effective Address" exception handler. So, we #
4227 # have our own fcalc_ea() routine here. If an access error is flagged #
4228 # by a _{i,d,}mem_read() call, we must exit through the special #
4230 # The data register is determined and its value loaded to get the #
4231 # string of FP registers affected. This value is used as an index into #
4232 # a lookup table such that we can determine the number of bytes #
4234 # If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
4235 # to read in all FP values. Again, _mem_read() may fail and require a #
4237 # If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4238 # to write all FP values. _mem_write() may also fail. #
4239 # If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
4240 # then we return the size of the dump and the string to the caller #
4241 # so that the move can occur outside of this routine. This special #
4242 # case is required so that moves to the system stack are handled #
4246 # fmovm.x dn, <ea> #
4247 # fmovm.x <ea>, dn #
4249 # <WORD 1> <WORD2> #
4250 # 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
4252 # & = (0): predecrement addressing mode #
4253 # (1): postincrement or control addressing mode #
4254 # @ = (0): move listed regs from memory to the FPU #
4255 # (1): move listed regs from the FPU to memory #
4256 # $$$ : index of data register holding reg select mask #
4259 # If the data register holds a zero, then the #
4260 # instruction is a nop. #
4262 #########################################################################
4264 global fmovm_dynamic
4267 # extract the data register in which the bit string resides...
4268 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
4269 andi.w &0x70,%d1 # extract reg bits
4270 lsr.b &0x4,%d1 # shift into lo bits
4272 # fetch the bit string into d0...
4273 bsr.l fetch_dreg # fetch reg string
4275 andi.l &0x000000ff,%d0 # keep only lo byte
4277 mov.l %d0,-(%sp) # save strg
4278 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
4279 mov.l %d0,-(%sp) # save size
4280 bsr.l fmovm_calc_ea # calculate <ea>
4281 mov.l (%sp)+,%d0 # restore size
4282 mov.l (%sp)+,%d1 # restore strg
4284 # if the bit string is a zero, then the operation is a no-op
4285 # but, make sure that we've calculated ea and advanced the opword pointer
4286 beq.w fmovm_data_done
4288 # separate move ins from move outs...
4289 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
4290 beq.w fmovm_data_in # it's a move out
4296 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
4297 bne.w fmovm_out_ctrl # control
4299 ############################
4301 # for predecrement mode, the bit string is the opposite of both control
4302 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303 # here, we convert it to be just like the others...
4304 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4306 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
4307 beq.b fmovm_out_ctrl # user
4310 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311 bne.b fmovm_out_ctrl
4313 # the operation was unfortunately an: fmovm.x dn,-(sp)
4314 # called from supervisor mode.
4315 # we're also passing "size" and "strg" back to the calling routine
4318 ############################
4320 mov.l %a0,%a1 # move <ea> to a1
4322 sub.l %d0,%sp # subtract size of dump
4325 tst.b %d1 # should FP0 be moved?
4326 bpl.b fmovm_out_ctrl_fp1 # no
4328 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
4329 mov.l 0x4+EXC_FP0(%a6),(%a0)+
4330 mov.l 0x8+EXC_FP0(%a6),(%a0)+
4333 lsl.b &0x1,%d1 # should FP1 be moved?
4334 bpl.b fmovm_out_ctrl_fp2 # no
4336 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
4337 mov.l 0x4+EXC_FP1(%a6),(%a0)+
4338 mov.l 0x8+EXC_FP1(%a6),(%a0)+
4341 lsl.b &0x1,%d1 # should FP2 be moved?
4342 bpl.b fmovm_out_ctrl_fp3 # no
4344 fmovm.x &0x20,(%a0) # yes
4348 lsl.b &0x1,%d1 # should FP3 be moved?
4349 bpl.b fmovm_out_ctrl_fp4 # no
4351 fmovm.x &0x10,(%a0) # yes
4355 lsl.b &0x1,%d1 # should FP4 be moved?
4356 bpl.b fmovm_out_ctrl_fp5 # no
4358 fmovm.x &0x08,(%a0) # yes
4362 lsl.b &0x1,%d1 # should FP5 be moved?
4363 bpl.b fmovm_out_ctrl_fp6 # no
4365 fmovm.x &0x04,(%a0) # yes
4369 lsl.b &0x1,%d1 # should FP6 be moved?
4370 bpl.b fmovm_out_ctrl_fp7 # no
4372 fmovm.x &0x02,(%a0) # yes
4376 lsl.b &0x1,%d1 # should FP7 be moved?
4377 bpl.b fmovm_out_ctrl_done # no
4379 fmovm.x &0x01,(%a0) # yes
4382 fmovm_out_ctrl_done:
4383 mov.l %a1,L_SCR1(%a6)
4385 lea (%sp),%a0 # pass: supervisor src
4386 mov.l %d0,-(%sp) # save size
4387 bsr.l _dmem_write # copy data to user mem
4390 add.l %d0,%sp # clear fpreg data from stack
4392 tst.l %d1 # did dstore err?
4393 bne.w fmovm_out_err # yes
4401 mov.l %a0,L_SCR1(%a6)
4403 sub.l %d0,%sp # make room for fpregs
4406 mov.l %d1,-(%sp) # save bit string for later
4407 mov.l %d0,-(%sp) # save # of bytes
4409 bsr.l _dmem_read # copy data from user mem
4411 mov.l (%sp)+,%d0 # retrieve # of bytes
4413 tst.l %d1 # did dfetch fail?
4414 bne.w fmovm_in_err # yes
4416 mov.l (%sp)+,%d1 # load bit string
4418 lea (%sp),%a0 # addr of stack
4420 tst.b %d1 # should FP0 be moved?
4421 bpl.b fmovm_data_in_fp1 # no
4423 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
4424 mov.l (%a0)+,0x4+EXC_FP0(%a6)
4425 mov.l (%a0)+,0x8+EXC_FP0(%a6)
4428 lsl.b &0x1,%d1 # should FP1 be moved?
4429 bpl.b fmovm_data_in_fp2 # no
4431 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
4432 mov.l (%a0)+,0x4+EXC_FP1(%a6)
4433 mov.l (%a0)+,0x8+EXC_FP1(%a6)
4436 lsl.b &0x1,%d1 # should FP2 be moved?
4437 bpl.b fmovm_data_in_fp3 # no
4439 fmovm.x (%a0)+,&0x20 # yes
4442 lsl.b &0x1,%d1 # should FP3 be moved?
4443 bpl.b fmovm_data_in_fp4 # no
4445 fmovm.x (%a0)+,&0x10 # yes
4448 lsl.b &0x1,%d1 # should FP4 be moved?
4449 bpl.b fmovm_data_in_fp5 # no
4451 fmovm.x (%a0)+,&0x08 # yes
4454 lsl.b &0x1,%d1 # should FP5 be moved?
4455 bpl.b fmovm_data_in_fp6 # no
4457 fmovm.x (%a0)+,&0x04 # yes
4460 lsl.b &0x1,%d1 # should FP6 be moved?
4461 bpl.b fmovm_data_in_fp7 # no
4463 fmovm.x (%a0)+,&0x02 # yes
4466 lsl.b &0x1,%d1 # should FP7 be moved?
4467 bpl.b fmovm_data_in_done # no
4469 fmovm.x (%a0)+,&0x01 # yes
4472 add.l %d0,%sp # remove fpregs from stack
4475 #####################################
4480 ##############################################################################
4483 # table indexed by the operation's bit string that gives the number
4484 # of bytes that will be moved.
4486 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4489 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4523 # table to convert a pre-decrement bit string into a post-increment
4524 # or control bit string.
4535 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4568 global fmovm_calc_ea
4569 ###############################################
4570 # _fmovm_calc_ea: calculate effective address #
4571 ###############################################
4573 mov.l %d0,%a0 # move # bytes to a0
4575 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576 # easily changed if they were inputs passed in registers.
4577 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
4578 mov.w %d0,%d1 # make a copy
4580 andi.w &0x3f,%d0 # extract mode field
4581 andi.l &0x7,%d1 # extract reg field
4583 # jump to the corresponding function for each {MODE,REG} pair.
4584 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4589 short tbl_fea_mode - tbl_fea_mode
4590 short tbl_fea_mode - tbl_fea_mode
4591 short tbl_fea_mode - tbl_fea_mode
4592 short tbl_fea_mode - tbl_fea_mode
4593 short tbl_fea_mode - tbl_fea_mode
4594 short tbl_fea_mode - tbl_fea_mode
4595 short tbl_fea_mode - tbl_fea_mode
4596 short tbl_fea_mode - tbl_fea_mode
4598 short tbl_fea_mode - tbl_fea_mode
4599 short tbl_fea_mode - tbl_fea_mode
4600 short tbl_fea_mode - tbl_fea_mode
4601 short tbl_fea_mode - tbl_fea_mode
4602 short tbl_fea_mode - tbl_fea_mode
4603 short tbl_fea_mode - tbl_fea_mode
4604 short tbl_fea_mode - tbl_fea_mode
4605 short tbl_fea_mode - tbl_fea_mode
4607 short faddr_ind_a0 - tbl_fea_mode
4608 short faddr_ind_a1 - tbl_fea_mode
4609 short faddr_ind_a2 - tbl_fea_mode
4610 short faddr_ind_a3 - tbl_fea_mode
4611 short faddr_ind_a4 - tbl_fea_mode
4612 short faddr_ind_a5 - tbl_fea_mode
4613 short faddr_ind_a6 - tbl_fea_mode
4614 short faddr_ind_a7 - tbl_fea_mode
4616 short faddr_ind_p_a0 - tbl_fea_mode
4617 short faddr_ind_p_a1 - tbl_fea_mode
4618 short faddr_ind_p_a2 - tbl_fea_mode
4619 short faddr_ind_p_a3 - tbl_fea_mode
4620 short faddr_ind_p_a4 - tbl_fea_mode
4621 short faddr_ind_p_a5 - tbl_fea_mode
4622 short faddr_ind_p_a6 - tbl_fea_mode
4623 short faddr_ind_p_a7 - tbl_fea_mode
4625 short faddr_ind_m_a0 - tbl_fea_mode
4626 short faddr_ind_m_a1 - tbl_fea_mode
4627 short faddr_ind_m_a2 - tbl_fea_mode
4628 short faddr_ind_m_a3 - tbl_fea_mode
4629 short faddr_ind_m_a4 - tbl_fea_mode
4630 short faddr_ind_m_a5 - tbl_fea_mode
4631 short faddr_ind_m_a6 - tbl_fea_mode
4632 short faddr_ind_m_a7 - tbl_fea_mode
4634 short faddr_ind_disp_a0 - tbl_fea_mode
4635 short faddr_ind_disp_a1 - tbl_fea_mode
4636 short faddr_ind_disp_a2 - tbl_fea_mode
4637 short faddr_ind_disp_a3 - tbl_fea_mode
4638 short faddr_ind_disp_a4 - tbl_fea_mode
4639 short faddr_ind_disp_a5 - tbl_fea_mode
4640 short faddr_ind_disp_a6 - tbl_fea_mode
4641 short faddr_ind_disp_a7 - tbl_fea_mode
4643 short faddr_ind_ext - tbl_fea_mode
4644 short faddr_ind_ext - tbl_fea_mode
4645 short faddr_ind_ext - tbl_fea_mode
4646 short faddr_ind_ext - tbl_fea_mode
4647 short faddr_ind_ext - tbl_fea_mode
4648 short faddr_ind_ext - tbl_fea_mode
4649 short faddr_ind_ext - tbl_fea_mode
4650 short faddr_ind_ext - tbl_fea_mode
4652 short fabs_short - tbl_fea_mode
4653 short fabs_long - tbl_fea_mode
4654 short fpc_ind - tbl_fea_mode
4655 short fpc_ind_ext - tbl_fea_mode
4656 short tbl_fea_mode - tbl_fea_mode
4657 short tbl_fea_mode - tbl_fea_mode
4658 short tbl_fea_mode - tbl_fea_mode
4659 short tbl_fea_mode - tbl_fea_mode
4661 ###################################
4662 # Address register indirect: (An) #
4663 ###################################
4665 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
4669 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
4673 mov.l %a2,%a0 # Get current a2
4677 mov.l %a3,%a0 # Get current a3
4681 mov.l %a4,%a0 # Get current a4
4685 mov.l %a5,%a0 # Get current a5
4689 mov.l (%a6),%a0 # Get current a6
4693 mov.l EXC_A7(%a6),%a0 # Get current a7
4696 #####################################################
4697 # Address register indirect w/ postincrement: (An)+ #
4698 #####################################################
4700 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4702 add.l %a0,%d1 # Increment
4703 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
4708 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4710 add.l %a0,%d1 # Increment
4711 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
4716 mov.l %a2,%d0 # Get current a2
4718 add.l %a0,%d1 # Increment
4719 mov.l %d1,%a2 # Save incr value
4724 mov.l %a3,%d0 # Get current a3
4726 add.l %a0,%d1 # Increment
4727 mov.l %d1,%a3 # Save incr value
4732 mov.l %a4,%d0 # Get current a4
4734 add.l %a0,%d1 # Increment
4735 mov.l %d1,%a4 # Save incr value
4740 mov.l %a5,%d0 # Get current a5
4742 add.l %a0,%d1 # Increment
4743 mov.l %d1,%a5 # Save incr value
4748 mov.l (%a6),%d0 # Get current a6
4750 add.l %a0,%d1 # Increment
4751 mov.l %d1,(%a6) # Save incr value
4756 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4758 mov.l EXC_A7(%a6),%d0 # Get current a7
4760 add.l %a0,%d1 # Increment
4761 mov.l %d1,EXC_A7(%a6) # Save incr value
4765 ####################################################
4766 # Address register indirect w/ predecrement: -(An) #
4767 ####################################################
4769 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4770 sub.l %a0,%d0 # Decrement
4771 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
4776 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4777 sub.l %a0,%d0 # Decrement
4778 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
4783 mov.l %a2,%d0 # Get current a2
4784 sub.l %a0,%d0 # Decrement
4785 mov.l %d0,%a2 # Save decr value
4790 mov.l %a3,%d0 # Get current a3
4791 sub.l %a0,%d0 # Decrement
4792 mov.l %d0,%a3 # Save decr value
4797 mov.l %a4,%d0 # Get current a4
4798 sub.l %a0,%d0 # Decrement
4799 mov.l %d0,%a4 # Save decr value
4804 mov.l %a5,%d0 # Get current a5
4805 sub.l %a0,%d0 # Decrement
4806 mov.l %d0,%a5 # Save decr value
4811 mov.l (%a6),%d0 # Get current a6
4812 sub.l %a0,%d0 # Decrement
4813 mov.l %d0,(%a6) # Save decr value
4818 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4820 mov.l EXC_A7(%a6),%d0 # Get current a7
4821 sub.l %a0,%d0 # Decrement
4822 mov.l %d0,EXC_A7(%a6) # Save decr value
4826 ########################################################
4827 # Address register indirect w/ displacement: (d16, An) #
4828 ########################################################
4830 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4831 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4832 bsr.l _imem_read_word
4834 tst.l %d1 # did ifetch fail?
4835 bne.l iea_iacc # yes
4837 mov.w %d0,%a0 # sign extend displacement
4839 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
4843 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4844 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4845 bsr.l _imem_read_word
4847 tst.l %d1 # did ifetch fail?
4848 bne.l iea_iacc # yes
4850 mov.w %d0,%a0 # sign extend displacement
4852 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
4856 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4857 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4858 bsr.l _imem_read_word
4860 tst.l %d1 # did ifetch fail?
4861 bne.l iea_iacc # yes
4863 mov.w %d0,%a0 # sign extend displacement
4865 add.l %a2,%a0 # a2 + d16
4869 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4870 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4871 bsr.l _imem_read_word
4873 tst.l %d1 # did ifetch fail?
4874 bne.l iea_iacc # yes
4876 mov.w %d0,%a0 # sign extend displacement
4878 add.l %a3,%a0 # a3 + d16
4882 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4883 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4884 bsr.l _imem_read_word
4886 tst.l %d1 # did ifetch fail?
4887 bne.l iea_iacc # yes
4889 mov.w %d0,%a0 # sign extend displacement
4891 add.l %a4,%a0 # a4 + d16
4895 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4896 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4897 bsr.l _imem_read_word
4899 tst.l %d1 # did ifetch fail?
4900 bne.l iea_iacc # yes
4902 mov.w %d0,%a0 # sign extend displacement
4904 add.l %a5,%a0 # a5 + d16
4908 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4909 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4910 bsr.l _imem_read_word
4912 tst.l %d1 # did ifetch fail?
4913 bne.l iea_iacc # yes
4915 mov.w %d0,%a0 # sign extend displacement
4917 add.l (%a6),%a0 # a6 + d16
4921 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4922 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4923 bsr.l _imem_read_word
4925 tst.l %d1 # did ifetch fail?
4926 bne.l iea_iacc # yes
4928 mov.w %d0,%a0 # sign extend displacement
4930 add.l EXC_A7(%a6),%a0 # a7 + d16
4933 ########################################################################
4934 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935 # " " " w/ " (base displacement): (bd, An, Xn) #
4936 # Memory indirect postindexed: ([bd, An], Xn, od) #
4937 # Memory indirect preindexed: ([bd, An, Xn], od) #
4938 ########################################################################
4941 bsr.l fetch_dreg # fetch base areg
4944 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4945 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4946 bsr.l _imem_read_word # fetch extword in d0
4948 tst.l %d1 # did ifetch fail?
4949 bne.l iea_iacc # yes
4956 mov.l %d0,L_SCR1(%a6) # hold opword
4960 andi.w &0xf,%d1 # extract index regno
4962 # count on fetch_dreg() not to alter a0...
4963 bsr.l fetch_dreg # fetch index
4965 mov.l %d2,-(%sp) # save d2
4966 mov.l L_SCR1(%a6),%d2 # fetch opword
4968 btst &0xb,%d2 # is it word or long?
4970 ext.l %d0 # sign extend word index
4974 andi.l &0x3,%d1 # extract scale value
4976 lsl.l %d1,%d0 # shift index by scale
4978 extb.l %d2 # sign extend displacement
4979 add.l %d2,%d0 # index + disp
4980 add.l %d0,%a0 # An + (index + disp)
4982 mov.l (%sp)+,%d2 # restore old d2
4985 ###########################
4986 # Absolute short: (XXX).W #
4987 ###########################
4989 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4990 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4991 bsr.l _imem_read_word # fetch short address
4993 tst.l %d1 # did ifetch fail?
4994 bne.l iea_iacc # yes
4996 mov.w %d0,%a0 # return <ea> in a0
4999 ##########################
5000 # Absolute long: (XXX).L #
5001 ##########################
5003 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5004 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5005 bsr.l _imem_read_long # fetch long address
5007 tst.l %d1 # did ifetch fail?
5008 bne.l iea_iacc # yes
5010 mov.l %d0,%a0 # return <ea> in a0
5013 #######################################################
5014 # Program counter indirect w/ displacement: (d16, PC) #
5015 #######################################################
5017 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5018 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5019 bsr.l _imem_read_word # fetch word displacement
5021 tst.l %d1 # did ifetch fail?
5022 bne.l iea_iacc # yes
5024 mov.w %d0,%a0 # sign extend displacement
5026 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
5028 # _imem_read_word() increased the extwptr by 2. need to adjust here.
5029 subq.l &0x2,%a0 # adjust <ea>
5032 ##########################################################
5033 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034 # " " w/ " (base displacement): (bd, PC, An) #
5035 # PC memory indirect postindexed: ([bd, PC], Xn, od) #
5036 # PC memory indirect preindexed: ([bd, PC, Xn], od) #
5037 ##########################################################
5039 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5040 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5041 bsr.l _imem_read_word # fetch ext word
5043 tst.l %d1 # did ifetch fail?
5044 bne.l iea_iacc # yes
5046 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
5047 subq.l &0x2,%a0 # adjust base
5049 btst &0x8,%d0 # is disp only 8 bits?
5050 bne.w fcalc_mem_ind # calc memory indirect
5052 mov.l %d0,L_SCR1(%a6) # store opword
5054 mov.l %d0,%d1 # make extword copy
5055 rol.w &0x4,%d1 # rotate reg num into place
5056 andi.w &0xf,%d1 # extract register number
5058 # count on fetch_dreg() not to alter a0...
5059 bsr.l fetch_dreg # fetch index
5061 mov.l %d2,-(%sp) # save d2
5062 mov.l L_SCR1(%a6),%d2 # fetch opword
5064 btst &0xb,%d2 # is index word or long?
5065 bne.b fpii8_long # long
5066 ext.l %d0 # sign extend word index
5069 rol.w &0x7,%d1 # rotate scale value into place
5070 andi.l &0x3,%d1 # extract scale value
5072 lsl.l %d1,%d0 # shift index by scale
5074 extb.l %d2 # sign extend displacement
5075 add.l %d2,%d0 # disp + index
5076 add.l %d0,%a0 # An + (index + disp)
5078 mov.l (%sp)+,%d2 # restore temp register
5086 btst &0x6,%d0 # is the index suppressed?
5089 movm.l &0x3c00,-(%sp) # save d2-d5
5091 mov.l %d0,%d5 # put extword in d5
5092 mov.l %a0,%d3 # put base in d3
5094 clr.l %d2 # yes, so index = 0
5099 mov.l %d0,L_SCR1(%a6) # save d0 (opword)
5100 bfextu %d0{&16:&4},%d1 # fetch dreg index
5103 movm.l &0x3c00,-(%sp) # save d2-d5
5104 mov.l %d0,%d2 # put index in d2
5105 mov.l L_SCR1(%a6),%d5
5108 btst &0xb,%d5 # is index word or long?
5113 bfextu %d5{&21:&2},%d0
5116 # base address (passed as parameter in d3):
5117 # we clear the value here if it should actually be suppressed.
5119 btst &0x7,%d5 # is the bd suppressed?
5123 # base displacement:
5125 bfextu %d5{&26:&2},%d0 # get bd size
5126 # beq.l fmovm_error # if (size == 0) it's reserved
5132 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5133 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5134 bsr.l _imem_read_long
5136 tst.l %d1 # did ifetch fail?
5137 bne.l fcea_iacc # yes
5142 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5143 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5144 bsr.l _imem_read_word
5146 tst.l %d1 # did ifetch fail?
5147 bne.l fcea_iacc # yes
5149 ext.l %d0 # sign extend bd
5152 add.l %d0,%d3 # base += bd
5154 # outer displacement:
5156 bfextu %d5{&30:&2},%d0 # is od suppressed?
5163 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5164 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5165 bsr.l _imem_read_long
5167 tst.l %d1 # did ifetch fail?
5168 bne.l fcea_iacc # yes
5173 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5174 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5175 bsr.l _imem_read_word
5177 tst.l %d1 # did ifetch fail?
5178 bne.l fcea_iacc # yes
5180 ext.l %d0 # sign extend od
5189 btst &0x2,%d5 # pre or post indexing?
5193 bsr.l _dmem_read_long
5195 tst.l %d1 # did dfetch fail?
5196 bne.w fcea_err # yes
5198 add.l %d2,%d0 # <ea> += index
5199 add.l %d4,%d0 # <ea> += od
5203 add.l %d2,%d3 # preindexing
5205 bsr.l _dmem_read_long
5207 tst.l %d1 # did dfetch fail?
5208 bne.w fcea_err # yes
5210 add.l %d4,%d0 # ea += od
5214 add.l %d2,%d3 # ea = (base + bd) + index
5219 movm.l (%sp)+,&0x003c # restore d2-d5
5222 #########################################################
5226 movm.l (%sp)+,&0x003c # restore d2-d5
5231 movm.l (%sp)+,&0x003c # restore d2-d5
5244 mov.l L_SCR1(%a6),%a0
5247 #########################################################################
5248 # XDEF **************************************************************** #
5249 # fmovm_ctrl(): emulate fmovm.l of control registers instr #
5251 # XREF **************************************************************** #
5252 # _imem_read_long() - read longword from memory #
5253 # iea_iacc() - _imem_read_long() failed; error recovery #
5255 # INPUT *************************************************************** #
5258 # OUTPUT ************************************************************** #
5259 # If _imem_read_long() doesn't fail: #
5260 # USER_FPCR(a6) = new FPCR value #
5261 # USER_FPSR(a6) = new FPSR value #
5262 # USER_FPIAR(a6) = new FPIAR value #
5264 # ALGORITHM *********************************************************** #
5265 # Decode the instruction type by looking at the extension word #
5266 # in order to see how many control registers to fetch from memory. #
5267 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
5268 # the special access error exit handler iea_iacc(). #
5270 # Instruction word decoding: #
5272 # fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
5275 # 1111 0010 00 111100 100$ $$00 0000 0000 #
5282 #########################################################################
5286 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
5287 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
5288 beq.w fctrl_in_7 # yes
5289 cmpi.b %d0,&0x98 # fpcr & fpsr ?
5290 beq.w fctrl_in_6 # yes
5291 cmpi.b %d0,&0x94 # fpcr & fpiar ?
5292 beq.b fctrl_in_5 # yes
5294 # fmovem.l #<data>, fpsr/fpiar
5296 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5297 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5298 bsr.l _imem_read_long # fetch FPSR from mem
5300 tst.l %d1 # did ifetch fail?
5301 bne.l iea_iacc # yes
5303 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
5304 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5305 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5306 bsr.l _imem_read_long # fetch FPIAR from mem
5308 tst.l %d1 # did ifetch fail?
5309 bne.l iea_iacc # yes
5311 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5314 # fmovem.l #<data>, fpcr/fpiar
5316 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5317 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5318 bsr.l _imem_read_long # fetch FPCR from mem
5320 tst.l %d1 # did ifetch fail?
5321 bne.l iea_iacc # yes
5323 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
5324 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5325 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5326 bsr.l _imem_read_long # fetch FPIAR from mem
5328 tst.l %d1 # did ifetch fail?
5329 bne.l iea_iacc # yes
5331 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5334 # fmovem.l #<data>, fpcr/fpsr
5336 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5337 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5338 bsr.l _imem_read_long # fetch FPCR from mem
5340 tst.l %d1 # did ifetch fail?
5341 bne.l iea_iacc # yes
5343 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5344 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5345 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5346 bsr.l _imem_read_long # fetch FPSR from mem
5348 tst.l %d1 # did ifetch fail?
5349 bne.l iea_iacc # yes
5351 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5354 # fmovem.l #<data>, fpcr/fpsr/fpiar
5356 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5357 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5358 bsr.l _imem_read_long # fetch FPCR from mem
5360 tst.l %d1 # did ifetch fail?
5361 bne.l iea_iacc # yes
5363 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5364 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5365 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5366 bsr.l _imem_read_long # fetch FPSR from mem
5368 tst.l %d1 # did ifetch fail?
5369 bne.l iea_iacc # yes
5371 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5372 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5373 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5374 bsr.l _imem_read_long # fetch FPIAR from mem
5376 tst.l %d1 # did ifetch fail?
5377 bne.l iea_iacc # yes
5379 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
5382 ##########################################################################
5384 #########################################################################
5385 # XDEF **************************************************************** #
5386 # addsub_scaler2(): scale inputs to fadd/fsub such that no #
5387 # OVFL/UNFL exceptions will result #
5389 # XREF **************************************************************** #
5390 # norm() - normalize mantissa after adjusting exponent #
5392 # INPUT *************************************************************** #
5393 # FP_SRC(a6) = fp op1(src) #
5394 # FP_DST(a6) = fp op2(dst) #
5396 # OUTPUT ************************************************************** #
5397 # FP_SRC(a6) = fp op1 scaled(src) #
5398 # FP_DST(a6) = fp op2 scaled(dst) #
5399 # d0 = scale amount #
5401 # ALGORITHM *********************************************************** #
5402 # If the DST exponent is > the SRC exponent, set the DST exponent #
5403 # equal to 0x3fff and scale the SRC exponent by the value that the #
5404 # DST exponent was scaled by. If the SRC exponent is greater or equal, #
5405 # do the opposite. Return this scale factor in d0. #
5406 # If the two exponents differ by > the number of mantissa bits #
5407 # plus two, then set the smallest exponent to a very small value as a #
5410 #########################################################################
5412 global addsub_scaler2
5414 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
5415 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
5416 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
5417 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
5418 mov.w SRC_EX(%a0),%d0
5419 mov.w DST_EX(%a1),%d1
5420 mov.w %d0,FP_SCR0_EX(%a6)
5421 mov.w %d1,FP_SCR1_EX(%a6)
5425 mov.w %d0,L_SCR1(%a6) # store src exponent
5426 mov.w %d1,2+L_SCR1(%a6) # store dst exponent
5428 cmp.w %d0, %d1 # is src exp >= dst exp?
5431 # dst exp is > src exp; scale dst to exp = 0x3fff
5433 bsr.l scale_to_zero_dst
5434 mov.l %d0,-(%sp) # save scale factor
5436 cmpi.b STAG(%a6),&DENORM # is dst denormalized?
5439 lea FP_SCR0(%a6),%a0
5440 bsr.l norm # normalize the denorm; result is new exp
5441 neg.w %d0 # new exp = -(shft val)
5442 mov.w %d0,L_SCR1(%a6) # inset new exp
5445 mov.w 2+L_SCR1(%a6),%d0
5446 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5448 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
5451 mov.w L_SCR1(%a6),%d0
5452 add.w 0x2(%sp),%d0 # scale src exponent by scale factor
5453 mov.w FP_SCR0_EX(%a6),%d1
5455 or.w %d1,%d0 # concat {sgn,new exp}
5456 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
5458 mov.l (%sp)+,%d0 # return SCALE factor
5462 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5463 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
5465 mov.l (%sp)+,%d0 # return SCALE factor
5468 # src exp is >= dst exp; scale src to exp = 0x3fff
5470 bsr.l scale_to_zero_src
5471 mov.l %d0,-(%sp) # save scale factor
5473 cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
5475 lea FP_SCR1(%a6),%a0
5476 bsr.l norm # normalize the denorm; result is new exp
5477 neg.w %d0 # new exp = -(shft val)
5478 mov.w %d0,2+L_SCR1(%a6) # inset new exp
5481 mov.w L_SCR1(%a6),%d0
5482 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5484 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
5487 mov.w 2+L_SCR1(%a6),%d0
5488 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
5489 mov.w FP_SCR1_EX(%a6),%d1
5491 or.w %d1,%d0 # concat {sgn,new exp}
5492 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
5494 mov.l (%sp)+,%d0 # return SCALE factor
5498 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5499 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
5501 mov.l (%sp)+,%d0 # return SCALE factor
5504 ##########################################################################
5506 #########################################################################
5507 # XDEF **************************************************************** #
5508 # scale_to_zero_src(): scale the exponent of extended precision #
5509 # value at FP_SCR0(a6). #
5511 # XREF **************************************************************** #
5512 # norm() - normalize the mantissa if the operand was a DENORM #
5514 # INPUT *************************************************************** #
5515 # FP_SCR0(a6) = extended precision operand to be scaled #
5517 # OUTPUT ************************************************************** #
5518 # FP_SCR0(a6) = scaled extended precision operand #
5519 # d0 = scale value #
5521 # ALGORITHM *********************************************************** #
5522 # Set the exponent of the input operand to 0x3fff. Save the value #
5523 # of the difference between the original and new exponent. Then, #
5524 # normalize the operand if it was a DENORM. Add this normalization #
5525 # value to the previous value. Return the result. #
5527 #########################################################################
5529 global scale_to_zero_src
5531 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5532 mov.w %d1,%d0 # make a copy
5534 andi.l &0x7fff,%d1 # extract operand's exponent
5536 andi.w &0x8000,%d0 # extract operand's sgn
5537 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5539 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
5541 cmpi.b STAG(%a6),&DENORM # is operand normalized?
5542 beq.b stzs_denorm # normalize the DENORM
5546 sub.l %d1,%d0 # scale = BIAS + (-exp)
5551 lea FP_SCR0(%a6),%a0 # pass ptr to src op
5552 bsr.l norm # normalize denorm
5553 neg.l %d0 # new exponent = -(shft val)
5554 mov.l %d0,%d1 # prepare for op_norm call
5555 bra.b stzs_norm # finish scaling
5559 #########################################################################
5560 # XDEF **************************************************************** #
5561 # scale_sqrt(): scale the input operand exponent so a subsequent #
5562 # fsqrt operation won't take an exception. #
5564 # XREF **************************************************************** #
5565 # norm() - normalize the mantissa if the operand was a DENORM #
5567 # INPUT *************************************************************** #
5568 # FP_SCR0(a6) = extended precision operand to be scaled #
5570 # OUTPUT ************************************************************** #
5571 # FP_SCR0(a6) = scaled extended precision operand #
5572 # d0 = scale value #
5574 # ALGORITHM *********************************************************** #
5575 # If the input operand is a DENORM, normalize it. #
5576 # If the exponent of the input operand is even, set the exponent #
5577 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
5578 # exponent of the input operand is off, set the exponent to ox3fff and #
5579 # return a scale factor of "(exp-0x3fff)/2". #
5581 #########################################################################
5585 cmpi.b STAG(%a6),&DENORM # is operand normalized?
5586 beq.b ss_denorm # normalize the DENORM
5588 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5589 andi.l &0x7fff,%d1 # extract operand's exponent
5591 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5593 btst &0x0,%d1 # is exp even or odd?
5596 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5599 sub.l %d1,%d0 # scale = BIAS + (-exp)
5600 asr.l &0x1,%d0 # divide scale factor by 2
5604 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5607 sub.l %d1,%d0 # scale = BIAS + (-exp)
5608 asr.l &0x1,%d0 # divide scale factor by 2
5612 lea FP_SCR0(%a6),%a0 # pass ptr to src op
5613 bsr.l norm # normalize denorm
5615 btst &0x0,%d0 # is exp even or odd?
5616 beq.b ss_denorm_even
5618 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5621 asr.l &0x1,%d0 # divide scale factor by 2
5625 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5628 asr.l &0x1,%d0 # divide scale factor by 2
5633 #########################################################################
5634 # XDEF **************************************************************** #
5635 # scale_to_zero_dst(): scale the exponent of extended precision #
5636 # value at FP_SCR1(a6). #
5638 # XREF **************************************************************** #
5639 # norm() - normalize the mantissa if the operand was a DENORM #
5641 # INPUT *************************************************************** #
5642 # FP_SCR1(a6) = extended precision operand to be scaled #
5644 # OUTPUT ************************************************************** #
5645 # FP_SCR1(a6) = scaled extended precision operand #
5646 # d0 = scale value #
5648 # ALGORITHM *********************************************************** #
5649 # Set the exponent of the input operand to 0x3fff. Save the value #
5650 # of the difference between the original and new exponent. Then, #
5651 # normalize the operand if it was a DENORM. Add this normalization #
5652 # value to the previous value. Return the result. #
5654 #########################################################################
5656 global scale_to_zero_dst
5658 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
5659 mov.w %d1,%d0 # make a copy
5661 andi.l &0x7fff,%d1 # extract operand's exponent
5663 andi.w &0x8000,%d0 # extract operand's sgn
5664 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5666 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
5668 cmpi.b DTAG(%a6),&DENORM # is operand normalized?
5669 beq.b stzd_denorm # normalize the DENORM
5673 sub.l %d1,%d0 # scale = BIAS + (-exp)
5677 lea FP_SCR1(%a6),%a0 # pass ptr to dst op
5678 bsr.l norm # normalize denorm
5679 neg.l %d0 # new exponent = -(shft val)
5680 mov.l %d0,%d1 # prepare for op_norm call
5681 bra.b stzd_norm # finish scaling
5683 ##########################################################################
5685 #########################################################################
5686 # XDEF **************************************************************** #
5687 # res_qnan(): return default result w/ QNAN operand for dyadic #
5688 # res_snan(): return default result w/ SNAN operand for dyadic #
5689 # res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
5690 # res_snan_1op(): return dflt result w/ SNAN operand for monadic #
5692 # XREF **************************************************************** #
5695 # INPUT *************************************************************** #
5696 # FP_SRC(a6) = pointer to extended precision src operand #
5697 # FP_DST(a6) = pointer to extended precision dst operand #
5699 # OUTPUT ************************************************************** #
5700 # fp0 = default result #
5702 # ALGORITHM *********************************************************** #
5703 # If either operand (but not both operands) of an operation is a #
5704 # nonsignalling NAN, then that NAN is returned as the result. If both #
5705 # operands are nonsignalling NANs, then the destination operand #
5706 # nonsignalling NAN is returned as the result. #
5707 # If either operand to an operation is a signalling NAN (SNAN), #
5708 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
5709 # enable bit is set in the FPCR, then the trap is taken and the #
5710 # destination is not modified. If the SNAN trap enable bit is not set, #
5711 # then the SNAN is converted to a nonsignalling NAN (by setting the #
5712 # SNAN bit in the operand to one), and the operation continues as #
5713 # described in the preceding paragraph, for nonsignalling NANs. #
5714 # Make sure the appropriate FPSR bits are set before exiting. #
5716 #########################################################################
5722 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
5724 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
5727 cmp.b STAG(%a6), &QNAN
5732 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
5733 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734 lea FP_SRC(%a6), %a0
5739 or.l &nan_mask, USER_FPSR(%a6)
5740 lea FP_SRC(%a6), %a0
5743 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744 bset &0x6, FP_DST_HI(%a6) # set SNAN bit
5745 lea FP_DST(%a6), %a0
5748 lea FP_DST(%a6), %a0
5749 cmp.b STAG(%a6), &SNAN
5751 or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
5753 or.l &nan_mask, USER_FPSR(%a6)
5755 btst &0x7, FTEMP_EX(%a0) # is NAN neg?
5757 or.l &neg_mask, USER_FPSR(%a6)
5759 fmovm.x (%a0), &0x80
5762 #########################################################################
5763 # XDEF **************************************************************** #
5764 # res_operr(): return default result during operand error #
5766 # XREF **************************************************************** #
5769 # INPUT *************************************************************** #
5772 # OUTPUT ************************************************************** #
5773 # fp0 = default operand error result #
5775 # ALGORITHM *********************************************************** #
5776 # An nonsignalling NAN is returned as the default result when #
5777 # an operand error occurs for the following cases: #
5779 # Multiply: (Infinity x Zero) #
5780 # Divide : (Zero / Zero) || (Infinity / Infinity) #
5782 #########################################################################
5786 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787 fmovm.x nan_return(%pc), &0x80
5791 long 0x7fff0000, 0xffffffff, 0xffffffff
5793 #########################################################################
5794 # XDEF **************************************************************** #
5795 # _denorm(): denormalize an intermediate result #
5797 # XREF **************************************************************** #
5800 # INPUT *************************************************************** #
5801 # a0 = points to the operand to be denormalized #
5802 # (in the internal extended format) #
5804 # d0 = rounding precision #
5806 # OUTPUT ************************************************************** #
5807 # a0 = pointer to the denormalized result #
5808 # (in the internal extended format) #
5810 # d0 = guard,round,sticky #
5812 # ALGORITHM *********************************************************** #
5813 # According to the exponent underflow threshold for the given #
5814 # precision, shift the mantissa bits to the right in order raise the #
5815 # exponent of the operand to the threshold value. While shifting the #
5816 # mantissa bits right, maintain the value of the guard, round, and #
5819 # (1) _denorm() is called by the underflow routines #
5820 # (2) _denorm() does NOT affect the status register #
5822 #########################################################################
5825 # table of exponent threshold values for each precision
5835 # Load the exponent threshold for the precision selected and check
5836 # to see if (threshold - exponent) is > 65 in which case we can
5837 # simply calculate the sticky bit and zero the mantissa. otherwise
5838 # we have to call the denormalization routine.
5840 lsr.b &0x2, %d0 # shift prec to lo bits
5841 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842 mov.w %d1, %d0 # copy d1 into d0
5843 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
5844 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
5845 bpl.b denorm_set_stky # yes; just calc sticky
5847 clr.l %d0 # clear g,r,s
5848 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849 beq.b denorm_call # no; don't change anything
5850 bset &29, %d0 # yes; set sticky bit
5853 bsr.l dnrm_lp # denormalize the number
5857 # all bit would have been shifted off during the denorm so simply
5858 # calculate if the sticky should be set and clear the entire mantissa.
5861 mov.l &0x20000000, %d0 # set sticky bit in return value
5862 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
5863 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
5864 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
5868 # dnrm_lp(): normalize exponent/mantissa to specified threshold #
5871 # %a0 : points to the operand to be denormalized #
5872 # %d0{31:29} : initial guard,round,sticky #
5873 # %d1{15:0} : denormalization threshold #
5875 # %a0 : points to the denormalized operand #
5876 # %d0{31:29} : final guard,round,sticky #
5879 # *** Local Equates *** #
5880 set GRS, L_SCR2 # g,r,s temp storage
5881 set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
5887 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888 # in memory so as to make the bitfield extraction for denormalization easier.
5890 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891 mov.l %d0, GRS(%a6) # place g,r,s after it
5894 # check to see how much less than the underflow threshold the operand
5897 mov.l %d1, %d0 # copy the denorm threshold
5898 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
5899 ble.b dnrm_no_lp # d1 <= 0
5900 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
5902 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
5904 bra.w case_3 # (d1 >= 64)
5907 # No normalization necessary
5910 mov.l GRS(%a6), %d0 # restore original g,r,s
5916 # %d0 = denorm threshold
5917 # %d1 = "n" = amt to shift
5919 # ---------------------------------------------------------
5920 # | FTEMP_HI | FTEMP_LO |grs000.........000|
5921 # ---------------------------------------------------------
5922 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5931 # <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932 # ---------------------------------------------------------
5933 # |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
5934 # ---------------------------------------------------------
5937 mov.l %d2, -(%sp) # create temp storage
5939 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5941 sub.w %d1, %d0 # %d0 = 32 - %d1
5943 cmpi.w %d1, &29 # is shft amt >= 29
5944 blt.b case1_extract # no; no fix needed
5946 or.b %d2, 3+FTEMP_LO2(%a6)
5949 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5953 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
5954 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
5956 bftst %d0{&2:&30} # were bits shifted off?
5957 beq.b case1_sticky_clear # no; go finish
5958 bset &rnd_stky_bit, %d0 # yes; set sticky bit
5961 and.l &0xe0000000, %d0 # clear all but G,R,S
5962 mov.l (%sp)+, %d2 # restore temp register
5968 # %d0 = denorm threshold
5969 # %d1 = "n" = amt to shift
5971 # ---------------------------------------------------------
5972 # | FTEMP_HI | FTEMP_LO |grs000.........000|
5973 # ---------------------------------------------------------
5974 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5977 # \ \ -------------------
5978 # \ -------------------- \
5979 # ------------------- \ \
5983 # <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984 # ---------------------------------------------------------
5985 # |0...............0|0....0| NEW_LO |grs |
5986 # ---------------------------------------------------------
5989 mov.l %d2, -(%sp) # create temp storage
5991 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5992 subi.w &0x20, %d1 # %d1 now between 0 and 32
5994 sub.w %d1, %d0 # %d0 = 32 - %d1
5996 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997 # the number of bits to check for the sticky detect.
5998 # it only plays a role in shift amounts of 61-63.
6000 or.b %d2, 3+FTEMP_LO2(%a6)
6002 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6005 bftst %d1{&2:&30} # were any bits shifted off?
6006 bne.b case2_set_sticky # yes; set sticky bit
6007 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6008 bne.b case2_set_sticky # yes; set sticky bit
6010 mov.l %d1, %d0 # move new G,R,S to %d0
6014 mov.l %d1, %d0 # move new G,R,S to %d0
6015 bset &rnd_stky_bit, %d0 # set sticky bit
6018 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
6019 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
6020 and.l &0xe0000000, %d0 # clear all but G,R,S
6022 mov.l (%sp)+,%d2 # restore temp register
6028 # %d0 = denorm threshold
6029 # %d1 = amt to shift
6032 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
6034 cmpi.w %d1, &65 # is shift amt > 65?
6035 blt.b case3_64 # no; it's == 64
6036 beq.b case3_65 # no; it's == 65
6041 # Shift value is > 65 and out of range. All bits are shifted off.
6042 # Return a zero mantissa with the sticky bit set
6044 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6045 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6046 mov.l &0x20000000, %d0 # set sticky bit
6052 # ---------------------------------------------------------
6053 # | FTEMP_HI | FTEMP_LO |grs000.........000|
6054 # ---------------------------------------------------------
6055 # <-------(32)------>
6059 # \ ------------------------------
6060 # ------------------------------- \
6064 # <-------(32)------>
6065 # ---------------------------------------------------------
6066 # |0...............0|0................0|grs |
6067 # ---------------------------------------------------------
6070 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6071 mov.l %d0, %d1 # make a copy
6072 and.l &0xc0000000, %d0 # extract G,R
6073 and.l &0x3fffffff, %d1 # extract other bits
6075 bra.b case3_complete
6080 # ---------------------------------------------------------
6081 # | FTEMP_HI | FTEMP_LO |grs000.........000|
6082 # ---------------------------------------------------------
6083 # <-------(32)------>
6087 # \ ------------------------------
6088 # -------------------------------- \
6092 # <-------(31)----->
6093 # ---------------------------------------------------------
6094 # |0...............0|0................0|0rs |
6095 # ---------------------------------------------------------
6098 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6099 and.l &0x80000000, %d0 # extract R bit
6100 lsr.l &0x1, %d0 # shift high bit into R bit
6101 and.l &0x7fffffff, %d1 # extract other bits
6104 # last operation done was an "and" of the bits shifted off so the condition
6105 # codes are already set so branch accordingly.
6106 bne.b case3_set_sticky # yes; go set new sticky
6107 tst.l FTEMP_LO(%a0) # were any bits shifted off?
6108 bne.b case3_set_sticky # yes; go set new sticky
6109 tst.b GRS(%a6) # were any bits shifted off?
6110 bne.b case3_set_sticky # yes; go set new sticky
6113 # no bits were shifted off so don't set the sticky bit.
6115 # the entire mantissa is zero.
6117 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6118 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6122 # some bits were shifted off so set the sticky bit.
6123 # the entire mantissa is zero.
6126 bset &rnd_stky_bit,%d0 # set new sticky bit
6127 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6128 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6131 #########################################################################
6132 # XDEF **************************************************************** #
6133 # _round(): round result according to precision/mode #
6135 # XREF **************************************************************** #
6138 # INPUT *************************************************************** #
6139 # a0 = ptr to input operand in internal extended format #
6140 # d1(hi) = contains rounding precision: #
6144 # d1(lo) = contains rounding mode: #
6149 # d0{31:29} = contains the g,r,s bits (extended) #
6151 # OUTPUT ************************************************************** #
6152 # a0 = pointer to rounded result #
6154 # ALGORITHM *********************************************************** #
6155 # On return the value pointed to by a0 is correctly rounded, #
6156 # a0 is preserved and the g-r-s bits in d0 are cleared. #
6157 # The result is not typed - the tag field is invalid. The #
6158 # result is still in the internal extended format. #
6160 # The INEX bit of USER_FPSR will be set if the rounded result was #
6161 # inexact (i.e. if any of the g-r-s bits were set). #
6163 #########################################################################
6168 # ext_grs() looks at the rounding precision and sets the appropriate
6170 # If (G,R,S == 0) then result is exact and round is done, else set
6171 # the inex flag in status reg and continue.
6173 bsr.l ext_grs # extract G,R,S
6175 tst.l %d0 # are G,R,S zero?
6176 beq.w truncate # yes; round is complete
6178 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6181 # Use rounding mode as an index into a jump table for these modes.
6182 # All of the following assumes grs != 0.
6184 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
6188 short rnd_near - tbl_mode
6189 short truncate - tbl_mode # RZ always truncates
6190 short rnd_mnus - tbl_mode
6191 short rnd_plus - tbl_mode
6193 #################################################################
6194 # ROUND PLUS INFINITY #
6196 # If sign of fp number = 0 (positive), then add 1 to l. #
6197 #################################################################
6199 tst.b FTEMP_SGN(%a0) # check for sign
6200 bmi.w truncate # if positive then truncate
6202 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6203 swap %d1 # set up d1 for round prec.
6205 cmpi.b %d1, &s_mode # is prec = sgl?
6207 bgt.w add_dbl # no; it's dbl
6208 bra.w add_ext # no; it's ext
6210 #################################################################
6211 # ROUND MINUS INFINITY #
6213 # If sign of fp number = 1 (negative), then add 1 to l. #
6214 #################################################################
6216 tst.b FTEMP_SGN(%a0) # check for sign
6217 bpl.w truncate # if negative then truncate
6219 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6220 swap %d1 # set up d1 for round prec.
6222 cmpi.b %d1, &s_mode # is prec = sgl?
6224 bgt.w add_dbl # no; it's dbl
6225 bra.w add_ext # no; it's ext
6227 #################################################################
6230 # If (g=1), then add 1 to l and if (r=s=0), then clear l #
6231 # Note that this will round to even in case of a tie. #
6232 #################################################################
6234 asl.l &0x1, %d0 # shift g-bit to c-bit
6235 bcc.w truncate # if (g=1) then
6237 swap %d1 # set up d1 for round prec.
6239 cmpi.b %d1, &s_mode # is prec = sgl?
6241 bgt.w add_dbl # no; it's dbl
6242 bra.w add_ext # no; it's ext
6244 # *** LOCAL EQUATES ***
6245 set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
6246 set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
6248 #########################
6250 #########################
6252 add.l &ad_1_sgl, FTEMP_HI(%a0)
6253 bcc.b scc_clr # no mantissa overflow
6254 roxr.w FTEMP_HI(%a0) # shift v-bit back in
6255 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
6256 add.w &0x1, FTEMP_EX(%a0) # and incr exponent
6258 tst.l %d0 # test for rs = 0
6260 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6262 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263 clr.l FTEMP_LO(%a0) # clear d2
6266 #########################
6268 #########################
6270 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
6271 bcc.b xcc_clr # test for carry out
6272 addq.l &1,FTEMP_HI(%a0) # propagate carry
6274 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6275 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6276 roxr.w FTEMP_LO(%a0)
6277 roxr.w FTEMP_LO+2(%a0)
6278 add.w &0x1,FTEMP_EX(%a0) # and inc exp
6280 tst.l %d0 # test rs = 0
6282 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
6286 #########################
6288 #########################
6290 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291 bcc.b dcc_clr # no carry
6292 addq.l &0x1, FTEMP_HI(%a0) # propagate carry
6293 bcc.b dcc_clr # no carry
6295 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6296 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6297 roxr.w FTEMP_LO(%a0)
6298 roxr.w FTEMP_LO+2(%a0)
6299 addq.w &0x1, FTEMP_EX(%a0) # incr exponent
6301 tst.l %d0 # test for rs = 0
6303 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6306 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6309 ###########################
6310 # Truncate all other bits #
6311 ###########################
6313 swap %d1 # select rnd prec
6315 cmpi.b %d1, &s_mode # is prec sgl?
6316 beq.w sgl_done # yes
6317 bgt.b dbl_done # no; it's dbl
6322 # ext_grs(): extract guard, round and sticky bits according to
6323 # rounding precision.
6326 # d0 = extended precision g,r,s (in d0{31:29})
6329 # d0{31:29} = guard, round, sticky
6331 # The ext_grs extract the guard/round/sticky bits according to the
6332 # selected rounding precision. It is called by the round subroutine
6333 # only. All registers except d0 are kept intact. d0 becomes an
6334 # updated guard,round,sticky in d0{31:29}
6336 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337 # prior to usage, and needs to restore d1 to original. this
6338 # routine is tightly tied to the round routine and not meant to
6339 # uphold standard subroutine calling practices.
6343 swap %d1 # have d1.w point to round precision
6344 tst.b %d1 # is rnd prec = extended?
6345 bne.b ext_grs_not_ext # no; go handle sgl or dbl
6348 # %d0 actually already hold g,r,s since _round() had it before calling
6349 # this function. so, as long as we don't disturb it, we are "returning" it.
6352 swap %d1 # yes; return to correct positions
6356 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
6358 cmpi.b %d1, &s_mode # is rnd prec = sgl?
6359 bne.b ext_grs_dbl # no; go handle dbl
6364 # -----------------------------------------------------
6365 # | EXP |XXXXXXX| |xx | |grs|
6366 # -----------------------------------------------------
6368 # ee ---------------------
6374 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375 mov.l &30, %d2 # of the sgl prec. limits
6376 lsl.l %d2, %d3 # shift g-r bits to MSB of d3
6377 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
6378 and.l &0x0000003f, %d2 # s bit is the or of all other
6379 bne.b ext_grs_st_stky # bits to the right of g-r
6380 tst.l FTEMP_LO(%a0) # test lower mantissa
6381 bne.b ext_grs_st_stky # if any are set, set sticky
6382 tst.l %d0 # test original g,r,s
6383 bne.b ext_grs_st_stky # if any are set, set sticky
6384 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
6389 # -----------------------------------------------------
6390 # | EXP |XXXXXXX| | |xx |grs|
6391 # -----------------------------------------------------
6399 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400 mov.l &30, %d2 # of the dbl prec. limits
6401 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
6402 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
6403 and.l &0x000001ff, %d2 # s bit is the or-ing of all
6404 bne.b ext_grs_st_stky # other bits to the right of g-r
6405 tst.l %d0 # test word original g,r,s
6406 bne.b ext_grs_st_stky # if any are set, set sticky
6407 bra.b ext_grs_end_sd # if clear, exit
6410 bset &rnd_stky_bit, %d3 # set sticky bit
6412 mov.l %d3, %d0 # return grs to d0
6414 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
6416 swap %d1 # restore d1 to original
6419 #########################################################################
6420 # norm(): normalize the mantissa of an extended precision input. the #
6421 # input operand should not be normalized already. #
6423 # XDEF **************************************************************** #
6426 # XREF **************************************************************** #
6429 # INPUT *************************************************************** #
6430 # a0 = pointer fp extended precision operand to normalize #
6432 # OUTPUT ************************************************************** #
6433 # d0 = number of bit positions the mantissa was shifted #
6434 # a0 = the input operand's mantissa is normalized; the exponent #
6437 #########################################################################
6440 mov.l %d2, -(%sp) # create some temp regs
6443 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
6444 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
6446 bfffo %d0{&0:&32}, %d2 # how many places to shift?
6447 beq.b norm_lo # hi(man) is all zeroes!
6450 lsl.l %d2, %d0 # left shift hi(man)
6451 bfextu %d1{&0:%d2}, %d3 # extract lo bits
6453 or.l %d3, %d0 # create hi(man)
6454 lsl.l %d2, %d1 # create lo(man)
6456 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6457 mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
6459 mov.l %d2, %d0 # return shift amount
6461 mov.l (%sp)+, %d3 # restore temp regs
6467 bfffo %d1{&0:&32}, %d2 # how many places to shift?
6468 lsl.l %d2, %d1 # shift lo(man)
6469 add.l &32, %d2 # add 32 to shft amount
6471 mov.l %d1, FTEMP_HI(%a0) # store hi(man)
6472 clr.l FTEMP_LO(%a0) # lo(man) is now zero
6474 mov.l %d2, %d0 # return shift amount
6476 mov.l (%sp)+, %d3 # restore temp regs
6481 #########################################################################
6482 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
6483 # - returns corresponding optype tag #
6485 # XDEF **************************************************************** #
6488 # XREF **************************************************************** #
6489 # norm() - normalize the mantissa #
6491 # INPUT *************************************************************** #
6492 # a0 = pointer to unnormalized extended precision number #
6494 # OUTPUT ************************************************************** #
6495 # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
6496 # a0 = input operand has been converted to a norm, denorm, or #
6497 # zero; both the exponent and mantissa are changed. #
6499 #########################################################################
6503 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504 bne.b unnorm_shift # hi(man) is not all zeroes
6507 # hi(man) is all zeroes so see if any bits in lo(man) are set
6510 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511 beq.w unnorm_zero # yes
6513 add.w &32, %d0 # no; fix shift distance
6516 # d0 = # shifts needed for complete normalization
6519 clr.l %d1 # clear top word
6520 mov.w FTEMP_EX(%a0), %d1 # extract exponent
6521 and.w &0x7fff, %d1 # strip off sgn
6523 cmp.w %d0, %d1 # will denorm push exp < 0?
6524 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
6527 # exponent would not go < 0. Therefore, number stays normalized
6529 sub.w %d0, %d1 # shift exponent value
6530 mov.w FTEMP_EX(%a0), %d0 # load old exponent
6531 and.w &0x8000, %d0 # save old sign
6532 or.w %d0, %d1 # {sgn,new exp}
6533 mov.w %d1, FTEMP_EX(%a0) # insert new exponent
6535 bsr.l norm # normalize UNNORM
6537 mov.b &NORM, %d0 # return new optype tag
6541 # exponent would go < 0, so only denormalize until exp = 0
6544 cmp.b %d1, &32 # is exp <= 32?
6545 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
6547 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548 mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
6550 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6551 lsl.l %d1, %d0 # extract new lo(man)
6552 mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
6554 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6556 mov.b &DENORM, %d0 # return new optype tag
6560 # only mantissa bits set are in lo(man)
6562 unnorm_nrm_zero_lrg:
6563 sub.w &32, %d1 # adjust shft amt by 32
6565 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6566 lsl.l %d1, %d0 # left shift lo(man)
6568 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6569 clr.l FTEMP_LO(%a0) # lo(man) = 0
6571 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6573 mov.b &DENORM, %d0 # return new optype tag
6577 # whole mantissa is zero so this UNNORM is actually a zero
6580 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
6582 mov.b &ZERO, %d0 # fix optype tag
6585 #########################################################################
6586 # XDEF **************************************************************** #
6587 # set_tag_x(): return the optype of the input ext fp number #
6589 # XREF **************************************************************** #
6592 # INPUT *************************************************************** #
6593 # a0 = pointer to extended precision operand #
6595 # OUTPUT ************************************************************** #
6596 # d0 = value of type tag #
6597 # one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
6599 # ALGORITHM *********************************************************** #
6600 # Simply test the exponent, j-bit, and mantissa values to #
6601 # determine the type of operand. #
6602 # If it's an unnormalized zero, alter the operand and force it #
6603 # to be a normal zero. #
6605 #########################################################################
6609 mov.w FTEMP_EX(%a0), %d0 # extract exponent
6610 andi.w &0x7fff, %d0 # strip off sign
6611 cmpi.w %d0, &0x7fff # is (EXP == MAX)?
6614 btst &0x7,FTEMP_HI(%a0)
6620 tst.w %d0 # is exponent = 0?
6633 # must distinguish now "Unnormalized zeroes" which we
6634 # must convert to zero.
6637 bne.b is_unnorm_reg_x
6639 bne.b is_unnorm_reg_x
6640 # it's an "unnormalized zero". let's convert it to an actual zero...
6641 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
6650 mov.l FTEMP_HI(%a0), %d0
6651 and.l &0x7fffffff, %d0 # msb is a don't care!
6657 btst &0x6, FTEMP_HI(%a0)
6665 #########################################################################
6666 # XDEF **************************************************************** #
6667 # set_tag_d(): return the optype of the input dbl fp number #
6669 # XREF **************************************************************** #
6672 # INPUT *************************************************************** #
6673 # a0 = points to double precision operand #
6675 # OUTPUT ************************************************************** #
6676 # d0 = value of type tag #
6677 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6679 # ALGORITHM *********************************************************** #
6680 # Simply test the exponent, j-bit, and mantissa values to #
6681 # determine the type of operand. #
6683 #########################################################################
6687 mov.l FTEMP(%a0), %d0
6690 andi.l &0x7ff00000, %d0
6691 beq.b zero_or_denorm_d
6693 cmpi.l %d0, &0x7ff00000
6700 and.l &0x000fffff, %d1
6711 and.l &0x000fffff, %d1
6728 #########################################################################
6729 # XDEF **************************************************************** #
6730 # set_tag_s(): return the optype of the input sgl fp number #
6732 # XREF **************************************************************** #
6735 # INPUT *************************************************************** #
6736 # a0 = pointer to single precision operand #
6738 # OUTPUT ************************************************************** #
6739 # d0 = value of type tag #
6740 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6742 # ALGORITHM *********************************************************** #
6743 # Simply test the exponent, j-bit, and mantissa values to #
6744 # determine the type of operand. #
6746 #########################################################################
6750 mov.l FTEMP(%a0), %d0
6753 andi.l &0x7f800000, %d0
6754 beq.b zero_or_denorm_s
6756 cmpi.l %d0, &0x7f800000
6763 and.l &0x007fffff, %d1
6772 and.l &0x007fffff, %d1
6787 #########################################################################
6788 # XDEF **************************************************************** #
6789 # unf_res(): routine to produce default underflow result of a #
6790 # scaled extended precision number; this is used by #
6791 # fadd/fdiv/fmul/etc. emulation routines. #
6792 # unf_res4(): same as above but for fsglmul/fsgldiv which use #
6793 # single round prec and extended prec mode. #
6795 # XREF **************************************************************** #
6796 # _denorm() - denormalize according to scale factor #
6797 # _round() - round denormalized number according to rnd prec #
6799 # INPUT *************************************************************** #
6800 # a0 = pointer to extended precison operand #
6801 # d0 = scale factor #
6802 # d1 = rounding precision/mode #
6804 # OUTPUT ************************************************************** #
6805 # a0 = pointer to default underflow result in extended precision #
6806 # d0.b = result FPSR_cc which caller may or may not want to save #
6808 # ALGORITHM *********************************************************** #
6809 # Convert the input operand to "internal format" which means the #
6810 # exponent is extended to 16 bits and the sign is stored in the unused #
6811 # portion of the extended precison operand. Denormalize the number #
6812 # according to the scale factor passed in d0. Then, round the #
6813 # denormalized result. #
6814 # Set the FPSR_exc bits as appropriate but return the cc bits in #
6815 # d0 in case the caller doesn't want to save them (as is the case for #
6817 # unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
6818 # precision and the rounding mode to single. #
6820 #########################################################################
6823 mov.l %d1, -(%sp) # save rnd prec,mode on stack
6825 btst &0x7, FTEMP_EX(%a0) # make "internal" format
6828 mov.w FTEMP_EX(%a0), %d1 # extract exponent
6831 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
6833 mov.l %a0, -(%sp) # save operand ptr during calls
6835 mov.l 0x4(%sp),%d0 # pass rnd prec.
6838 bsr.l _denorm # denorm result
6841 mov.w 0x6(%sp),%d1 # load prec:mode into %d1
6842 andi.w &0xc0,%d1 # extract rnd prec
6848 bsr.l _round # round the denorm
6852 # result is now rounded properly. convert back to normal format
6853 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
6854 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6855 beq.b unf_res_chkifzero # no; result is positive
6856 bset &0x7, FTEMP_EX(%a0) # set result sgn
6857 clr.b FTEMP_SGN(%a0) # clear temp sign
6859 # the number may have become zero after rounding. set ccodes accordingly.
6862 tst.l FTEMP_HI(%a0) # is value now a zero?
6863 bne.b unf_res_cont # no
6865 bne.b unf_res_cont # no
6866 # bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
6867 bset &z_bit, %d0 # yes; set zero ccode bit
6872 # can inex1 also be set along with unfl and inex2???
6874 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6876 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877 beq.b unf_res_end # no
6878 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6881 add.l &0x4, %sp # clear stack
6884 # unf_res() for fsglmul() and fsgldiv().
6887 mov.l %d1,-(%sp) # save rnd prec,mode on stack
6889 btst &0x7,FTEMP_EX(%a0) # make "internal" format
6892 mov.w FTEMP_EX(%a0),%d1 # extract exponent
6895 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
6897 mov.l %a0,-(%sp) # save operand ptr during calls
6899 clr.l %d0 # force rnd prec = ext
6900 bsr.l _denorm # denorm result
6903 mov.w &s_mode,%d1 # force rnd prec = sgl
6905 mov.w 0x6(%sp),%d1 # load rnd mode
6906 andi.w &0x30,%d1 # extract rnd prec
6908 bsr.l _round # round the denorm
6912 # result is now rounded properly. convert back to normal format
6913 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
6914 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6915 beq.b unf_res4_chkifzero # no; result is positive
6916 bset &0x7,FTEMP_EX(%a0) # set result sgn
6917 clr.b FTEMP_SGN(%a0) # clear temp sign
6919 # the number may have become zero after rounding. set ccodes accordingly.
6922 tst.l FTEMP_HI(%a0) # is value now a zero?
6923 bne.b unf_res4_cont # no
6925 bne.b unf_res4_cont # no
6926 # bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
6927 bset &z_bit,%d0 # yes; set zero ccode bit
6932 # can inex1 also be set along with unfl and inex2???
6934 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6936 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937 beq.b unf_res4_end # no
6938 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6941 add.l &0x4,%sp # clear stack
6944 #########################################################################
6945 # XDEF **************************************************************** #
6946 # ovf_res(): routine to produce the default overflow result of #
6947 # an overflowing number. #
6948 # ovf_res2(): same as above but the rnd mode/prec are passed #
6951 # XREF **************************************************************** #
6954 # INPUT *************************************************************** #
6955 # d1.b = '-1' => (-); '0' => (+) #
6957 # d0 = rnd mode/prec #
6959 # hi(d0) = rnd prec #
6960 # lo(d0) = rnd mode #
6962 # OUTPUT ************************************************************** #
6963 # a0 = points to extended precision result #
6964 # d0.b = condition code bits #
6966 # ALGORITHM *********************************************************** #
6967 # The default overflow result can be determined by the sign of #
6968 # the result and the rounding mode/prec in effect. These bits are #
6969 # concatenated together to create an index into the default result #
6970 # table. A pointer to the correct result is returned in a0. The #
6971 # resulting condition codes are returned in d0 in case the caller #
6972 # doesn't want FPSR_cc altered (as is the case for fmove out). #
6974 #########################################################################
6978 andi.w &0x10,%d1 # keep result sign
6979 lsr.b &0x4,%d0 # shift prec/mode
6980 or.b %d0,%d1 # concat the two
6981 mov.w %d1,%d0 # make a copy
6982 lsl.b &0x1,%d1 # multiply d1 by 2
6987 and.w &0x10, %d1 # keep result sign
6988 or.b %d0, %d1 # insert rnd mode
6990 or.b %d0, %d1 # insert rnd prec
6991 mov.w %d1, %d0 # make a copy
6992 lsl.b &0x1, %d1 # shift left by 1
6995 # use the rounding mode, precision, and result sign as in index into the
6996 # two tables below to fetch the default result and the result ccodes.
6999 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7005 byte 0x2, 0x0, 0x0, 0x2
7006 byte 0x2, 0x0, 0x0, 0x2
7007 byte 0x2, 0x0, 0x0, 0x2
7008 byte 0x0, 0x0, 0x0, 0x0
7009 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7010 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7011 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7014 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7019 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7024 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7029 long 0x00000000,0x00000000,0x00000000,0x00000000
7030 long 0x00000000,0x00000000,0x00000000,0x00000000
7031 long 0x00000000,0x00000000,0x00000000,0x00000000
7032 long 0x00000000,0x00000000,0x00000000,0x00000000
7034 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7039 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7044 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7049 #########################################################################
7050 # XDEF **************************************************************** #
7051 # fout(): move from fp register to memory or data register #
7053 # XREF **************************************************************** #
7054 # _round() - needed to create EXOP for sgl/dbl precision #
7055 # norm() - needed to create EXOP for extended precision #
7056 # ovf_res() - create default overflow result for sgl/dbl precision#
7057 # unf_res() - create default underflow result for sgl/dbl prec. #
7058 # dst_dbl() - create rounded dbl precision result. #
7059 # dst_sgl() - create rounded sgl precision result. #
7060 # fetch_dreg() - fetch dynamic k-factor reg for packed. #
7061 # bindec() - convert FP binary number to packed number. #
7062 # _mem_write() - write data to memory. #
7063 # _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064 # _dmem_write_{byte,word,long}() - write data to memory. #
7065 # store_dreg_{b,w,l}() - store data to data register file. #
7066 # facc_out_{b,w,l,d,x}() - data access error occurred. #
7068 # INPUT *************************************************************** #
7069 # a0 = pointer to extended precision source operand #
7070 # d0 = round prec,mode #
7072 # OUTPUT ************************************************************** #
7073 # fp0 : intermediate underflow or overflow result if #
7074 # OVFL/UNFL occurred for a sgl or dbl operand #
7076 # ALGORITHM *********************************************************** #
7077 # This routine is accessed by many handlers that need to do an #
7078 # opclass three move of an operand out to memory. #
7079 # Decode an fmove out (opclass 3) instruction to determine if #
7080 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
7081 # register or memory. The algorithm uses a standard "fmove" to create #
7082 # the rounded result. Also, since exceptions are disabled, this also #
7083 # create the correct OPERR default result if appropriate. #
7084 # For sgl or dbl precision, overflow or underflow can occur. If #
7085 # either occurs and is enabled, the EXOP. #
7086 # For extended precision, the stacked <ea> must be fixed along #
7087 # w/ the address index register as appropriate w/ _calc_ea_fout(). If #
7088 # the source is a denorm and if underflow is enabled, an EXOP must be #
7090 # For packed, the k-factor must be fetched from the instruction #
7091 # word or a data register. The <ea> must be fixed as w/ extended #
7092 # precision. Then, bindec() is called to create the appropriate #
7094 # If at any time an access error is flagged by one of the move- #
7095 # to-memory routines, then a special exit must be made so that the #
7096 # access error can be handled properly. #
7098 #########################################################################
7102 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104 jmp (tbl_fout.b,%pc,%a1) # jump to routine
7108 short fout_long - tbl_fout
7109 short fout_sgl - tbl_fout
7110 short fout_ext - tbl_fout
7111 short fout_pack - tbl_fout
7112 short fout_word - tbl_fout
7113 short fout_dbl - tbl_fout
7114 short fout_byte - tbl_fout
7115 short fout_pack - tbl_fout
7117 #################################################################
7118 # fmove.b out ###################################################
7119 #################################################################
7121 # Only "Unimplemented Data Type" exceptions enter here. The operand
7122 # is either a DENORM or a NORM.
7124 tst.b STAG(%a6) # is operand normalized?
7125 bne.b fout_byte_denorm # no
7127 fmovm.x SRC(%a0),&0x80 # load value
7130 fmov.l %d0,%fpcr # insert rnd prec,mode
7132 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
7134 fmov.l &0x0,%fpcr # clear FPCR
7135 fmov.l %fpsr,%d1 # fetch FPSR
7136 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7138 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7139 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7140 beq.b fout_byte_dn # must save to integer regfile
7142 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7143 bsr.l _dmem_write_byte # write byte
7145 tst.l %d1 # did dstore fail?
7146 bne.l facc_out_b # yes
7151 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7157 mov.l SRC_EX(%a0),%d1
7158 andi.l &0x80000000,%d1 # keep DENORM sign
7159 ori.l &0x00800000,%d1 # make smallest sgl
7161 bra.b fout_byte_norm
7163 #################################################################
7164 # fmove.w out ###################################################
7165 #################################################################
7167 # Only "Unimplemented Data Type" exceptions enter here. The operand
7168 # is either a DENORM or a NORM.
7170 tst.b STAG(%a6) # is operand normalized?
7171 bne.b fout_word_denorm # no
7173 fmovm.x SRC(%a0),&0x80 # load value
7176 fmov.l %d0,%fpcr # insert rnd prec:mode
7178 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
7180 fmov.l &0x0,%fpcr # clear FPCR
7181 fmov.l %fpsr,%d1 # fetch FPSR
7182 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7184 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7185 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7186 beq.b fout_word_dn # must save to integer regfile
7188 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7189 bsr.l _dmem_write_word # write word
7191 tst.l %d1 # did dstore fail?
7192 bne.l facc_out_w # yes
7197 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7203 mov.l SRC_EX(%a0),%d1
7204 andi.l &0x80000000,%d1 # keep DENORM sign
7205 ori.l &0x00800000,%d1 # make smallest sgl
7207 bra.b fout_word_norm
7209 #################################################################
7210 # fmove.l out ###################################################
7211 #################################################################
7213 # Only "Unimplemented Data Type" exceptions enter here. The operand
7214 # is either a DENORM or a NORM.
7216 tst.b STAG(%a6) # is operand normalized?
7217 bne.b fout_long_denorm # no
7219 fmovm.x SRC(%a0),&0x80 # load value
7222 fmov.l %d0,%fpcr # insert rnd prec:mode
7224 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
7226 fmov.l &0x0,%fpcr # clear FPCR
7227 fmov.l %fpsr,%d1 # fetch FPSR
7228 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7231 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7232 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7233 beq.b fout_long_dn # must save to integer regfile
7235 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7236 bsr.l _dmem_write_long # write long
7238 tst.l %d1 # did dstore fail?
7239 bne.l facc_out_l # yes
7244 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7250 mov.l SRC_EX(%a0),%d1
7251 andi.l &0x80000000,%d1 # keep DENORM sign
7252 ori.l &0x00800000,%d1 # make smallest sgl
7254 bra.b fout_long_norm
7256 #################################################################
7257 # fmove.x out ###################################################
7258 #################################################################
7260 # Only "Unimplemented Data Type" exceptions enter here. The operand
7261 # is either a DENORM or a NORM.
7262 # The DENORM causes an Underflow exception.
7265 # we copy the extended precision result to FP_SCR0 so that the reserved
7266 # 16-bit field gets zeroed. we do this since we promise not to disturb
7267 # what's at SRC(a0).
7268 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7269 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
7270 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7271 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7273 fmovm.x SRC(%a0),&0x80 # return result
7275 bsr.l _calc_ea_fout # fix stacked <ea>
7277 mov.l %a0,%a1 # pass: dst addr
7278 lea FP_SCR0(%a6),%a0 # pass: src addr
7279 mov.l &0xc,%d0 # pass: opsize is 12 bytes
7281 # we must not yet write the extended precision data to the stack
7282 # in the pre-decrement case from supervisor mode or else we'll corrupt
7283 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284 cmpi.b SPCOND_FLG(%a6),&mda7_flg
7287 bsr.l _dmem_write # write ext prec number to memory
7289 tst.l %d1 # did dstore fail?
7290 bne.w fout_ext_err # yes
7292 tst.b STAG(%a6) # is operand normalized?
7293 bne.b fout_ext_denorm # no
7296 # the number is a DENORM. must set the underflow exception bit
7298 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7300 mov.b FPCR_ENABLE(%a6),%d0
7301 andi.b &0x0a,%d0 # is UNFL or INEX enabled?
7302 bne.b fout_ext_exc # yes
7305 # we don't want to do the write if the exception occurred in supervisor mode
7306 # so _mem_write2() handles this for us.
7308 bsr.l _mem_write2 # write ext prec number to memory
7310 tst.l %d1 # did dstore fail?
7311 bne.w fout_ext_err # yes
7313 tst.b STAG(%a6) # is operand normalized?
7314 bne.b fout_ext_denorm # no
7318 lea FP_SCR0(%a6),%a0
7319 bsr.l norm # normalize the mantissa
7320 neg.w %d0 # new exp = -(shft amt)
7322 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7323 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7324 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7328 mov.l EXC_A6(%a6),(%a6) # fix stacked a6
7331 #########################################################################
7332 # fmove.s out ###########################################################
7333 #########################################################################
7335 andi.b &0x30,%d0 # clear rnd prec
7336 ori.b &s_mode*0x10,%d0 # insert sgl prec
7337 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7340 # operand is a normalized number. first, we check to see if the move out
7341 # would cause either an underflow or overflow. these cases are handled
7342 # separately. otherwise, set the FPCR to the proper rounding mode and
7345 mov.w SRC_EX(%a0),%d0 # extract exponent
7346 andi.w &0x7fff,%d0 # strip sign
7348 cmpi.w %d0,&SGL_HI # will operand overflow?
7349 bgt.w fout_sgl_ovfl # yes; go handle OVFL
7350 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
7351 cmpi.w %d0,&SGL_LO # will operand underflow?
7352 blt.w fout_sgl_unfl # yes; go handle underflow
7355 # NORMs(in range) can be stored out by a simple "fmov.s"
7356 # Unnormalized inputs can come through this point.
7359 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7361 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7362 fmov.l &0x0,%fpsr # clear FPSR
7364 fmov.s %fp0,%d0 # store does convert and round
7366 fmov.l &0x0,%fpcr # clear FPCR
7367 fmov.l %fpsr,%d1 # save FPSR
7369 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
7372 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7373 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7374 beq.b fout_sgl_exg_write_dn # must save to integer regfile
7376 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7377 bsr.l _dmem_write_long # write long
7379 tst.l %d1 # did dstore fail?
7380 bne.l facc_out_l # yes
7384 fout_sgl_exg_write_dn:
7385 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7391 # here, we know that the operand would UNFL if moved out to single prec,
7392 # so, denorm and round and then use generic store single routine to
7393 # write the value to memory.
7396 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7398 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7399 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7400 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7403 clr.l %d0 # pass: S.F. = 0
7405 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7406 bne.b fout_sgl_unfl_cont # let DENORMs fall through
7408 lea FP_SCR0(%a6),%a0
7409 bsr.l norm # normalize the DENORM
7412 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7413 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7414 bsr.l unf_res # calc default underflow result
7416 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7417 bsr.l dst_sgl # convert to single prec
7419 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7420 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7421 beq.b fout_sgl_unfl_dn # must save to integer regfile
7423 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7424 bsr.l _dmem_write_long # write long
7426 tst.l %d1 # did dstore fail?
7427 bne.l facc_out_l # yes
7429 bra.b fout_sgl_unfl_chkexc
7432 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7436 fout_sgl_unfl_chkexc:
7437 mov.b FPCR_ENABLE(%a6),%d1
7438 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7439 bne.w fout_sd_exc_unfl # yes
7444 # it's definitely an overflow so call ovf_res to get the correct answer
7447 tst.b 3+SRC_HI(%a0) # is result inexact?
7448 bne.b fout_sgl_ovfl_inex2
7449 tst.l SRC_LO(%a0) # is result inexact?
7450 bne.b fout_sgl_ovfl_inex2
7451 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452 bra.b fout_sgl_ovfl_cont
7453 fout_sgl_ovfl_inex2:
7454 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7459 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460 # overflow result. DON'T save the returned ccodes from ovf_res() since
7461 # fmove out doesn't alter them.
7462 tst.b SRC_EX(%a0) # is operand negative?
7464 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
7465 bsr.l ovf_res # calc OVFL result
7466 fmovm.x (%a0),&0x80 # load default overflow result
7467 fmov.s %fp0,%d0 # store to single
7469 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7470 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7471 beq.b fout_sgl_ovfl_dn # must save to integer regfile
7473 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7474 bsr.l _dmem_write_long # write long
7476 tst.l %d1 # did dstore fail?
7477 bne.l facc_out_l # yes
7479 bra.b fout_sgl_ovfl_chkexc
7482 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7486 fout_sgl_ovfl_chkexc:
7487 mov.b FPCR_ENABLE(%a6),%d1
7488 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7489 bne.w fout_sd_exc_ovfl # yes
7494 # move out MAY overflow:
7495 # (1) force the exp to 0x3fff
7496 # (2) do a move w/ appropriate rnd mode
7497 # (3) if exp still equals zero, then insert original exponent
7498 # for the correct result.
7499 # if exp now equals one, then it overflowed so call ovf_res.
7502 mov.w SRC_EX(%a0),%d1 # fetch current sign
7503 andi.w &0x8000,%d1 # keep it,clear exp
7504 ori.w &0x3fff,%d1 # insert exp = 0
7505 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7506 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7509 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7511 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7512 fmov.l &0x0,%fpcr # clear FPCR
7514 fabs.x %fp0 # need absolute value
7515 fcmp.b %fp0,&0x2 # did exponent increase?
7516 fblt.w fout_sgl_exg # no; go finish NORM
7517 bra.w fout_sgl_ovfl # yes; go handle overflow
7524 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7525 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7526 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7528 cmpi.b STAG(%a6),&DENORM # was src a DENORM?
7529 bne.b fout_sd_exc_cont # no
7531 lea FP_SCR0(%a6),%a0
7535 bfins %d0,FP_SCR0_EX(%a6){&1:&15}
7536 bra.b fout_sd_exc_cont
7540 mov.l (%sp)+,%a0 # restore a0
7542 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7543 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7544 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7547 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
7548 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
7549 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
7551 mov.b 3+L_SCR3(%a6),%d1
7555 mov.b 3+L_SCR3(%a6),%d1
7558 clr.l %d0 # pass: zero g,r,s
7559 bsr.l _round # round the DENORM
7561 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
7562 beq.b fout_sd_exc_done # no
7563 bset &0x7,FP_SCR0_EX(%a6) # yes
7566 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7569 #################################################################
7570 # fmove.d out ###################################################
7571 #################################################################
7573 andi.b &0x30,%d0 # clear rnd prec
7574 ori.b &d_mode*0x10,%d0 # insert dbl prec
7575 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7578 # operand is a normalized number. first, we check to see if the move out
7579 # would cause either an underflow or overflow. these cases are handled
7580 # separately. otherwise, set the FPCR to the proper rounding mode and
7583 mov.w SRC_EX(%a0),%d0 # extract exponent
7584 andi.w &0x7fff,%d0 # strip sign
7586 cmpi.w %d0,&DBL_HI # will operand overflow?
7587 bgt.w fout_dbl_ovfl # yes; go handle OVFL
7588 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
7589 cmpi.w %d0,&DBL_LO # will operand underflow?
7590 blt.w fout_dbl_unfl # yes; go handle underflow
7593 # NORMs(in range) can be stored out by a simple "fmov.d"
7594 # Unnormalized inputs can come through this point.
7597 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7599 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7600 fmov.l &0x0,%fpsr # clear FPSR
7602 fmov.d %fp0,L_SCR1(%a6) # store does convert and round
7604 fmov.l &0x0,%fpcr # clear FPCR
7605 fmov.l %fpsr,%d0 # save FPSR
7607 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
7609 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7610 lea L_SCR1(%a6),%a0 # pass: src addr
7611 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7612 bsr.l _dmem_write # store dbl fop to memory
7614 tst.l %d1 # did dstore fail?
7615 bne.l facc_out_d # yes
7617 rts # no; so we're finished
7620 # here, we know that the operand would UNFL if moved out to double prec,
7621 # so, denorm and round and then use generic store double routine to
7622 # write the value to memory.
7625 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7627 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7628 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7629 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7632 clr.l %d0 # pass: S.F. = 0
7634 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7635 bne.b fout_dbl_unfl_cont # let DENORMs fall through
7637 lea FP_SCR0(%a6),%a0
7638 bsr.l norm # normalize the DENORM
7641 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7642 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7643 bsr.l unf_res # calc default underflow result
7645 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7646 bsr.l dst_dbl # convert to single prec
7647 mov.l %d0,L_SCR1(%a6)
7648 mov.l %d1,L_SCR2(%a6)
7650 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7651 lea L_SCR1(%a6),%a0 # pass: src addr
7652 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7653 bsr.l _dmem_write # store dbl fop to memory
7655 tst.l %d1 # did dstore fail?
7656 bne.l facc_out_d # yes
7658 mov.b FPCR_ENABLE(%a6),%d1
7659 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7660 bne.w fout_sd_exc_unfl # yes
7665 # it's definitely an overflow so call ovf_res to get the correct answer
7668 mov.w 2+SRC_LO(%a0),%d0
7670 bne.b fout_dbl_ovfl_inex2
7672 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673 bra.b fout_dbl_ovfl_cont
7674 fout_dbl_ovfl_inex2:
7675 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7680 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681 # overflow result. DON'T save the returned ccodes from ovf_res() since
7682 # fmove out doesn't alter them.
7683 tst.b SRC_EX(%a0) # is operand negative?
7685 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
7686 bsr.l ovf_res # calc OVFL result
7687 fmovm.x (%a0),&0x80 # load default overflow result
7688 fmov.d %fp0,L_SCR1(%a6) # store to double
7690 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7691 lea L_SCR1(%a6),%a0 # pass: src addr
7692 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7693 bsr.l _dmem_write # store dbl fop to memory
7695 tst.l %d1 # did dstore fail?
7696 bne.l facc_out_d # yes
7698 mov.b FPCR_ENABLE(%a6),%d1
7699 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7700 bne.w fout_sd_exc_ovfl # yes
7705 # move out MAY overflow:
7706 # (1) force the exp to 0x3fff
7707 # (2) do a move w/ appropriate rnd mode
7708 # (3) if exp still equals zero, then insert original exponent
7709 # for the correct result.
7710 # if exp now equals one, then it overflowed so call ovf_res.
7713 mov.w SRC_EX(%a0),%d1 # fetch current sign
7714 andi.w &0x8000,%d1 # keep it,clear exp
7715 ori.w &0x3fff,%d1 # insert exp = 0
7716 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7717 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7720 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7722 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7723 fmov.l &0x0,%fpcr # clear FPCR
7725 fabs.x %fp0 # need absolute value
7726 fcmp.b %fp0,&0x2 # did exponent increase?
7727 fblt.w fout_dbl_exg # no; go finish NORM
7728 bra.w fout_dbl_ovfl # yes; go handle overflow
7730 #########################################################################
7731 # XDEF **************************************************************** #
7732 # dst_dbl(): create double precision value from extended prec. #
7734 # XREF **************************************************************** #
7737 # INPUT *************************************************************** #
7738 # a0 = pointer to source operand in extended precision #
7740 # OUTPUT ************************************************************** #
7741 # d0 = hi(double precision result) #
7742 # d1 = lo(double precision result) #
7744 # ALGORITHM *********************************************************** #
7746 # Changes extended precision to double precision. #
7747 # Note: no attempt is made to round the extended value to double. #
7748 # dbl_sign = ext_sign #
7749 # dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
7750 # get rid of ext integer bit #
7751 # dbl_mant = ext_mant{62:12} #
7753 # --------------- --------------- --------------- #
7754 # extended -> |s| exp | |1| ms mant | | ls mant | #
7755 # --------------- --------------- --------------- #
7756 # 95 64 63 62 32 31 11 0 #
7761 # --------------- --------------- #
7762 # double -> |s|exp| mant | | mant | #
7763 # --------------- --------------- #
7766 #########################################################################
7769 clr.l %d0 # clear d0
7770 mov.w FTEMP_EX(%a0),%d0 # get exponent
7771 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7772 addi.w &DBL_BIAS,%d0 # add double precision bias
7773 tst.b FTEMP_HI(%a0) # is number a denorm?
7774 bmi.b dst_get_dupper # no
7775 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
7777 swap %d0 # d0 now in upper word
7778 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
7779 tst.b FTEMP_EX(%a0) # test sign
7780 bpl.b dst_get_dman # if positive, go process mantissa
7781 bset &0x1f,%d0 # if negative, set sign
7783 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7784 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
7785 or.l %d1,%d0 # put these bits in ms word of double
7786 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
7787 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7788 mov.l &21,%d0 # load shift count
7789 lsl.l %d0,%d1 # put lower 11 bits in upper bits
7790 mov.l %d1,L_SCR2(%a6) # build lower lword in memory
7791 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
7792 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
7793 mov.l L_SCR2(%a6),%d1
7794 or.l %d0,%d1 # put them in double result
7795 mov.l L_SCR1(%a6),%d0
7798 #########################################################################
7799 # XDEF **************************************************************** #
7800 # dst_sgl(): create single precision value from extended prec #
7802 # XREF **************************************************************** #
7804 # INPUT *************************************************************** #
7805 # a0 = pointer to source operand in extended precision #
7807 # OUTPUT ************************************************************** #
7808 # d0 = single precision result #
7810 # ALGORITHM *********************************************************** #
7812 # Changes extended precision to single precision. #
7813 # sgl_sign = ext_sign #
7814 # sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
7815 # get rid of ext integer bit #
7816 # sgl_mant = ext_mant{62:12} #
7818 # --------------- --------------- --------------- #
7819 # extended -> |s| exp | |1| ms mant | | ls mant | #
7820 # --------------- --------------- --------------- #
7821 # 95 64 63 62 40 32 31 12 0 #
7827 # single -> |s|exp| mant | #
7831 #########################################################################
7835 mov.w FTEMP_EX(%a0),%d0 # get exponent
7836 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7837 addi.w &SGL_BIAS,%d0 # add single precision bias
7838 tst.b FTEMP_HI(%a0) # is number a denorm?
7839 bmi.b dst_get_supper # no
7840 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
7842 swap %d0 # put exp in upper word of d0
7843 lsl.l &0x7,%d0 # shift it into single exp bits
7844 tst.b FTEMP_EX(%a0) # test sign
7845 bpl.b dst_get_sman # if positive, continue
7846 bset &0x1f,%d0 # if negative, put in sign first
7848 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7849 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
7850 lsr.l &0x8,%d1 # and put them flush right
7851 or.l %d1,%d0 # put these bits in ms word of single
7854 ##############################################################################
7856 bsr.l _calc_ea_fout # fetch the <ea>
7859 mov.b STAG(%a6),%d0 # fetch input type
7860 bne.w fout_pack_not_norm # input is not NORM
7863 btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
7864 beq.b fout_pack_s # static
7867 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
7871 bsr.l fetch_dreg # fetch Dn w/ k-factor
7873 bra.b fout_pack_type
7875 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
7878 bfexts %d0{&25:&7},%d0 # extract k-factor
7881 lea FP_SRC(%a6),%a0 # pass: ptr to input
7883 # bindec is currently scrambling FP_SRC for denorm inputs.
7884 # we'll have to change this, but for now, tough luck!!!
7885 bsr.l bindec # convert xprec to packed
7887 # andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7892 tst.b 3+FP_SCR0_EX(%a6)
7894 tst.l FP_SCR0_HI(%a6)
7896 tst.l FP_SCR0_LO(%a6)
7899 # add the extra condition that only if the k-factor was zero, too, should
7900 # we zero the exponent
7903 # "mantissa" is all zero which means that the answer is zero. but, the '040
7904 # algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905 # if the mantissa is zero, I will zero the exponent, too.
7906 # the question now is whether the exponents sign bit is allowed to be non-zero
7907 # for a zero, also...
7908 andi.w &0xf000,FP_SCR0(%a6)
7912 lea FP_SCR0(%a6),%a0 # pass: src addr
7915 mov.l (%sp)+,%a1 # pass: dst addr
7916 mov.l &0xc,%d0 # pass: opsize is 12 bytes
7918 cmpi.b SPCOND_FLG(%a6),&mda7_flg
7921 bsr.l _dmem_write # write ext prec number to memory
7923 tst.l %d1 # did dstore fail?
7924 bne.w fout_ext_err # yes
7928 # we don't want to do the write if the exception occurred in supervisor mode
7929 # so _mem_write2() handles this for us.
7931 bsr.l _mem_write2 # write ext prec number to memory
7933 tst.l %d1 # did dstore fail?
7934 bne.w fout_ext_err # yes
7939 cmpi.b %d0,&DENORM # is it a DENORM?
7940 beq.w fout_pack_norm # yes
7942 clr.w 2+FP_SRC_EX(%a6)
7943 cmpi.b %d0,&SNAN # is it an SNAN?
7944 beq.b fout_pack_snan # yes
7945 bra.b fout_pack_write # no
7948 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949 bset &0x6,FP_SRC_HI(%a6) # set snan bit
7950 bra.b fout_pack_write
7952 #########################################################################
7953 # XDEF **************************************************************** #
7954 # fmul(): emulates the fmul instruction #
7955 # fsmul(): emulates the fsmul instruction #
7956 # fdmul(): emulates the fdmul instruction #
7958 # XREF **************************************************************** #
7959 # scale_to_zero_src() - scale src exponent to zero #
7960 # scale_to_zero_dst() - scale dst exponent to zero #
7961 # unf_res() - return default underflow result #
7962 # ovf_res() - return default overflow result #
7963 # res_qnan() - return QNAN result #
7964 # res_snan() - return SNAN result #
7966 # INPUT *************************************************************** #
7967 # a0 = pointer to extended precision source operand #
7968 # a1 = pointer to extended precision destination operand #
7969 # d0 rnd prec,mode #
7971 # OUTPUT ************************************************************** #
7973 # fp1 = EXOP (if exception occurred) #
7975 # ALGORITHM *********************************************************** #
7976 # Handle NANs, infinities, and zeroes as special cases. Divide #
7977 # norms/denorms into ext/sgl/dbl precision. #
7978 # For norms/denorms, scale the exponents such that a multiply #
7979 # instruction won't cause an exception. Use the regular fmul to #
7980 # compute a result. Check if the regular operands would have taken #
7981 # an exception. If so, return the default overflow/underflow result #
7982 # and return the EXOP if exceptions are enabled. Else, scale the #
7983 # result operand to the proper exponent. #
7985 #########################################################################
7989 long 0x3fff - 0x7ffe # ext_max
7990 long 0x3fff - 0x407e # sgl_max
7991 long 0x3fff - 0x43fe # dbl_max
7993 long 0x3fff + 0x0001 # ext_unfl
7994 long 0x3fff - 0x3f80 # sgl_unfl
7995 long 0x3fff - 0x3c00 # dbl_unfl
7999 andi.b &0x30,%d0 # clear rnd prec
8000 ori.b &s_mode*0x10,%d0 # insert sgl prec
8006 ori.b &d_mode*0x10,%d0 # insert dbl prec
8010 mov.l %d0,L_SCR3(%a6) # store rnd info
8015 or.b STAG(%a6),%d1 # combine src tags
8016 bne.w fmul_not_norm # optimize on non-norm input
8019 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8020 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8021 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8023 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8024 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8025 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8027 bsr.l scale_to_zero_src # scale src exponent
8028 mov.l %d0,-(%sp) # save scale factor 1
8030 bsr.l scale_to_zero_dst # scale dst exponent
8032 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
8034 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8035 lsr.b &0x6,%d1 # shift to lo bits
8036 mov.l (%sp)+,%d0 # load S.F.
8037 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038 beq.w fmul_may_ovfl # result may rnd to overflow
8039 blt.w fmul_ovfl # result will overflow
8041 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042 beq.w fmul_may_unfl # result may rnd to no unfl
8043 bgt.w fmul_unfl # result will underflow
8047 # - the result of the multiply operation will neither overflow nor underflow.
8048 # - do the multiply to the proper precision and rounding mode.
8049 # - scale the result exponent using the scale factor. if both operands were
8050 # normalized then we really don't need to go through this scaling. but for now,
8054 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8056 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8057 fmov.l &0x0,%fpsr # clear FPSR
8059 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8061 fmov.l %fpsr,%d1 # save status
8062 fmov.l &0x0,%fpcr # clear FPCR
8064 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8067 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8068 mov.l %d2,-(%sp) # save d2
8069 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8070 mov.l %d1,%d2 # make a copy
8071 andi.l &0x7fff,%d1 # strip sign
8072 andi.w &0x8000,%d2 # keep old sign
8073 sub.l %d0,%d1 # add scale factor
8074 or.w %d2,%d1 # concat old sign,new exp
8075 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8076 mov.l (%sp)+,%d2 # restore d2
8077 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8082 # - the result of the multiply operation is an overflow.
8083 # - do the multiply to the proper precision and rounding mode in order to
8084 # set the inexact bits.
8085 # - calculate the default result and return it in fp0.
8086 # - if overflow or inexact is enabled, we need a multiply result rounded to
8087 # extended precision. if the original operation was extended, then we have this
8088 # result. if the original operation was single or double, we have to do another
8089 # multiply using extended precision and the correct rounding mode. the result
8090 # of this operation then has its exponent scaled by -0x6000 to create the
8091 # exceptional operand.
8094 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8096 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8097 fmov.l &0x0,%fpsr # clear FPSR
8099 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8101 fmov.l %fpsr,%d1 # save status
8102 fmov.l &0x0,%fpcr # clear FPCR
8104 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8106 # save setting this until now because this is where fmul_may_ovfl may jump in
8108 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8110 mov.b FPCR_ENABLE(%a6),%d1
8111 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8112 bne.b fmul_ovfl_ena # yes
8114 # calculate the default result
8116 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8117 sne %d1 # set sign param accordingly
8118 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
8119 bsr.l ovf_res # calculate default result
8120 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8121 fmovm.x (%a0),&0x80 # return default result in fp0
8125 # OVFL is enabled; Create EXOP:
8126 # - if precision is extended, then we have the EXOP. simply bias the exponent
8127 # with an extra -0x6000. if the precision is single or double, we need to
8128 # calculate a result rounded to extended precision.
8131 mov.l L_SCR3(%a6),%d1
8132 andi.b &0xc0,%d1 # test the rnd prec
8133 bne.b fmul_ovfl_ena_sd # it's sgl or dbl
8136 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8138 mov.l %d2,-(%sp) # save d2
8139 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8140 mov.w %d1,%d2 # make a copy
8141 andi.l &0x7fff,%d1 # strip sign
8142 sub.l %d0,%d1 # add scale factor
8143 subi.l &0x6000,%d1 # subtract bias
8144 andi.w &0x7fff,%d1 # clear sign bit
8145 andi.w &0x8000,%d2 # keep old sign
8146 or.w %d2,%d1 # concat old sign,new exp
8147 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8148 mov.l (%sp)+,%d2 # restore d2
8149 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8153 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8155 mov.l L_SCR3(%a6),%d1
8156 andi.b &0x30,%d1 # keep rnd mode only
8157 fmov.l %d1,%fpcr # set FPCR
8159 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8161 fmov.l &0x0,%fpcr # clear FPCR
8162 bra.b fmul_ovfl_ena_cont
8166 # - the result of the multiply operation MAY overflow.
8167 # - do the multiply to the proper precision and rounding mode in order to
8168 # set the inexact bits.
8169 # - calculate the default result and return it in fp0.
8172 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8174 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8175 fmov.l &0x0,%fpsr # clear FPSR
8177 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8179 fmov.l %fpsr,%d1 # save status
8180 fmov.l &0x0,%fpcr # clear FPCR
8182 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8184 fabs.x %fp0,%fp1 # make a copy of result
8185 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8186 fbge.w fmul_ovfl_tst # yes; overflow has occurred
8188 # no, it didn't overflow; we have correct result
8189 bra.w fmul_normal_exit
8193 # - the result of the multiply operation is an underflow.
8194 # - do the multiply to the proper precision and rounding mode in order to
8195 # set the inexact bits.
8196 # - calculate the default result and return it in fp0.
8197 # - if overflow or inexact is enabled, we need a multiply result rounded to
8198 # extended precision. if the original operation was extended, then we have this
8199 # result. if the original operation was single or double, we have to do another
8200 # multiply using extended precision and the correct rounding mode. the result
8201 # of this operation then has its exponent scaled by -0x6000 to create the
8202 # exceptional operand.
8205 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8207 # for fun, let's use only extended precision, round to zero. then, let
8208 # the unf_res() routine figure out all the rest.
8209 # will we get the correct answer.
8210 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8212 fmov.l &rz_mode*0x10,%fpcr # set FPCR
8213 fmov.l &0x0,%fpsr # clear FPSR
8215 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8217 fmov.l %fpsr,%d1 # save status
8218 fmov.l &0x0,%fpcr # clear FPCR
8220 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8222 mov.b FPCR_ENABLE(%a6),%d1
8223 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8224 bne.b fmul_unfl_ena # yes
8227 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8229 lea FP_SCR0(%a6),%a0 # pass: result addr
8230 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8231 bsr.l unf_res # calculate default result
8232 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
8233 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8240 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
8242 mov.l L_SCR3(%a6),%d1
8243 andi.b &0xc0,%d1 # is precision extended?
8244 bne.b fmul_unfl_ena_sd # no, sgl or dbl
8246 # if the rnd mode is anything but RZ, then we have to re-do the above
8247 # multiplication because we used RZ for all.
8248 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8251 fmov.l &0x0,%fpsr # clear FPSR
8253 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8255 fmov.l &0x0,%fpcr # clear FPCR
8257 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
8258 mov.l %d2,-(%sp) # save d2
8259 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8260 mov.l %d1,%d2 # make a copy
8261 andi.l &0x7fff,%d1 # strip sign
8262 andi.w &0x8000,%d2 # keep old sign
8263 sub.l %d0,%d1 # add scale factor
8264 addi.l &0x6000,%d1 # add bias
8266 or.w %d2,%d1 # concat old sign,new exp
8267 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8268 mov.l (%sp)+,%d2 # restore d2
8269 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8273 mov.l L_SCR3(%a6),%d1
8274 andi.b &0x30,%d1 # use only rnd mode
8275 fmov.l %d1,%fpcr # set FPCR
8277 bra.b fmul_unfl_ena_cont
8280 # -use the correct rounding mode and precision. this code favors operations
8281 # that do not underflow.
8283 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8285 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8286 fmov.l &0x0,%fpsr # clear FPSR
8288 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8290 fmov.l %fpsr,%d1 # save status
8291 fmov.l &0x0,%fpcr # clear FPCR
8293 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8295 fabs.x %fp0,%fp1 # make a copy of result
8296 fcmp.b %fp1,&0x2 # is |result| > 2.b?
8297 fbgt.w fmul_normal_exit # no; no underflow occurred
8298 fblt.w fmul_unfl # yes; underflow occurred
8301 # we still don't know if underflow occurred. result is ~ equal to 2. but,
8302 # we don't know if the result was an underflow that rounded up to a 2 or
8303 # a normalized number that rounded down to a 2. so, redo the entire operation
8304 # using RZ as the rounding mode to see what the pre-rounded result is.
8305 # this case should be relatively rare.
8307 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
8309 mov.l L_SCR3(%a6),%d1
8310 andi.b &0xc0,%d1 # keep rnd prec
8311 ori.b &rz_mode*0x10,%d1 # insert RZ
8313 fmov.l %d1,%fpcr # set FPCR
8314 fmov.l &0x0,%fpsr # clear FPSR
8316 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8318 fmov.l &0x0,%fpcr # clear FPCR
8319 fabs.x %fp1 # make absolute value
8320 fcmp.b %fp1,&0x2 # is |result| < 2.b?
8321 fbge.w fmul_normal_exit # no; no underflow occurred
8322 bra.w fmul_unfl # yes, underflow occurred
8324 ################################################################################
8327 # Multiply: inputs are not both normalized; what are they?
8330 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331 jmp (tbl_fmul_op.b,%pc,%d1.w)
8335 short fmul_norm - tbl_fmul_op # NORM x NORM
8336 short fmul_zero - tbl_fmul_op # NORM x ZERO
8337 short fmul_inf_src - tbl_fmul_op # NORM x INF
8338 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8339 short fmul_norm - tbl_fmul_op # NORM x DENORM
8340 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8341 short tbl_fmul_op - tbl_fmul_op #
8342 short tbl_fmul_op - tbl_fmul_op #
8344 short fmul_zero - tbl_fmul_op # ZERO x NORM
8345 short fmul_zero - tbl_fmul_op # ZERO x ZERO
8346 short fmul_res_operr - tbl_fmul_op # ZERO x INF
8347 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
8348 short fmul_zero - tbl_fmul_op # ZERO x DENORM
8349 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
8350 short tbl_fmul_op - tbl_fmul_op #
8351 short tbl_fmul_op - tbl_fmul_op #
8353 short fmul_inf_dst - tbl_fmul_op # INF x NORM
8354 short fmul_res_operr - tbl_fmul_op # INF x ZERO
8355 short fmul_inf_dst - tbl_fmul_op # INF x INF
8356 short fmul_res_qnan - tbl_fmul_op # INF x QNAN
8357 short fmul_inf_dst - tbl_fmul_op # INF x DENORM
8358 short fmul_res_snan - tbl_fmul_op # INF x SNAN
8359 short tbl_fmul_op - tbl_fmul_op #
8360 short tbl_fmul_op - tbl_fmul_op #
8362 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
8363 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
8364 short fmul_res_qnan - tbl_fmul_op # QNAN x INF
8365 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
8366 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
8367 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
8368 short tbl_fmul_op - tbl_fmul_op #
8369 short tbl_fmul_op - tbl_fmul_op #
8371 short fmul_norm - tbl_fmul_op # NORM x NORM
8372 short fmul_zero - tbl_fmul_op # NORM x ZERO
8373 short fmul_inf_src - tbl_fmul_op # NORM x INF
8374 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8375 short fmul_norm - tbl_fmul_op # NORM x DENORM
8376 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8377 short tbl_fmul_op - tbl_fmul_op #
8378 short tbl_fmul_op - tbl_fmul_op #
8380 short fmul_res_snan - tbl_fmul_op # SNAN x NORM
8381 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
8382 short fmul_res_snan - tbl_fmul_op # SNAN x INF
8383 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
8384 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
8385 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
8386 short tbl_fmul_op - tbl_fmul_op #
8387 short tbl_fmul_op - tbl_fmul_op #
8397 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8399 global fmul_zero # global for fsglmul
8401 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8402 mov.b DST_EX(%a1),%d1
8404 bpl.b fmul_zero_p # result ZERO is pos.
8406 fmov.s &0x80000000,%fp0 # load -ZERO
8407 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8410 fmov.s &0x00000000,%fp0 # load +ZERO
8411 mov.b &z_bmask,FPSR_CC(%a6) # set Z
8415 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8417 # Note: The j-bit for an infinity is a don't-care. However, to be
8418 # strictly compatible w/ the 68881/882, we make sure to return an
8419 # INF w/ the j-bit set if the input INF j-bit was set. Destination
8420 # INFs take priority.
8422 global fmul_inf_dst # global for fsglmul
8424 fmovm.x DST(%a1),&0x80 # return INF result in fp0
8425 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8426 mov.b DST_EX(%a1),%d1
8428 bpl.b fmul_inf_dst_p # result INF is pos.
8430 fabs.x %fp0 # clear result sign
8431 fneg.x %fp0 # set result sign
8432 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8435 fabs.x %fp0 # clear result sign
8436 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
8439 global fmul_inf_src # global for fsglmul
8441 fmovm.x SRC(%a0),&0x80 # return INF result in fp0
8442 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8443 mov.b DST_EX(%a1),%d1
8445 bpl.b fmul_inf_dst_p # result INF is pos.
8446 bra.b fmul_inf_dst_n
8448 #########################################################################
8449 # XDEF **************************************************************** #
8450 # fin(): emulates the fmove instruction #
8451 # fsin(): emulates the fsmove instruction #
8452 # fdin(): emulates the fdmove instruction #
8454 # XREF **************************************************************** #
8455 # norm() - normalize mantissa for EXOP on denorm #
8456 # scale_to_zero_src() - scale src exponent to zero #
8457 # ovf_res() - return default overflow result #
8458 # unf_res() - return default underflow result #
8459 # res_qnan_1op() - return QNAN result #
8460 # res_snan_1op() - return SNAN result #
8462 # INPUT *************************************************************** #
8463 # a0 = pointer to extended precision source operand #
8464 # d0 = round prec/mode #
8466 # OUTPUT ************************************************************** #
8468 # fp1 = EXOP (if exception occurred) #
8470 # ALGORITHM *********************************************************** #
8471 # Handle NANs, infinities, and zeroes as special cases. Divide #
8472 # norms into extended, single, and double precision. #
8473 # Norms can be emulated w/ a regular fmove instruction. For #
8474 # sgl/dbl, must scale exponent and perform an "fmove". Check to see #
8475 # if the result would have overflowed/underflowed. If so, use unf_res() #
8476 # or ovf_res() to return the default result. Also return EXOP if #
8477 # exception is enabled. If no exception, return the default result. #
8478 # Unnorms don't pass through here. #
8480 #########################################################################
8484 andi.b &0x30,%d0 # clear rnd prec
8485 ori.b &s_mode*0x10,%d0 # insert sgl precision
8490 andi.b &0x30,%d0 # clear rnd prec
8491 ori.b &d_mode*0x10,%d0 # insert dbl precision
8495 mov.l %d0,L_SCR3(%a6) # store rnd info
8497 mov.b STAG(%a6),%d1 # fetch src optype tag
8498 bne.w fin_not_norm # optimize on non-norm input
8501 # FP MOVE IN: NORMs and DENORMs ONLY!
8504 andi.b &0xc0,%d0 # is precision extended?
8505 bne.w fin_not_ext # no, so go handle dbl or sgl
8508 # precision selected is extended. so...we cannot get an underflow
8509 # or overflow because of rounding to the correct precision. so...
8510 # skip the scaling and unscaling...
8512 tst.b SRC_EX(%a0) # is the operand negative?
8513 bpl.b fin_norm_done # no
8514 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8516 fmovm.x SRC(%a0),&0x80 # return result in fp0
8520 # for an extended precision DENORM, the UNFL exception bit is set
8521 # the accrued bit is NOT set in this instance(no inexactness!)
8524 andi.b &0xc0,%d0 # is precision extended?
8525 bne.w fin_not_ext # no, so go handle dbl or sgl
8527 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528 tst.b SRC_EX(%a0) # is the operand negative?
8529 bpl.b fin_denorm_done # no
8530 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8532 fmovm.x SRC(%a0),&0x80 # return result in fp0
8533 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534 bne.b fin_denorm_unfl_ena # yes
8538 # the input is an extended DENORM and underflow is enabled in the FPCR.
8539 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540 # exponent and insert back into the operand.
8542 fin_denorm_unfl_ena:
8543 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8544 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8545 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8546 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
8547 bsr.l norm # normalize result
8548 neg.w %d0 # new exponent = -(shft val)
8549 addi.w &0x6000,%d0 # add new bias to exponent
8550 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
8551 andi.w &0x8000,%d1 # keep old sign
8552 andi.w &0x7fff,%d0 # clear sign position
8553 or.w %d1,%d0 # concat new exo,old sign
8554 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
8555 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8559 # operand is to be rounded to single or double precision
8562 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
8566 # operand is to be rounded to single precision
8569 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8570 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8571 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8572 bsr.l scale_to_zero_src # calculate scale factor
8574 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
8575 bge.w fin_sd_unfl # yes; go handle underflow
8576 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
8577 beq.w fin_sd_may_ovfl # maybe; go check
8578 blt.w fin_sd_ovfl # yes; go handle overflow
8581 # operand will NOT overflow or underflow when moved into the fp reg file
8584 fmov.l &0x0,%fpsr # clear FPSR
8585 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8587 fmov.x FP_SCR0(%a6),%fp0 # perform move
8589 fmov.l %fpsr,%d1 # save FPSR
8590 fmov.l &0x0,%fpcr # clear FPCR
8592 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8595 mov.l %d2,-(%sp) # save d2
8596 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8597 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8598 mov.w %d1,%d2 # make a copy
8599 andi.l &0x7fff,%d1 # strip sign
8600 sub.l %d0,%d1 # add scale factor
8601 andi.w &0x8000,%d2 # keep old sign
8602 or.w %d1,%d2 # concat old sign,new exponent
8603 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
8604 mov.l (%sp)+,%d2 # restore d2
8605 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8609 # operand is to be rounded to double precision
8612 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8613 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8614 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8615 bsr.l scale_to_zero_src # calculate scale factor
8617 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
8618 bge.w fin_sd_unfl # yes; go handle underflow
8619 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
8620 beq.w fin_sd_may_ovfl # maybe; go check
8621 blt.w fin_sd_ovfl # yes; go handle overflow
8622 bra.w fin_sd_normal # no; ho handle normalized op
8625 # operand WILL underflow when moved in to the fp register file
8628 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8630 tst.b FP_SCR0_EX(%a6) # is operand negative?
8631 bpl.b fin_sd_unfl_tst
8632 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
8634 # if underflow or inexact is enabled, then go calculate the EXOP first.
8636 mov.b FPCR_ENABLE(%a6),%d1
8637 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8638 bne.b fin_sd_unfl_ena # yes
8641 lea FP_SCR0(%a6),%a0 # pass: result addr
8642 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8643 bsr.l unf_res # calculate default result
8644 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
8645 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8649 # operand will underflow AND underflow or inexact is enabled.
8650 # Therefore, we must return the result rounded to extended precision.
8653 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
8657 mov.l %d2,-(%sp) # save d2
8658 mov.w %d1,%d2 # make a copy
8659 andi.l &0x7fff,%d1 # strip sign
8660 sub.l %d0,%d1 # subtract scale factor
8661 andi.w &0x8000,%d2 # extract old sign
8662 addi.l &0x6000,%d1 # add new bias
8664 or.w %d1,%d2 # concat old sign,new exp
8665 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
8666 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
8667 mov.l (%sp)+,%d2 # restore d2
8668 bra.b fin_sd_unfl_dis
8671 # operand WILL overflow.
8674 fmov.l &0x0,%fpsr # clear FPSR
8675 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8677 fmov.x FP_SCR0(%a6),%fp0 # perform move
8679 fmov.l &0x0,%fpcr # clear FPCR
8680 fmov.l %fpsr,%d1 # save FPSR
8682 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8685 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8687 mov.b FPCR_ENABLE(%a6),%d1
8688 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8689 bne.b fin_sd_ovfl_ena # yes
8692 # OVFL is not enabled; therefore, we must create the default result by
8693 # calling ovf_res().
8696 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8697 sne %d1 # set sign param accordingly
8698 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
8699 bsr.l ovf_res # calculate default result
8700 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8701 fmovm.x (%a0),&0x80 # return default result in fp0
8706 # the INEX2 bit has already been updated by the round to the correct precision.
8707 # now, round to extended(and don't alter the FPSR).
8710 mov.l %d2,-(%sp) # save d2
8711 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8712 mov.l %d1,%d2 # make a copy
8713 andi.l &0x7fff,%d1 # strip sign
8714 andi.w &0x8000,%d2 # keep old sign
8715 sub.l %d0,%d1 # add scale factor
8716 sub.l &0x6000,%d1 # subtract bias
8719 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8720 mov.l (%sp)+,%d2 # restore d2
8721 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8722 bra.b fin_sd_ovfl_dis
8725 # the move in MAY overflow. so...
8728 fmov.l &0x0,%fpsr # clear FPSR
8729 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8731 fmov.x FP_SCR0(%a6),%fp0 # perform the move
8733 fmov.l %fpsr,%d1 # save status
8734 fmov.l &0x0,%fpcr # clear FPCR
8736 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8738 fabs.x %fp0,%fp1 # make a copy of result
8739 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8740 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
8742 # no, it didn't overflow; we have correct result
8743 bra.w fin_sd_normal_exit
8745 ##########################################################################
8748 # operand is not a NORM: check its optype and branch accordingly
8751 cmpi.b %d1,&DENORM # weed out DENORM
8753 cmpi.b %d1,&SNAN # weed out SNANs
8755 cmpi.b %d1,&QNAN # weed out QNANs
8759 # do the fmove in; at this point, only possible ops are ZERO and INF.
8760 # use fmov to determine ccodes.
8761 # prec:mode should be zero at this point but it won't affect answer anyways.
8763 fmov.x SRC(%a0),%fp0 # do fmove in
8764 fmov.l %fpsr,%d0 # no exceptions possible
8765 rol.l &0x8,%d0 # put ccodes in lo byte
8766 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
8769 #########################################################################
8770 # XDEF **************************************************************** #
8771 # fdiv(): emulates the fdiv instruction #
8772 # fsdiv(): emulates the fsdiv instruction #
8773 # fddiv(): emulates the fddiv instruction #
8775 # XREF **************************************************************** #
8776 # scale_to_zero_src() - scale src exponent to zero #
8777 # scale_to_zero_dst() - scale dst exponent to zero #
8778 # unf_res() - return default underflow result #
8779 # ovf_res() - return default overflow result #
8780 # res_qnan() - return QNAN result #
8781 # res_snan() - return SNAN result #
8783 # INPUT *************************************************************** #
8784 # a0 = pointer to extended precision source operand #
8785 # a1 = pointer to extended precision destination operand #
8786 # d0 rnd prec,mode #
8788 # OUTPUT ************************************************************** #
8790 # fp1 = EXOP (if exception occurred) #
8792 # ALGORITHM *********************************************************** #
8793 # Handle NANs, infinities, and zeroes as special cases. Divide #
8794 # norms/denorms into ext/sgl/dbl precision. #
8795 # For norms/denorms, scale the exponents such that a divide #
8796 # instruction won't cause an exception. Use the regular fdiv to #
8797 # compute a result. Check if the regular operands would have taken #
8798 # an exception. If so, return the default overflow/underflow result #
8799 # and return the EXOP if exceptions are enabled. Else, scale the #
8800 # result operand to the proper exponent. #
8802 #########################################################################
8806 long 0x3fff - 0x0000 # ext_unfl
8807 long 0x3fff - 0x3f81 # sgl_unfl
8808 long 0x3fff - 0x3c01 # dbl_unfl
8811 long 0x3fff - 0x7ffe # ext overflow exponent
8812 long 0x3fff - 0x407e # sgl overflow exponent
8813 long 0x3fff - 0x43fe # dbl overflow exponent
8817 andi.b &0x30,%d0 # clear rnd prec
8818 ori.b &s_mode*0x10,%d0 # insert sgl prec
8823 andi.b &0x30,%d0 # clear rnd prec
8824 ori.b &d_mode*0x10,%d0 # insert dbl prec
8828 mov.l %d0,L_SCR3(%a6) # store rnd info
8833 or.b STAG(%a6),%d1 # combine src tags
8835 bne.w fdiv_not_norm # optimize on non-norm input
8838 # DIVIDE: NORMs and DENORMs ONLY!
8841 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8842 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8843 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8845 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8846 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8847 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8849 bsr.l scale_to_zero_src # scale src exponent
8850 mov.l %d0,-(%sp) # save scale factor 1
8852 bsr.l scale_to_zero_dst # scale dst exponent
8854 neg.l (%sp) # SCALE FACTOR = scale1 - scale2
8857 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8858 lsr.b &0x6,%d1 # shift to lo bits
8859 mov.l (%sp)+,%d0 # load S.F.
8860 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861 ble.w fdiv_may_ovfl # result will overflow
8863 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864 beq.w fdiv_may_unfl # maybe
8865 bgt.w fdiv_unfl # yes; go handle underflow
8868 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8870 fmov.l L_SCR3(%a6),%fpcr # save FPCR
8871 fmov.l &0x0,%fpsr # clear FPSR
8873 fdiv.x FP_SCR0(%a6),%fp0 # perform divide
8875 fmov.l %fpsr,%d1 # save FPSR
8876 fmov.l &0x0,%fpcr # clear FPCR
8878 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8881 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
8882 mov.l %d2,-(%sp) # store d2
8883 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8884 mov.l %d1,%d2 # make a copy
8885 andi.l &0x7fff,%d1 # strip sign
8886 andi.w &0x8000,%d2 # keep old sign
8887 sub.l %d0,%d1 # add scale factor
8888 or.w %d2,%d1 # concat old sign,new exp
8889 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8890 mov.l (%sp)+,%d2 # restore d2
8891 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8900 mov.l (%sp)+,%d0 # restore scale factor
8901 bra.b fdiv_normal_exit
8904 mov.l %d0,-(%sp) # save scale factor
8906 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8908 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8909 fmov.l &0x0,%fpsr # set FPSR
8911 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8916 or.l %d0,USER_FPSR(%a6) # save INEX,N
8918 fmovm.x &0x01,-(%sp) # save result to stack
8919 mov.w (%sp),%d0 # fetch new exponent
8920 add.l &0xc,%sp # clear result from stack
8921 andi.l &0x7fff,%d0 # strip sign
8922 sub.l (%sp),%d0 # add scale factor
8923 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8928 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8930 mov.b FPCR_ENABLE(%a6),%d1
8931 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8932 bne.b fdiv_ovfl_ena # yes
8935 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8936 sne %d1 # set sign param accordingly
8937 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
8938 bsr.l ovf_res # calculate default result
8939 or.b %d0,FPSR_CC(%a6) # set INF if applicable
8940 fmovm.x (%a0),&0x80 # return default result in fp0
8944 mov.l L_SCR3(%a6),%d1
8945 andi.b &0xc0,%d1 # is precision extended?
8946 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
8949 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8951 mov.l %d2,-(%sp) # save d2
8952 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8953 mov.w %d1,%d2 # make a copy
8954 andi.l &0x7fff,%d1 # strip sign
8955 sub.l %d0,%d1 # add scale factor
8956 subi.l &0x6000,%d1 # subtract bias
8957 andi.w &0x7fff,%d1 # clear sign bit
8958 andi.w &0x8000,%d2 # keep old sign
8959 or.w %d2,%d1 # concat old sign,new exp
8960 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8961 mov.l (%sp)+,%d2 # restore d2
8962 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8966 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8968 mov.l L_SCR3(%a6),%d1
8969 andi.b &0x30,%d1 # keep rnd mode
8970 fmov.l %d1,%fpcr # set FPCR
8972 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8974 fmov.l &0x0,%fpcr # clear FPCR
8975 bra.b fdiv_ovfl_ena_cont
8978 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8980 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8982 fmov.l &rz_mode*0x10,%fpcr # set FPCR
8983 fmov.l &0x0,%fpsr # clear FPSR
8985 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8987 fmov.l %fpsr,%d1 # save status
8988 fmov.l &0x0,%fpcr # clear FPCR
8990 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8992 mov.b FPCR_ENABLE(%a6),%d1
8993 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8994 bne.b fdiv_unfl_ena # yes
8997 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8999 lea FP_SCR0(%a6),%a0 # pass: result addr
9000 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9001 bsr.l unf_res # calculate default result
9002 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
9003 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9010 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
9012 mov.l L_SCR3(%a6),%d1
9013 andi.b &0xc0,%d1 # is precision extended?
9014 bne.b fdiv_unfl_ena_sd # no, sgl or dbl
9016 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9019 fmov.l &0x0,%fpsr # clear FPSR
9021 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9023 fmov.l &0x0,%fpcr # clear FPCR
9025 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
9026 mov.l %d2,-(%sp) # save d2
9027 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9028 mov.l %d1,%d2 # make a copy
9029 andi.l &0x7fff,%d1 # strip sign
9030 andi.w &0x8000,%d2 # keep old sign
9031 sub.l %d0,%d1 # add scale factoer
9032 addi.l &0x6000,%d1 # add bias
9034 or.w %d2,%d1 # concat old sign,new exp
9035 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
9036 mov.l (%sp)+,%d2 # restore d2
9037 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9041 mov.l L_SCR3(%a6),%d1
9042 andi.b &0x30,%d1 # use only rnd mode
9043 fmov.l %d1,%fpcr # set FPCR
9045 bra.b fdiv_unfl_ena_cont
9048 # the divide operation MAY underflow:
9051 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
9053 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9054 fmov.l &0x0,%fpsr # clear FPSR
9056 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
9058 fmov.l %fpsr,%d1 # save status
9059 fmov.l &0x0,%fpcr # clear FPCR
9061 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9063 fabs.x %fp0,%fp1 # make a copy of result
9064 fcmp.b %fp1,&0x1 # is |result| > 1.b?
9065 fbgt.w fdiv_normal_exit # no; no underflow occurred
9066 fblt.w fdiv_unfl # yes; underflow occurred
9069 # we still don't know if underflow occurred. result is ~ equal to 1. but,
9070 # we don't know if the result was an underflow that rounded up to a 1
9071 # or a normalized number that rounded down to a 1. so, redo the entire
9072 # operation using RZ as the rounding mode to see what the pre-rounded
9073 # result is. this case should be relatively rare.
9075 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
9077 mov.l L_SCR3(%a6),%d1
9078 andi.b &0xc0,%d1 # keep rnd prec
9079 ori.b &rz_mode*0x10,%d1 # insert RZ
9081 fmov.l %d1,%fpcr # set FPCR
9082 fmov.l &0x0,%fpsr # clear FPSR
9084 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9086 fmov.l &0x0,%fpcr # clear FPCR
9087 fabs.x %fp1 # make absolute value
9088 fcmp.b %fp1,&0x1 # is |result| < 1.b?
9089 fbge.w fdiv_normal_exit # no; no underflow occurred
9090 bra.w fdiv_unfl # yes; underflow occurred
9092 ############################################################################
9095 # Divide: inputs are not both normalized; what are they?
9098 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099 jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
9103 short fdiv_norm - tbl_fdiv_op # NORM / NORM
9104 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
9105 short fdiv_zero_load - tbl_fdiv_op # NORM / INF
9106 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
9107 short fdiv_norm - tbl_fdiv_op # NORM / DENORM
9108 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
9109 short tbl_fdiv_op - tbl_fdiv_op #
9110 short tbl_fdiv_op - tbl_fdiv_op #
9112 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
9113 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
9114 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
9115 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
9116 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
9117 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
9118 short tbl_fdiv_op - tbl_fdiv_op #
9119 short tbl_fdiv_op - tbl_fdiv_op #
9121 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
9122 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
9123 short fdiv_res_operr - tbl_fdiv_op # INF / INF
9124 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
9125 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
9126 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
9127 short tbl_fdiv_op - tbl_fdiv_op #
9128 short tbl_fdiv_op - tbl_fdiv_op #
9130 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
9131 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
9132 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
9133 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
9134 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
9135 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
9136 short tbl_fdiv_op - tbl_fdiv_op #
9137 short tbl_fdiv_op - tbl_fdiv_op #
9139 short fdiv_norm - tbl_fdiv_op # DENORM / NORM
9140 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
9141 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
9142 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
9143 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
9144 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
9145 short tbl_fdiv_op - tbl_fdiv_op #
9146 short tbl_fdiv_op - tbl_fdiv_op #
9148 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
9149 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
9150 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
9151 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
9152 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
9153 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
9154 short tbl_fdiv_op - tbl_fdiv_op #
9155 short tbl_fdiv_op - tbl_fdiv_op #
9164 global fdiv_zero_load # global for fsgldiv
9166 mov.b SRC_EX(%a0),%d0 # result sign is exclusive
9167 mov.b DST_EX(%a1),%d1 # or of input signs.
9169 bpl.b fdiv_zero_load_p # result is positive
9170 fmov.s &0x80000000,%fp0 # load a -ZERO
9171 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9174 fmov.s &0x00000000,%fp0 # load a +ZERO
9175 mov.b &z_bmask,FPSR_CC(%a6) # set Z
9179 # The destination was In Range and the source was a ZERO. The result,
9180 # Therefore, is an INF w/ the proper sign.
9181 # So, determine the sign and return a new INF (w/ the j-bit cleared).
9183 global fdiv_inf_load # global for fsgldiv
9185 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186 mov.b SRC_EX(%a0),%d0 # load both signs
9187 mov.b DST_EX(%a1),%d1
9189 bpl.b fdiv_inf_load_p # result is positive
9190 fmov.s &0xff800000,%fp0 # make result -INF
9191 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9194 fmov.s &0x7f800000,%fp0 # make result +INF
9195 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9199 # The destination was an INF w/ an In Range or ZERO source, the result is
9200 # an INF w/ the proper sign.
9201 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202 # dst INF is set, then then j-bit of the result INF is also set).
9204 global fdiv_inf_dst # global for fsgldiv
9206 mov.b DST_EX(%a1),%d0 # load both signs
9207 mov.b SRC_EX(%a0),%d1
9209 bpl.b fdiv_inf_dst_p # result is positive
9211 fmovm.x DST(%a1),&0x80 # return result in fp0
9212 fabs.x %fp0 # clear sign bit
9213 fneg.x %fp0 # set sign bit
9214 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9218 fmovm.x DST(%a1),&0x80 # return result in fp0
9219 fabs.x %fp0 # return positive INF
9220 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9223 #########################################################################
9224 # XDEF **************************************************************** #
9225 # fneg(): emulates the fneg instruction #
9226 # fsneg(): emulates the fsneg instruction #
9227 # fdneg(): emulates the fdneg instruction #
9229 # XREF **************************************************************** #
9230 # norm() - normalize a denorm to provide EXOP #
9231 # scale_to_zero_src() - scale sgl/dbl source exponent #
9232 # ovf_res() - return default overflow result #
9233 # unf_res() - return default underflow result #
9234 # res_qnan_1op() - return QNAN result #
9235 # res_snan_1op() - return SNAN result #
9237 # INPUT *************************************************************** #
9238 # a0 = pointer to extended precision source operand #
9239 # d0 = rnd prec,mode #
9241 # OUTPUT ************************************************************** #
9243 # fp1 = EXOP (if exception occurred) #
9245 # ALGORITHM *********************************************************** #
9246 # Handle NANs, zeroes, and infinities as special cases. Separate #
9247 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
9248 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
9249 # and an actual fneg performed to see if overflow/underflow would have #
9250 # occurred. If so, return default underflow/overflow result. Else, #
9251 # scale the result exponent and return result. FPSR gets set based on #
9252 # the result value. #
9254 #########################################################################
9258 andi.b &0x30,%d0 # clear rnd prec
9259 ori.b &s_mode*0x10,%d0 # insert sgl precision
9264 andi.b &0x30,%d0 # clear rnd prec
9265 ori.b &d_mode*0x10,%d0 # insert dbl prec
9269 mov.l %d0,L_SCR3(%a6) # store rnd info
9271 bne.w fneg_not_norm # optimize on non-norm input
9274 # NEGATE SIGN : norms and denorms ONLY!
9277 andi.b &0xc0,%d0 # is precision extended?
9278 bne.w fneg_not_ext # no; go handle sgl or dbl
9281 # precision selected is extended. so...we can not get an underflow
9282 # or overflow because of rounding to the correct precision. so...
9283 # skip the scaling and unscaling...
9285 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9286 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9287 mov.w SRC_EX(%a0),%d0
9288 eori.w &0x8000,%d0 # negate sign
9289 bpl.b fneg_norm_load # sign is positive
9290 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9292 mov.w %d0,FP_SCR0_EX(%a6)
9293 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9297 # for an extended precision DENORM, the UNFL exception bit is set
9298 # the accrued bit is NOT set in this instance(no inexactness!)
9301 andi.b &0xc0,%d0 # is precision extended?
9302 bne.b fneg_not_ext # no; go handle sgl or dbl
9304 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9306 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9307 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9308 mov.w SRC_EX(%a0),%d0
9309 eori.w &0x8000,%d0 # negate sign
9310 bpl.b fneg_denorm_done # no
9311 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9313 mov.w %d0,FP_SCR0_EX(%a6)
9314 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9316 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317 bne.b fneg_ext_unfl_ena # yes
9321 # the input is an extended DENORM and underflow is enabled in the FPCR.
9322 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323 # exponent and insert back into the operand.
9326 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9327 bsr.l norm # normalize result
9328 neg.w %d0 # new exponent = -(shft val)
9329 addi.w &0x6000,%d0 # add new bias to exponent
9330 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9331 andi.w &0x8000,%d1 # keep old sign
9332 andi.w &0x7fff,%d0 # clear sign position
9333 or.w %d1,%d0 # concat old sign, new exponent
9334 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9335 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9339 # operand is either single or double
9342 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9346 # operand is to be rounded to single precision
9349 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9350 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9351 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9352 bsr.l scale_to_zero_src # calculate scale factor
9354 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9355 bge.w fneg_sd_unfl # yes; go handle underflow
9356 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9357 beq.w fneg_sd_may_ovfl # maybe; go check
9358 blt.w fneg_sd_ovfl # yes; go handle overflow
9361 # operand will NOT overflow or underflow when moved in to the fp reg file
9364 fmov.l &0x0,%fpsr # clear FPSR
9365 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9367 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9369 fmov.l %fpsr,%d1 # save FPSR
9370 fmov.l &0x0,%fpcr # clear FPCR
9372 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9374 fneg_sd_normal_exit:
9375 mov.l %d2,-(%sp) # save d2
9376 fmovm.x &0x80,FP_SCR0(%a6) # store out result
9377 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9378 mov.w %d1,%d2 # make a copy
9379 andi.l &0x7fff,%d1 # strip sign
9380 sub.l %d0,%d1 # add scale factor
9381 andi.w &0x8000,%d2 # keep old sign
9382 or.w %d1,%d2 # concat old sign,new exp
9383 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
9384 mov.l (%sp)+,%d2 # restore d2
9385 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9389 # operand is to be rounded to double precision
9392 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9393 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9394 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9395 bsr.l scale_to_zero_src # calculate scale factor
9397 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
9398 bge.b fneg_sd_unfl # yes; go handle underflow
9399 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
9400 beq.w fneg_sd_may_ovfl # maybe; go check
9401 blt.w fneg_sd_ovfl # yes; go handle overflow
9402 bra.w fneg_sd_normal # no; ho handle normalized op
9405 # operand WILL underflow when moved in to the fp register file
9408 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9410 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
9411 bpl.b fneg_sd_unfl_tst
9412 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
9414 # if underflow or inexact is enabled, go calculate EXOP first.
9416 mov.b FPCR_ENABLE(%a6),%d1
9417 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9418 bne.b fneg_sd_unfl_ena # yes
9421 lea FP_SCR0(%a6),%a0 # pass: result addr
9422 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9423 bsr.l unf_res # calculate default result
9424 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
9425 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9429 # operand will underflow AND underflow is enabled.
9430 # Therefore, we must return the result rounded to extended precision.
9433 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
9437 mov.l %d2,-(%sp) # save d2
9438 mov.l %d1,%d2 # make a copy
9439 andi.l &0x7fff,%d1 # strip sign
9440 andi.w &0x8000,%d2 # keep old sign
9441 sub.l %d0,%d1 # subtract scale factor
9442 addi.l &0x6000,%d1 # add new bias
9444 or.w %d2,%d1 # concat new sign,new exp
9445 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
9446 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
9447 mov.l (%sp)+,%d2 # restore d2
9448 bra.b fneg_sd_unfl_dis
9451 # operand WILL overflow.
9454 fmov.l &0x0,%fpsr # clear FPSR
9455 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9457 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9459 fmov.l &0x0,%fpcr # clear FPCR
9460 fmov.l %fpsr,%d1 # save FPSR
9462 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9465 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9467 mov.b FPCR_ENABLE(%a6),%d1
9468 andi.b &0x13,%d1 # is OVFL or INEX enabled?
9469 bne.b fneg_sd_ovfl_ena # yes
9472 # OVFL is not enabled; therefore, we must create the default result by
9473 # calling ovf_res().
9476 btst &neg_bit,FPSR_CC(%a6) # is result negative?
9477 sne %d1 # set sign param accordingly
9478 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
9479 bsr.l ovf_res # calculate default result
9480 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
9481 fmovm.x (%a0),&0x80 # return default result in fp0
9486 # the INEX2 bit has already been updated by the round to the correct precision.
9487 # now, round to extended(and don't alter the FPSR).
9490 mov.l %d2,-(%sp) # save d2
9491 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9492 mov.l %d1,%d2 # make a copy
9493 andi.l &0x7fff,%d1 # strip sign
9494 andi.w &0x8000,%d2 # keep old sign
9495 sub.l %d0,%d1 # add scale factor
9496 subi.l &0x6000,%d1 # subtract bias
9498 or.w %d2,%d1 # concat sign,exp
9499 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
9500 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9501 mov.l (%sp)+,%d2 # restore d2
9502 bra.b fneg_sd_ovfl_dis
9505 # the move in MAY underflow. so...
9508 fmov.l &0x0,%fpsr # clear FPSR
9509 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9511 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9513 fmov.l %fpsr,%d1 # save status
9514 fmov.l &0x0,%fpcr # clear FPCR
9516 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9518 fabs.x %fp0,%fp1 # make a copy of result
9519 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
9520 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
9522 # no, it didn't overflow; we have correct result
9523 bra.w fneg_sd_normal_exit
9525 ##########################################################################
9528 # input is not normalized; what is it?
9531 cmpi.b %d1,&DENORM # weed out DENORM
9533 cmpi.b %d1,&SNAN # weed out SNAN
9535 cmpi.b %d1,&QNAN # weed out QNAN
9539 # do the fneg; at this point, only possible ops are ZERO and INF.
9540 # use fneg to determine ccodes.
9541 # prec:mode should be zero at this point but it won't affect answer anyways.
9543 fneg.x SRC_EX(%a0),%fp0 # do fneg
9545 rol.l &0x8,%d0 # put ccodes in lo byte
9546 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
9549 #########################################################################
9550 # XDEF **************************************************************** #
9551 # ftst(): emulates the ftest instruction #
9553 # XREF **************************************************************** #
9554 # res{s,q}nan_1op() - set NAN result for monadic instruction #
9556 # INPUT *************************************************************** #
9557 # a0 = pointer to extended precision source operand #
9559 # OUTPUT ************************************************************** #
9562 # ALGORITHM *********************************************************** #
9563 # Check the source operand tag (STAG) and set the FPCR according #
9564 # to the operand type and sign. #
9566 #########################################################################
9571 bne.b ftst_not_norm # optimize on non-norm input
9577 tst.b SRC_EX(%a0) # is operand negative?
9578 bmi.b ftst_norm_m # yes
9581 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9585 # input is not normalized; what is it?
9588 cmpi.b %d1,&ZERO # weed out ZERO
9590 cmpi.b %d1,&INF # weed out INF
9592 cmpi.b %d1,&SNAN # weed out SNAN
9594 cmpi.b %d1,&QNAN # weed out QNAN
9601 tst.b SRC_EX(%a0) # is operand negative?
9602 bmi.b ftst_denorm_m # yes
9605 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9612 tst.b SRC_EX(%a0) # is operand negative?
9613 bmi.b ftst_inf_m # yes
9615 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9618 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9625 tst.b SRC_EX(%a0) # is operand negative?
9626 bmi.b ftst_zero_m # yes
9628 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9631 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9634 #########################################################################
9635 # XDEF **************************************************************** #
9636 # fint(): emulates the fint instruction #
9638 # XREF **************************************************************** #
9639 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9641 # INPUT *************************************************************** #
9642 # a0 = pointer to extended precision source operand #
9643 # d0 = round precision/mode #
9645 # OUTPUT ************************************************************** #
9648 # ALGORITHM *********************************************************** #
9649 # Separate according to operand type. Unnorms don't pass through #
9650 # here. For norms, load the rounding mode/prec, execute a "fint", then #
9651 # store the resulting FPSR bits. #
9652 # For denorms, force the j-bit to a one and do the same as for #
9653 # norms. Denorms are so low that the answer will either be a zero or a #
9655 # For zeroes/infs/NANs, return the same while setting the FPSR #
9658 #########################################################################
9663 bne.b fint_not_norm # optimize on non-norm input
9669 andi.b &0x30,%d0 # set prec = ext
9671 fmov.l %d0,%fpcr # set FPCR
9672 fmov.l &0x0,%fpsr # clear FPSR
9674 fint.x SRC(%a0),%fp0 # execute fint
9676 fmov.l &0x0,%fpcr # clear FPCR
9677 fmov.l %fpsr,%d0 # save FPSR
9678 or.l %d0,USER_FPSR(%a6) # set exception bits
9683 # input is not normalized; what is it?
9686 cmpi.b %d1,&ZERO # weed out ZERO
9688 cmpi.b %d1,&INF # weed out INF
9690 cmpi.b %d1,&DENORM # weed out DENORM
9692 cmpi.b %d1,&SNAN # weed out SNAN
9694 bra.l res_qnan_1op # weed out QNAN
9699 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700 # also, the INEX2 and AINEX exception bits will be set.
9701 # so, we could either set these manually or force the DENORM
9702 # to a very small NORM and ship it to the NORM routine.
9706 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9708 lea FP_SCR0(%a6),%a0
9715 tst.b SRC_EX(%a0) # is ZERO negative?
9716 bmi.b fint_zero_m # yes
9718 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9719 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9722 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9723 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9730 fmovm.x SRC(%a0),&0x80 # return result in fp0
9731 tst.b SRC_EX(%a0) # is INF negative?
9732 bmi.b fint_inf_m # yes
9734 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9737 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9740 #########################################################################
9741 # XDEF **************************************************************** #
9742 # fintrz(): emulates the fintrz instruction #
9744 # XREF **************************************************************** #
9745 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9747 # INPUT *************************************************************** #
9748 # a0 = pointer to extended precision source operand #
9749 # d0 = round precision/mode #
9751 # OUTPUT ************************************************************** #
9754 # ALGORITHM *********************************************************** #
9755 # Separate according to operand type. Unnorms don't pass through #
9756 # here. For norms, load the rounding mode/prec, execute a "fintrz", #
9757 # then store the resulting FPSR bits. #
9758 # For denorms, force the j-bit to a one and do the same as for #
9759 # norms. Denorms are so low that the answer will either be a zero or a #
9761 # For zeroes/infs/NANs, return the same while setting the FPSR #
9764 #########################################################################
9769 bne.b fintrz_not_norm # optimize on non-norm input
9775 fmov.l &0x0,%fpsr # clear FPSR
9777 fintrz.x SRC(%a0),%fp0 # execute fintrz
9779 fmov.l %fpsr,%d0 # save FPSR
9780 or.l %d0,USER_FPSR(%a6) # set exception bits
9785 # input is not normalized; what is it?
9788 cmpi.b %d1,&ZERO # weed out ZERO
9790 cmpi.b %d1,&INF # weed out INF
9792 cmpi.b %d1,&DENORM # weed out DENORM
9794 cmpi.b %d1,&SNAN # weed out SNAN
9796 bra.l res_qnan_1op # weed out QNAN
9801 # for DENORMs, the result will be (+/-)ZERO.
9802 # also, the INEX2 and AINEX exception bits will be set.
9803 # so, we could either set these manually or force the DENORM
9804 # to a very small NORM and ship it to the NORM routine.
9808 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9810 lea FP_SCR0(%a6),%a0
9817 tst.b SRC_EX(%a0) # is ZERO negative?
9818 bmi.b fintrz_zero_m # yes
9820 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9821 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9824 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9825 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9832 fmovm.x SRC(%a0),&0x80 # return result in fp0
9833 tst.b SRC_EX(%a0) # is INF negative?
9834 bmi.b fintrz_inf_m # yes
9836 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9839 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9842 #########################################################################
9843 # XDEF **************************************************************** #
9844 # fabs(): emulates the fabs instruction #
9845 # fsabs(): emulates the fsabs instruction #
9846 # fdabs(): emulates the fdabs instruction #
9848 # XREF **************************************************************** #
9849 # norm() - normalize denorm mantissa to provide EXOP #
9850 # scale_to_zero_src() - make exponent. = 0; get scale factor #
9851 # unf_res() - calculate underflow result #
9852 # ovf_res() - calculate overflow result #
9853 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9855 # INPUT *************************************************************** #
9856 # a0 = pointer to extended precision source operand #
9857 # d0 = rnd precision/mode #
9859 # OUTPUT ************************************************************** #
9861 # fp1 = EXOP (if exception occurred) #
9863 # ALGORITHM *********************************************************** #
9864 # Handle NANs, infinities, and zeroes as special cases. Divide #
9865 # norms into extended, single, and double precision. #
9866 # Simply clear sign for extended precision norm. Ext prec denorm #
9867 # gets an EXOP created for it since it's an underflow. #
9868 # Double and single precision can overflow and underflow. First, #
9869 # scale the operand such that the exponent is zero. Perform an "fabs" #
9870 # using the correct rnd mode/prec. Check to see if the original #
9871 # exponent would take an exception. If so, use unf_res() or ovf_res() #
9872 # to calculate the default result. Also, create the EXOP for the #
9873 # exceptional case. If no exception should occur, insert the correct #
9874 # result exponent and return. #
9875 # Unnorms don't pass through here. #
9877 #########################################################################
9881 andi.b &0x30,%d0 # clear rnd prec
9882 ori.b &s_mode*0x10,%d0 # insert sgl precision
9887 andi.b &0x30,%d0 # clear rnd prec
9888 ori.b &d_mode*0x10,%d0 # insert dbl precision
9892 mov.l %d0,L_SCR3(%a6) # store rnd info
9894 bne.w fabs_not_norm # optimize on non-norm input
9897 # ABSOLUTE VALUE: norms and denorms ONLY!
9900 andi.b &0xc0,%d0 # is precision extended?
9901 bne.b fabs_not_ext # no; go handle sgl or dbl
9904 # precision selected is extended. so...we can not get an underflow
9905 # or overflow because of rounding to the correct precision. so...
9906 # skip the scaling and unscaling...
9908 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9909 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9910 mov.w SRC_EX(%a0),%d1
9911 bclr &15,%d1 # force absolute value
9912 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
9913 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9917 # for an extended precision DENORM, the UNFL exception bit is set
9918 # the accrued bit is NOT set in this instance(no inexactness!)
9921 andi.b &0xc0,%d0 # is precision extended?
9922 bne.b fabs_not_ext # no
9924 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9926 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9927 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9928 mov.w SRC_EX(%a0),%d0
9929 bclr &15,%d0 # clear sign
9930 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
9932 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9934 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935 bne.b fabs_ext_unfl_ena
9939 # the input is an extended DENORM and underflow is enabled in the FPCR.
9940 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941 # exponent and insert back into the operand.
9944 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9945 bsr.l norm # normalize result
9946 neg.w %d0 # new exponent = -(shft val)
9947 addi.w &0x6000,%d0 # add new bias to exponent
9948 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9949 andi.w &0x8000,%d1 # keep old sign
9950 andi.w &0x7fff,%d0 # clear sign position
9951 or.w %d1,%d0 # concat old sign, new exponent
9952 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9953 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9957 # operand is either single or double
9960 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9964 # operand is to be rounded to single precision
9967 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9968 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9969 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9970 bsr.l scale_to_zero_src # calculate scale factor
9972 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9973 bge.w fabs_sd_unfl # yes; go handle underflow
9974 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9975 beq.w fabs_sd_may_ovfl # maybe; go check
9976 blt.w fabs_sd_ovfl # yes; go handle overflow
9979 # operand will NOT overflow or underflow when moved in to the fp reg file
9982 fmov.l &0x0,%fpsr # clear FPSR
9983 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9985 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
9987 fmov.l %fpsr,%d1 # save FPSR
9988 fmov.l &0x0,%fpcr # clear FPCR
9990 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9992 fabs_sd_normal_exit:
9993 mov.l %d2,-(%sp) # save d2
9994 fmovm.x &0x80,FP_SCR0(%a6) # store out result
9995 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9996 mov.l %d1,%d2 # make a copy
9997 andi.l &0x7fff,%d1 # strip sign
9998 sub.l %d0,%d1 # add scale factor
9999 andi.w &0x8000,%d2 # keep old sign
10000 or.w %d1,%d2 # concat old sign,new exp
10001 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
10002 mov.l (%sp)+,%d2 # restore d2
10003 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10007 # operand is to be rounded to double precision
10010 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10011 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10012 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10013 bsr.l scale_to_zero_src # calculate scale factor
10015 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
10016 bge.b fabs_sd_unfl # yes; go handle underflow
10017 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
10018 beq.w fabs_sd_may_ovfl # maybe; go check
10019 blt.w fabs_sd_ovfl # yes; go handle overflow
10020 bra.w fabs_sd_normal # no; ho handle normalized op
10023 # operand WILL underflow when moved in to the fp register file
10026 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10028 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
10030 # if underflow or inexact is enabled, go calculate EXOP first.
10031 mov.b FPCR_ENABLE(%a6),%d1
10032 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10033 bne.b fabs_sd_unfl_ena # yes
10036 lea FP_SCR0(%a6),%a0 # pass: result addr
10037 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10038 bsr.l unf_res # calculate default result
10039 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
10040 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10044 # operand will underflow AND underflow is enabled.
10045 # Therefore, we must return the result rounded to extended precision.
10048 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
10052 mov.l %d2,-(%sp) # save d2
10053 mov.l %d1,%d2 # make a copy
10054 andi.l &0x7fff,%d1 # strip sign
10055 andi.w &0x8000,%d2 # keep old sign
10056 sub.l %d0,%d1 # subtract scale factor
10057 addi.l &0x6000,%d1 # add new bias
10059 or.w %d2,%d1 # concat new sign,new exp
10060 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
10061 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
10062 mov.l (%sp)+,%d2 # restore d2
10063 bra.b fabs_sd_unfl_dis
10066 # operand WILL overflow.
10069 fmov.l &0x0,%fpsr # clear FPSR
10070 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10072 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10074 fmov.l &0x0,%fpcr # clear FPCR
10075 fmov.l %fpsr,%d1 # save FPSR
10077 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10080 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10082 mov.b FPCR_ENABLE(%a6),%d1
10083 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10084 bne.b fabs_sd_ovfl_ena # yes
10087 # OVFL is not enabled; therefore, we must create the default result by
10088 # calling ovf_res().
10091 btst &neg_bit,FPSR_CC(%a6) # is result negative?
10092 sne %d1 # set sign param accordingly
10093 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
10094 bsr.l ovf_res # calculate default result
10095 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10096 fmovm.x (%a0),&0x80 # return default result in fp0
10101 # the INEX2 bit has already been updated by the round to the correct precision.
10102 # now, round to extended(and don't alter the FPSR).
10105 mov.l %d2,-(%sp) # save d2
10106 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10107 mov.l %d1,%d2 # make a copy
10108 andi.l &0x7fff,%d1 # strip sign
10109 andi.w &0x8000,%d2 # keep old sign
10110 sub.l %d0,%d1 # add scale factor
10111 subi.l &0x6000,%d1 # subtract bias
10113 or.w %d2,%d1 # concat sign,exp
10114 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10115 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10116 mov.l (%sp)+,%d2 # restore d2
10117 bra.b fabs_sd_ovfl_dis
10120 # the move in MAY underflow. so...
10123 fmov.l &0x0,%fpsr # clear FPSR
10124 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10126 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10128 fmov.l %fpsr,%d1 # save status
10129 fmov.l &0x0,%fpcr # clear FPCR
10131 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10133 fabs.x %fp0,%fp1 # make a copy of result
10134 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10135 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
10137 # no, it didn't overflow; we have correct result
10138 bra.w fabs_sd_normal_exit
10140 ##########################################################################
10143 # input is not normalized; what is it?
10146 cmpi.b %d1,&DENORM # weed out DENORM
10148 cmpi.b %d1,&SNAN # weed out SNAN
10150 cmpi.b %d1,&QNAN # weed out QNAN
10153 fabs.x SRC(%a0),%fp0 # force absolute value
10155 cmpi.b %d1,&INF # weed out INF
10158 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10161 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10164 #########################################################################
10165 # XDEF **************************************************************** #
10166 # fcmp(): fp compare op routine #
10168 # XREF **************************************************************** #
10169 # res_qnan() - return QNAN result #
10170 # res_snan() - return SNAN result #
10172 # INPUT *************************************************************** #
10173 # a0 = pointer to extended precision source operand #
10174 # a1 = pointer to extended precision destination operand #
10175 # d0 = round prec/mode #
10177 # OUTPUT ************************************************************** #
10180 # ALGORITHM *********************************************************** #
10181 # Handle NANs and denorms as special cases. For everything else, #
10182 # just use the actual fcmp instruction to produce the correct condition #
10185 #########################################################################
10190 mov.b DTAG(%a6),%d1
10193 bne.b fcmp_not_norm # optimize on non-norm input
10196 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10199 fmovm.x DST(%a1),&0x80 # load dst op
10201 fcmp.x %fp0,SRC(%a0) # do compare
10203 fmov.l %fpsr,%d0 # save FPSR
10204 rol.l &0x8,%d0 # extract ccode bits
10205 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
10210 # fcmp: inputs are not both normalized; what are they?
10213 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214 jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
10218 short fcmp_norm - tbl_fcmp_op # NORM - NORM
10219 short fcmp_norm - tbl_fcmp_op # NORM - ZERO
10220 short fcmp_norm - tbl_fcmp_op # NORM - INF
10221 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
10222 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
10223 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
10224 short tbl_fcmp_op - tbl_fcmp_op #
10225 short tbl_fcmp_op - tbl_fcmp_op #
10227 short fcmp_norm - tbl_fcmp_op # ZERO - NORM
10228 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
10229 short fcmp_norm - tbl_fcmp_op # ZERO - INF
10230 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
10231 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
10232 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
10233 short tbl_fcmp_op - tbl_fcmp_op #
10234 short tbl_fcmp_op - tbl_fcmp_op #
10236 short fcmp_norm - tbl_fcmp_op # INF - NORM
10237 short fcmp_norm - tbl_fcmp_op # INF - ZERO
10238 short fcmp_norm - tbl_fcmp_op # INF - INF
10239 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
10240 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
10241 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
10242 short tbl_fcmp_op - tbl_fcmp_op #
10243 short tbl_fcmp_op - tbl_fcmp_op #
10245 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
10246 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
10247 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
10248 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
10249 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
10250 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
10251 short tbl_fcmp_op - tbl_fcmp_op #
10252 short tbl_fcmp_op - tbl_fcmp_op #
10254 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
10255 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
10256 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
10257 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
10258 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
10259 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
10260 short tbl_fcmp_op - tbl_fcmp_op #
10261 short tbl_fcmp_op - tbl_fcmp_op #
10263 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
10264 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
10265 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
10266 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
10267 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
10268 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
10269 short tbl_fcmp_op - tbl_fcmp_op #
10270 short tbl_fcmp_op - tbl_fcmp_op #
10272 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10276 andi.b &0xf7,FPSR_CC(%a6)
10280 andi.b &0xf7,FPSR_CC(%a6)
10284 # DENORMs are a little more difficult.
10285 # If you have a 2 DENORMs, then you can just force the j-bit to a one
10286 # and use the fcmp_norm routine.
10287 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288 # and use the fcmp_norm routine.
10289 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291 # (1) signs are (+) and the DENORM is the dst or
10292 # (2) signs are (-) and the DENORM is the src
10296 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10297 mov.l SRC_HI(%a0),%d0
10298 bset &31,%d0 # DENORM src; make into small norm
10299 mov.l %d0,FP_SCR0_HI(%a6)
10300 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10301 lea FP_SCR0(%a6),%a0
10305 mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
10306 mov.l DST_HI(%a1),%d0
10307 bset &31,%d0 # DENORM src; make into small norm
10308 mov.l %d0,FP_SCR0_HI(%a6)
10309 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
10310 lea FP_SCR0(%a6),%a1
10314 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10315 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10316 mov.l DST_HI(%a1),%d0
10317 bset &31,%d0 # DENORM dst; make into small norm
10318 mov.l %d0,FP_SCR1_HI(%a6)
10319 mov.l SRC_HI(%a0),%d0
10320 bset &31,%d0 # DENORM dst; make into small norm
10321 mov.l %d0,FP_SCR0_HI(%a6)
10322 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10323 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10324 lea FP_SCR1(%a6),%a1
10325 lea FP_SCR0(%a6),%a0
10329 mov.b SRC_EX(%a0),%d0 # determine if like signs
10330 mov.b DST_EX(%a1),%d1
10334 # signs are the same, so must determine the answer ourselves.
10335 tst.b %d0 # is src op negative?
10336 bmi.b fcmp_nrm_dnrm_m # yes
10339 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10343 mov.b SRC_EX(%a0),%d0 # determine if like signs
10344 mov.b DST_EX(%a1),%d1
10348 # signs are the same, so must determine the answer ourselves.
10349 tst.b %d0 # is src op negative?
10350 bpl.b fcmp_dnrm_nrm_m # no
10353 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10356 #########################################################################
10357 # XDEF **************************************************************** #
10358 # fsglmul(): emulates the fsglmul instruction #
10360 # XREF **************************************************************** #
10361 # scale_to_zero_src() - scale src exponent to zero #
10362 # scale_to_zero_dst() - scale dst exponent to zero #
10363 # unf_res4() - return default underflow result for sglop #
10364 # ovf_res() - return default overflow result #
10365 # res_qnan() - return QNAN result #
10366 # res_snan() - return SNAN result #
10368 # INPUT *************************************************************** #
10369 # a0 = pointer to extended precision source operand #
10370 # a1 = pointer to extended precision destination operand #
10371 # d0 rnd prec,mode #
10373 # OUTPUT ************************************************************** #
10375 # fp1 = EXOP (if exception occurred) #
10377 # ALGORITHM *********************************************************** #
10378 # Handle NANs, infinities, and zeroes as special cases. Divide #
10379 # norms/denorms into ext/sgl/dbl precision. #
10380 # For norms/denorms, scale the exponents such that a multiply #
10381 # instruction won't cause an exception. Use the regular fsglmul to #
10382 # compute a result. Check if the regular operands would have taken #
10383 # an exception. If so, return the default overflow/underflow result #
10384 # and return the EXOP if exceptions are enabled. Else, scale the #
10385 # result operand to the proper exponent. #
10387 #########################################################################
10391 mov.l %d0,L_SCR3(%a6) # store rnd info
10394 mov.b DTAG(%a6),%d1
10398 bne.w fsglmul_not_norm # optimize on non-norm input
10401 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10402 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10403 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10405 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10406 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10407 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10409 bsr.l scale_to_zero_src # scale exponent
10410 mov.l %d0,-(%sp) # save scale factor 1
10412 bsr.l scale_to_zero_dst # scale dst exponent
10414 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
10416 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
10417 beq.w fsglmul_may_ovfl # result may rnd to overflow
10418 blt.w fsglmul_ovfl # result will overflow
10420 cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
10421 beq.w fsglmul_may_unfl # result may rnd to no unfl
10422 bgt.w fsglmul_unfl # result will underflow
10425 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10427 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10428 fmov.l &0x0,%fpsr # clear FPSR
10430 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10432 fmov.l %fpsr,%d1 # save status
10433 fmov.l &0x0,%fpcr # clear FPCR
10435 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10437 fsglmul_normal_exit:
10438 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10439 mov.l %d2,-(%sp) # save d2
10440 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10441 mov.l %d1,%d2 # make a copy
10442 andi.l &0x7fff,%d1 # strip sign
10443 andi.w &0x8000,%d2 # keep old sign
10444 sub.l %d0,%d1 # add scale factor
10445 or.w %d2,%d1 # concat old sign,new exp
10446 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10447 mov.l (%sp)+,%d2 # restore d2
10448 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10452 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10454 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10455 fmov.l &0x0,%fpsr # clear FPSR
10457 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10459 fmov.l %fpsr,%d1 # save status
10460 fmov.l &0x0,%fpcr # clear FPCR
10462 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10466 # save setting this until now because this is where fsglmul_may_ovfl may jump in
10467 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10469 mov.b FPCR_ENABLE(%a6),%d1
10470 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10471 bne.b fsglmul_ovfl_ena # yes
10474 btst &neg_bit,FPSR_CC(%a6) # is result negative?
10475 sne %d1 # set sign param accordingly
10476 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10477 andi.b &0x30,%d0 # force prec = ext
10478 bsr.l ovf_res # calculate default result
10479 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10480 fmovm.x (%a0),&0x80 # return default result in fp0
10484 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10486 mov.l %d2,-(%sp) # save d2
10487 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10488 mov.l %d1,%d2 # make a copy
10489 andi.l &0x7fff,%d1 # strip sign
10490 sub.l %d0,%d1 # add scale factor
10491 subi.l &0x6000,%d1 # subtract bias
10493 andi.w &0x8000,%d2 # keep old sign
10494 or.w %d2,%d1 # concat old sign,new exp
10495 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10496 mov.l (%sp)+,%d2 # restore d2
10497 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10498 bra.b fsglmul_ovfl_dis
10501 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10503 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10504 fmov.l &0x0,%fpsr # clear FPSR
10506 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10508 fmov.l %fpsr,%d1 # save status
10509 fmov.l &0x0,%fpcr # clear FPCR
10511 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10513 fabs.x %fp0,%fp1 # make a copy of result
10514 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10515 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
10517 # no, it didn't overflow; we have correct result
10518 bra.w fsglmul_normal_exit
10521 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10523 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10525 fmov.l &rz_mode*0x10,%fpcr # set FPCR
10526 fmov.l &0x0,%fpsr # clear FPSR
10528 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10530 fmov.l %fpsr,%d1 # save status
10531 fmov.l &0x0,%fpcr # clear FPCR
10533 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10535 mov.b FPCR_ENABLE(%a6),%d1
10536 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10537 bne.b fsglmul_unfl_ena # yes
10540 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10542 lea FP_SCR0(%a6),%a0 # pass: result addr
10543 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10544 bsr.l unf_res4 # calculate default result
10545 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10546 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10553 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10555 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10556 fmov.l &0x0,%fpsr # clear FPSR
10558 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10560 fmov.l &0x0,%fpcr # clear FPCR
10562 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10563 mov.l %d2,-(%sp) # save d2
10564 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10565 mov.l %d1,%d2 # make a copy
10566 andi.l &0x7fff,%d1 # strip sign
10567 andi.w &0x8000,%d2 # keep old sign
10568 sub.l %d0,%d1 # add scale factor
10569 addi.l &0x6000,%d1 # add bias
10571 or.w %d2,%d1 # concat old sign,new exp
10572 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10573 mov.l (%sp)+,%d2 # restore d2
10574 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10575 bra.w fsglmul_unfl_dis
10578 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10580 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10581 fmov.l &0x0,%fpsr # clear FPSR
10583 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10585 fmov.l %fpsr,%d1 # save status
10586 fmov.l &0x0,%fpcr # clear FPCR
10588 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10590 fabs.x %fp0,%fp1 # make a copy of result
10591 fcmp.b %fp1,&0x2 # is |result| > 2.b?
10592 fbgt.w fsglmul_normal_exit # no; no underflow occurred
10593 fblt.w fsglmul_unfl # yes; underflow occurred
10596 # we still don't know if underflow occurred. result is ~ equal to 2. but,
10597 # we don't know if the result was an underflow that rounded up to a 2 or
10598 # a normalized number that rounded down to a 2. so, redo the entire operation
10599 # using RZ as the rounding mode to see what the pre-rounded result is.
10600 # this case should be relatively rare.
10602 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
10604 mov.l L_SCR3(%a6),%d1
10605 andi.b &0xc0,%d1 # keep rnd prec
10606 ori.b &rz_mode*0x10,%d1 # insert RZ
10608 fmov.l %d1,%fpcr # set FPCR
10609 fmov.l &0x0,%fpsr # clear FPSR
10611 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10613 fmov.l &0x0,%fpcr # clear FPCR
10614 fabs.x %fp1 # make absolute value
10615 fcmp.b %fp1,&0x2 # is |result| < 2.b?
10616 fbge.w fsglmul_normal_exit # no; no underflow occurred
10617 bra.w fsglmul_unfl # yes, underflow occurred
10619 ##############################################################################
10622 # Single Precision Multiply: inputs are not both normalized; what are they?
10625 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
10630 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10631 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10632 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10633 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10634 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10635 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10636 short tbl_fsglmul_op - tbl_fsglmul_op #
10637 short tbl_fsglmul_op - tbl_fsglmul_op #
10639 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
10640 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
10641 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
10642 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
10643 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
10644 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
10645 short tbl_fsglmul_op - tbl_fsglmul_op #
10646 short tbl_fsglmul_op - tbl_fsglmul_op #
10648 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
10649 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
10650 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
10651 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
10652 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
10653 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
10654 short tbl_fsglmul_op - tbl_fsglmul_op #
10655 short tbl_fsglmul_op - tbl_fsglmul_op #
10657 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
10658 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
10659 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
10660 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
10661 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
10662 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
10663 short tbl_fsglmul_op - tbl_fsglmul_op #
10664 short tbl_fsglmul_op - tbl_fsglmul_op #
10666 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10667 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10668 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10669 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10670 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10671 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10672 short tbl_fsglmul_op - tbl_fsglmul_op #
10673 short tbl_fsglmul_op - tbl_fsglmul_op #
10675 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
10676 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
10677 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
10678 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
10679 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
10680 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
10681 short tbl_fsglmul_op - tbl_fsglmul_op #
10682 short tbl_fsglmul_op - tbl_fsglmul_op #
10697 #########################################################################
10698 # XDEF **************************************************************** #
10699 # fsgldiv(): emulates the fsgldiv instruction #
10701 # XREF **************************************************************** #
10702 # scale_to_zero_src() - scale src exponent to zero #
10703 # scale_to_zero_dst() - scale dst exponent to zero #
10704 # unf_res4() - return default underflow result for sglop #
10705 # ovf_res() - return default overflow result #
10706 # res_qnan() - return QNAN result #
10707 # res_snan() - return SNAN result #
10709 # INPUT *************************************************************** #
10710 # a0 = pointer to extended precision source operand #
10711 # a1 = pointer to extended precision destination operand #
10712 # d0 rnd prec,mode #
10714 # OUTPUT ************************************************************** #
10716 # fp1 = EXOP (if exception occurred) #
10718 # ALGORITHM *********************************************************** #
10719 # Handle NANs, infinities, and zeroes as special cases. Divide #
10720 # norms/denorms into ext/sgl/dbl precision. #
10721 # For norms/denorms, scale the exponents such that a divide #
10722 # instruction won't cause an exception. Use the regular fsgldiv to #
10723 # compute a result. Check if the regular operands would have taken #
10724 # an exception. If so, return the default overflow/underflow result #
10725 # and return the EXOP if exceptions are enabled. Else, scale the #
10726 # result operand to the proper exponent. #
10728 #########################################################################
10732 mov.l %d0,L_SCR3(%a6) # store rnd info
10735 mov.b DTAG(%a6),%d1
10737 or.b STAG(%a6),%d1 # combine src tags
10739 bne.w fsgldiv_not_norm # optimize on non-norm input
10742 # DIVIDE: NORMs and DENORMs ONLY!
10745 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10746 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10747 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10749 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10750 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10751 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10753 bsr.l scale_to_zero_src # calculate scale factor 1
10754 mov.l %d0,-(%sp) # save scale factor 1
10756 bsr.l scale_to_zero_dst # calculate scale factor 2
10758 neg.l (%sp) # S.F. = scale1 - scale2
10761 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
10764 cmpi.l %d0,&0x3fff-0x7ffe
10765 ble.w fsgldiv_may_ovfl
10767 cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
10768 beq.w fsgldiv_may_unfl # maybe
10769 bgt.w fsgldiv_unfl # yes; go handle underflow
10772 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10774 fmov.l L_SCR3(%a6),%fpcr # save FPCR
10775 fmov.l &0x0,%fpsr # clear FPSR
10777 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
10779 fmov.l %fpsr,%d1 # save FPSR
10780 fmov.l &0x0,%fpcr # clear FPCR
10782 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10784 fsgldiv_normal_exit:
10785 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
10786 mov.l %d2,-(%sp) # save d2
10787 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10788 mov.l %d1,%d2 # make a copy
10789 andi.l &0x7fff,%d1 # strip sign
10790 andi.w &0x8000,%d2 # keep old sign
10791 sub.l %d0,%d1 # add scale factor
10792 or.w %d2,%d1 # concat old sign,new exp
10793 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10794 mov.l (%sp)+,%d2 # restore d2
10795 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10799 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10801 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10802 fmov.l &0x0,%fpsr # set FPSR
10804 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
10809 or.l %d1,USER_FPSR(%a6) # save INEX,N
10811 fmovm.x &0x01,-(%sp) # save result to stack
10812 mov.w (%sp),%d1 # fetch new exponent
10813 add.l &0xc,%sp # clear result
10814 andi.l &0x7fff,%d1 # strip sign
10815 sub.l %d0,%d1 # add scale factor
10816 cmp.l %d1,&0x7fff # did divide overflow?
10817 blt.b fsgldiv_normal_exit
10820 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10822 mov.b FPCR_ENABLE(%a6),%d1
10823 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10824 bne.b fsgldiv_ovfl_ena # yes
10827 btst &neg_bit,FPSR_CC(%a6) # is result negative
10828 sne %d1 # set sign param accordingly
10829 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10830 andi.b &0x30,%d0 # kill precision
10831 bsr.l ovf_res # calculate default result
10832 or.b %d0,FPSR_CC(%a6) # set INF if applicable
10833 fmovm.x (%a0),&0x80 # return default result in fp0
10837 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10839 mov.l %d2,-(%sp) # save d2
10840 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10841 mov.l %d1,%d2 # make a copy
10842 andi.l &0x7fff,%d1 # strip sign
10843 andi.w &0x8000,%d2 # keep old sign
10844 sub.l %d0,%d1 # add scale factor
10845 subi.l &0x6000,%d1 # subtract new bias
10846 andi.w &0x7fff,%d1 # clear ms bit
10847 or.w %d2,%d1 # concat old sign,new exp
10848 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10849 mov.l (%sp)+,%d2 # restore d2
10850 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10851 bra.b fsgldiv_ovfl_dis
10854 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10856 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10858 fmov.l &rz_mode*0x10,%fpcr # set FPCR
10859 fmov.l &0x0,%fpsr # clear FPSR
10861 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10863 fmov.l %fpsr,%d1 # save status
10864 fmov.l &0x0,%fpcr # clear FPCR
10866 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10868 mov.b FPCR_ENABLE(%a6),%d1
10869 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10870 bne.b fsgldiv_unfl_ena # yes
10873 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10875 lea FP_SCR0(%a6),%a0 # pass: result addr
10876 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10877 bsr.l unf_res4 # calculate default result
10878 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10879 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10886 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10888 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10889 fmov.l &0x0,%fpsr # clear FPSR
10891 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10893 fmov.l &0x0,%fpcr # clear FPCR
10895 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10896 mov.l %d2,-(%sp) # save d2
10897 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10898 mov.l %d1,%d2 # make a copy
10899 andi.l &0x7fff,%d1 # strip sign
10900 andi.w &0x8000,%d2 # keep old sign
10901 sub.l %d0,%d1 # add scale factor
10902 addi.l &0x6000,%d1 # add bias
10903 andi.w &0x7fff,%d1 # clear top bit
10904 or.w %d2,%d1 # concat old sign, new exp
10905 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10906 mov.l (%sp)+,%d2 # restore d2
10907 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10908 bra.b fsgldiv_unfl_dis
10911 # the divide operation MAY underflow:
10914 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10916 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10917 fmov.l &0x0,%fpsr # clear FPSR
10919 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10921 fmov.l %fpsr,%d1 # save status
10922 fmov.l &0x0,%fpcr # clear FPCR
10924 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10926 fabs.x %fp0,%fp1 # make a copy of result
10927 fcmp.b %fp1,&0x1 # is |result| > 1.b?
10928 fbgt.w fsgldiv_normal_exit # no; no underflow occurred
10929 fblt.w fsgldiv_unfl # yes; underflow occurred
10932 # we still don't know if underflow occurred. result is ~ equal to 1. but,
10933 # we don't know if the result was an underflow that rounded up to a 1
10934 # or a normalized number that rounded down to a 1. so, redo the entire
10935 # operation using RZ as the rounding mode to see what the pre-rounded
10936 # result is. this case should be relatively rare.
10938 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
10940 clr.l %d1 # clear scratch register
10941 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
10943 fmov.l %d1,%fpcr # set FPCR
10944 fmov.l &0x0,%fpsr # clear FPSR
10946 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10948 fmov.l &0x0,%fpcr # clear FPCR
10949 fabs.x %fp1 # make absolute value
10950 fcmp.b %fp1,&0x1 # is |result| < 1.b?
10951 fbge.w fsgldiv_normal_exit # no; no underflow occurred
10952 bra.w fsgldiv_unfl # yes; underflow occurred
10954 ############################################################################
10957 # Divide: inputs are not both normalized; what are they?
10960 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10965 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
10966 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
10967 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
10968 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
10969 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
10970 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
10971 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10972 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10974 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
10975 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
10976 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
10977 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
10978 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
10979 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
10980 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10981 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10983 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
10984 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
10985 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
10986 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
10987 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
10988 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
10989 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10990 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10992 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
10993 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
10994 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
10995 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
10996 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
10997 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
10998 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10999 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11001 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
11002 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
11003 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
11004 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
11005 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
11006 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
11007 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11008 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11010 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
11011 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
11012 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
11013 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
11014 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
11015 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
11016 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11017 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11026 bra.l fdiv_inf_load
11028 bra.l fdiv_zero_load
11032 #########################################################################
11033 # XDEF **************************************************************** #
11034 # fadd(): emulates the fadd instruction #
11035 # fsadd(): emulates the fadd instruction #
11036 # fdadd(): emulates the fdadd instruction #
11038 # XREF **************************************************************** #
11039 # addsub_scaler2() - scale the operands so they won't take exc #
11040 # ovf_res() - return default overflow result #
11041 # unf_res() - return default underflow result #
11042 # res_qnan() - set QNAN result #
11043 # res_snan() - set SNAN result #
11044 # res_operr() - set OPERR result #
11045 # scale_to_zero_src() - set src operand exponent equal to zero #
11046 # scale_to_zero_dst() - set dst operand exponent equal to zero #
11048 # INPUT *************************************************************** #
11049 # a0 = pointer to extended precision source operand #
11050 # a1 = pointer to extended precision destination operand #
11052 # OUTPUT ************************************************************** #
11054 # fp1 = EXOP (if exception occurred) #
11056 # ALGORITHM *********************************************************** #
11057 # Handle NANs, infinities, and zeroes as special cases. Divide #
11058 # norms into extended, single, and double precision. #
11059 # Do addition after scaling exponents such that exception won't #
11060 # occur. Then, check result exponent to see if exception would have #
11061 # occurred. If so, return default result and maybe EXOP. Else, insert #
11062 # the correct result exponent and return. Set FPSR bits as appropriate. #
11064 #########################################################################
11068 andi.b &0x30,%d0 # clear rnd prec
11069 ori.b &s_mode*0x10,%d0 # insert sgl prec
11074 andi.b &0x30,%d0 # clear rnd prec
11075 ori.b &d_mode*0x10,%d0 # insert dbl prec
11079 mov.l %d0,L_SCR3(%a6) # store rnd info
11082 mov.b DTAG(%a6),%d1
11084 or.b STAG(%a6),%d1 # combine src tags
11086 bne.w fadd_not_norm # optimize on non-norm input
11089 # ADD: norms and denorms
11092 bsr.l addsub_scaler2 # scale exponents
11095 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11097 fmov.l &0x0,%fpsr # clear FPSR
11098 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11100 fadd.x FP_SCR0(%a6),%fp0 # execute add
11102 fmov.l &0x0,%fpcr # clear FPCR
11103 fmov.l %fpsr,%d1 # fetch INEX2,N,Z
11105 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11107 fbeq.w fadd_zero_exit # if result is zero, end now
11109 mov.l %d2,-(%sp) # save d2
11111 fmovm.x &0x01,-(%sp) # save result to stack
11113 mov.w 2+L_SCR3(%a6),%d1
11116 mov.w (%sp),%d2 # fetch new sign, exp
11117 andi.l &0x7fff,%d2 # strip sign
11118 sub.l %d0,%d2 # add scale factor
11120 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121 bge.b fadd_ovfl # yes
11123 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124 blt.w fadd_unfl # yes
11125 beq.w fadd_may_unfl # maybe; go find out
11129 andi.w &0x8000,%d1 # keep sign
11130 or.w %d2,%d1 # concat sign,new exp
11131 mov.w %d1,(%sp) # insert new exponent
11133 fmovm.x (%sp)+,&0x80 # return result in fp0
11135 mov.l (%sp)+,%d2 # restore d2
11139 # fmov.s &0x00000000,%fp0 # return zero in fp0
11143 long 0x7fff # ext ovfl
11144 long 0x407f # sgl ovfl
11145 long 0x43ff # dbl ovfl
11148 long 0x0000 # ext unfl
11149 long 0x3f81 # sgl unfl
11150 long 0x3c01 # dbl unfl
11153 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11155 mov.b FPCR_ENABLE(%a6),%d1
11156 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11157 bne.b fadd_ovfl_ena # yes
11161 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11162 sne %d1 # set sign param accordingly
11163 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11164 bsr.l ovf_res # calculate default result
11165 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11166 fmovm.x (%a0),&0x80 # return default result in fp0
11167 mov.l (%sp)+,%d2 # restore d2
11171 mov.b L_SCR3(%a6),%d1
11172 andi.b &0xc0,%d1 # is precision extended?
11173 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
11175 fadd_ovfl_ena_cont:
11177 andi.w &0x8000,%d1 # keep sign
11178 subi.l &0x6000,%d2 # add extra bias
11180 or.w %d2,%d1 # concat sign,new exp
11181 mov.w %d1,(%sp) # insert new exponent
11183 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11184 bra.b fadd_ovfl_dis
11187 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11189 mov.l L_SCR3(%a6),%d1
11190 andi.b &0x30,%d1 # keep rnd mode
11191 fmov.l %d1,%fpcr # set FPCR
11193 fadd.x FP_SCR0(%a6),%fp0 # execute add
11195 fmov.l &0x0,%fpcr # clear FPCR
11198 fmovm.x &0x01,-(%sp)
11199 bra.b fadd_ovfl_ena_cont
11202 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11206 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11208 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11209 fmov.l &0x0,%fpsr # clear FPSR
11211 fadd.x FP_SCR0(%a6),%fp0 # execute add
11213 fmov.l &0x0,%fpcr # clear FPCR
11214 fmov.l %fpsr,%d1 # save status
11216 or.l %d1,USER_FPSR(%a6) # save INEX,N
11218 mov.b FPCR_ENABLE(%a6),%d1
11219 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11220 bne.b fadd_unfl_ena # yes
11223 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11225 lea FP_SCR0(%a6),%a0 # pass: result addr
11226 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11227 bsr.l unf_res # calculate default result
11228 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
11229 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11230 mov.l (%sp)+,%d2 # restore d2
11234 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11236 mov.l L_SCR3(%a6),%d1
11237 andi.b &0xc0,%d1 # is precision extended?
11238 bne.b fadd_unfl_ena_sd # no; sgl or dbl
11240 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11242 fadd_unfl_ena_cont:
11243 fmov.l &0x0,%fpsr # clear FPSR
11245 fadd.x FP_SCR0(%a6),%fp1 # execute multiply
11247 fmov.l &0x0,%fpcr # clear FPCR
11249 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11250 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11251 mov.l %d1,%d2 # make a copy
11252 andi.l &0x7fff,%d1 # strip sign
11253 andi.w &0x8000,%d2 # keep old sign
11254 sub.l %d0,%d1 # add scale factor
11255 addi.l &0x6000,%d1 # add new bias
11256 andi.w &0x7fff,%d1 # clear top bit
11257 or.w %d2,%d1 # concat sign,new exp
11258 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11259 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11260 bra.w fadd_unfl_dis
11263 mov.l L_SCR3(%a6),%d1
11264 andi.b &0x30,%d1 # use only rnd mode
11265 fmov.l %d1,%fpcr # set FPCR
11267 bra.b fadd_unfl_ena_cont
11270 # result is equal to the smallest normalized number in the selected precision
11271 # if the precision is extended, this result could not have come from an
11272 # underflow that rounded up.
11275 mov.l L_SCR3(%a6),%d1
11277 beq.w fadd_normal # yes; no underflow occurred
11279 mov.l 0x4(%sp),%d1 # extract hi(man)
11280 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11281 bne.w fadd_normal # no; no underflow occurred
11283 tst.l 0x8(%sp) # is lo(man) = 0x0?
11284 bne.w fadd_normal # no; no underflow occurred
11286 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287 beq.w fadd_normal # no; no underflow occurred
11290 # ok, so now the result has a exponent equal to the smallest normalized
11291 # exponent for the selected precision. also, the mantissa is equal to
11292 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11294 # now, we must determine whether the pre-rounded result was an underflow
11295 # rounded "up" or a normalized number rounded "down".
11296 # so, we do this be re-executing the add using RZ as the rounding mode and
11297 # seeing if the new result is smaller or equal to the current result.
11299 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11301 mov.l L_SCR3(%a6),%d1
11302 andi.b &0xc0,%d1 # keep rnd prec
11303 ori.b &rz_mode*0x10,%d1 # insert rnd mode
11304 fmov.l %d1,%fpcr # set FPCR
11305 fmov.l &0x0,%fpsr # clear FPSR
11307 fadd.x FP_SCR0(%a6),%fp1 # execute add
11309 fmov.l &0x0,%fpcr # clear FPCR
11311 fabs.x %fp0 # compare absolute values
11313 fcmp.x %fp0,%fp1 # is first result > second?
11315 fbgt.w fadd_unfl # yes; it's an underflow
11316 bra.w fadd_normal # no; it's not an underflow
11318 ##########################################################################
11321 # Add: inputs are not both normalized; what are they?
11324 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325 jmp (tbl_fadd_op.b,%pc,%d1.w*1)
11329 short fadd_norm - tbl_fadd_op # NORM + NORM
11330 short fadd_zero_src - tbl_fadd_op # NORM + ZERO
11331 short fadd_inf_src - tbl_fadd_op # NORM + INF
11332 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11333 short fadd_norm - tbl_fadd_op # NORM + DENORM
11334 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11335 short tbl_fadd_op - tbl_fadd_op #
11336 short tbl_fadd_op - tbl_fadd_op #
11338 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
11339 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
11340 short fadd_inf_src - tbl_fadd_op # ZERO + INF
11341 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11342 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
11343 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11344 short tbl_fadd_op - tbl_fadd_op #
11345 short tbl_fadd_op - tbl_fadd_op #
11347 short fadd_inf_dst - tbl_fadd_op # INF + NORM
11348 short fadd_inf_dst - tbl_fadd_op # INF + ZERO
11349 short fadd_inf_2 - tbl_fadd_op # INF + INF
11350 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11351 short fadd_inf_dst - tbl_fadd_op # INF + DENORM
11352 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11353 short tbl_fadd_op - tbl_fadd_op #
11354 short tbl_fadd_op - tbl_fadd_op #
11356 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
11357 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
11358 short fadd_res_qnan - tbl_fadd_op # QNAN + INF
11359 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
11360 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
11361 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
11362 short tbl_fadd_op - tbl_fadd_op #
11363 short tbl_fadd_op - tbl_fadd_op #
11365 short fadd_norm - tbl_fadd_op # DENORM + NORM
11366 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
11367 short fadd_inf_src - tbl_fadd_op # DENORM + INF
11368 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11369 short fadd_norm - tbl_fadd_op # DENORM + DENORM
11370 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11371 short tbl_fadd_op - tbl_fadd_op #
11372 short tbl_fadd_op - tbl_fadd_op #
11374 short fadd_res_snan - tbl_fadd_op # SNAN + NORM
11375 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
11376 short fadd_res_snan - tbl_fadd_op # SNAN + INF
11377 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
11378 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
11379 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
11380 short tbl_fadd_op - tbl_fadd_op #
11381 short tbl_fadd_op - tbl_fadd_op #
11389 # both operands are ZEROes
11392 mov.b SRC_EX(%a0),%d0 # are the signs opposite
11393 mov.b DST_EX(%a1),%d1
11395 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
11397 # the signs are the same. so determine whether they are positive or negative
11398 # and return the appropriately signed zero.
11399 tst.b %d0 # are ZEROes positive or negative?
11400 bmi.b fadd_zero_rm # negative
11401 fmov.s &0x00000000,%fp0 # return +ZERO
11402 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11406 # the ZEROes have opposite signs:
11407 # - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408 # - -ZERO is returned in the case of RM.
11410 fadd_zero_2_chk_rm:
11411 mov.b 3+L_SCR3(%a6),%d1
11412 andi.b &0x30,%d1 # extract rnd mode
11413 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
11414 beq.b fadd_zero_rm # yes
11415 fmov.s &0x00000000,%fp0 # return +ZERO
11416 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11420 fmov.s &0x80000000,%fp0 # return -ZERO
11421 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11425 # one operand is a ZERO and the other is a DENORM or NORM. scale
11426 # the DENORM or NORM and jump to the regular fadd routine.
11429 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11430 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11431 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11432 bsr.l scale_to_zero_src # scale the operand
11433 clr.w FP_SCR1_EX(%a6)
11434 clr.l FP_SCR1_HI(%a6)
11435 clr.l FP_SCR1_LO(%a6)
11436 bra.w fadd_zero_entry # go execute fadd
11439 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11440 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11441 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11442 bsr.l scale_to_zero_dst # scale the operand
11443 clr.w FP_SCR0_EX(%a6)
11444 clr.l FP_SCR0_HI(%a6)
11445 clr.l FP_SCR0_LO(%a6)
11446 bra.w fadd_zero_entry # go execute fadd
11449 # both operands are INFs. an OPERR will result if the INFs have
11450 # different signs. else, an INF of the same sign is returned
11453 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11454 mov.b DST_EX(%a1),%d1
11456 bmi.l res_operr # weed out (-INF)+(+INF)
11458 # ok, so it's not an OPERR. but, we do have to remember to return the
11459 # src INF since that's where the 881/882 gets the j-bit from...
11462 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11465 fmovm.x SRC(%a0),&0x80 # return src INF
11466 tst.b SRC_EX(%a0) # is INF positive?
11467 bpl.b fadd_inf_done # yes; we're done
11468 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11472 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11475 fmovm.x DST(%a1),&0x80 # return dst INF
11476 tst.b DST_EX(%a1) # is INF positive?
11477 bpl.b fadd_inf_done # yes; we're done
11478 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11482 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11485 #########################################################################
11486 # XDEF **************************************************************** #
11487 # fsub(): emulates the fsub instruction #
11488 # fssub(): emulates the fssub instruction #
11489 # fdsub(): emulates the fdsub instruction #
11491 # XREF **************************************************************** #
11492 # addsub_scaler2() - scale the operands so they won't take exc #
11493 # ovf_res() - return default overflow result #
11494 # unf_res() - return default underflow result #
11495 # res_qnan() - set QNAN result #
11496 # res_snan() - set SNAN result #
11497 # res_operr() - set OPERR result #
11498 # scale_to_zero_src() - set src operand exponent equal to zero #
11499 # scale_to_zero_dst() - set dst operand exponent equal to zero #
11501 # INPUT *************************************************************** #
11502 # a0 = pointer to extended precision source operand #
11503 # a1 = pointer to extended precision destination operand #
11505 # OUTPUT ************************************************************** #
11507 # fp1 = EXOP (if exception occurred) #
11509 # ALGORITHM *********************************************************** #
11510 # Handle NANs, infinities, and zeroes as special cases. Divide #
11511 # norms into extended, single, and double precision. #
11512 # Do subtraction after scaling exponents such that exception won't#
11513 # occur. Then, check result exponent to see if exception would have #
11514 # occurred. If so, return default result and maybe EXOP. Else, insert #
11515 # the correct result exponent and return. Set FPSR bits as appropriate. #
11517 #########################################################################
11521 andi.b &0x30,%d0 # clear rnd prec
11522 ori.b &s_mode*0x10,%d0 # insert sgl prec
11527 andi.b &0x30,%d0 # clear rnd prec
11528 ori.b &d_mode*0x10,%d0 # insert dbl prec
11532 mov.l %d0,L_SCR3(%a6) # store rnd info
11535 mov.b DTAG(%a6),%d1
11537 or.b STAG(%a6),%d1 # combine src tags
11539 bne.w fsub_not_norm # optimize on non-norm input
11542 # SUB: norms and denorms
11545 bsr.l addsub_scaler2 # scale exponents
11548 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11550 fmov.l &0x0,%fpsr # clear FPSR
11551 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11553 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11555 fmov.l &0x0,%fpcr # clear FPCR
11556 fmov.l %fpsr,%d1 # fetch INEX2, N, Z
11558 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11560 fbeq.w fsub_zero_exit # if result zero, end now
11562 mov.l %d2,-(%sp) # save d2
11564 fmovm.x &0x01,-(%sp) # save result to stack
11566 mov.w 2+L_SCR3(%a6),%d1
11569 mov.w (%sp),%d2 # fetch new exponent
11570 andi.l &0x7fff,%d2 # strip sign
11571 sub.l %d0,%d2 # add scale factor
11573 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574 bge.b fsub_ovfl # yes
11576 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577 blt.w fsub_unfl # yes
11578 beq.w fsub_may_unfl # maybe; go find out
11582 andi.w &0x8000,%d1 # keep sign
11583 or.w %d2,%d1 # insert new exponent
11584 mov.w %d1,(%sp) # insert new exponent
11586 fmovm.x (%sp)+,&0x80 # return result in fp0
11588 mov.l (%sp)+,%d2 # restore d2
11592 # fmov.s &0x00000000,%fp0 # return zero in fp0
11596 long 0x7fff # ext ovfl
11597 long 0x407f # sgl ovfl
11598 long 0x43ff # dbl ovfl
11601 long 0x0000 # ext unfl
11602 long 0x3f81 # sgl unfl
11603 long 0x3c01 # dbl unfl
11606 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11608 mov.b FPCR_ENABLE(%a6),%d1
11609 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11610 bne.b fsub_ovfl_ena # yes
11614 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11615 sne %d1 # set sign param accordingly
11616 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11617 bsr.l ovf_res # calculate default result
11618 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11619 fmovm.x (%a0),&0x80 # return default result in fp0
11620 mov.l (%sp)+,%d2 # restore d2
11624 mov.b L_SCR3(%a6),%d1
11625 andi.b &0xc0,%d1 # is precision extended?
11626 bne.b fsub_ovfl_ena_sd # no
11628 fsub_ovfl_ena_cont:
11629 mov.w (%sp),%d1 # fetch {sgn,exp}
11630 andi.w &0x8000,%d1 # keep sign
11631 subi.l &0x6000,%d2 # subtract new bias
11632 andi.w &0x7fff,%d2 # clear top bit
11633 or.w %d2,%d1 # concat sign,exp
11634 mov.w %d1,(%sp) # insert new exponent
11636 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11637 bra.b fsub_ovfl_dis
11640 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11642 mov.l L_SCR3(%a6),%d1
11643 andi.b &0x30,%d1 # clear rnd prec
11644 fmov.l %d1,%fpcr # set FPCR
11646 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11648 fmov.l &0x0,%fpcr # clear FPCR
11651 fmovm.x &0x01,-(%sp)
11652 bra.b fsub_ovfl_ena_cont
11655 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11659 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11661 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11662 fmov.l &0x0,%fpsr # clear FPSR
11664 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11666 fmov.l &0x0,%fpcr # clear FPCR
11667 fmov.l %fpsr,%d1 # save status
11669 or.l %d1,USER_FPSR(%a6)
11671 mov.b FPCR_ENABLE(%a6),%d1
11672 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11673 bne.b fsub_unfl_ena # yes
11676 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11678 lea FP_SCR0(%a6),%a0 # pass: result addr
11679 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11680 bsr.l unf_res # calculate default result
11681 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
11682 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11683 mov.l (%sp)+,%d2 # restore d2
11687 fmovm.x FP_SCR1(%a6),&0x40
11689 mov.l L_SCR3(%a6),%d1
11690 andi.b &0xc0,%d1 # is precision extended?
11691 bne.b fsub_unfl_ena_sd # no
11693 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11695 fsub_unfl_ena_cont:
11696 fmov.l &0x0,%fpsr # clear FPSR
11698 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11700 fmov.l &0x0,%fpcr # clear FPCR
11702 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
11703 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11704 mov.l %d1,%d2 # make a copy
11705 andi.l &0x7fff,%d1 # strip sign
11706 andi.w &0x8000,%d2 # keep old sign
11707 sub.l %d0,%d1 # add scale factor
11708 addi.l &0x6000,%d1 # subtract new bias
11709 andi.w &0x7fff,%d1 # clear top bit
11710 or.w %d2,%d1 # concat sgn,exp
11711 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11712 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11713 bra.w fsub_unfl_dis
11716 mov.l L_SCR3(%a6),%d1
11717 andi.b &0x30,%d1 # clear rnd prec
11718 fmov.l %d1,%fpcr # set FPCR
11720 bra.b fsub_unfl_ena_cont
11723 # result is equal to the smallest normalized number in the selected precision
11724 # if the precision is extended, this result could not have come from an
11725 # underflow that rounded up.
11728 mov.l L_SCR3(%a6),%d1
11729 andi.b &0xc0,%d1 # fetch rnd prec
11730 beq.w fsub_normal # yes; no underflow occurred
11733 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11734 bne.w fsub_normal # no; no underflow occurred
11736 tst.l 0x8(%sp) # is lo(man) = 0x0?
11737 bne.w fsub_normal # no; no underflow occurred
11739 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740 beq.w fsub_normal # no; no underflow occurred
11743 # ok, so now the result has a exponent equal to the smallest normalized
11744 # exponent for the selected precision. also, the mantissa is equal to
11745 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11747 # now, we must determine whether the pre-rounded result was an underflow
11748 # rounded "up" or a normalized number rounded "down".
11749 # so, we do this be re-executing the add using RZ as the rounding mode and
11750 # seeing if the new result is smaller or equal to the current result.
11752 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11754 mov.l L_SCR3(%a6),%d1
11755 andi.b &0xc0,%d1 # keep rnd prec
11756 ori.b &rz_mode*0x10,%d1 # insert rnd mode
11757 fmov.l %d1,%fpcr # set FPCR
11758 fmov.l &0x0,%fpsr # clear FPSR
11760 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11762 fmov.l &0x0,%fpcr # clear FPCR
11764 fabs.x %fp0 # compare absolute values
11766 fcmp.x %fp0,%fp1 # is first result > second?
11768 fbgt.w fsub_unfl # yes; it's an underflow
11769 bra.w fsub_normal # no; it's not an underflow
11771 ##########################################################################
11774 # Sub: inputs are not both normalized; what are they?
11777 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778 jmp (tbl_fsub_op.b,%pc,%d1.w*1)
11782 short fsub_norm - tbl_fsub_op # NORM - NORM
11783 short fsub_zero_src - tbl_fsub_op # NORM - ZERO
11784 short fsub_inf_src - tbl_fsub_op # NORM - INF
11785 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11786 short fsub_norm - tbl_fsub_op # NORM - DENORM
11787 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11788 short tbl_fsub_op - tbl_fsub_op #
11789 short tbl_fsub_op - tbl_fsub_op #
11791 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
11792 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
11793 short fsub_inf_src - tbl_fsub_op # ZERO - INF
11794 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11795 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
11796 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11797 short tbl_fsub_op - tbl_fsub_op #
11798 short tbl_fsub_op - tbl_fsub_op #
11800 short fsub_inf_dst - tbl_fsub_op # INF - NORM
11801 short fsub_inf_dst - tbl_fsub_op # INF - ZERO
11802 short fsub_inf_2 - tbl_fsub_op # INF - INF
11803 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11804 short fsub_inf_dst - tbl_fsub_op # INF - DENORM
11805 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11806 short tbl_fsub_op - tbl_fsub_op #
11807 short tbl_fsub_op - tbl_fsub_op #
11809 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
11810 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
11811 short fsub_res_qnan - tbl_fsub_op # QNAN - INF
11812 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
11813 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
11814 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
11815 short tbl_fsub_op - tbl_fsub_op #
11816 short tbl_fsub_op - tbl_fsub_op #
11818 short fsub_norm - tbl_fsub_op # DENORM - NORM
11819 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
11820 short fsub_inf_src - tbl_fsub_op # DENORM - INF
11821 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11822 short fsub_norm - tbl_fsub_op # DENORM - DENORM
11823 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11824 short tbl_fsub_op - tbl_fsub_op #
11825 short tbl_fsub_op - tbl_fsub_op #
11827 short fsub_res_snan - tbl_fsub_op # SNAN - NORM
11828 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
11829 short fsub_res_snan - tbl_fsub_op # SNAN - INF
11830 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
11831 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
11832 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
11833 short tbl_fsub_op - tbl_fsub_op #
11834 short tbl_fsub_op - tbl_fsub_op #
11842 # both operands are ZEROes
11845 mov.b SRC_EX(%a0),%d0
11846 mov.b DST_EX(%a1),%d1
11848 bpl.b fsub_zero_2_chk_rm
11850 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851 tst.b %d0 # is dst negative?
11852 bmi.b fsub_zero_2_rm # yes
11853 fmov.s &0x00000000,%fp0 # no; return +ZERO
11854 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11858 # the ZEROes have the same signs:
11859 # - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860 # - -ZERO is returned in the case of RM.
11862 fsub_zero_2_chk_rm:
11863 mov.b 3+L_SCR3(%a6),%d1
11864 andi.b &0x30,%d1 # extract rnd mode
11865 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
11866 beq.b fsub_zero_2_rm # yes
11867 fmov.s &0x00000000,%fp0 # no; return +ZERO
11868 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11872 fmov.s &0x80000000,%fp0 # return -ZERO
11873 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11877 # one operand is a ZERO and the other is a DENORM or a NORM.
11878 # scale the DENORM or NORM and jump to the regular fsub routine.
11881 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11882 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11883 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11884 bsr.l scale_to_zero_src # scale the operand
11885 clr.w FP_SCR1_EX(%a6)
11886 clr.l FP_SCR1_HI(%a6)
11887 clr.l FP_SCR1_LO(%a6)
11888 bra.w fsub_zero_entry # go execute fsub
11891 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11892 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11893 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11894 bsr.l scale_to_zero_dst # scale the operand
11895 clr.w FP_SCR0_EX(%a6)
11896 clr.l FP_SCR0_HI(%a6)
11897 clr.l FP_SCR0_LO(%a6)
11898 bra.w fsub_zero_entry # go execute fsub
11901 # both operands are INFs. an OPERR will result if the INFs have the
11902 # same signs. else,
11905 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11906 mov.b DST_EX(%a1),%d1
11908 bpl.l res_operr # weed out (-INF)+(+INF)
11910 # ok, so it's not an OPERR. but we do have to remember to return
11911 # the src INF since that's where the 881/882 gets the j-bit.
11914 fmovm.x SRC(%a0),&0x80 # return src INF
11915 fneg.x %fp0 # invert sign
11916 fbge.w fsub_inf_done # sign is now positive
11917 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11921 fmovm.x DST(%a1),&0x80 # return dst INF
11922 tst.b DST_EX(%a1) # is INF negative?
11923 bpl.b fsub_inf_done # no
11924 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11928 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11931 #########################################################################
11932 # XDEF **************************************************************** #
11933 # fsqrt(): emulates the fsqrt instruction #
11934 # fssqrt(): emulates the fssqrt instruction #
11935 # fdsqrt(): emulates the fdsqrt instruction #
11937 # XREF **************************************************************** #
11938 # scale_sqrt() - scale the source operand #
11939 # unf_res() - return default underflow result #
11940 # ovf_res() - return default overflow result #
11941 # res_qnan_1op() - return QNAN result #
11942 # res_snan_1op() - return SNAN result #
11944 # INPUT *************************************************************** #
11945 # a0 = pointer to extended precision source operand #
11946 # d0 rnd prec,mode #
11948 # OUTPUT ************************************************************** #
11950 # fp1 = EXOP (if exception occurred) #
11952 # ALGORITHM *********************************************************** #
11953 # Handle NANs, infinities, and zeroes as special cases. Divide #
11954 # norms/denorms into ext/sgl/dbl precision. #
11955 # For norms/denorms, scale the exponents such that a sqrt #
11956 # instruction won't cause an exception. Use the regular fsqrt to #
11957 # compute a result. Check if the regular operands would have taken #
11958 # an exception. If so, return the default overflow/underflow result #
11959 # and return the EXOP if exceptions are enabled. Else, scale the #
11960 # result operand to the proper exponent. #
11962 #########################################################################
11966 andi.b &0x30,%d0 # clear rnd prec
11967 ori.b &s_mode*0x10,%d0 # insert sgl precision
11972 andi.b &0x30,%d0 # clear rnd prec
11973 ori.b &d_mode*0x10,%d0 # insert dbl precision
11977 mov.l %d0,L_SCR3(%a6) # store rnd info
11979 mov.b STAG(%a6),%d1
11980 bne.w fsqrt_not_norm # optimize on non-norm input
11983 # SQUARE ROOT: norms and denorms ONLY!
11986 tst.b SRC_EX(%a0) # is operand negative?
11987 bmi.l res_operr # yes
11989 andi.b &0xc0,%d0 # is precision extended?
11990 bne.b fsqrt_not_ext # no; go handle sgl or dbl
11992 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11993 fmov.l &0x0,%fpsr # clear FPSR
11995 fsqrt.x (%a0),%fp0 # execute square root
11998 or.l %d1,USER_FPSR(%a6) # set N,INEX
12003 tst.b SRC_EX(%a0) # is operand negative?
12004 bmi.l res_operr # yes
12006 andi.b &0xc0,%d0 # is precision extended?
12007 bne.b fsqrt_not_ext # no; go handle sgl or dbl
12009 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12010 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12011 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12013 bsr.l scale_sqrt # calculate scale factor
12015 bra.w fsqrt_sd_normal
12018 # operand is either single or double
12021 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12025 # operand is to be rounded to single precision
12028 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12029 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12030 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12032 bsr.l scale_sqrt # calculate scale factor
12034 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
12035 beq.w fsqrt_sd_may_unfl
12036 bgt.w fsqrt_sd_unfl # yes; go handle underflow
12037 cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
12038 beq.w fsqrt_sd_may_ovfl # maybe; go check
12039 blt.w fsqrt_sd_ovfl # yes; go handle overflow
12042 # operand will NOT overflow or underflow when moved in to the fp reg file
12045 fmov.l &0x0,%fpsr # clear FPSR
12046 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12048 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12050 fmov.l %fpsr,%d1 # save FPSR
12051 fmov.l &0x0,%fpcr # clear FPCR
12053 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12055 fsqrt_sd_normal_exit:
12056 mov.l %d2,-(%sp) # save d2
12057 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12058 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12059 mov.l %d1,%d2 # make a copy
12060 andi.l &0x7fff,%d1 # strip sign
12061 sub.l %d0,%d1 # add scale factor
12062 andi.w &0x8000,%d2 # keep old sign
12063 or.w %d1,%d2 # concat old sign,new exp
12064 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12065 mov.l (%sp)+,%d2 # restore d2
12066 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12070 # operand is to be rounded to double precision
12073 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12074 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12075 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12077 bsr.l scale_sqrt # calculate scale factor
12079 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
12080 beq.w fsqrt_sd_may_unfl
12081 bgt.b fsqrt_sd_unfl # yes; go handle underflow
12082 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
12083 beq.w fsqrt_sd_may_ovfl # maybe; go check
12084 blt.w fsqrt_sd_ovfl # yes; go handle overflow
12085 bra.w fsqrt_sd_normal # no; ho handle normalized op
12087 # we're on the line here and the distinguising characteristic is whether
12088 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089 # elsewise fall through to underflow.
12091 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12092 bne.w fsqrt_sd_normal # yes, so no underflow
12095 # operand WILL underflow when moved in to the fp register file
12098 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12100 fmov.l &rz_mode*0x10,%fpcr # set FPCR
12101 fmov.l &0x0,%fpsr # clear FPSR
12103 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
12105 fmov.l %fpsr,%d1 # save status
12106 fmov.l &0x0,%fpcr # clear FPCR
12108 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12110 # if underflow or inexact is enabled, go calculate EXOP first.
12111 mov.b FPCR_ENABLE(%a6),%d1
12112 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12113 bne.b fsqrt_sd_unfl_ena # yes
12116 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12118 lea FP_SCR0(%a6),%a0 # pass: result addr
12119 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12120 bsr.l unf_res # calculate default result
12121 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
12122 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12126 # operand will underflow AND underflow is enabled.
12127 # Therefore, we must return the result rounded to extended precision.
12130 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12134 mov.l %d2,-(%sp) # save d2
12135 mov.l %d1,%d2 # make a copy
12136 andi.l &0x7fff,%d1 # strip sign
12137 andi.w &0x8000,%d2 # keep old sign
12138 sub.l %d0,%d1 # subtract scale factor
12139 addi.l &0x6000,%d1 # add new bias
12141 or.w %d2,%d1 # concat new sign,new exp
12142 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
12143 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12144 mov.l (%sp)+,%d2 # restore d2
12145 bra.b fsqrt_sd_unfl_dis
12148 # operand WILL overflow.
12151 fmov.l &0x0,%fpsr # clear FPSR
12152 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12154 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
12156 fmov.l &0x0,%fpcr # clear FPCR
12157 fmov.l %fpsr,%d1 # save FPSR
12159 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12162 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12164 mov.b FPCR_ENABLE(%a6),%d1
12165 andi.b &0x13,%d1 # is OVFL or INEX enabled?
12166 bne.b fsqrt_sd_ovfl_ena # yes
12169 # OVFL is not enabled; therefore, we must create the default result by
12170 # calling ovf_res().
12173 btst &neg_bit,FPSR_CC(%a6) # is result negative?
12174 sne %d1 # set sign param accordingly
12175 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12176 bsr.l ovf_res # calculate default result
12177 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12178 fmovm.x (%a0),&0x80 # return default result in fp0
12183 # the INEX2 bit has already been updated by the round to the correct precision.
12184 # now, round to extended(and don't alter the FPSR).
12187 mov.l %d2,-(%sp) # save d2
12188 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12189 mov.l %d1,%d2 # make a copy
12190 andi.l &0x7fff,%d1 # strip sign
12191 andi.w &0x8000,%d2 # keep old sign
12192 sub.l %d0,%d1 # add scale factor
12193 subi.l &0x6000,%d1 # subtract bias
12195 or.w %d2,%d1 # concat sign,exp
12196 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12197 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12198 mov.l (%sp)+,%d2 # restore d2
12199 bra.b fsqrt_sd_ovfl_dis
12202 # the move in MAY underflow. so...
12205 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12206 bne.w fsqrt_sd_ovfl # yes, so overflow
12208 fmov.l &0x0,%fpsr # clear FPSR
12209 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12211 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12213 fmov.l %fpsr,%d1 # save status
12214 fmov.l &0x0,%fpcr # clear FPCR
12216 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12218 fmov.x %fp0,%fp1 # make a copy of result
12219 fcmp.b %fp1,&0x1 # is |result| >= 1.b?
12220 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
12222 # no, it didn't overflow; we have correct result
12223 bra.w fsqrt_sd_normal_exit
12225 ##########################################################################
12228 # input is not normalized; what is it?
12231 cmpi.b %d1,&DENORM # weed out DENORM
12233 cmpi.b %d1,&ZERO # weed out ZERO
12235 cmpi.b %d1,&INF # weed out INF
12237 cmpi.b %d1,&SNAN # weed out SNAN
12244 # fsqrt(+INF) = +INF
12245 # fsqrt(-INF) = OPERR
12248 tst.b SRC_EX(%a0) # is ZERO positive or negative?
12249 bmi.b fsqrt_zero_m # negative
12251 fmov.s &0x00000000,%fp0 # return +ZERO
12252 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
12255 fmov.s &0x80000000,%fp0 # return -ZERO
12256 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12260 tst.b SRC_EX(%a0) # is INF positive or negative?
12261 bmi.l res_operr # negative
12263 fmovm.x SRC(%a0),&0x80 # return +INF in fp0
12264 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12267 #########################################################################
12268 # XDEF **************************************************************** #
12269 # fetch_dreg(): fetch register according to index in d1 #
12271 # XREF **************************************************************** #
12274 # INPUT *************************************************************** #
12275 # d1 = index of register to fetch from #
12277 # OUTPUT ************************************************************** #
12278 # d0 = value of register fetched #
12280 # ALGORITHM *********************************************************** #
12281 # According to the index value in d1 which can range from zero #
12282 # to fifteen, load the corresponding register file value (where #
12283 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
12284 # stack. The rest should still be in their original places. #
12286 #########################################################################
12288 # this routine leaves d1 intact for subsequent store_dreg calls.
12291 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
12292 jmp (tbl_fdreg.b,%pc,%d0.w*1)
12295 short fdreg0 - tbl_fdreg
12296 short fdreg1 - tbl_fdreg
12297 short fdreg2 - tbl_fdreg
12298 short fdreg3 - tbl_fdreg
12299 short fdreg4 - tbl_fdreg
12300 short fdreg5 - tbl_fdreg
12301 short fdreg6 - tbl_fdreg
12302 short fdreg7 - tbl_fdreg
12303 short fdreg8 - tbl_fdreg
12304 short fdreg9 - tbl_fdreg
12305 short fdrega - tbl_fdreg
12306 short fdregb - tbl_fdreg
12307 short fdregc - tbl_fdreg
12308 short fdregd - tbl_fdreg
12309 short fdrege - tbl_fdreg
12310 short fdregf - tbl_fdreg
12313 mov.l EXC_DREGS+0x0(%a6),%d0
12316 mov.l EXC_DREGS+0x4(%a6),%d0
12337 mov.l EXC_DREGS+0x8(%a6),%d0
12340 mov.l EXC_DREGS+0xc(%a6),%d0
12358 mov.l EXC_A7(%a6),%d0
12361 #########################################################################
12362 # XDEF **************************************************************** #
12363 # store_dreg_l(): store longword to data register specified by d1 #
12365 # XREF **************************************************************** #
12368 # INPUT *************************************************************** #
12369 # d0 = longowrd value to store #
12370 # d1 = index of register to fetch from #
12372 # OUTPUT ************************************************************** #
12373 # (data register is updated) #
12375 # ALGORITHM *********************************************************** #
12376 # According to the index value in d1, store the longword value #
12377 # in d0 to the corresponding data register. D0/D1 are on the stack #
12378 # while the rest are in their initial places. #
12380 #########################################################################
12382 global store_dreg_l
12384 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
12385 jmp (tbl_sdregl.b,%pc,%d1.w*1)
12388 short sdregl0 - tbl_sdregl
12389 short sdregl1 - tbl_sdregl
12390 short sdregl2 - tbl_sdregl
12391 short sdregl3 - tbl_sdregl
12392 short sdregl4 - tbl_sdregl
12393 short sdregl5 - tbl_sdregl
12394 short sdregl6 - tbl_sdregl
12395 short sdregl7 - tbl_sdregl
12398 mov.l %d0,EXC_DREGS+0x0(%a6)
12401 mov.l %d0,EXC_DREGS+0x4(%a6)
12422 #########################################################################
12423 # XDEF **************************************************************** #
12424 # store_dreg_w(): store word to data register specified by d1 #
12426 # XREF **************************************************************** #
12429 # INPUT *************************************************************** #
12430 # d0 = word value to store #
12431 # d1 = index of register to fetch from #
12433 # OUTPUT ************************************************************** #
12434 # (data register is updated) #
12436 # ALGORITHM *********************************************************** #
12437 # According to the index value in d1, store the word value #
12438 # in d0 to the corresponding data register. D0/D1 are on the stack #
12439 # while the rest are in their initial places. #
12441 #########################################################################
12443 global store_dreg_w
12445 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
12446 jmp (tbl_sdregw.b,%pc,%d1.w*1)
12449 short sdregw0 - tbl_sdregw
12450 short sdregw1 - tbl_sdregw
12451 short sdregw2 - tbl_sdregw
12452 short sdregw3 - tbl_sdregw
12453 short sdregw4 - tbl_sdregw
12454 short sdregw5 - tbl_sdregw
12455 short sdregw6 - tbl_sdregw
12456 short sdregw7 - tbl_sdregw
12459 mov.w %d0,2+EXC_DREGS+0x0(%a6)
12462 mov.w %d0,2+EXC_DREGS+0x4(%a6)
12483 #########################################################################
12484 # XDEF **************************************************************** #
12485 # store_dreg_b(): store byte to data register specified by d1 #
12487 # XREF **************************************************************** #
12490 # INPUT *************************************************************** #
12491 # d0 = byte value to store #
12492 # d1 = index of register to fetch from #
12494 # OUTPUT ************************************************************** #
12495 # (data register is updated) #
12497 # ALGORITHM *********************************************************** #
12498 # According to the index value in d1, store the byte value #
12499 # in d0 to the corresponding data register. D0/D1 are on the stack #
12500 # while the rest are in their initial places. #
12502 #########################################################################
12504 global store_dreg_b
12506 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
12507 jmp (tbl_sdregb.b,%pc,%d1.w*1)
12510 short sdregb0 - tbl_sdregb
12511 short sdregb1 - tbl_sdregb
12512 short sdregb2 - tbl_sdregb
12513 short sdregb3 - tbl_sdregb
12514 short sdregb4 - tbl_sdregb
12515 short sdregb5 - tbl_sdregb
12516 short sdregb6 - tbl_sdregb
12517 short sdregb7 - tbl_sdregb
12520 mov.b %d0,3+EXC_DREGS+0x0(%a6)
12523 mov.b %d0,3+EXC_DREGS+0x4(%a6)
12544 #########################################################################
12545 # XDEF **************************************************************** #
12546 # inc_areg(): increment an address register by the value in d0 #
12548 # XREF **************************************************************** #
12551 # INPUT *************************************************************** #
12552 # d0 = amount to increment by #
12553 # d1 = index of address register to increment #
12555 # OUTPUT ************************************************************** #
12556 # (address register is updated) #
12558 # ALGORITHM *********************************************************** #
12559 # Typically used for an instruction w/ a post-increment <ea>, #
12560 # this routine adds the increment value in d0 to the address register #
12561 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12562 # in their original places. #
12563 # For a7, if the increment amount is one, then we have to #
12564 # increment by two. For any a7 update, set the mia7_flag so that if #
12565 # an access error exception occurs later in emulation, this address #
12566 # register update can be undone. #
12568 #########################################################################
12572 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
12573 jmp (tbl_iareg.b,%pc,%d1.w*1)
12576 short iareg0 - tbl_iareg
12577 short iareg1 - tbl_iareg
12578 short iareg2 - tbl_iareg
12579 short iareg3 - tbl_iareg
12580 short iareg4 - tbl_iareg
12581 short iareg5 - tbl_iareg
12582 short iareg6 - tbl_iareg
12583 short iareg7 - tbl_iareg
12585 iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
12587 iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
12589 iareg2: add.l %d0,%a2
12591 iareg3: add.l %d0,%a3
12593 iareg4: add.l %d0,%a4
12595 iareg5: add.l %d0,%a5
12597 iareg6: add.l %d0,(%a6)
12599 iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
12602 add.l %d0,EXC_A7(%a6)
12605 addq.l &0x2,EXC_A7(%a6)
12608 #########################################################################
12609 # XDEF **************************************************************** #
12610 # dec_areg(): decrement an address register by the value in d0 #
12612 # XREF **************************************************************** #
12615 # INPUT *************************************************************** #
12616 # d0 = amount to decrement by #
12617 # d1 = index of address register to decrement #
12619 # OUTPUT ************************************************************** #
12620 # (address register is updated) #
12622 # ALGORITHM *********************************************************** #
12623 # Typically used for an instruction w/ a pre-decrement <ea>, #
12624 # this routine adds the decrement value in d0 to the address register #
12625 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12626 # in their original places. #
12627 # For a7, if the decrement amount is one, then we have to #
12628 # decrement by two. For any a7 update, set the mda7_flag so that if #
12629 # an access error exception occurs later in emulation, this address #
12630 # register update can be undone. #
12632 #########################################################################
12636 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
12637 jmp (tbl_dareg.b,%pc,%d1.w*1)
12640 short dareg0 - tbl_dareg
12641 short dareg1 - tbl_dareg
12642 short dareg2 - tbl_dareg
12643 short dareg3 - tbl_dareg
12644 short dareg4 - tbl_dareg
12645 short dareg5 - tbl_dareg
12646 short dareg6 - tbl_dareg
12647 short dareg7 - tbl_dareg
12649 dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
12651 dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
12653 dareg2: sub.l %d0,%a2
12655 dareg3: sub.l %d0,%a3
12657 dareg4: sub.l %d0,%a4
12659 dareg5: sub.l %d0,%a5
12661 dareg6: sub.l %d0,(%a6)
12663 dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
12666 sub.l %d0,EXC_A7(%a6)
12669 subq.l &0x2,EXC_A7(%a6)
12672 ##############################################################################
12674 #########################################################################
12675 # XDEF **************************************************************** #
12676 # load_fpn1(): load FP register value into FP_SRC(a6). #
12678 # XREF **************************************************************** #
12681 # INPUT *************************************************************** #
12682 # d0 = index of FP register to load #
12684 # OUTPUT ************************************************************** #
12685 # FP_SRC(a6) = value loaded from FP register file #
12687 # ALGORITHM *********************************************************** #
12688 # Using the index in d0, load FP_SRC(a6) with a number from the #
12689 # FP register file. #
12691 #########################################################################
12695 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696 jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
12699 short load_fpn1_0 - tbl_load_fpn1
12700 short load_fpn1_1 - tbl_load_fpn1
12701 short load_fpn1_2 - tbl_load_fpn1
12702 short load_fpn1_3 - tbl_load_fpn1
12703 short load_fpn1_4 - tbl_load_fpn1
12704 short load_fpn1_5 - tbl_load_fpn1
12705 short load_fpn1_6 - tbl_load_fpn1
12706 short load_fpn1_7 - tbl_load_fpn1
12709 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712 lea FP_SRC(%a6), %a0
12715 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718 lea FP_SRC(%a6), %a0
12721 fmovm.x &0x20, FP_SRC(%a6)
12722 lea FP_SRC(%a6), %a0
12725 fmovm.x &0x10, FP_SRC(%a6)
12726 lea FP_SRC(%a6), %a0
12729 fmovm.x &0x08, FP_SRC(%a6)
12730 lea FP_SRC(%a6), %a0
12733 fmovm.x &0x04, FP_SRC(%a6)
12734 lea FP_SRC(%a6), %a0
12737 fmovm.x &0x02, FP_SRC(%a6)
12738 lea FP_SRC(%a6), %a0
12741 fmovm.x &0x01, FP_SRC(%a6)
12742 lea FP_SRC(%a6), %a0
12745 #############################################################################
12747 #########################################################################
12748 # XDEF **************************************************************** #
12749 # load_fpn2(): load FP register value into FP_DST(a6). #
12751 # XREF **************************************************************** #
12754 # INPUT *************************************************************** #
12755 # d0 = index of FP register to load #
12757 # OUTPUT ************************************************************** #
12758 # FP_DST(a6) = value loaded from FP register file #
12760 # ALGORITHM *********************************************************** #
12761 # Using the index in d0, load FP_DST(a6) with a number from the #
12762 # FP register file. #
12764 #########################################################################
12768 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769 jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
12772 short load_fpn2_0 - tbl_load_fpn2
12773 short load_fpn2_1 - tbl_load_fpn2
12774 short load_fpn2_2 - tbl_load_fpn2
12775 short load_fpn2_3 - tbl_load_fpn2
12776 short load_fpn2_4 - tbl_load_fpn2
12777 short load_fpn2_5 - tbl_load_fpn2
12778 short load_fpn2_6 - tbl_load_fpn2
12779 short load_fpn2_7 - tbl_load_fpn2
12782 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785 lea FP_DST(%a6), %a0
12788 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791 lea FP_DST(%a6), %a0
12794 fmovm.x &0x20, FP_DST(%a6)
12795 lea FP_DST(%a6), %a0
12798 fmovm.x &0x10, FP_DST(%a6)
12799 lea FP_DST(%a6), %a0
12802 fmovm.x &0x08, FP_DST(%a6)
12803 lea FP_DST(%a6), %a0
12806 fmovm.x &0x04, FP_DST(%a6)
12807 lea FP_DST(%a6), %a0
12810 fmovm.x &0x02, FP_DST(%a6)
12811 lea FP_DST(%a6), %a0
12814 fmovm.x &0x01, FP_DST(%a6)
12815 lea FP_DST(%a6), %a0
12818 #############################################################################
12820 #########################################################################
12821 # XDEF **************************************************************** #
12822 # store_fpreg(): store an fp value to the fpreg designated d0. #
12824 # XREF **************************************************************** #
12827 # INPUT *************************************************************** #
12828 # fp0 = extended precision value to store #
12829 # d0 = index of floating-point register #
12831 # OUTPUT ************************************************************** #
12834 # ALGORITHM *********************************************************** #
12835 # Store the value in fp0 to the FP register designated by the #
12836 # value in d0. The FP number can be DENORM or SNAN so we have to be #
12837 # careful that we don't take an exception here. #
12839 #########################################################################
12843 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844 jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
12847 short store_fpreg_0 - tbl_store_fpreg
12848 short store_fpreg_1 - tbl_store_fpreg
12849 short store_fpreg_2 - tbl_store_fpreg
12850 short store_fpreg_3 - tbl_store_fpreg
12851 short store_fpreg_4 - tbl_store_fpreg
12852 short store_fpreg_5 - tbl_store_fpreg
12853 short store_fpreg_6 - tbl_store_fpreg
12854 short store_fpreg_7 - tbl_store_fpreg
12857 fmovm.x &0x80, EXC_FP0(%a6)
12860 fmovm.x &0x80, EXC_FP1(%a6)
12863 fmovm.x &0x01, -(%sp)
12864 fmovm.x (%sp)+, &0x20
12867 fmovm.x &0x01, -(%sp)
12868 fmovm.x (%sp)+, &0x10
12871 fmovm.x &0x01, -(%sp)
12872 fmovm.x (%sp)+, &0x08
12875 fmovm.x &0x01, -(%sp)
12876 fmovm.x (%sp)+, &0x04
12879 fmovm.x &0x01, -(%sp)
12880 fmovm.x (%sp)+, &0x02
12883 fmovm.x &0x01, -(%sp)
12884 fmovm.x (%sp)+, &0x01
12887 #########################################################################
12888 # XDEF **************************************************************** #
12889 # get_packed(): fetch a packed operand from memory and then #
12890 # convert it to a floating-point binary number. #
12892 # XREF **************************************************************** #
12893 # _dcalc_ea() - calculate the correct <ea> #
12894 # _mem_read() - fetch the packed operand from memory #
12895 # facc_in_x() - the fetch failed so jump to special exit code #
12896 # decbin() - convert packed to binary extended precision #
12898 # INPUT *************************************************************** #
12901 # OUTPUT ************************************************************** #
12902 # If no failure on _mem_read(): #
12903 # FP_SRC(a6) = packed operand now as a binary FP number #
12905 # ALGORITHM *********************************************************** #
12906 # Get the correct <ea> which is the value on the exception stack #
12907 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
12908 # Then, fetch the operand from memory. If the fetch fails, exit #
12909 # through facc_in_x(). #
12910 # If the packed operand is a ZERO,NAN, or INF, convert it to #
12911 # its binary representation here. Else, call decbin() which will #
12912 # convert the packed value to an extended precision binary value. #
12914 #########################################################################
12916 # the stacked <ea> for packed is correct except for -(An).
12917 # the base reg must be updated for both -(An) and (An)+.
12920 mov.l &0xc,%d0 # packed is 12 bytes
12921 bsr.l _dcalc_ea # fetch <ea>; correct An
12923 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
12924 mov.l &0xc,%d0 # pass: 12 bytes
12925 bsr.l _dmem_read # read packed operand
12927 tst.l %d1 # did dfetch fail?
12928 bne.l facc_in_x # yes
12930 # The packed operand is an INF or a NAN if the exponent field is all ones.
12931 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
12932 cmpi.w %d0,&0x7fff # INF or NAN?
12933 bne.b gp_try_zero # no
12934 rts # operand is an INF or NAN
12936 # The packed operand is a zero if the mantissa is all zero, else it's
12937 # a normal packed op.
12939 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
12940 andi.b &0x0f,%d0 # clear all but last nybble
12941 bne.b gp_not_spec # not a zero
12942 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
12943 bne.b gp_not_spec # not a zero
12944 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
12945 bne.b gp_not_spec # not a zero
12946 rts # operand is a ZERO
12948 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
12949 bsr.l decbin # convert to extended
12950 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
12953 #########################################################################
12954 # decbin(): Converts normalized packed bcd value pointed to by register #
12955 # a0 to extended-precision value in fp0. #
12957 # INPUT *************************************************************** #
12958 # a0 = pointer to normalized packed bcd value #
12960 # OUTPUT ************************************************************** #
12961 # fp0 = exact fp representation of the packed bcd value. #
12963 # ALGORITHM *********************************************************** #
12964 # Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
12965 # and NaN operands are dispatched without entering this routine) #
12966 # value in 68881/882 format at location (a0). #
12968 # A1. Convert the bcd exponent to binary by successive adds and #
12969 # muls. Set the sign according to SE. Subtract 16 to compensate #
12970 # for the mantissa which is to be interpreted as 17 integer #
12971 # digits, rather than 1 integer and 16 fraction digits. #
12972 # Note: this operation can never overflow. #
12974 # A2. Convert the bcd mantissa to binary by successive #
12975 # adds and muls in FP0. Set the sign according to SM. #
12976 # The mantissa digits will be converted with the decimal point #
12977 # assumed following the least-significant digit. #
12978 # Note: this operation can never overflow. #
12980 # A3. Count the number of leading/trailing zeros in the #
12981 # bcd string. If SE is positive, count the leading zeros; #
12982 # if negative, count the trailing zeros. Set the adjusted #
12983 # exponent equal to the exponent from A1 and the zero count #
12984 # added if SM = 1 and subtracted if SM = 0. Scale the #
12985 # mantissa the equivalent of forcing in the bcd value: #
12987 # SM = 0 a non-zero digit in the integer position #
12988 # SM = 1 a non-zero digit in Mant0, lsd of the fraction #
12990 # this will insure that any value, regardless of its #
12991 # representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
12994 # A4. Calculate the factor 10^exp in FP1 using a table of #
12995 # 10^(2^n) values. To reduce the error in forming factors #
12996 # greater than 10^27, a directed rounding scheme is used with #
12997 # tables rounded to RN, RM, and RP, according to the table #
12998 # in the comments of the pwrten section. #
13000 # A5. Form the final binary number by scaling the mantissa by #
13001 # the exponent factor. This is done by multiplying the #
13002 # mantissa in FP0 by the factor in FP1 if the adjusted #
13003 # exponent sign is positive, and dividing FP0 by FP1 if #
13004 # it is negative. #
13006 # Clean up and return. Check if the final mul or div was inexact. #
13007 # If so, set INEX1 in USER_FPSR. #
13009 #########################################################################
13012 # PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013 # to nearest, minus, and plus, respectively. The tables include
13014 # 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
13015 # is required until the power is greater than 27, however, all
13016 # tables include the first 5 for ease of indexing.
13032 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034 mov.l 0x8(%a0),FP_SCR0_LO(%a6)
13036 lea FP_SCR0(%a6),%a0
13038 movm.l &0x3c00,-(%sp) # save d2-d5
13039 fmovm.x &0x1,-(%sp) # save fp1
13041 # Calculate exponent:
13042 # 1. Copy bcd value in memory for use as a working copy.
13043 # 2. Calculate absolute value of exponent in d1 by mul and add.
13044 # 3. Correct for exponent sign.
13045 # 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046 # (i.e., all digits assumed left of the decimal point.)
13051 # (*) d0: temp digit storage
13052 # (*) d1: accumulator for binary exponent
13053 # (*) d2: digit count
13054 # (*) d3: offset pointer
13055 # ( ) d4: first word of bcd
13056 # ( ) a0: pointer to working bcd value
13057 # ( ) a6: pointer to original bcd value
13058 # (*) FP_SCR1: working copy of original bcd value
13059 # (*) L_SCR1: copy of original exponent word
13062 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
13063 mov.l &ESTRT,%d3 # counter to pick up digits
13064 mov.l (%a0),%d4 # get first word of bcd
13065 clr.l %d1 # zero d1 for accumulator
13067 mulu.l &0xa,%d1 # mul partial product by one digit place
13068 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
13069 add.l %d0,%d1 # d1 = d1 + d0
13070 addq.b &4,%d3 # advance d3 to the next digit
13071 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
13072 btst &30,%d4 # get SE
13073 beq.b e_pos # don't negate if pos
13074 neg.l %d1 # negate before subtracting
13076 sub.l &16,%d1 # sub to compensate for shift of mant
13077 bge.b e_save # if still pos, do not neg
13078 neg.l %d1 # now negative, make pos and set SE
13079 or.l &0x40000000,%d4 # set SE in d4,
13080 or.l &0x40000000,(%a0) # and in working bcd
13082 mov.l %d1,-(%sp) # save exp on stack
13085 # Calculate mantissa:
13086 # 1. Calculate absolute value of mantissa in fp0 by mul and add.
13087 # 2. Correct for mantissa sign.
13088 # (i.e., all digits assumed left of the decimal point.)
13093 # (*) d0: temp digit storage
13094 # (*) d1: lword counter
13095 # (*) d2: digit count
13096 # (*) d3: offset pointer
13097 # ( ) d4: words 2 and 3 of bcd
13098 # ( ) a0: pointer to working bcd value
13099 # ( ) a6: pointer to original bcd value
13100 # (*) fp0: mantissa accumulator
13101 # ( ) FP_SCR1: working copy of original bcd value
13102 # ( ) L_SCR1: copy of original exponent word
13105 mov.l &1,%d1 # word counter, init to 1
13106 fmov.s &0x00000000,%fp0 # accumulator
13109 # Since the packed number has a long word between the first & second parts,
13110 # get the integer digit then skip down & get the rest of the
13111 # mantissa. We will unroll the loop once.
13113 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
13114 fadd.b %d0,%fp0 # add digit to sum in fp0
13117 # Get the rest of the mantissa.
13120 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
13121 mov.l &FSTRT,%d3 # counter to pick up digits
13122 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
13124 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
13125 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
13126 fadd.b %d0,%fp0 # fp0 = fp0 + digit
13129 # If all the digits (8) in that long word have been converted (d2=0),
13130 # then inc d1 (=2) to point to the next long word and reset d3 to 0
13131 # to initialize the digit offset, and set d2 to 7 for the digit count;
13132 # else continue with this long word.
13134 addq.b &4,%d3 # advance d3 to the next digit
13135 dbf.w %d2,md2b # check for last digit in this lw
13137 addq.l &1,%d1 # inc lw pointer in mantissa
13138 cmp.l %d1,&2 # test for last lw
13139 ble.b loadlw # if not, get last one
13141 # Check the sign of the mant and make the value in fp0 the same sign.
13144 btst &31,(%a0) # test sign of the mantissa
13145 beq.b ap_st_z # if clear, go to append/strip zeros
13146 fneg.x %fp0 # if set, negate fp0
13148 # Append/strip zeros:
13150 # For adjusted exponents which have an absolute value greater than 27*,
13151 # this routine calculates the amount needed to normalize the mantissa
13152 # for the adjusted exponent. That number is subtracted from the exp
13153 # if the exp was positive, and added if it was negative. The purpose
13154 # of this is to reduce the value of the exponent and the possibility
13155 # of error in calculation of pwrten.
13157 # 1. Branch on the sign of the adjusted exponent.
13158 # 2p.(positive exp)
13159 # 2. Check M16 and the digits in lwords 2 and 3 in decending order.
13160 # 3. Add one for each zero encountered until a non-zero digit.
13161 # 4. Subtract the count from the exp.
13162 # 5. Check if the exp has crossed zero in #3 above; make the exp abs
13164 # 6. Multiply the mantissa by 10**count.
13165 # 2n.(negative exp)
13166 # 2. Check the digits in lwords 3 and 2 in decending order.
13167 # 3. Add one for each zero encountered until a non-zero digit.
13168 # 4. Add the count to the exp.
13169 # 5. Check if the exp has crossed zero in #3 above; clear SE.
13170 # 6. Divide the mantissa by 10**count.
13172 # *Why 27? If the adjusted exponent is within -28 < expA < 28, than
13173 # any adjustment due to append/strip zeros will drive the resultane
13174 # exponent towards zero. Since all pwrten constants with a power
13175 # of 27 or less are exact, there is no need to use this routine to
13176 # attempt to lessen the resultant exponent.
13181 # (*) d0: temp digit storage
13182 # (*) d1: zero count
13183 # (*) d2: digit count
13184 # (*) d3: offset pointer
13185 # ( ) d4: first word of bcd
13186 # (*) d5: lword counter
13187 # ( ) a0: pointer to working bcd value
13188 # ( ) FP_SCR1: working copy of original bcd value
13189 # ( ) L_SCR1: copy of original exponent word
13192 # First check the absolute value of the exponent to see if this
13193 # routine is necessary. If so, then check the sign of the exponent
13194 # and do append (+) or strip (-) zeros accordingly.
13195 # This section handles a positive adjusted exponent.
13198 mov.l (%sp),%d1 # load expA for range test
13199 cmp.l %d1,&27 # test is with 27
13200 ble.w pwrten # if abs(expA) <28, skip ap/st zeros
13201 btst &30,(%a0) # check sign of exp
13202 bne.b ap_st_n # if neg, go to neg side
13203 clr.l %d1 # zero count reg
13204 mov.l (%a0),%d4 # load lword 1 to d4
13205 bfextu %d4{&28:&4},%d0 # get M16 in d0
13206 bne.b ap_p_fx # if M16 is non-zero, go fix exp
13207 addq.l &1,%d1 # inc zero count
13208 mov.l &1,%d5 # init lword counter
13209 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
13210 bne.b ap_p_cl # if lw 2 is zero, skip it
13211 addq.l &8,%d1 # and inc count by 8
13212 addq.l &1,%d5 # inc lword counter
13213 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
13215 clr.l %d3 # init offset reg
13216 mov.l &7,%d2 # init digit counter
13218 bfextu %d4{%d3:&4},%d0 # get digit
13219 bne.b ap_p_fx # if non-zero, go to fix exp
13220 addq.l &4,%d3 # point to next digit
13221 addq.l &1,%d1 # inc digit counter
13222 dbf.w %d2,ap_p_gd # get next digit
13224 mov.l %d1,%d0 # copy counter to d2
13225 mov.l (%sp),%d1 # get adjusted exp from memory
13226 sub.l %d0,%d1 # subtract count from exp
13227 bge.b ap_p_fm # if still pos, go to pwrten
13228 neg.l %d1 # now its neg; get abs
13229 mov.l (%a0),%d4 # load lword 1 to d4
13230 or.l &0x40000000,%d4 # and set SE in d4
13231 or.l &0x40000000,(%a0) # and in memory
13233 # Calculate the mantissa multiplier to compensate for the striping of
13234 # zeros from the mantissa.
13237 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13238 clr.l %d3 # init table index
13239 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13240 mov.l &3,%d2 # init d2 to count bits in counter
13242 asr.l &1,%d0 # shift lsb into carry
13243 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
13244 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13246 add.l &12,%d3 # inc d3 to next rtable entry
13247 tst.l %d0 # check if d0 is zero
13248 bne.b ap_p_el # if not, get next bit
13249 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
13250 bra.b pwrten # go calc pwrten
13252 # This section handles a negative adjusted exponent.
13255 clr.l %d1 # clr counter
13256 mov.l &2,%d5 # set up d5 to point to lword 3
13257 mov.l (%a0,%d5.L*4),%d4 # get lword 3
13258 bne.b ap_n_cl # if not zero, check digits
13259 sub.l &1,%d5 # dec d5 to point to lword 2
13260 addq.l &8,%d1 # inc counter by 8
13261 mov.l (%a0,%d5.L*4),%d4 # get lword 2
13263 mov.l &28,%d3 # point to last digit
13264 mov.l &7,%d2 # init digit counter
13266 bfextu %d4{%d3:&4},%d0 # get digit
13267 bne.b ap_n_fx # if non-zero, go to exp fix
13268 subq.l &4,%d3 # point to previous digit
13269 addq.l &1,%d1 # inc digit counter
13270 dbf.w %d2,ap_n_gd # get next digit
13272 mov.l %d1,%d0 # copy counter to d0
13273 mov.l (%sp),%d1 # get adjusted exp from memory
13274 sub.l %d0,%d1 # subtract count from exp
13275 bgt.b ap_n_fm # if still pos, go fix mantissa
13276 neg.l %d1 # take abs of exp and clr SE
13277 mov.l (%a0),%d4 # load lword 1 to d4
13278 and.l &0xbfffffff,%d4 # and clr SE in d4
13279 and.l &0xbfffffff,(%a0) # and in memory
13281 # Calculate the mantissa multiplier to compensate for the appending of
13282 # zeros to the mantissa.
13285 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13286 clr.l %d3 # init table index
13287 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13288 mov.l &3,%d2 # init d2 to count bits in counter
13290 asr.l &1,%d0 # shift lsb into carry
13291 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
13292 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13294 add.l &12,%d3 # inc d3 to next rtable entry
13295 tst.l %d0 # check if d0 is zero
13296 bne.b ap_n_el # if not, get next bit
13297 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
13300 # Calculate power-of-ten factor from adjusted and shifted exponent.
13307 # (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308 # (*) d3: FPCR work copy
13309 # ( ) d4: first word of bcd
13310 # (*) a1: RTABLE pointer
13314 # (*) d3: PWRTxx table index
13315 # ( ) a0: pointer to working copy of bcd
13316 # (*) a1: PWRTxx pointer
13317 # (*) fp1: power-of-ten accumulator
13319 # Pwrten calculates the exponent factor in the selected rounding mode
13320 # according to the following table:
13322 # Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
13343 mov.l USER_FPCR(%a6),%d3 # get user's FPCR
13344 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
13345 mov.l (%a0),%d4 # reload 1st bcd word to d4
13346 asl.l &2,%d2 # format d2 to be
13347 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
13348 add.l %d0,%d2 # in d2 as index into RTABLE
13349 lea.l RTABLE(%pc),%a1 # load rtable base
13350 mov.b (%a1,%d2),%d0 # load new rounding bits from table
13351 clr.l %d3 # clear d3 to force no exc and extended
13352 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
13353 fmov.l %d3,%fpcr # write new FPCR
13354 asr.l &1,%d0 # write correct PTENxx table
13355 bcc.b not_rp # to a1
13356 lea.l PTENRP(%pc),%a1 # it is RP
13357 bra.b calc_p # go to init section
13359 asr.l &1,%d0 # keep checking
13361 lea.l PTENRM(%pc),%a1 # it is RM
13362 bra.b calc_p # go to init section
13364 lea.l PTENRN(%pc),%a1 # it is RN
13366 mov.l %d1,%d0 # copy exp to d0;use d0
13367 bpl.b no_neg # if exp is negative,
13368 neg.l %d0 # invert it
13369 or.l &0x40000000,(%a0) # and set SE bit
13371 clr.l %d3 # table index
13372 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13374 asr.l &1,%d0 # shift next bit into carry
13375 bcc.b e_next # if zero, skip the mul
13376 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13378 add.l &12,%d3 # inc d3 to next rtable entry
13379 tst.l %d0 # check if d0 is zero
13380 bne.b e_loop # not zero, continue shifting
13383 # Check the sign of the adjusted exp and make the value in fp0 the
13384 # same sign. If the exp was pos then multiply fp1*fp0;
13385 # else divide fp0/fp1.
13389 # ( ) a0: pointer to working bcd value
13390 # (*) fp0: mantissa accumulator
13391 # ( ) fp1: scaling factor - 10**(abs(exp))
13394 btst &30,(%a0) # test the sign of the exponent
13395 beq.b mul # if clear, go to multiply
13397 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
13400 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
13403 # Clean up and return with result in fp0.
13405 # If the final mul/div in decbin incurred an inex exception,
13406 # it will be inex2, but will be reported as inex1 by get_op.
13409 fmov.l %fpsr,%d0 # get status register
13410 bclr &inex2_bit+8,%d0 # test for inex2 and clear it
13411 beq.b no_exc # skip this if no exc
13412 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13414 add.l &0x4,%sp # clear 1 lw param
13415 fmovm.x (%sp)+,&0x40 # restore fp1
13416 movm.l (%sp)+,&0x3c # restore d2-d5
13421 #########################################################################
13422 # bindec(): Converts an input in extended precision format to bcd format#
13424 # INPUT *************************************************************** #
13425 # a0 = pointer to the input extended precision value in memory. #
13426 # the input may be either normalized, unnormalized, or #
13428 # d0 = contains the k-factor sign-extended to 32-bits. #
13430 # OUTPUT ************************************************************** #
13431 # FP_SCR0(a6) = bcd format result on the stack. #
13433 # ALGORITHM *********************************************************** #
13435 # A1. Set RM and size ext; Set SIGMA = sign of input. #
13436 # The k-factor is saved for use in d7. Clear the #
13437 # BINDEC_FLG for separating normalized/denormalized #
13438 # input. If input is unnormalized or denormalized, #
13441 # A2. Set X = abs(input). #
13443 # A3. Compute ILOG. #
13444 # ILOG is the log base 10 of the input value. It is #
13445 # approximated by adding e + 0.f when the original #
13446 # value is viewed as 2^^e * 1.f in extended precision. #
13447 # This value is stored in d6. #
13449 # A4. Clr INEX bit. #
13450 # The operation in A3 above may have set INEX2. #
13452 # A5. Set ICTR = 0; #
13453 # ICTR is a flag used in A13. It must be set before the #
13456 # A6. Calculate LEN. #
13457 # LEN is the number of digits to be displayed. The #
13458 # k-factor can dictate either the total number of digits, #
13459 # if it is a positive number, or the number of digits #
13460 # after the decimal point which are to be included as #
13461 # significant. See the 68882 manual for examples. #
13462 # If LEN is computed to be greater than 17, set OPERR in #
13463 # USER_FPSR. LEN is stored in d4. #
13465 # A7. Calculate SCALE. #
13466 # SCALE is equal to 10^ISCALE, where ISCALE is the number #
13467 # of decimal places needed to insure LEN integer digits #
13468 # in the output before conversion to bcd. LAMBDA is the #
13469 # sign of ISCALE, used in A9. Fp1 contains #
13470 # 10^^(abs(ISCALE)) using a rounding mode which is a #
13471 # function of the original rounding mode and the signs #
13472 # of ISCALE and X. A table is given in the code. #
13474 # A8. Clr INEX; Force RZ. #
13475 # The operation in A3 above may have set INEX2. #
13476 # RZ mode is forced for the scaling operation to insure #
13477 # only one rounding error. The grs bits are collected in #
13478 # the INEX flag for use in A10. #
13480 # A9. Scale X -> Y. #
13481 # The mantissa is scaled to the desired number of #
13482 # significant digits. The excess digits are collected #
13485 # A10. Or in INEX. #
13486 # If INEX is set, round error occurred. This is #
13487 # compensated for by 'or-ing' in the INEX2 flag to #
13490 # A11. Restore original FPCR; set size ext. #
13491 # Perform FINT operation in the user's rounding mode. #
13492 # Keep the size to extended. #
13494 # A12. Calculate YINT = FINT(Y) according to user's rounding #
13495 # mode. The FPSP routine sintd0 is used. The output #
13498 # A13. Check for LEN digits. #
13499 # If the int operation results in more than LEN digits, #
13500 # or less than LEN -1 digits, adjust ILOG and repeat from #
13501 # A6. This test occurs only on the first pass. If the #
13502 # result is exactly 10^LEN, decrement ILOG and divide #
13503 # the mantissa by 10. #
13505 # A14. Convert the mantissa to bcd. #
13506 # The binstr routine is used to convert the LEN digit #
13507 # mantissa to bcd in memory. The input to binstr is #
13508 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
13509 # such that the decimal point is to the left of bit 63. #
13510 # The bcd digits are stored in the correct position in #
13511 # the final string area in memory. #
13513 # A15. Convert the exponent to bcd. #
13514 # As in A14 above, the exp is converted to bcd and the #
13515 # digits are stored in the final string. #
13516 # Test the length of the final exponent string. If the #
13517 # length is 4, set operr. #
13519 # A16. Write sign bits to final string. #
13521 #########################################################################
13523 set BINDEC_FLG, EXC_TEMP # DENORM flag
13525 # Constants in extended precision
13527 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13529 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13531 # Constants in single precision
13533 long 0x3F800000,0x00000000,0x00000000,0x00000000
13535 long 0x40000000,0x00000000,0x00000000,0x00000000
13537 long 0x41200000,0x00000000,0x00000000,0x00000000
13539 long 0x459A2800,0x00000000,0x00000000,0x00000000
13547 # Implementation Notes:
13549 # The registers are used as follows:
13551 # d0: scratch; LEN input to binstr
13553 # d2: upper 32-bits of mantissa for binstr
13554 # d3: scratch;lower 32-bits of mantissa for binstr
13559 # a0: ptr for original operand/final result
13560 # a1: scratch pointer
13561 # a2: pointer to FP_X; abs(original value) in ext
13572 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
13573 fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
13575 # A1. Set RM and size ext. Set SIGMA = sign input;
13576 # The k-factor is saved for use in d7. Clear BINDEC_FLG for
13577 # separating normalized/denormalized input. If the input
13578 # is a denormalized number, set the BINDEC_FLG memory word
13579 # to signal denorm. If the input is unnormalized, normalize
13580 # the input and test for denormalized result.
13582 fmov.l &rm_mode*0x10,%fpcr # set RM and ext
13583 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
13584 mov.l %d0,%d7 # move k-factor to d7
13586 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
13587 cmpi.b STAG(%a6),&DENORM # is input a DENORM?
13588 bne.w A2_str # no; input is a NORM
13591 # Normalize the denorm
13595 and.w &0x7fff,%d0 # strip sign of normalized exp
13605 # Test if the normalized input is denormalized
13608 bgt.b pos_exp # if greater than zero, it is a norm
13609 st BINDEC_FLG(%a6) # set flag for denorm
13611 and.w &0x7fff,%d0 # strip sign of normalized exp
13616 # A2. Set X = abs(input).
13619 mov.l (%a0),FP_SCR1(%a6) # move input to work space
13620 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
13621 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
13622 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
13624 # A3. Compute ILOG.
13625 # ILOG is the log base 10 of the input value. It is approx-
13626 # imated by adding e + 0.f when the original value is viewed
13627 # as 2^^e * 1.f in extended precision. This value is stored
13632 # d0: k-factor/exponent
13638 # d7: k-factor/Unchanged
13639 # a0: ptr for original operand/final result
13642 # fp0: x/float(ILOG)
13646 # F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13648 # L_SCR2:first word of X packed/Unchanged
13650 tst.b BINDEC_FLG(%a6) # check for denorm
13651 beq.b A3_cont # if clr, continue with norm
13652 mov.l &-4933,%d6 # force ILOG = -4933
13655 mov.w FP_SCR1(%a6),%d0 # move exp to d0
13656 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
13657 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
13658 sub.w &0x3fff,%d0 # strip off bias
13659 fadd.w %d0,%fp0 # add in exp
13660 fsub.s FONE(%pc),%fp0 # subtract off 1.0
13661 fbge.w pos_res # if pos, branch
13662 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
13663 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13664 bra.b A4_str # go move out ILOG
13666 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
13667 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13670 # A4. Clr INEX bit.
13671 # The operation in A3 above may have set INEX2.
13674 fmov.l &0,%fpsr # zero all of fpsr - nothing needed
13677 # A5. Set ICTR = 0;
13678 # ICTR is a flag used in A13. It must be set before the
13679 # loop entry A6. The lower word of d5 is used for ICTR.
13681 clr.w %d5 # clear ICTR
13683 # A6. Calculate LEN.
13684 # LEN is the number of digits to be displayed. The k-factor
13685 # can dictate either the total number of digits, if it is
13686 # a positive number, or the number of digits after the
13687 # original decimal point which are to be included as
13688 # significant. See the 68882 manual for examples.
13689 # If LEN is computed to be greater than 17, set OPERR in
13690 # USER_FPSR. LEN is stored in d4.
13694 # d0: exponent/Unchanged
13697 # d4: exc picture/LEN
13698 # d5: ICTR/Unchanged
13699 # d6: ILOG/Unchanged
13700 # d7: k-factor/Unchanged
13701 # a0: ptr for original operand/final result
13704 # fp0: float(ILOG)/Unchanged
13708 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13710 # L_SCR2:first word of X packed/Unchanged
13713 tst.l %d7 # branch on sign of k
13714 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
13715 mov.l %d7,%d4 # if k > 0, LEN = k
13716 bra.b len_ck # skip to LEN check
13718 mov.l %d6,%d4 # first load ILOG to d4
13719 sub.l %d7,%d4 # subtract off k
13720 addq.l &1,%d4 # add in the 1
13722 tst.l %d4 # LEN check: branch on sign of LEN
13723 ble.b LEN_ng # if neg, set LEN = 1
13724 cmp.l %d4,&17 # test if LEN > 17
13725 ble.b A7_str # if not, forget it
13726 mov.l &17,%d4 # set max LEN = 17
13727 tst.l %d7 # if negative, never set OPERR
13728 ble.b A7_str # if positive, continue
13729 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
13730 bra.b A7_str # finished here
13732 mov.l &1,%d4 # min LEN is 1
13735 # A7. Calculate SCALE.
13736 # SCALE is equal to 10^ISCALE, where ISCALE is the number
13737 # of decimal places needed to insure LEN integer digits
13738 # in the output before conversion to bcd. LAMBDA is the sign
13739 # of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
13740 # the rounding mode as given in the following table (see
13741 # Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742 # of opposite sign in bindec.sa from Coonen).
13745 # FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
13746 # ----------------------------------------------
13747 # RN 00 0 0 00/0 RN
13748 # RN 00 0 1 00/0 RN
13749 # RN 00 1 0 00/0 RN
13750 # RN 00 1 1 00/0 RN
13751 # RZ 01 0 0 11/3 RP
13752 # RZ 01 0 1 11/3 RP
13753 # RZ 01 1 0 10/2 RM
13754 # RZ 01 1 1 10/2 RM
13755 # RM 10 0 0 11/3 RP
13756 # RM 10 0 1 10/2 RM
13757 # RM 10 1 0 10/2 RM
13758 # RM 10 1 1 11/3 RP
13759 # RP 11 0 0 10/2 RM
13760 # RP 11 0 1 11/3 RP
13761 # RP 11 1 0 11/3 RP
13762 # RP 11 1 1 10/2 RM
13766 # d0: exponent/scratch - final is 0
13767 # d2: x/0 or 24 for A9
13768 # d3: x/scratch - offset ptr into PTENRM array
13769 # d4: LEN/Unchanged
13770 # d5: 0/ICTR:LAMBDA
13771 # d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772 # d7: k-factor/Unchanged
13773 # a0: ptr for original operand/final result
13774 # a1: x/ptr to PTENRM array
13776 # fp0: float(ILOG)/Unchanged
13780 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13782 # L_SCR2:first word of X packed/Unchanged
13785 tst.l %d7 # test sign of k
13786 bgt.b k_pos # if pos and > 0, skip this
13787 cmp.l %d7,%d6 # test k - ILOG
13788 blt.b k_pos # if ILOG >= k, skip this
13789 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
13791 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
13792 addq.l &1,%d0 # add the 1
13793 sub.l %d4,%d0 # sub off LEN
13794 swap %d5 # use upper word of d5 for LAMBDA
13795 clr.w %d5 # set it zero initially
13796 clr.w %d2 # set up d2 for very small case
13797 tst.l %d0 # test sign of ISCALE
13798 bge.b iscale # if pos, skip next inst
13799 addq.w &1,%d5 # if neg, set LAMBDA true
13800 cmp.l %d0,&0xffffecd4 # test iscale <= -4908
13801 bgt.b no_inf # if false, skip rest
13802 add.l &24,%d0 # add in 24 to iscale
13803 mov.l &24,%d2 # put 24 in d2 for A9
13805 neg.l %d0 # and take abs of ISCALE
13807 fmov.s FONE(%pc),%fp1 # init fp1 to 1
13808 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
13809 lsl.w &1,%d1 # put them in bits 2:1
13810 add.w %d5,%d1 # add in LAMBDA
13811 lsl.w &1,%d1 # put them in bits 3:1
13812 tst.l L_SCR2(%a6) # test sign of original x
13813 bge.b x_pos # if pos, don't set bit 0
13814 addq.l &1,%d1 # if neg, set bit 0
13816 lea.l RBDTBL(%pc),%a2 # load rbdtbl base
13817 mov.b (%a2,%d1),%d3 # load d3 with new rmode
13818 lsl.l &4,%d3 # put bits in proper position
13819 fmov.l %d3,%fpcr # load bits into fpu
13820 lsr.l &4,%d3 # put bits in proper position
13821 tst.b %d3 # decode new rmode for pten table
13822 bne.b not_rn # if zero, it is RN
13823 lea.l PTENRN(%pc),%a1 # load a1 with RN table base
13824 bra.b rmode # exit decode
13826 lsr.b &1,%d3 # get lsb in carry
13827 bcc.b not_rp2 # if carry clear, it is RM
13828 lea.l PTENRP(%pc),%a1 # load a1 with RP table base
13829 bra.b rmode # exit decode
13831 lea.l PTENRM(%pc),%a1 # load a1 with RM table base
13833 clr.l %d3 # clr table index
13835 lsr.l &1,%d0 # shift next bit into carry
13836 bcc.b e_next2 # if zero, skip the mul
13837 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13839 add.l &12,%d3 # inc d3 to next pwrten table entry
13840 tst.l %d0 # test if ISCALE is zero
13841 bne.b e_loop2 # if not, loop
13843 # A8. Clr INEX; Force RZ.
13844 # The operation in A3 above may have set INEX2.
13845 # RZ mode is forced for the scaling operation to insure
13846 # only one rounding error. The grs bits are collected in
13847 # the INEX flag for use in A10.
13852 fmov.l &0,%fpsr # clr INEX
13853 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
13855 # A9. Scale X -> Y.
13856 # The mantissa is scaled to the desired number of significant
13857 # digits. The excess digits are collected in INEX2. If mul,
13858 # Check d2 for excess 10 exponential value. If not zero,
13859 # the iscale value would have caused the pwrten calculation
13860 # to overflow. Only a negative iscale can cause this, so
13861 # multiply by 10^(d2), which is now only allowed to be 24,
13862 # with a multiply by 10^8 and 10^16, which is exact since
13863 # 10^24 is exact. If the input was denormalized, we must
13864 # create a busy stack frame with the mul command and the
13865 # two operands, and allow the fpu to complete the multiply.
13869 # d0: FPCR with RZ mode/Unchanged
13870 # d2: 0 or 24/unchanged
13872 # d4: LEN/Unchanged
13874 # d6: ILOG/Unchanged
13875 # d7: k-factor/Unchanged
13876 # a0: ptr for original operand/final result
13877 # a1: ptr to PTENRM array/Unchanged
13879 # fp0: float(ILOG)/X adjusted for SCALE (Y)
13880 # fp1: 10^ISCALE/Unchanged
13883 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13885 # L_SCR2:first word of X packed/Unchanged
13888 fmov.x (%a0),%fp0 # load X from memory
13889 fabs.x %fp0 # use abs(X)
13890 tst.w %d5 # LAMBDA is in lower word of d5
13891 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
13892 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
13893 bra.w A10_st # branch to A10
13896 tst.b BINDEC_FLG(%a6) # check for denorm
13897 beq.w A9_norm # if norm, continue with mul
13899 # for DENORM, we must calculate:
13900 # fp0 = input_op * 10^ISCALE * 10^24
13901 # since the input operand is a DENORM, we can't multiply it directly.
13902 # so, we do the multiplication of the exponents and mantissas separately.
13903 # in this way, we avoid underflow on intermediate stages of the
13904 # multiplication and guarantee a result without exception.
13905 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
13907 mov.w (%sp),%d3 # grab exponent
13908 andi.w &0x7fff,%d3 # clear sign
13909 ori.w &0x8000,(%a0) # make DENORM exp negative
13910 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
13911 subi.w &0x3fff,%d3 # subtract BIAS
13913 subi.w &0x3fff,%d3 # subtract BIAS
13915 subi.w &0x3fff,%d3 # subtract BIAS
13917 bmi.w sc_mul_err # is result is DENORM, punt!!!
13919 andi.w &0x8000,(%sp) # keep sign
13920 or.w %d3,(%sp) # insert new exponent
13921 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
13922 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
13923 mov.l 0x4(%a0),-(%sp)
13924 mov.l &0x3fff0000,-(%sp) # force exp to zero
13925 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
13928 # fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13929 # fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13930 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
13931 mov.l 36+4(%a1),-(%sp)
13932 mov.l &0x3fff0000,-(%sp) # force exp to zero
13933 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
13934 mov.l 48+4(%a1),-(%sp)
13935 mov.l &0x3fff0000,-(%sp)# force exp to zero
13936 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
13937 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
13944 tst.w %d2 # test for small exp case
13945 beq.b A9_con # if zero, continue as normal
13946 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13947 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13949 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
13952 # If INEX is set, round error occurred. This is compensated
13953 # for by 'or-ing' in the INEX2 flag to the lsb of Y.
13957 # d0: FPCR with RZ mode/FPSR with INEX2 isolated
13960 # d4: LEN/Unchanged
13962 # d6: ILOG/Unchanged
13963 # d7: k-factor/Unchanged
13964 # a0: ptr for original operand/final result
13965 # a1: ptr to PTENxx array/Unchanged
13966 # a2: x/ptr to FP_SCR1(a6)
13967 # fp0: Y/Y with lsb adjusted
13968 # fp1: 10^ISCALE/Unchanged
13972 fmov.l %fpsr,%d0 # get FPSR
13973 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
13974 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
13975 btst &9,%d0 # check if INEX2 set
13976 beq.b A11_st # if clear, skip rest
13977 or.l &1,8(%a2) # or in 1 to lsb of mantissa
13978 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
13981 # A11. Restore original FPCR; set size ext.
13982 # Perform FINT operation in the user's rounding mode. Keep
13983 # the size to extended. The sintdo entry point in the sint
13984 # routine expects the FPCR value to be in USER_FPCR for
13985 # mode and precision. The original FPCR is saved in L_SCR1.
13988 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
13989 and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
13990 # ;block exceptions
13993 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994 # The FPSP routine sintd0 is used. The output is in fp0.
13998 # d0: FPSR with AINEX cleared/FPCR with size set to ext
14001 # d4: LEN/Unchanged
14002 # d5: ICTR:LAMBDA/Unchanged
14003 # d6: ILOG/Unchanged
14004 # d7: k-factor/Unchanged
14005 # a0: ptr for original operand/src ptr for sintdo
14006 # a1: ptr to PTENxx array/Unchanged
14007 # a2: ptr to FP_SCR1(a6)/Unchanged
14008 # a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14010 # fp1: 10^ISCALE/Unchanged
14013 # F_SCR2:Y adjusted for inex/Y with original exponent
14014 # L_SCR1:x/original USER_FPCR
14015 # L_SCR2:first word of X packed/Unchanged
14018 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
14019 mov.l L_SCR1(%a6),-(%sp)
14020 mov.l L_SCR2(%a6),-(%sp)
14022 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
14023 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
14024 tst.l L_SCR2(%a6) # test sign of original operand
14025 bge.b do_fint12 # if pos, use Y
14026 or.l &0x80000000,(%a0) # if neg, use -Y
14028 mov.l USER_FPSR(%a6),-(%sp)
14029 # bsr sintdo # sint routine returns int in fp0
14031 fmov.l USER_FPCR(%a6),%fpcr
14032 fmov.l &0x0,%fpsr # clear the AEXC bits!!!
14033 ## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
14034 ## andi.l &0x00000030,%d0
14035 ## fmov.l %d0,%fpcr
14036 fint.x FP_SCR1(%a6),%fp0 # do fint()
14038 or.w %d0,FPSR_EXCEPT(%a6)
14039 ## fmov.l &0x0,%fpcr
14040 ## fmov.l %fpsr,%d0 # don't keep ccodes
14041 ## or.w %d0,FPSR_EXCEPT(%a6)
14043 mov.b (%sp),USER_FPSR(%a6)
14046 mov.l (%sp)+,L_SCR2(%a6)
14047 mov.l (%sp)+,L_SCR1(%a6)
14048 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
14050 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
14051 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
14053 # A13. Check for LEN digits.
14054 # If the int operation results in more than LEN digits,
14055 # or less than LEN -1 digits, adjust ILOG and repeat from
14056 # A6. This test occurs only on the first pass. If the
14057 # result is exactly 10^LEN, decrement ILOG and divide
14058 # the mantissa by 10. The calculation of 10^LEN cannot
14059 # be inexact, since all powers of ten up to 10^27 are exact
14060 # in extended precision, so the use of a previous power-of-ten
14061 # table will introduce no error.
14066 # d0: FPCR with size set to ext/scratch final = 0
14068 # d3: x/scratch final = x
14069 # d4: LEN/LEN adjusted
14070 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14071 # d6: ILOG/ILOG adjusted
14072 # d7: k-factor/Unchanged
14073 # a0: pointer into memory for packed bcd string formation
14074 # a1: ptr to PTENxx array/Unchanged
14075 # a2: ptr to FP_SCR1(a6)/Unchanged
14076 # fp0: int portion of Y/abs(YINT) adjusted
14077 # fp1: 10^ISCALE/Unchanged
14080 # F_SCR2:Y with original exponent/Unchanged
14081 # L_SCR1:original USER_FPCR/Unchanged
14082 # L_SCR2:first word of X packed/Unchanged
14085 swap %d5 # put ICTR in lower word of d5
14086 tst.w %d5 # check if ICTR = 0
14087 bne not_zr # if non-zero, go to second test
14089 # Compute 10^(LEN-1)
14091 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14092 mov.l %d4,%d0 # put LEN in d0
14093 subq.l &1,%d0 # d0 = LEN -1
14094 clr.l %d3 # clr table index
14096 lsr.l &1,%d0 # shift next bit into carry
14097 bcc.b l_next # if zero, skip the mul
14098 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14100 add.l &12,%d3 # inc d3 to next pwrten table entry
14101 tst.l %d0 # test if LEN is zero
14102 bne.b l_loop # if not, loop
14104 # 10^LEN-1 is computed for this test and A14. If the input was
14105 # denormalized, check only the case in which YINT > 10^LEN.
14107 tst.b BINDEC_FLG(%a6) # check if input was norm
14108 beq.b A13_con # if norm, continue with checking
14109 fabs.x %fp0 # take abs of YINT
14112 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14115 fabs.x %fp0 # take abs of YINT
14116 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
14117 fbge.w test_2 # if greater, do next test
14118 subq.l &1,%d6 # subtract 1 from ILOG
14119 mov.w &1,%d5 # set ICTR
14120 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14121 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14122 bra.w A6_str # return to A6 and recompute YINT
14124 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14125 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
14126 fblt.w A14_st # if less, all is ok, go to A14
14127 fbgt.w fix_ex # if greater, fix and redo
14128 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
14129 addq.l &1,%d6 # and inc ILOG
14130 bra.b A14_st # and continue elsewhere
14132 addq.l &1,%d6 # increment ILOG by 1
14133 mov.w &1,%d5 # set ICTR
14134 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14135 bra.w A6_str # return to A6 and recompute YINT
14137 # Since ICTR <> 0, we have already been through one adjustment,
14138 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139 # 10^LEN is again computed using whatever table is in a1 since the
14140 # value calculated cannot be inexact.
14143 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14144 mov.l %d4,%d0 # put LEN in d0
14145 clr.l %d3 # clr table index
14147 lsr.l &1,%d0 # shift next bit into carry
14148 bcc.b z_next # if zero, skip the mul
14149 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14151 add.l &12,%d3 # inc d3 to next pwrten table entry
14152 tst.l %d0 # test if LEN is zero
14153 bne.b z_loop # if not, loop
14154 fabs.x %fp0 # get abs(YINT)
14155 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
14156 fbneq.w A14_st # if not, skip this
14157 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
14158 addq.l &1,%d6 # and inc ILOG by 1
14159 addq.l &1,%d4 # and inc LEN
14160 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
14162 # A14. Convert the mantissa to bcd.
14163 # The binstr routine is used to convert the LEN digit
14164 # mantissa to bcd in memory. The input to binstr is
14165 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166 # such that the decimal point is to the left of bit 63.
14167 # The bcd digits are stored in the correct position in
14168 # the final string area in memory.
14173 # d0: x/LEN call to binstr - final is 0
14175 # d2: x/ms 32-bits of mant of abs(YINT)
14176 # d3: x/ls 32-bits of mant of abs(YINT)
14177 # d4: LEN/Unchanged
14178 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14180 # d7: k-factor/Unchanged
14181 # a0: pointer into memory for packed bcd string formation
14182 # /ptr to first mantissa byte in result string
14183 # a1: ptr to PTENxx array/Unchanged
14184 # a2: ptr to FP_SCR1(a6)/Unchanged
14185 # fp0: int portion of Y/abs(YINT) adjusted
14186 # fp1: 10^ISCALE/Unchanged
14187 # fp2: 10^LEN/Unchanged
14188 # F_SCR1:x/Work area for final result
14189 # F_SCR2:Y with original exponent/Unchanged
14190 # L_SCR1:original USER_FPCR/Unchanged
14191 # L_SCR2:first word of X packed/Unchanged
14194 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
14195 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
14196 lea.l FP_SCR0(%a6),%a0
14197 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
14198 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
14199 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
14200 clr.l 4(%a0) # zero word 2 of FP_RES
14201 clr.l 8(%a0) # zero word 3 of FP_RES
14202 mov.l (%a0),%d0 # move exponent to d0
14203 swap %d0 # put exponent in lower word
14204 beq.b no_sft # if zero, don't shift
14205 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
14206 tst.l %d0 # check if > 1
14207 bgt.b no_sft # if so, don't shift
14208 neg.l %d0 # make exp positive
14210 lsr.l &1,%d2 # shift d2:d3 right, add 0s
14211 roxr.l &1,%d3 # the number of places
14212 dbf.w %d0,m_loop # given in d0
14214 tst.l %d2 # check for mantissa of zero
14215 bne.b no_zr # if not, go on
14216 tst.l %d3 # continue zero check
14217 beq.b zer_m # if zero, go directly to binstr
14219 clr.l %d1 # put zero in d1 for addx
14220 add.l &0x00000080,%d3 # inc at bit 7
14221 addx.l %d1,%d2 # continue inc
14222 and.l &0xffffff80,%d3 # strip off lsb not used by 882
14224 mov.l %d4,%d0 # put LEN in d0 for binstr call
14225 addq.l &3,%a0 # a0 points to M16 byte in result
14226 bsr binstr # call binstr to convert mant
14229 # A15. Convert the exponent to bcd.
14230 # As in A14 above, the exp is converted to bcd and the
14231 # digits are stored in the final string.
14233 # Digits are stored in L_SCR1(a6) on return from BINDEC as:
14236 # -----------------------------------------
14237 # | 0 | e3 | e2 | e1 | e4 | X | X | X |
14238 # -----------------------------------------
14240 # And are moved into their proper places in FP_SCR0. If digit e4
14241 # is non-zero, OPERR is signaled. In all cases, all 4 digits are
14242 # written as specified in the 881/882 manual for packed decimal.
14246 # d0: x/LEN call to binstr - final is 0
14247 # d1: x/scratch (0);shift count for final exponent packing
14248 # d2: x/ms 32-bits of exp fraction/scratch
14249 # d3: x/ls 32-bits of exp fraction
14250 # d4: LEN/Unchanged
14251 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14253 # d7: k-factor/Unchanged
14254 # a0: ptr to result string/ptr to L_SCR1(a6)
14255 # a1: ptr to PTENxx array/Unchanged
14256 # a2: ptr to FP_SCR1(a6)/Unchanged
14257 # fp0: abs(YINT) adjusted/float(ILOG)
14258 # fp1: 10^ISCALE/Unchanged
14259 # fp2: 10^LEN/Unchanged
14260 # F_SCR1:Work area for final result/BCD result
14261 # F_SCR2:Y with original exponent/ILOG/10^4
14262 # L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263 # L_SCR2:first word of X packed/Unchanged
14266 tst.b BINDEC_FLG(%a6) # check for denorm
14268 ftest.x %fp0 # test for zero
14269 fbeq.w den_zero # if zero, use k-factor or 4933
14270 fmov.l %d6,%fp0 # float ILOG
14271 fabs.x %fp0 # get abs of ILOG
14274 tst.l %d7 # check sign of the k-factor
14275 blt.b use_ilog # if negative, use ILOG
14276 fmov.s F4933(%pc),%fp0 # force exponent to 4933
14277 bra.b convrt # do it
14279 fmov.l %d6,%fp0 # float ILOG
14280 fabs.x %fp0 # get abs of ILOG
14283 ftest.x %fp0 # test for zero
14284 fbneq.w not_zero # if zero, force exponent
14285 fmov.s FONE(%pc),%fp0 # force exponent to 1
14286 bra.b convrt # do it
14288 fmov.l %d6,%fp0 # float ILOG
14289 fabs.x %fp0 # get abs of ILOG
14291 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
14292 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
14293 mov.l 4(%a2),%d2 # move word 2 to d2
14294 mov.l 8(%a2),%d3 # move word 3 to d3
14295 mov.w (%a2),%d0 # move exp to d0
14296 beq.b x_loop_fin # if zero, skip the shift
14297 sub.w &0x3ffd,%d0 # subtract off bias
14298 neg.w %d0 # make exp positive
14300 lsr.l &1,%d2 # shift d2:d3 right
14301 roxr.l &1,%d3 # the number of places
14302 dbf.w %d0,x_loop # given in d0
14304 clr.l %d1 # put zero in d1 for addx
14305 add.l &0x00000080,%d3 # inc at bit 6
14306 addx.l %d1,%d2 # continue inc
14307 and.l &0xffffff80,%d3 # strip off lsb not used by 882
14308 mov.l &4,%d0 # put 4 in d0 for binstr call
14309 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14310 bsr binstr # call binstr to convert exp
14311 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14312 mov.l &12,%d1 # use d1 for shift count
14313 lsr.l %d1,%d0 # shift d0 right by 12
14314 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
14315 lsr.l %d1,%d0 # shift d0 right by 12
14316 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
14317 tst.b %d0 # check if e4 is zero
14318 beq.b A16_st # if zero, skip rest
14319 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
14322 # A16. Write sign bits to final string.
14323 # Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14327 # d0: x/scratch - final is x
14330 # d4: LEN/Unchanged
14331 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14332 # d6: ILOG/ILOG adjusted
14333 # d7: k-factor/Unchanged
14334 # a0: ptr to L_SCR1(a6)/Unchanged
14335 # a1: ptr to PTENxx array/Unchanged
14336 # a2: ptr to FP_SCR1(a6)/Unchanged
14337 # fp0: float(ILOG)/Unchanged
14338 # fp1: 10^ISCALE/Unchanged
14339 # fp2: 10^LEN/Unchanged
14340 # F_SCR1:BCD result with correct signs
14342 # L_SCR1:Exponent digits on return from binstr
14343 # L_SCR2:first word of X packed/Unchanged
14346 clr.l %d0 # clr d0 for collection of signs
14347 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
14348 tst.l L_SCR2(%a6) # check sign of original mantissa
14349 bge.b mant_p # if pos, don't set SM
14350 mov.l &2,%d0 # move 2 in to d0 for SM
14352 tst.l %d6 # check sign of ILOG
14353 bge.b wr_sgn # if pos, don't set SE
14354 addq.l &1,%d0 # set bit 0 in d0 for SE
14356 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14358 # Clean up and restore all registers used.
14360 fmov.l &0,%fpsr # clear possible inex2/ainex bits
14361 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
14362 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
14367 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14368 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14369 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14370 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14371 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14372 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14373 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14374 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14375 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14376 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14377 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14378 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14379 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14383 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14384 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14385 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14386 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14387 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14388 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14389 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
14390 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14391 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14392 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14393 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
14394 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14395 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14399 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14400 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14401 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14402 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14403 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14404 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
14405 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14406 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
14407 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
14408 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
14409 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14410 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
14411 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
14413 #########################################################################
14414 # binstr(): Converts a 64-bit binary integer to bcd. #
14416 # INPUT *************************************************************** #
14417 # d2:d3 = 64-bit binary integer #
14418 # d0 = desired length (LEN) #
14419 # a0 = pointer to start in memory for bcd characters #
14420 # (This pointer must point to byte 4 of the first #
14421 # lword of the packed decimal memory string.) #
14423 # OUTPUT ************************************************************** #
14424 # a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14426 # ALGORITHM *********************************************************** #
14427 # The 64-bit binary is assumed to have a decimal point before #
14428 # bit 63. The fraction is multiplied by 10 using a mul by 2 #
14429 # shift and a mul by 8 shift. The bits shifted out of the #
14430 # msb form a decimal digit. This process is iterated until #
14431 # LEN digits are formed. #
14433 # A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
14434 # digit formed will be assumed the least significant. This is #
14435 # to force the first byte formed to have a 0 in the upper 4 bits. #
14437 # A2. Beginning of the loop: #
14438 # Copy the fraction in d2:d3 to d4:d5. #
14440 # A3. Multiply the fraction in d2:d3 by 8 using bit-field #
14441 # extracts and shifts. The three msbs from d2 will go into d1. #
14443 # A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
14444 # will be collected by the carry. #
14446 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
14447 # into d2:d3. D1 will contain the bcd digit formed. #
14449 # A6. Test d7. If zero, the digit formed is the ms digit. If non- #
14450 # zero, it is the ls digit. Put the digit in its place in the #
14451 # upper word of d0. If it is the ls digit, write the word #
14452 # from d0 to memory. #
14454 # A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
14456 #########################################################################
14458 # Implementation Notes:
14460 # The registers are used as follows:
14463 # d1: temp used to form the digit
14464 # d2: upper 32-bits of fraction for mul by 8
14465 # d3: lower 32-bits of fraction for mul by 8
14466 # d4: upper 32-bits of fraction for mul by 2
14467 # d5: lower 32-bits of fraction for mul by 2
14468 # d6: temp for bit-field extracts
14469 # d7: byte digit formation word;digit count {0,1}
14470 # a0: pointer into memory for packed bcd string formation
14475 movm.l &0xff00,-(%sp) # {%d0-%d7}
14480 mov.l &1,%d7 # init d7 for second digit
14481 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
14483 # A2. Copy d2:d3 to d4:d5. Start loop.
14486 mov.l %d2,%d4 # copy the fraction before muls
14487 mov.l %d3,%d5 # to d4:d5
14489 # A3. Multiply d2:d3 by 8; extract msbs into d1.
14491 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
14492 asl.l &3,%d2 # shift d2 left by 3 places
14493 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
14494 asl.l &3,%d3 # shift d3 left by 3 places
14495 or.l %d6,%d2 # or in msbs from d3 into d2
14497 # A4. Multiply d4:d5 by 2; add carry out to d1.
14499 asl.l &1,%d5 # mul d5 by 2
14500 roxl.l &1,%d4 # mul d4 by 2
14501 swap %d6 # put 0 in d6 lower word
14502 addx.w %d6,%d1 # add in extend from mul by 2
14504 # A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
14506 add.l %d5,%d3 # add lower 32 bits
14507 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508 addx.l %d4,%d2 # add with extend upper 32 bits
14509 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510 addx.w %d6,%d1 # add in extend from add to d1
14511 swap %d6 # with d6 = 0; put 0 in upper word
14513 # A6. Test d7 and branch.
14515 tst.w %d7 # if zero, store digit & to loop
14516 beq.b first_d # if non-zero, form byte & write
14518 swap %d7 # bring first digit to word d7b
14519 asl.w &4,%d7 # first digit in upper 4 bits d7b
14520 add.w %d1,%d7 # add in ls digit to d7b
14521 mov.b %d7,(%a0)+ # store d7b byte in memory
14522 swap %d7 # put LEN counter in word d7a
14523 clr.w %d7 # set d7a to signal no digits done
14524 dbf.w %d0,loop # do loop some more!
14525 bra.b end_bstr # finished, so exit
14527 swap %d7 # put digit word in d7b
14528 mov.w %d1,%d7 # put new digit in d7b
14529 swap %d7 # put LEN counter in word d7a
14530 addq.w &1,%d7 # set d7a to signal first digit done
14531 dbf.w %d0,loop # do loop some more!
14532 swap %d7 # put last digit in string
14533 lsl.w &4,%d7 # move it to upper 4 bits
14534 mov.b %d7,(%a0)+ # store it in memory string
14536 # Clean up and return with result in fp0.
14539 movm.l (%sp)+,&0xff # {%d0-%d7}
14542 #########################################################################
14543 # XDEF **************************************************************** #
14544 # facc_in_b(): dmem_read_byte failed #
14545 # facc_in_w(): dmem_read_word failed #
14546 # facc_in_l(): dmem_read_long failed #
14547 # facc_in_d(): dmem_read of dbl prec failed #
14548 # facc_in_x(): dmem_read of ext prec failed #
14550 # facc_out_b(): dmem_write_byte failed #
14551 # facc_out_w(): dmem_write_word failed #
14552 # facc_out_l(): dmem_write_long failed #
14553 # facc_out_d(): dmem_write of dbl prec failed #
14554 # facc_out_x(): dmem_write of ext prec failed #
14556 # XREF **************************************************************** #
14557 # _real_access() - exit through access error handler #
14559 # INPUT *************************************************************** #
14562 # OUTPUT ************************************************************** #
14565 # ALGORITHM *********************************************************** #
14566 # Flow jumps here when an FP data fetch call gets an error #
14567 # result. This means the operating system wants an access error frame #
14568 # made out of the current exception stack frame. #
14569 # So, we first call restore() which makes sure that any updated #
14570 # -(an)+ register gets returned to its pre-exception value and then #
14571 # we change the stack to an access error stack frame. #
14573 #########################################################################
14576 movq.l &0x1,%d0 # one byte
14577 bsr.w restore # fix An
14579 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
14583 movq.l &0x2,%d0 # two bytes
14584 bsr.w restore # fix An
14586 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
14590 movq.l &0x4,%d0 # four bytes
14591 bsr.w restore # fix An
14593 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
14597 movq.l &0x8,%d0 # eight bytes
14598 bsr.w restore # fix An
14600 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14604 movq.l &0xc,%d0 # twelve bytes
14605 bsr.w restore # fix An
14607 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14610 ################################################################
14613 movq.l &0x1,%d0 # one byte
14614 bsr.w restore # restore An
14616 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
14620 movq.l &0x2,%d0 # two bytes
14621 bsr.w restore # restore An
14623 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
14627 movq.l &0x4,%d0 # four bytes
14628 bsr.w restore # restore An
14630 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
14634 movq.l &0x8,%d0 # eight bytes
14635 bsr.w restore # restore An
14637 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14641 mov.l &0xc,%d0 # twelve bytes
14642 bsr.w restore # restore An
14644 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14646 # here's where we actually create the access error frame from the
14647 # current exception stack frame.
14649 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14651 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
14652 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
14657 mov.l (%sp),-(%sp) # store SR, hi(PC)
14658 mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
14659 mov.l 0xc(%sp),0x8(%sp) # store EA
14660 mov.l &0x00000001,0xc(%sp) # store FSLW
14661 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
14662 mov.w &0x4008,0x6(%sp) # store voff
14664 btst &0x5,(%sp) # supervisor or user mode?
14665 beq.b facc_out2 # user
14666 bset &0x2,0xd(%sp) # set supervisor TM bit
14671 ##################################################################
14673 # if the effective addressing mode was predecrement or postincrement,
14674 # the emulation has already changed its value to the correct post-
14675 # instruction value. but since we're exiting to the access error
14676 # handler, then AN must be returned to its pre-instruction value.
14679 mov.b EXC_OPWORD+0x1(%a6),%d1
14680 andi.b &0x38,%d1 # extract opmode
14681 cmpi.b %d1,&0x18 # postinc?
14683 cmpi.b %d1,&0x20 # predec?
14688 mov.b EXC_OPWORD+0x1(%a6),%d1
14689 andi.w &0x0007,%d1 # fetch An
14691 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692 jmp (tbl_rest_inc.b,%pc,%d1.w*1)
14695 short ri_a0 - tbl_rest_inc
14696 short ri_a1 - tbl_rest_inc
14697 short ri_a2 - tbl_rest_inc
14698 short ri_a3 - tbl_rest_inc
14699 short ri_a4 - tbl_rest_inc
14700 short ri_a5 - tbl_rest_inc
14701 short ri_a6 - tbl_rest_inc
14702 short ri_a7 - tbl_rest_inc
14705 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
14708 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
14711 sub.l %d0,%a2 # fix a2
14714 sub.l %d0,%a3 # fix a3
14717 sub.l %d0,%a4 # fix a4
14720 sub.l %d0,%a5 # fix a5
14723 sub.l %d0,(%a6) # fix stacked a6
14725 # if it's a fmove out instruction, we don't have to fix a7
14726 # because we hadn't changed it yet. if it's an opclass two
14727 # instruction (data moved in) and the exception was in supervisor
14728 # mode, then also also wasn't updated. if it was user mode, then
14729 # restore the correct a7 which is in the USP currently.
14731 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
14732 bne.b ri_a7_done # out
14734 btst &0x5,EXC_SR(%a6) # user or supervisor?
14735 bne.b ri_a7_done # supervisor
14736 movc %usp,%a0 # restore USP
14742 # need to invert adjustment value if the <ea> was predec