2 # $NetBSD: fpsp.s,v 1.5 2005/12/11 12:17:52 christos Exp $
5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7 # M68000 Hi-Performance Microprocessor Division
8 # M68060 Software Package Production Release
10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11 # All rights reserved.
13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14 # To the maximum extent permitted by applicable law,
15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19 # and any accompanying written materials.
21 # To the maximum extent permitted by applicable law,
22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
27 # Motorola assumes no responsibility for the maintenance and support
30 # You are hereby granted a copyright license to use, modify, and distribute the
31 # SOFTWARE so long as this entire notice is retained without alteration
32 # in any modified and/or redistributed versions, and that such modified
33 # versions are clearly identified as such.
34 # No licenses are granted by implication, estoppel or otherwise under any
35 # patents or trademarks of Motorola, Inc.
36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
40 # This file is appended to the top of the 060FPSP package
41 # and contains the entry points into the package. The user, in
42 # effect, branches to one of the branch table entries located
43 # after _060FPSP_TABLE.
44 # Also, subroutine stubs exist in this file (_fpsp_done for
45 # example) that are referenced by the FPSP package itself in order
46 # to call a given routine. The stub routine actually performs the
47 # callout. The FPSP code does a "bsr" to the stub routine. This
48 # extra layer of hierarchy adds a slight performance penalty but
49 # it makes the FPSP code easier to read and more mainatinable.
60 set _off_fpu_dis
, 0x20
80 ###############################################################
82 # Here's the table of ENTRY POINTS for those linking the package.
104 ###############################################################
108 mov.
l (_060FPSP_TABLE-
0x80+_off_done
,%pc
),%d0
109 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
116 mov.
l (_060FPSP_TABLE-
0x80+_off_ovfl
,%pc
),%d0
117 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
124 mov.
l (_060FPSP_TABLE-
0x80+_off_unfl
,%pc
),%d0
125 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
132 mov.
l (_060FPSP_TABLE-
0x80+_off_inex
,%pc
),%d0
133 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
140 mov.
l (_060FPSP_TABLE-
0x80+_off_bsun
,%pc
),%d0
141 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
148 mov.
l (_060FPSP_TABLE-
0x80+_off_operr
,%pc
),%d0
149 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
156 mov.
l (_060FPSP_TABLE-
0x80+_off_snan
,%pc
),%d0
157 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
164 mov.
l (_060FPSP_TABLE-
0x80+_off_dz
,%pc
),%d0
165 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
172 mov.
l (_060FPSP_TABLE-
0x80+_off_fline
,%pc
),%d0
173 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
177 global _real_fpu_disabled
180 mov.
l (_060FPSP_TABLE-
0x80+_off_fpu_dis
,%pc
),%d0
181 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
188 mov.
l (_060FPSP_TABLE-
0x80+_off_trap
,%pc
),%d0
189 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
196 mov.
l (_060FPSP_TABLE-
0x80+_off_trace
,%pc
),%d0
197 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
204 mov.
l (_060FPSP_TABLE-
0x80+_off_access
,%pc
),%d0
205 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
209 #######################################
214 mov.
l (_060FPSP_TABLE-
0x80+_off_imr
,%pc
),%d0
215 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
222 mov.
l (_060FPSP_TABLE-
0x80+_off_dmr
,%pc
),%d0
223 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
230 mov.
l (_060FPSP_TABLE-
0x80+_off_dmw
,%pc
),%d0
231 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
235 global _imem_read_word
238 mov.
l (_060FPSP_TABLE-
0x80+_off_irw
,%pc
),%d0
239 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
243 global _imem_read_long
246 mov.
l (_060FPSP_TABLE-
0x80+_off_irl
,%pc
),%d0
247 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
251 global _dmem_read_byte
254 mov.
l (_060FPSP_TABLE-
0x80+_off_drb
,%pc
),%d0
255 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
259 global _dmem_read_word
262 mov.
l (_060FPSP_TABLE-
0x80+_off_drw
,%pc
),%d0
263 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
267 global _dmem_read_long
270 mov.
l (_060FPSP_TABLE-
0x80+_off_drl
,%pc
),%d0
271 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
275 global _dmem_write_byte
278 mov.
l (_060FPSP_TABLE-
0x80+_off_dwb
,%pc
),%d0
279 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
283 global _dmem_write_word
286 mov.
l (_060FPSP_TABLE-
0x80+_off_dww
,%pc
),%d0
287 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
291 global _dmem_write_long
294 mov.
l (_060FPSP_TABLE-
0x80+_off_dwl
,%pc
),%d0
295 pea.
l (_060FPSP_TABLE-
0x80,%pc
,%d0
)
300 # This file contains a set of define statements for constants
301 # in order to promote readability within the corecode itself.
304 set LOCAL_SIZE
, 192 # stack frame size(bytes)
305 set LV
, -LOCAL_SIZE
# stack offset
307 set EXC_SR
, 0x4 # stack status register
308 set EXC_PC
, 0x6 # stack pc
309 set EXC_VOFF
, 0xa # stacked vector offset
310 set EXC_EA
, 0xc # stacked <ea>
312 set EXC_FP
, 0x0 # frame pointer
314 set EXC_AREGS
, -68 # offset of all address regs
315 set EXC_DREGS
, -100 # offset of all data regs
316 set EXC_FPREGS
, -36 # offset of all fp regs
318 set EXC_A7
, EXC_AREGS+
(7*4) # offset of saved a7
319 set OLD_A7
, EXC_AREGS+
(6*4) # extra copy of saved a7
320 set EXC_A6
, EXC_AREGS+
(6*4) # offset of saved a6
321 set EXC_A5
, EXC_AREGS+
(5*4)
322 set EXC_A4
, EXC_AREGS+
(4*4)
323 set EXC_A3
, EXC_AREGS+
(3*4)
324 set EXC_A2
, EXC_AREGS+
(2*4)
325 set EXC_A1
, EXC_AREGS+
(1*4)
326 set EXC_A0
, EXC_AREGS+
(0*4)
327 set EXC_D7
, EXC_DREGS+
(7*4)
328 set EXC_D6
, EXC_DREGS+
(6*4)
329 set EXC_D5
, EXC_DREGS+
(5*4)
330 set EXC_D4
, EXC_DREGS+
(4*4)
331 set EXC_D3
, EXC_DREGS+
(3*4)
332 set EXC_D2
, EXC_DREGS+
(2*4)
333 set EXC_D1
, EXC_DREGS+
(1*4)
334 set EXC_D0
, EXC_DREGS+
(0*4)
336 set EXC_FP0
, EXC_FPREGS+
(0*12) # offset of saved fp0
337 set EXC_FP1
, EXC_FPREGS+
(1*12) # offset of saved fp1
338 set EXC_FP2
, EXC_FPREGS+
(2*12) # offset of saved fp2 (not used)
340 set FP_SCR1
, LV+
80 # fp scratch 1
341 set FP_SCR1_EX
, FP_SCR1+
0
342 set FP_SCR1_SGN
, FP_SCR1+
2
343 set FP_SCR1_HI
, FP_SCR1+
4
344 set FP_SCR1_LO
, FP_SCR1+
8
346 set FP_SCR0
, LV+
68 # fp scratch 0
347 set FP_SCR0_EX
, FP_SCR0+
0
348 set FP_SCR0_SGN
, FP_SCR0+
2
349 set FP_SCR0_HI
, FP_SCR0+
4
350 set FP_SCR0_LO
, FP_SCR0+
8
352 set FP_DST
, LV+
56 # fp destination operand
353 set FP_DST_EX
, FP_DST+
0
354 set FP_DST_SGN
, FP_DST+
2
355 set FP_DST_HI
, FP_DST+
4
356 set FP_DST_LO
, FP_DST+
8
358 set FP_SRC
, LV+
44 # fp source operand
359 set FP_SRC_EX
, FP_SRC+
0
360 set FP_SRC_SGN
, FP_SRC+
2
361 set FP_SRC_HI
, FP_SRC+
4
362 set FP_SRC_LO
, FP_SRC+
8
364 set USER_FPIAR
, LV+
40 # FP instr address register
366 set USER_FPSR
, LV+
36 # FP status register
367 set FPSR_CC
, USER_FPSR+
0 # FPSR condition codes
368 set FPSR_QBYTE
, USER_FPSR+
1 # FPSR qoutient byte
369 set FPSR_EXCEPT
, USER_FPSR+
2 # FPSR exception status byte
370 set FPSR_AEXCEPT
, USER_FPSR+
3 # FPSR accrued exception byte
372 set USER_FPCR
, LV+
32 # FP control register
373 set FPCR_ENABLE
, USER_FPCR+
2 # FPCR exception enable
374 set FPCR_MODE
, USER_FPCR+
3 # FPCR rounding mode control
376 set L_SCR3
, LV+
28 # integer scratch 3
377 set L_SCR2
, LV+
24 # integer scratch 2
378 set L_SCR1
, LV+
20 # integer scratch 1
380 set STORE_FLG
, LV+
19 # flag: operand store (ie. not fcmp/ftst)
382 set EXC_TEMP2
, LV+
24 # temporary space
383 set EXC_TEMP
, LV+
16 # temporary space
385 set DTAG
, LV+
15 # destination operand type
386 set STAG
, LV+
14 # source operand type
388 set SPCOND_FLG
, LV+
10 # flag: special case (see below)
390 set EXC_CC
, LV+
8 # saved condition codes
391 set EXC_EXTWPTR
, LV+
4 # saved current PC (active)
392 set EXC_EXTWORD
, LV+
2 # saved extension word
393 set EXC_CMDREG
, LV+
2 # saved extension word
394 set EXC_OPWORD
, LV+
0 # saved operation word
396 ################################
400 set FTEMP
, 0 # offsets within an
401 set FTEMP_EX
, 0 # extended precision
402 set FTEMP_SGN
, 2 # value saved in memory.
407 set LOCAL
, 0 # offsets within an
408 set LOCAL_EX
, 0 # extended precision
409 set LOCAL_SGN
, 2 # value saved in memory.
414 set
DST, 0 # offsets within an
415 set DST_EX
, 0 # extended precision
416 set DST_HI
, 4 # value saved in memory.
419 set SRC
, 0 # offsets within an
420 set SRC_EX
, 0 # extended precision
421 set SRC_HI
, 4 # value saved in memory.
424 set SGL_LO
, 0x3f81 # min sgl prec exponent
425 set SGL_HI
, 0x407e # max sgl prec exponent
426 set DBL_LO
, 0x3c01 # min dbl prec exponent
427 set DBL_HI
, 0x43fe # max dbl prec exponent
428 set EXT_LO
, 0x0 # min ext prec exponent
429 set EXT_HI
, 0x7ffe # max ext prec exponent
431 set EXT_BIAS
, 0x3fff # extended precision bias
432 set SGL_BIAS
, 0x007f # single precision bias
433 set DBL_BIAS
, 0x03ff # double precision bias
435 set NORM
, 0x00 # operand type for STAG/DTAG
436 set ZERO
, 0x01 # operand type for STAG/DTAG
437 set INF
, 0x02 # operand type for STAG/DTAG
438 set QNAN
, 0x03 # operand type for STAG/DTAG
439 set DENORM
, 0x04 # operand type for STAG/DTAG
440 set SNAN
, 0x05 # operand type for STAG/DTAG
441 set UNNORM
, 0x06 # operand type for STAG/DTAG
446 set neg_bit
, 0x3 # negative result
447 set z_bit
, 0x2 # zero result
448 set inf_bit
, 0x1 # infinite result
449 set nan_bit
, 0x0 # NAN result
451 set q_sn_bit
, 0x7 # sign bit of quotient byte
453 set bsun_bit
, 7 # branch on unordered
454 set snan_bit
, 6 # signalling NAN
455 set operr_bit
, 5 # operand error
456 set ovfl_bit
, 4 # overflow
457 set unfl_bit
, 3 # underflow
458 set dz_bit
, 2 # divide by zero
459 set inex2_bit
, 1 # inexact result 2
460 set inex1_bit
, 0 # inexact result 1
462 set aiop_bit
, 7 # accrued inexact operation bit
463 set aovfl_bit
, 6 # accrued overflow bit
464 set aunfl_bit
, 5 # accrued underflow bit
465 set adz_bit
, 4 # accrued dz bit
466 set ainex_bit
, 3 # accrued inexact bit
468 #############################
469 # FPSR individual bit masks #
470 #############################
471 set neg_mask
, 0x08000000 # negative bit mask (lw)
472 set inf_mask
, 0x02000000 # infinity bit mask (lw)
473 set z_mask
, 0x04000000 # zero bit mask (lw)
474 set nan_mask
, 0x01000000 # nan bit mask (lw)
476 set neg_bmask
, 0x08 # negative bit mask (byte)
477 set inf_bmask
, 0x02 # infinity bit mask (byte)
478 set z_bmask
, 0x04 # zero bit mask (byte)
479 set nan_bmask
, 0x01 # nan bit mask (byte)
481 set bsun_mask
, 0x00008000 # bsun exception mask
482 set snan_mask
, 0x00004000 # snan exception mask
483 set operr_mask
, 0x00002000 # operr exception mask
484 set ovfl_mask
, 0x00001000 # overflow exception mask
485 set unfl_mask
, 0x00000800 # underflow exception mask
486 set dz_mask
, 0x00000400 # dz exception mask
487 set inex2_mask
, 0x00000200 # inex2 exception mask
488 set inex1_mask
, 0x00000100 # inex1 exception mask
490 set aiop_mask
, 0x00000080 # accrued illegal operation
491 set aovfl_mask
, 0x00000040 # accrued overflow
492 set aunfl_mask
, 0x00000020 # accrued underflow
493 set adz_mask
, 0x00000010 # accrued divide by zero
494 set ainex_mask
, 0x00000008 # accrued inexact
496 ######################################
497 # FPSR combinations used in the FPSP #
498 ######################################
499 set dzinf_mask
, inf_mask+dz_mask+adz_mask
500 set opnan_mask
, nan_mask+operr_mask+aiop_mask
501 set nzi_mask
, 0x01ffffff #clears N, Z, and I
502 set unfinx_mask
, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
503 set unf2inx_mask
, unfl_mask+inex2_mask+ainex_mask
504 set ovfinx_mask
, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
505 set inx1a_mask
, inex1_mask+ainex_mask
506 set inx2a_mask
, inex2_mask+ainex_mask
507 set snaniop_mask
, nan_mask+snan_mask+aiop_mask
508 set snaniop2_mask
, snan_mask+aiop_mask
509 set naniop_mask
, nan_mask+aiop_mask
510 set neginf_mask
, neg_mask+inf_mask
511 set infaiop_mask
, inf_mask+aiop_mask
512 set negz_mask
, neg_mask+z_mask
513 set opaop_mask
, operr_mask+aiop_mask
514 set unfl_inx_mask
, unfl_mask+aunfl_mask+ainex_mask
515 set ovfl_inx_mask
, ovfl_mask+aovfl_mask+ainex_mask
520 set rnd_stky_bit
, 29 # stky bit pos in longword
522 set sign_bit
, 0x7 # sign bit
523 set signan_bit
, 0x6 # signalling nan bit
525 set sgl_thresh
, 0x3f81 # minimum sgl exponent
526 set dbl_thresh
, 0x3c01 # minimum dbl exponent
528 set x_mode
, 0x0 # extended precision
529 set s_mode
, 0x4 # single precision
530 set d_mode
, 0x8 # double precision
532 set rn_mode
, 0x0 # round-to-nearest
533 set rz_mode
, 0x1 # round-to-zero
534 set rm_mode
, 0x2 # round-tp-minus-infinity
535 set rp_mode
, 0x3 # round-to-plus-infinity
537 set mantissalen
, 64 # length of mantissa in bits
539 set BYTE
, 1 # len(byte) == 1 byte
540 set WORD
, 2 # len(word) == 2 bytes
541 set LONG
, 4 # len(longword) == 2 bytes
543 set BSUN_VEC
, 0xc0 # bsun vector offset
544 set INEX_VEC
, 0xc4 # inexact vector offset
545 set DZ_VEC
, 0xc8 # dz vector offset
546 set UNFL_VEC
, 0xcc # unfl vector offset
547 set OPERR_VEC
, 0xd0 # operr vector offset
548 set OVFL_VEC
, 0xd4 # ovfl vector offset
549 set SNAN_VEC
, 0xd8 # snan vector offset
551 ###########################
552 # SPecial CONDition FLaGs #
553 ###########################
554 set ftrapcc_flg
, 0x01 # flag bit: ftrapcc exception
555 set fbsun_flg
, 0x02 # flag bit: bsun exception
556 set mia7_flg
, 0x04 # flag bit: (a7)+ <ea>
557 set mda7_flg
, 0x08 # flag bit: -(a7) <ea>
558 set fmovm_flg
, 0x40 # flag bit: fmovm instruction
559 set immed_flg
, 0x80 # flag bit: &<data> <ea>
567 ##################################
568 # TRANSCENDENTAL "LAST-OP" FLAGS #
569 ##################################
570 set FMUL_OP
, 0x0 # fmul instr performed last
571 set FDIV_OP
, 0x1 # fdiv performed last
572 set FADD_OP
, 0x2 # fadd performed last
573 set FMOV_OP
, 0x3 # fmov performed last
578 T1
: long
0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
579 T2
: long
0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
581 PI
: long
0x40000000,0xC90FDAA2,0x2168C235,0x00000000
582 PIBY2
: long
0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
585 long
0x3FE45F30,0x6DC9C883
587 #########################################################################
588 # XDEF **************************************************************** #
589 # _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
591 # This handler should be the first code executed upon taking the #
592 # FP Overflow exception in an operating system. #
594 # XREF **************************************************************** #
595 # _imem_read_long() - read instruction longword #
596 # fix_skewed_ops() - adjust src operand in fsave frame #
597 # set_tag_x() - determine optype of src/dst operands #
598 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
599 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
600 # load_fpn2() - load dst operand from FP regfile #
601 # fout() - emulate an opclass 3 instruction #
602 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
603 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
604 # _real_ovfl() - "callout" for Overflow exception enabled code #
605 # _real_inex() - "callout" for Inexact exception enabled code #
606 # _real_trace() - "callout" for Trace exception code #
608 # INPUT *************************************************************** #
609 # - The system stack contains the FP Ovfl exception stack frame #
610 # - The fsave frame contains the source operand #
612 # OUTPUT ************************************************************** #
613 # Overflow Exception enabled: #
614 # - The system stack is unchanged #
615 # - The fsave frame contains the adjusted src op for opclass 0,2 #
616 # Overflow Exception disabled: #
617 # - The system stack is unchanged #
618 # - The "exception present" flag in the fsave frame is cleared #
620 # ALGORITHM *********************************************************** #
621 # On the 060, if an FP overflow is present as the result of any #
622 # instruction, the 060 will take an overflow exception whether the #
623 # exception is enabled or disabled in the FPCR. For the disabled case, #
624 # This handler emulates the instruction to determine what the correct #
625 # default result should be for the operation. This default result is #
626 # then stored in either the FP regfile, data regfile, or memory. #
627 # Finally, the handler exits through the "callout" _fpsp_done() #
628 # denoting that no exceptional conditions exist within the machine. #
629 # If the exception is enabled, then this handler must create the #
630 # exceptional operand and plave it in the fsave state frame, and store #
631 # the default result (only if the instruction is opclass 3). For #
632 # exceptions enabled, this handler must exit through the "callout" #
633 # _real_ovfl() so that the operating system enabled overflow handler #
634 # can handle this case. #
635 # Two other conditions exist. First, if overflow was disabled #
636 # but the inexact exception was enabled, this handler must exit #
637 # through the "callout" _real_inex() regardless of whether the result #
639 # Also, in the case of an opclass three instruction where #
640 # overflow was disabled and the trace exception was enabled, this #
641 # handler must exit through the "callout" _real_trace(). #
643 #########################################################################
648 #$# sub.l &24,%sp # make room for src/dst
650 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
652 fsave FP_SRC
(%a6
) # grab the "busy" frame
654 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
655 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
656 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1 on stack
658 # the FPIAR holds the "current PC" of the faulting instruction
659 mov.
l USER_FPIAR
(%a6
),EXC_EXTWPTR
(%a6
)
660 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
661 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
662 bsr.
l _imem_read_long
# fetch the instruction words
663 mov.
l %d0
,EXC_OPWORD
(%a6
)
665 ##############################################################################
667 btst
&0x5,EXC_CMDREG
(%a6
) # is instr an fmove out?
671 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
672 bsr.
l fix_skewed_ops
# fix src op
674 # since, I believe, only NORMs and DENORMs can come through here,
675 # maybe we can avoid the subroutine call.
676 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
677 bsr.
l set_tag_x
# tag the operand type
678 mov.
b %d0
,STAG
(%a6
) # maybe NORM,DENORM
680 # bit five of the fp extension word separates the monadic and dyadic operations
681 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
682 # will never take this exception.
683 btst
&0x5,1+EXC_CMDREG
(%a6
) # is operation monadic or dyadic?
684 beq.
b fovfl_extract
# monadic
686 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# dyadic; load dst reg
687 bsr.
l load_fpn2
# load dst into FP_DST
689 lea FP_DST
(%a6
),%a0
# pass: ptr to dst op
690 bsr.
l set_tag_x
# tag the operand type
691 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
692 bne.
b fovfl_op2_done
# no
693 bsr.
l unnorm_fix
# yes; convert to NORM,DENORM,or ZERO
695 mov.
b %d0
,DTAG
(%a6
) # save dst optype tag
699 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
700 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
701 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
702 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
703 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
704 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
707 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd prec/mode
709 mov.
b 1+EXC_CMDREG
(%a6
),%d1
710 andi.w
&0x007f,%d1
# extract extension
712 andi.l &0x00ff01ff,USER_FPSR
(%a6
) # zero all but accured field
714 fmov.
l &0x0,%fpcr
# zero current control regs
720 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
721 mov.
l (tbl_unsupp.
l,%pc
,%d1.w
*4),%d1
# fetch routine addr
722 jsr
(tbl_unsupp.
l,%pc
,%d1.
l*1)
724 # the operation has been emulated. the result is in fp0.
725 # the EXOP, if an exception occurred, is in fp1.
726 # we must save the default result regardless of whether
727 # traps are enabled or disabled.
728 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
731 # the exceptional possibilities we have left ourselves with are ONLY overflow
732 # and inexact. and, the inexact is such that overflow occurred and was disabled
733 # but inexact was enabled.
734 btst
&ovfl_bit
,FPCR_ENABLE
(%a6
)
737 btst
&inex2_bit
,FPCR_ENABLE
(%a6
)
740 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
741 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
742 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
748 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
749 # in fp1. now, simply jump to _real_ovfl()!
751 fmovm.x
&0x40,FP_SRC
(%a6
) # save EXOP (fp1) to stack
753 mov.w
&0xe005,2+FP_SRC
(%a6
) # save exc status
755 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
756 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
757 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
759 frestore FP_SRC
(%a6
) # do this after fmovm,other f<op>s!
765 # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
766 # we must jump to real_inex().
769 fmovm.x
&0x40,FP_SRC
(%a6
) # save EXOP (fp1) to stack
771 mov.
b &0xc4,1+EXC_VOFF
(%a6
) # vector offset = 0xc4
772 mov.w
&0xe001,2+FP_SRC
(%a6
) # save exc status
774 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
775 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
776 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
778 frestore FP_SRC
(%a6
) # do this after fmovm,other f<op>s!
784 ########################################################################
788 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
789 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
790 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
792 # the src operand is definitely a NORM(!), so tag it as such
793 mov.
b &NORM
,STAG
(%a6
) # set src optype tag
796 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd prec/mode
798 and.l &0xffff00ff,USER_FPSR
(%a6
) # zero all but accured field
800 fmov.
l &0x0,%fpcr
# zero current control regs
803 lea FP_SRC
(%a6
),%a0
# pass ptr to src operand
807 btst
&ovfl_bit
,FPCR_ENABLE
(%a6
)
810 btst
&inex2_bit
,FPCR_ENABLE
(%a6
)
813 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
814 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
815 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
820 btst
&0x7,(%sp
) # is trace on?
821 beq.
l _fpsp_done
# no
823 fmov.
l %fpiar
,0x8(%sp
) # "Current PC" is in FPIAR
824 mov.w
&0x2024,0x6(%sp
) # stk fmt = 0x2; voff = 0x024
827 #########################################################################
828 # XDEF **************************************************************** #
829 # _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
831 # This handler should be the first code executed upon taking the #
832 # FP Underflow exception in an operating system. #
834 # XREF **************************************************************** #
835 # _imem_read_long() - read instruction longword #
836 # fix_skewed_ops() - adjust src operand in fsave frame #
837 # set_tag_x() - determine optype of src/dst operands #
838 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
839 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
840 # load_fpn2() - load dst operand from FP regfile #
841 # fout() - emulate an opclass 3 instruction #
842 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
843 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
844 # _real_ovfl() - "callout" for Overflow exception enabled code #
845 # _real_inex() - "callout" for Inexact exception enabled code #
846 # _real_trace() - "callout" for Trace exception code #
848 # INPUT *************************************************************** #
849 # - The system stack contains the FP Unfl exception stack frame #
850 # - The fsave frame contains the source operand #
852 # OUTPUT ************************************************************** #
853 # Underflow Exception enabled: #
854 # - The system stack is unchanged #
855 # - The fsave frame contains the adjusted src op for opclass 0,2 #
856 # Underflow Exception disabled: #
857 # - The system stack is unchanged #
858 # - The "exception present" flag in the fsave frame is cleared #
860 # ALGORITHM *********************************************************** #
861 # On the 060, if an FP underflow is present as the result of any #
862 # instruction, the 060 will take an underflow exception whether the #
863 # exception is enabled or disabled in the FPCR. For the disabled case, #
864 # This handler emulates the instruction to determine what the correct #
865 # default result should be for the operation. This default result is #
866 # then stored in either the FP regfile, data regfile, or memory. #
867 # Finally, the handler exits through the "callout" _fpsp_done() #
868 # denoting that no exceptional conditions exist within the machine. #
869 # If the exception is enabled, then this handler must create the #
870 # exceptional operand and plave it in the fsave state frame, and store #
871 # the default result (only if the instruction is opclass 3). For #
872 # exceptions enabled, this handler must exit through the "callout" #
873 # _real_unfl() so that the operating system enabled overflow handler #
874 # can handle this case. #
875 # Two other conditions exist. First, if underflow was disabled #
876 # but the inexact exception was enabled and the result was inexact, #
877 # this handler must exit through the "callout" _real_inex(). #
879 # Also, in the case of an opclass three instruction where #
880 # underflow was disabled and the trace exception was enabled, this #
881 # handler must exit through the "callout" _real_trace(). #
883 #########################################################################
888 #$# sub.l &24,%sp # make room for src/dst
890 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
892 fsave FP_SRC
(%a6
) # grab the "busy" frame
894 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
895 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
896 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1 on stack
898 # the FPIAR holds the "current PC" of the faulting instruction
899 mov.
l USER_FPIAR
(%a6
),EXC_EXTWPTR
(%a6
)
900 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
901 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
902 bsr.
l _imem_read_long
# fetch the instruction words
903 mov.
l %d0
,EXC_OPWORD
(%a6
)
905 ##############################################################################
907 btst
&0x5,EXC_CMDREG
(%a6
) # is instr an fmove out?
911 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
912 bsr.
l fix_skewed_ops
# fix src op
914 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
915 bsr.
l set_tag_x
# tag the operand type
916 mov.
b %d0
,STAG
(%a6
) # maybe NORM,DENORM
918 # bit five of the fp ext word separates the monadic and dyadic operations
919 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
920 # will never take this exception.
921 btst
&0x5,1+EXC_CMDREG
(%a6
) # is op monadic or dyadic?
922 beq.
b funfl_extract
# monadic
924 # now, what's left that's not dyadic is fsincos. we can distinguish it
925 # from all dyadics by the '0110xxx pattern
926 btst
&0x4,1+EXC_CMDREG
(%a6
) # is op an fsincos?
927 bne.
b funfl_extract
# yes
929 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# dyadic; load dst reg
930 bsr.
l load_fpn2
# load dst into FP_DST
932 lea FP_DST
(%a6
),%a0
# pass: ptr to dst op
933 bsr.
l set_tag_x
# tag the operand type
934 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
935 bne.
b funfl_op2_done
# no
936 bsr.
l unnorm_fix
# yes; convert to NORM,DENORM,or ZERO
938 mov.
b %d0
,DTAG
(%a6
) # save dst optype tag
942 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
943 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
944 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
945 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
946 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
947 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
950 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd prec/mode
952 mov.
b 1+EXC_CMDREG
(%a6
),%d1
953 andi.w
&0x007f,%d1
# extract extension
955 andi.l &0x00ff01ff,USER_FPSR
(%a6
)
957 fmov.
l &0x0,%fpcr
# zero current control regs
963 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
964 mov.
l (tbl_unsupp.
l,%pc
,%d1.w
*4),%d1
# fetch routine addr
965 jsr
(tbl_unsupp.
l,%pc
,%d1.
l*1)
967 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
970 # The `060 FPU multiplier hardware is such that if the result of a
971 # multiply operation is the smallest possible normalized number
972 # (0x00000000_80000000_00000000), then the machine will take an
973 # underflow exception. Since this is incorrect, we need to check
974 # if our emulation, after re-doing the operation, decided that
975 # no underflow was called for. We do these checks only in
976 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
977 # special case will simply exit gracefully with the correct result.
979 # the exceptional possibilities we have left ourselves with are ONLY overflow
980 # and inexact. and, the inexact is such that overflow occurred and was disabled
981 # but inexact was enabled.
982 btst
&unfl_bit
,FPCR_ENABLE
(%a6
)
986 btst
&inex2_bit
,FPCR_ENABLE
(%a6
)
990 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
991 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
992 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
998 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
999 # in fp1 (don't forget to save fp0). what to do now?
1000 # well, we simply have to get to go to _real_unfl()!
1003 # The `060 FPU multiplier hardware is such that if the result of a
1004 # multiply operation is the smallest possible normalized number
1005 # (0x00000000_80000000_00000000), then the machine will take an
1006 # underflow exception. Since this is incorrect, we check here to see
1007 # if our emulation, after re-doing the operation, decided that
1008 # no underflow was called for.
1009 btst
&unfl_bit
,FPSR_EXCEPT
(%a6
)
1013 fmovm.x
&0x40,FP_SRC
(%a6
) # save EXOP (fp1) to stack
1015 mov.w
&0xe003,2+FP_SRC
(%a6
) # save exc status
1017 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
1018 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1019 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1021 frestore FP_SRC
(%a6
) # do this after fmovm,other f<op>s!
1027 # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1028 # we must jump to real_inex().
1031 # The `060 FPU multiplier hardware is such that if the result of a
1032 # multiply operation is the smallest possible normalized number
1033 # (0x00000000_80000000_00000000), then the machine will take an
1034 # underflow exception.
1035 # But, whether bogus or not, if inexact is enabled AND it occurred,
1036 # then we have to branch to real_inex.
1038 btst
&inex2_bit
,FPSR_EXCEPT
(%a6
)
1043 fmovm.x
&0x40,FP_SRC
(%a6
) # save EXOP to stack
1045 mov.
b &0xc4,1+EXC_VOFF
(%a6
) # vector offset = 0xc4
1046 mov.w
&0xe001,2+FP_SRC
(%a6
) # save exc status
1048 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
1049 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1050 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1052 frestore FP_SRC
(%a6
) # do this after fmovm,other f<op>s!
1058 #######################################################################
1062 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1063 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1064 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1066 # the src operand is definitely a NORM(!), so tag it as such
1067 mov.
b &NORM
,STAG
(%a6
) # set src optype tag
1070 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd prec/mode
1072 and.l &0xffff00ff,USER_FPSR
(%a6
) # zero all but accured field
1074 fmov.
l &0x0,%fpcr
# zero current control regs
1077 lea FP_SRC
(%a6
),%a0
# pass ptr to src operand
1081 btst
&unfl_bit
,FPCR_ENABLE
(%a6
)
1082 bne.w funfl_unfl_on2
1084 btst
&inex2_bit
,FPCR_ENABLE
(%a6
)
1085 bne.w funfl_inex_on2
1087 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
1088 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1089 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1094 btst
&0x7,(%sp
) # is trace on?
1095 beq.
l _fpsp_done
# no
1097 fmov.
l %fpiar
,0x8(%sp
) # "Current PC" is in FPIAR
1098 mov.w
&0x2024,0x6(%sp
) # stk fmt = 0x2; voff = 0x024
1101 #########################################################################
1102 # XDEF **************************************************************** #
1103 # _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1104 # Data Type" exception. #
1106 # This handler should be the first code executed upon taking the #
1107 # FP Unimplemented Data Type exception in an operating system. #
1109 # XREF **************************************************************** #
1110 # _imem_read_{word,long}() - read instruction word/longword #
1111 # fix_skewed_ops() - adjust src operand in fsave frame #
1112 # set_tag_x() - determine optype of src/dst operands #
1113 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
1114 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
1115 # load_fpn2() - load dst operand from FP regfile #
1116 # load_fpn1() - load src operand from FP regfile #
1117 # fout() - emulate an opclass 3 instruction #
1118 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1119 # _real_inex() - "callout" to operating system inexact handler #
1120 # _fpsp_done() - "callout" for exit; work all done #
1121 # _real_trace() - "callout" for Trace enabled exception #
1122 # funimp_skew() - adjust fsave src ops to "incorrect" value #
1123 # _real_snan() - "callout" for SNAN exception #
1124 # _real_operr() - "callout" for OPERR exception #
1125 # _real_ovfl() - "callout" for OVFL exception #
1126 # _real_unfl() - "callout" for UNFL exception #
1127 # get_packed() - fetch packed operand from memory #
1129 # INPUT *************************************************************** #
1130 # - The system stack contains the "Unimp Data Type" stk frame #
1131 # - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1133 # OUTPUT ************************************************************** #
1134 # If Inexact exception (opclass 3): #
1135 # - The system stack is changed to an Inexact exception stk frame #
1136 # If SNAN exception (opclass 3): #
1137 # - The system stack is changed to an SNAN exception stk frame #
1138 # If OPERR exception (opclass 3): #
1139 # - The system stack is changed to an OPERR exception stk frame #
1140 # If OVFL exception (opclass 3): #
1141 # - The system stack is changed to an OVFL exception stk frame #
1142 # If UNFL exception (opclass 3): #
1143 # - The system stack is changed to an UNFL exception stack frame #
1144 # If Trace exception enabled: #
1145 # - The system stack is changed to a Trace exception stack frame #
1146 # Else: (normal case) #
1147 # - Correct result has been stored as appropriate #
1149 # ALGORITHM *********************************************************** #
1150 # Two main instruction types can enter here: (1) DENORM or UNNORM #
1151 # unimplemented data types. These can be either opclass 0,2 or 3 #
1152 # instructions, and (2) PACKED unimplemented data format instructions #
1153 # also of opclasses 0,2, or 3. #
1154 # For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1155 # operand from the fsave state frame and the dst operand (if dyadic) #
1156 # from the FP register file. The instruction is then emulated by #
1157 # choosing an emulation routine from a table of routines indexed by #
1158 # instruction type. Once the instruction has been emulated and result #
1159 # saved, then we check to see if any enabled exceptions resulted from #
1160 # instruction emulation. If none, then we exit through the "callout" #
1161 # _fpsp_done(). If there is an enabled FP exception, then we insert #
1162 # this exception into the FPU in the fsave state frame and then exit #
1163 # through _fpsp_done(). #
1164 # PACKED opclass 0 and 2 is similar in how the instruction is #
1165 # emulated and exceptions handled. The differences occur in how the #
1166 # handler loads the packed op (by calling get_packed() routine) and #
1167 # by the fact that a Trace exception could be pending for PACKED ops. #
1168 # If a Trace exception is pending, then the current exception stack #
1169 # frame is changed to a Trace exception stack frame and an exit is #
1170 # made through _real_trace(). #
1171 # For UNNORM/DENORM opclass 3, the actual move out to memory is #
1172 # performed by calling the routine fout(). If no exception should occur #
1173 # as the result of emulation, then an exit either occurs through #
1174 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1175 # (a Trace stack frame must be created here, too). If an FP exception #
1176 # should occur, then we must create an exception stack frame of that #
1177 # type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1178 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1179 # emulation is performed in a similar manner. #
1181 #########################################################################
1184 # (1) DENORM and UNNORM (unimplemented) data types:
1189 # pre-instruction * *
1190 # ***************** *****************
1191 # * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1192 # ***************** *****************
1195 # ***************** *****************
1197 # ***************** *****************
1199 # (2) PACKED format (unsupported) opclasses two and three:
1215 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
1217 fsave FP_SRC
(%a6
) # save fp state
1219 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
1220 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
1221 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1 on stack
1223 btst
&0x5,EXC_SR
(%a6
) # user or supervisor mode?
1226 mov.
l %usp
,%a0
# fetch user stack pointer
1227 mov.
l %a0
,EXC_A7
(%a6
) # save on stack
1229 # if the exception is an opclass zero or two unimplemented data type
1230 # exception, then the a7' calculated here is wrong since it doesn't
1231 # stack an ea. however, we don't need an a7' for this case anyways.
1233 lea
0x4+EXC_EA
(%a6
),%a0
# load old a7'
1234 mov.
l %a0
,EXC_A7
(%a6
) # save on stack
1238 # the FPIAR holds the "current PC" of the faulting instruction
1239 # the FPIAR should be set correctly for ALL exceptions passing through
1241 mov.
l USER_FPIAR
(%a6
),EXC_EXTWPTR
(%a6
)
1242 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
1243 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
1244 bsr.
l _imem_read_long
# fetch the instruction words
1245 mov.
l %d0
,EXC_OPWORD
(%a6
) # store OPWORD and EXTWORD
1247 ############################
1249 clr.
b SPCOND_FLG
(%a6
) # clear special condition flag
1251 # Separate opclass three (fpn-to-mem) ops since they have a different
1252 # stack frame and protocol.
1253 btst
&0x5,EXC_CMDREG
(%a6
) # is it an fmove out?
1256 # Separate packed opclass two instructions.
1257 bfextu EXC_CMDREG
(%a6
){&0:&6},%d0
1262 # I'm not sure at this point what FPSR bits are valid for this instruction.
1263 # so, since the emulation routines re-create them anyways, zero exception field
1264 andi.l &0x00ff00ff,USER_FPSR
(%a6
) # zero exception field
1266 fmov.
l &0x0,%fpcr
# zero current control regs
1269 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1270 # precision format if the src format was single or double and the
1271 # source data type was an INF, NAN, DENORM, or UNNORM
1272 lea FP_SRC
(%a6
),%a0
# pass ptr to input
1273 bsr.
l fix_skewed_ops
1275 # we don't know whether the src operand or the dst operand (or both) is the
1276 # UNNORM or DENORM. call the function that tags the operand type. if the
1277 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1278 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
1279 bsr.
l set_tag_x
# tag the operand type
1280 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
1282 bsr.
l unnorm_fix
# yes; convert to NORM,DENORM,or ZERO
1285 mov.
b %d0
,STAG
(%a6
) # save src optype tag
1287 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# dyadic; load dst reg
1289 # bit five of the fp extension word separates the monadic and dyadic operations
1291 btst
&0x5,1+EXC_CMDREG
(%a6
) # is operation monadic or dyadic?
1292 beq.
b fu_extract
# monadic
1293 cmpi.
b 1+EXC_CMDREG
(%a6
),&0x3a # is operation an ftst?
1294 beq.
b fu_extract
# yes, so it's monadic, too
1296 bsr.
l load_fpn2
# load dst into FP_DST
1298 lea FP_DST
(%a6
),%a0
# pass: ptr to dst op
1299 bsr.
l set_tag_x
# tag the operand type
1300 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
1301 bne.
b fu_op2_done
# no
1302 bsr.
l unnorm_fix
# yes; convert to NORM,DENORM,or ZERO
1304 mov.
b %d0
,DTAG
(%a6
) # save dst optype tag
1308 mov.
b FPCR_MODE
(%a6
),%d0
# fetch rnd mode/prec
1310 bfextu
1+EXC_CMDREG
(%a6
){&1:&7},%d1
# extract extension
1315 mov.
l (tbl_unsupp.
l,%pc
,%d1.
l*4),%d1
# fetch routine addr
1316 jsr
(tbl_unsupp.
l,%pc
,%d1.
l*1)
1319 # Exceptions in order of precedence:
1321 # SNAN : all dyadic ops
1322 # OPERR : fsqrt(-NORM)
1323 # OVFL : all except ftst,fcmp
1324 # UNFL : all except ftst,fcmp
1326 # INEX2 : all except ftst,fcmp
1327 # INEX1 : none (packed doesn't go through here)
1330 # we determine the highest priority exception(if any) set by the
1331 # emulation routine that has also been enabled by the user.
1332 mov.
b FPCR_ENABLE
(%a6
),%d0
# fetch exceptions set
1333 bne.
b fu_in_ena
# some are enabled
1336 # fcmp and ftst do not store any result.
1337 mov.
b 1+EXC_CMDREG
(%a6
),%d0
# fetch extension
1338 andi.b &0x38,%d0
# extract bits 3-5
1339 cmpi.
b %d0
,&0x38 # is instr fcmp or ftst?
1340 beq.
b fu_in_exit
# yes
1342 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# dyadic; load dst reg
1343 bsr.
l store_fpreg
# store the result
1347 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1348 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1349 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1356 and.b FPSR_EXCEPT
(%a6
),%d0
# keep only ones enabled
1357 bfffo
%d0
{&24:&8},%d0
# find highest priority exception
1358 bne.
b fu_in_exc
# there is at least one set
1361 # No exceptions occurred that were also enabled. Now:
1363 # if (OVFL && ovfl_disabled && inexact_enabled) {
1364 # branch to _real_inex() (even if the result was exact!);
1366 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1370 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # was overflow set?
1371 beq.
b fu_in_cont
# no
1374 btst
&inex2_bit
,FPCR_ENABLE
(%a6
) # was inexact enabled?
1375 beq.
b fu_in_cont
# no
1376 bra.w fu_in_exc_ovfl
# go insert overflow frame
1379 # An exception occurred and that exception was enabled:
1381 # shift enabled exception field into lo byte of d0;
1382 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1383 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1385 # * this is the case where we must call _real_inex() now or else
1386 # * there will be no other way to pass it the exceptional operand
1388 # call _real_inex();
1390 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1394 subi.
l &24,%d0
# fix offset to be 0-8
1395 cmpi.
b %d0
,&0x6 # is exception INEX? (6)
1396 bne.
b fu_in_exc_exit
# no
1398 # the enabled exception was inexact
1399 btst
&unfl_bit
,FPSR_EXCEPT
(%a6
) # did disabled underflow occur?
1400 bne.w fu_in_exc_unfl
# yes
1401 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # did disabled overflow occur?
1402 bne.w fu_in_exc_ovfl
# yes
1404 # here, we insert the correct fsave status value into the fsave frame for the
1405 # corresponding exception. the operand in the fsave frame should be the original
1408 mov.
l %d0
,-(%sp
) # save d0
1409 bsr.
l funimp_skew
# skew sgl or dbl inputs
1410 mov.
l (%sp
)+,%d0
# restore d0
1412 mov.w
(tbl_except.
b,%pc
,%d0.w
*2),2+FP_SRC
(%a6
) # create exc status
1414 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1415 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1416 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1418 frestore FP_SRC
(%a6
) # restore src op
1425 short
0xe000,0xe006,0xe004,0xe005
1426 short
0xe003,0xe002,0xe001,0xe001
1430 bra.
b fu_in_exc_exit
1433 bra.
b fu_in_exc_exit
1435 # If the input operand to this operation was opclass two and a single
1436 # or double precision denorm, inf, or nan, the operand needs to be
1437 # "corrected" in order to have the proper equivalent extended precision
1439 global fix_skewed_ops
1441 bfextu EXC_CMDREG
(%a6
){&0:&6},%d0
# extract opclass,src fmt
1442 cmpi.
b %d0
,&0x11 # is class = 2 & fmt = sgl?
1444 cmpi.
b %d0
,&0x15 # is class = 2 & fmt = dbl?
1449 mov.w LOCAL_EX
(%a0
),%d0
# fetch src exponent
1450 andi.w
&0x7fff,%d0
# strip sign
1451 cmpi.w
%d0
,&0x3f80 # is |exp| == $3f80?
1452 beq.
b fso_sgl_dnrm_zero
# yes
1453 cmpi.w
%d0
,&0x407f # no; is |exp| == $407f?
1454 beq.
b fso_infnan
# yes
1458 andi.l &0x7fffffff,LOCAL_HI
(%a0
) # clear j-bit
1459 beq.
b fso_zero
# it's a skewed zero
1461 # here, we count on norm not to alter a0...
1462 bsr.
l norm
# normalize mantissa
1463 neg.w
%d0
# -shft amt
1464 addi.w
&0x3f81,%d0
# adjust new exponent
1465 andi.w
&0x8000,LOCAL_EX
(%a0
) # clear old exponent
1466 or.w
%d0
,LOCAL_EX
(%a0
) # insert new exponent
1470 andi.w
&0x8000,LOCAL_EX
(%a0
) # clear bogus exponent
1474 andi.b &0x7f,LOCAL_HI
(%a0
) # clear j-bit
1475 ori.w
&0x7fff,LOCAL_EX
(%a0
) # make exponent = $7fff
1479 mov.w LOCAL_EX
(%a0
),%d0
# fetch src exponent
1480 andi.w
&0x7fff,%d0
# strip sign
1481 cmpi.w
%d0
,&0x3c00 # is |exp| == $3c00?
1482 beq.
b fso_dbl_dnrm_zero
# yes
1483 cmpi.w
%d0
,&0x43ff # no; is |exp| == $43ff?
1484 beq.
b fso_infnan
# yes
1488 andi.l &0x7fffffff,LOCAL_HI
(%a0
) # clear j-bit
1489 bne.
b fso_dbl_dnrm
# it's a skewed denorm
1490 tst.
l LOCAL_LO
(%a0
) # is it a zero?
1491 beq.
b fso_zero
# yes
1493 # here, we count on norm not to alter a0...
1494 bsr.
l norm
# normalize mantissa
1495 neg.w
%d0
# -shft amt
1496 addi.w
&0x3c01,%d0
# adjust new exponent
1497 andi.w
&0x8000,LOCAL_EX
(%a0
) # clear old exponent
1498 or.w
%d0
,LOCAL_EX
(%a0
) # insert new exponent
1501 #################################################################
1503 # fmove out took an unimplemented data type exception.
1504 # the src operand is in FP_SRC. Call _fout() to write out the result and
1505 # to determine which exceptions, if any, to take.
1508 # Separate packed move outs from the UNNORM and DENORM move outs.
1509 bfextu EXC_CMDREG
(%a6
){&3:&3},%d0
1516 # I'm not sure at this point what FPSR bits are valid for this instruction.
1517 # so, since the emulation routines re-create them anyways, zero exception field.
1518 # fmove out doesn't affect ccodes.
1519 and.l &0xffff00ff,USER_FPSR
(%a6
) # zero exception field
1521 fmov.
l &0x0,%fpcr
# zero current control regs
1524 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1525 # call here. just figure out what it is...
1526 mov.w FP_SRC_EX
(%a6
),%d0
# get exponent
1527 andi.w
&0x7fff,%d0
# strip sign
1528 beq.
b fu_out_denorm
# it's a DENORM
1531 bsr.
l unnorm_fix
# yes; fix it
1537 mov.
b &DENORM
,STAG
(%a6
)
1541 mov.
b FPCR_MODE
(%a6
),%d0
# fetch rnd mode/prec
1543 lea FP_SRC
(%a6
),%a0
# pass ptr to src operand
1545 mov.
l (%a6
),EXC_A6
(%a6
) # in case a6 changes
1546 bsr.
l fout
# call fmove out routine
1548 # Exceptions in order of precedence:
1551 # OPERR : fmove.{b,w,l} out of large UNNORM
1552 # OVFL : fmove.{s,d}
1553 # UNFL : fmove.{s,d,x}
1556 # INEX1 : none (packed doesn't travel through here)
1558 # determine the highest priority exception(if any) set by the
1559 # emulation routine that has also been enabled by the user.
1560 mov.
b FPCR_ENABLE
(%a6
),%d0
# fetch exceptions enabled
1561 bne.w fu_out_ena
# some are enabled
1565 mov.
l EXC_A6
(%a6
),(%a6
) # in case a6 changed
1567 # on extended precision opclass three instructions using pre-decrement or
1568 # post-increment addressing mode, the address register is not updated. is the
1569 # address register was the stack pointer used from user mode, then let's update
1570 # it here. if it was used from supervisor mode, then we have to handle this
1571 # as a special case.
1572 btst
&0x5,EXC_SR
(%a6
)
1575 mov.
l EXC_A7
(%a6
),%a0
# restore a7
1579 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1580 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1581 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1585 btst
&0x7,(%sp
) # is trace on?
1586 bne.
b fu_out_trace
# yes
1590 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1591 # ("fmov.x fpm,-(a7)") if so,
1593 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
1594 bne.
b fu_out_done_cont
1596 # the extended precision result is still in fp0. but, we need to save it
1597 # somewhere on the stack until we can copy it to its final resting place.
1598 # here, we're counting on the top of the stack to be the old place-holders
1599 # for fp0/fp1 which have already been restored. that way, we can write
1600 # over those destinations with the shifted stack frame.
1601 fmovm.x
&0x80,FP_SRC
(%a6
) # put answer on stack
1603 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1604 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1605 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1607 mov.
l (%a6
),%a6
# restore frame pointer
1609 mov.
l LOCAL_SIZE+EXC_SR
(%sp
),LOCAL_SIZE+EXC_SR-
0xc(%sp
)
1610 mov.
l LOCAL_SIZE+
2+EXC_PC
(%sp
),LOCAL_SIZE+
2+EXC_PC-
0xc(%sp
)
1612 # now, copy the result to the proper place on the stack
1613 mov.
l LOCAL_SIZE+FP_SRC_EX
(%sp
),LOCAL_SIZE+EXC_SR+
0x0(%sp
)
1614 mov.
l LOCAL_SIZE+FP_SRC_HI
(%sp
),LOCAL_SIZE+EXC_SR+
0x4(%sp
)
1615 mov.
l LOCAL_SIZE+FP_SRC_LO
(%sp
),LOCAL_SIZE+EXC_SR+
0x8(%sp
)
1617 add.l &LOCAL_SIZE-
0x8,%sp
1625 and.b FPSR_EXCEPT
(%a6
),%d0
# keep only ones enabled
1626 bfffo
%d0
{&24:&8},%d0
# find highest priority exception
1627 bne.
b fu_out_exc
# there is at least one set
1629 # no exceptions were set.
1630 # if a disabled overflow occurred and inexact was enabled but the result
1631 # was exact, then a branch to _real_inex() is made.
1632 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # was overflow set?
1633 beq.w fu_out_done
# no
1636 btst
&inex2_bit
,FPCR_ENABLE
(%a6
) # was inexact enabled?
1637 beq.w fu_out_done
# no
1641 # The fp move out that took the "Unimplemented Data Type" exception was
1642 # being traced. Since the stack frames are similar, get the "current" PC
1643 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1645 # UNSUPP FRAME TRACE FRAME
1646 # ***************** *****************
1647 # * EA * * Current *
1649 # ***************** *****************
1650 # * 0x3 * 0x0dc * * 0x2 * 0x024 *
1651 # ***************** *****************
1654 # ***************** *****************
1656 # ***************** *****************
1659 mov.w
&0x2024,0x6(%sp
)
1660 fmov.
l %fpiar
,0x8(%sp
)
1663 # an exception occurred and that exception was enabled.
1665 subi.
l &24,%d0
# fix offset to be 0-8
1667 # we don't mess with the existing fsave frame. just re-insert it and
1668 # jump to the "_real_{}()" handler...
1669 mov.w
(tbl_fu_out.
b,%pc
,%d0.w
*2),%d0
1670 jmp
(tbl_fu_out.
b,%pc
,%d0.w
*1)
1674 short tbl_fu_out
- tbl_fu_out
# BSUN can't happen
1675 short tbl_fu_out
- tbl_fu_out
# SNAN can't happen
1676 short fu_operr
- tbl_fu_out
# OPERR
1677 short fu_ovfl
- tbl_fu_out
# OVFL
1678 short fu_unfl
- tbl_fu_out
# UNFL
1679 short tbl_fu_out
- tbl_fu_out
# DZ can't happen
1680 short fu_inex
- tbl_fu_out
# INEX2
1681 short tbl_fu_out
- tbl_fu_out
# INEX1 won't make it here
1683 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1686 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1687 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1688 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1690 mov.w
&0x30d8,EXC_VOFF
(%a6
) # vector offset = 0xd8
1691 mov.w
&0xe006,2+FP_SRC
(%a6
)
1693 frestore FP_SRC
(%a6
)
1701 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1702 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1703 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1705 mov.w
&0x30d0,EXC_VOFF
(%a6
) # vector offset = 0xd0
1706 mov.w
&0xe004,2+FP_SRC
(%a6
)
1708 frestore FP_SRC
(%a6
)
1716 fmovm.x
&0x40,FP_SRC
(%a6
) # save EXOP to the stack
1718 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1719 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1720 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1722 mov.w
&0x30d4,EXC_VOFF
(%a6
) # vector offset = 0xd4
1723 mov.w
&0xe005,2+FP_SRC
(%a6
)
1725 frestore FP_SRC
(%a6
) # restore EXOP
1731 # underflow can happen for extended precision. extended precision opclass
1732 # three instruction exceptions don't update the stack pointer. so, if the
1733 # exception occurred from user mode, then simply update a7 and exit normally.
1734 # if the exception occurred from supervisor mode, check if
1736 mov.
l EXC_A6
(%a6
),(%a6
) # restore a6
1738 btst
&0x5,EXC_SR
(%a6
)
1741 mov.
l EXC_A7
(%a6
),%a0
# restore a7 whether we need
1742 mov.
l %a0
,%usp
# to or not...
1745 fmovm.x
&0x40,FP_SRC
(%a6
) # save EXOP to the stack
1747 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1748 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1749 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1751 mov.w
&0x30cc,EXC_VOFF
(%a6
) # vector offset = 0xcc
1752 mov.w
&0xe003,2+FP_SRC
(%a6
)
1754 frestore FP_SRC
(%a6
) # restore EXOP
1761 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
# was the <ea> mode -(sp)?
1764 # the extended precision result is still in fp0. but, we need to save it
1765 # somewhere on the stack until we can copy it to its final resting place
1766 # (where the exc frame is currently). make sure it's not at the top of the
1767 # frame or it will get overwritten when the exc stack frame is shifted "down".
1768 fmovm.x
&0x80,FP_SRC
(%a6
) # put answer on stack
1769 fmovm.x
&0x40,FP_DST
(%a6
) # put EXOP on stack
1771 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1772 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1773 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1775 mov.w
&0x30cc,EXC_VOFF
(%a6
) # vector offset = 0xcc
1776 mov.w
&0xe003,2+FP_DST
(%a6
)
1778 frestore FP_DST
(%a6
) # restore EXOP
1780 mov.
l (%a6
),%a6
# restore frame pointer
1782 mov.
l LOCAL_SIZE+EXC_SR
(%sp
),LOCAL_SIZE+EXC_SR-
0xc(%sp
)
1783 mov.
l LOCAL_SIZE+
2+EXC_PC
(%sp
),LOCAL_SIZE+
2+EXC_PC-
0xc(%sp
)
1784 mov.
l LOCAL_SIZE+EXC_EA
(%sp
),LOCAL_SIZE+EXC_EA-
0xc(%sp
)
1786 # now, copy the result to the proper place on the stack
1787 mov.
l LOCAL_SIZE+FP_SRC_EX
(%sp
),LOCAL_SIZE+EXC_SR+
0x0(%sp
)
1788 mov.
l LOCAL_SIZE+FP_SRC_HI
(%sp
),LOCAL_SIZE+EXC_SR+
0x4(%sp
)
1789 mov.
l LOCAL_SIZE+FP_SRC_LO
(%sp
),LOCAL_SIZE+EXC_SR+
0x8(%sp
)
1791 add.l &LOCAL_SIZE-
0x8,%sp
1795 # fmove in and out enter here.
1797 fmovm.x
&0x40,FP_SRC
(%a6
) # save EXOP to the stack
1799 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1800 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1801 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1803 mov.w
&0x30c4,EXC_VOFF
(%a6
) # vector offset = 0xc4
1804 mov.w
&0xe001,2+FP_SRC
(%a6
)
1806 frestore FP_SRC
(%a6
) # restore EXOP
1813 #########################################################################
1814 #########################################################################
1818 # I'm not sure at this point what FPSR bits are valid for this instruction.
1819 # so, since the emulation routines re-create them anyways, zero exception field
1820 andi.l &0x0ff00ff,USER_FPSR
(%a6
) # zero exception field
1822 fmov.
l &0x0,%fpcr
# zero current control regs
1825 bsr.
l get_packed
# fetch packed src operand
1827 lea FP_SRC
(%a6
),%a0
# pass ptr to src
1828 bsr.
l set_tag_x
# set src optype tag
1830 mov.
b %d0
,STAG
(%a6
) # save src optype tag
1832 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# dyadic; load dst reg
1834 # bit five of the fp extension word separates the monadic and dyadic operations
1836 btst
&0x5,1+EXC_CMDREG
(%a6
) # is operation monadic or dyadic?
1837 beq.
b fu_extract_p
# monadic
1838 cmpi.
b 1+EXC_CMDREG
(%a6
),&0x3a # is operation an ftst?
1839 beq.
b fu_extract_p
# yes, so it's monadic, too
1841 bsr.
l load_fpn2
# load dst into FP_DST
1843 lea FP_DST
(%a6
),%a0
# pass: ptr to dst op
1844 bsr.
l set_tag_x
# tag the operand type
1845 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
1846 bne.
b fu_op2_done_p
# no
1847 bsr.
l unnorm_fix
# yes; convert to NORM,DENORM,or ZERO
1849 mov.
b %d0
,DTAG
(%a6
) # save dst optype tag
1853 mov.
b FPCR_MODE
(%a6
),%d0
# fetch rnd mode/prec
1855 bfextu
1+EXC_CMDREG
(%a6
){&1:&7},%d1
# extract extension
1860 mov.
l (tbl_unsupp.
l,%pc
,%d1.
l*4),%d1
# fetch routine addr
1861 jsr
(tbl_unsupp.
l,%pc
,%d1.
l*1)
1864 # Exceptions in order of precedence:
1866 # SNAN : all dyadic ops
1867 # OPERR : fsqrt(-NORM)
1868 # OVFL : all except ftst,fcmp
1869 # UNFL : all except ftst,fcmp
1871 # INEX2 : all except ftst,fcmp
1875 # we determine the highest priority exception(if any) set by the
1876 # emulation routine that has also been enabled by the user.
1877 mov.
b FPCR_ENABLE
(%a6
),%d0
# fetch exceptions enabled
1878 bne.w fu_in_ena_p
# some are enabled
1881 # fcmp and ftst do not store any result.
1882 mov.
b 1+EXC_CMDREG
(%a6
),%d0
# fetch extension
1883 andi.b &0x38,%d0
# extract bits 3-5
1884 cmpi.
b %d0
,&0x38 # is instr fcmp or ftst?
1885 beq.
b fu_in_exit_p
# yes
1887 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# dyadic; load dst reg
1888 bsr.
l store_fpreg
# store the result
1892 btst
&0x5,EXC_SR
(%a6
) # user or supervisor?
1893 bne.w fu_in_exit_s_p
# supervisor
1895 mov.
l EXC_A7
(%a6
),%a0
# update user a7
1899 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1900 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1901 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1903 unlk
%a6
# unravel stack frame
1905 btst
&0x7,(%sp
) # is trace on?
1906 bne.w fu_trace_p
# yes
1908 bra.
l _fpsp_done
# exit to os
1910 # the exception occurred in supervisor mode. check to see if the
1911 # addressing mode was (a7)+. if so, we'll need to shift the
1914 btst
&mia7_bit
,SPCOND_FLG
(%a6
) # was ea mode (a7)+
1915 beq.
b fu_in_exit_cont_p
# no
1917 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1918 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
1919 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
1921 unlk
%a6
# unravel stack frame
1923 # shift the stack frame "up". we don't really care about the <ea> field.
1924 mov.
l 0x4(%sp
),0x10(%sp
)
1925 mov.
l 0x0(%sp
),0xc(%sp
)
1928 btst
&0x7,(%sp
) # is trace on?
1929 bne.w fu_trace_p
# yes
1931 bra.
l _fpsp_done
# exit to os
1934 and.b FPSR_EXCEPT
(%a6
),%d0
# keep only ones enabled & set
1935 bfffo
%d0
{&24:&8},%d0
# find highest priority exception
1936 bne.
b fu_in_exc_p
# at least one was set
1939 # No exceptions occurred that were also enabled. Now:
1941 # if (OVFL && ovfl_disabled && inexact_enabled) {
1942 # branch to _real_inex() (even if the result was exact!);
1944 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1948 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # was overflow set?
1949 beq.w fu_in_cont_p
# no
1952 btst
&inex2_bit
,FPCR_ENABLE
(%a6
) # was inexact enabled?
1953 beq.w fu_in_cont_p
# no
1954 bra.w fu_in_exc_ovfl_p
# do _real_inex() now
1957 # An exception occurred and that exception was enabled:
1959 # shift enabled exception field into lo byte of d0;
1960 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1961 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1963 # * this is the case where we must call _real_inex() now or else
1964 # * there will be no other way to pass it the exceptional operand
1966 # call _real_inex();
1968 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1972 subi.
l &24,%d0
# fix offset to be 0-8
1973 cmpi.
b %d0
,&0x6 # is exception INEX? (6 or 7)
1974 blt.
b fu_in_exc_exit_p
# no
1976 # the enabled exception was inexact
1977 btst
&unfl_bit
,FPSR_EXCEPT
(%a6
) # did disabled underflow occur?
1978 bne.w fu_in_exc_unfl_p
# yes
1979 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # did disabled overflow occur?
1980 bne.w fu_in_exc_ovfl_p
# yes
1982 # here, we insert the correct fsave status value into the fsave frame for the
1983 # corresponding exception. the operand in the fsave frame should be the original
1985 # as a reminder for future predicted pain and agony, we are passing in fsave the
1986 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1987 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1989 btst
&0x5,EXC_SR
(%a6
) # user or supervisor?
1990 bne.w fu_in_exc_exit_s_p
# supervisor
1992 mov.
l EXC_A7
(%a6
),%a0
# update user a7
1995 fu_in_exc_exit_cont_p
:
1996 mov.w
(tbl_except_p.
b,%pc
,%d0.w
*2),2+FP_SRC
(%a6
)
1998 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
1999 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2000 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2002 frestore FP_SRC
(%a6
) # restore src op
2006 btst
&0x7,(%sp
) # is trace enabled?
2007 bne.w fu_trace_p
# yes
2012 short
0xe000,0xe006,0xe004,0xe005
2013 short
0xe003,0xe002,0xe001,0xe001
2017 bra.w fu_in_exc_exit_p
2021 bra.w fu_in_exc_exit_p
2024 btst
&mia7_bit
,SPCOND_FLG
(%a6
)
2025 beq.
b fu_in_exc_exit_cont_p
2027 mov.w
(tbl_except_p.
b,%pc
,%d0.w
*2),2+FP_SRC
(%a6
)
2029 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
2030 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2031 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2033 frestore FP_SRC
(%a6
) # restore src op
2035 unlk
%a6
# unravel stack frame
2037 # shift stack frame "up". who cares about <ea> field.
2038 mov.
l 0x4(%sp
),0x10(%sp
)
2039 mov.
l 0x0(%sp
),0xc(%sp
)
2042 btst
&0x7,(%sp
) # is trace on?
2043 bne.
b fu_trace_p
# yes
2045 bra.
l _fpsp_done
# exit to os
2048 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2049 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2050 # trace stack frame then jump to _real_trace().
2052 # UNSUPP FRAME TRACE FRAME
2053 # ***************** *****************
2054 # * EA * * Current *
2056 # ***************** *****************
2057 # * 0x2 * 0x0dc * * 0x2 * 0x024 *
2058 # ***************** *****************
2061 # ***************** *****************
2063 # ***************** *****************
2065 mov.w
&0x2024,0x6(%sp
)
2066 fmov.
l %fpiar
,0x8(%sp
)
2070 #########################################################
2071 #########################################################
2075 # I'm not sure at this point what FPSR bits are valid for this instruction.
2076 # so, since the emulation routines re-create them anyways, zero exception field.
2077 # fmove out doesn't affect ccodes.
2078 and.l &0xffff00ff,USER_FPSR
(%a6
) # zero exception field
2080 fmov.
l &0x0,%fpcr
# zero current control regs
2083 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
2086 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2087 # able to detect all operand types.
2089 bsr.
l set_tag_x
# tag the operand type
2090 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
2092 bsr.
l unnorm_fix
# yes; convert to NORM,DENORM,or ZERO
2095 mov.
b %d0
,STAG
(%a6
) # save src optype tag
2098 mov.
b FPCR_MODE
(%a6
),%d0
# fetch rnd mode/prec
2100 lea FP_SRC
(%a6
),%a0
# pass ptr to src operand
2102 mov.
l (%a6
),EXC_A6
(%a6
) # in case a6 changes
2103 bsr.
l fout
# call fmove out routine
2105 # Exceptions in order of precedence:
2108 # OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2115 # determine the highest priority exception(if any) set by the
2116 # emulation routine that has also been enabled by the user.
2117 mov.
b FPCR_ENABLE
(%a6
),%d0
# fetch exceptions enabled
2118 bne.w fu_out_ena_p
# some are enabled
2121 mov.
l EXC_A6
(%a6
),(%a6
) # restore a6
2123 btst
&0x5,EXC_SR
(%a6
) # user or supervisor?
2124 bne.
b fu_out_exit_s_p
# supervisor
2126 mov.
l EXC_A7
(%a6
),%a0
# update user a7
2130 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
2131 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2132 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2134 unlk
%a6
# unravel stack frame
2136 btst
&0x7,(%sp
) # is trace on?
2137 bne.w fu_trace_p
# yes
2139 bra.
l _fpsp_done
# exit to os
2141 # the exception occurred in supervisor mode. check to see if the
2142 # addressing mode was -(a7). if so, we'll need to shift the
2143 # stack frame "down".
2145 btst
&mda7_bit
,SPCOND_FLG
(%a6
) # was ea mode -(a7)
2146 beq.
b fu_out_exit_cont_p
# no
2148 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
2149 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2150 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2152 mov.
l (%a6
),%a6
# restore frame pointer
2154 mov.
l LOCAL_SIZE+EXC_SR
(%sp
),LOCAL_SIZE+EXC_SR-
0xc(%sp
)
2155 mov.
l LOCAL_SIZE+
2+EXC_PC
(%sp
),LOCAL_SIZE+
2+EXC_PC-
0xc(%sp
)
2157 # now, copy the result to the proper place on the stack
2158 mov.
l LOCAL_SIZE+FP_DST_EX
(%sp
),LOCAL_SIZE+EXC_SR+
0x0(%sp
)
2159 mov.
l LOCAL_SIZE+FP_DST_HI
(%sp
),LOCAL_SIZE+EXC_SR+
0x4(%sp
)
2160 mov.
l LOCAL_SIZE+FP_DST_LO
(%sp
),LOCAL_SIZE+EXC_SR+
0x8(%sp
)
2162 add.l &LOCAL_SIZE-
0x8,%sp
2170 and.b FPSR_EXCEPT
(%a6
),%d0
# keep only ones enabled
2171 bfffo
%d0
{&24:&8},%d0
# find highest priority exception
2174 mov.
l EXC_A6
(%a6
),(%a6
) # restore a6
2176 # an exception occurred and that exception was enabled.
2177 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2184 btst
&0x5,EXC_SR
(%a6
)
2187 mov.
l EXC_A7
(%a6
),%a0
2192 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
2195 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2196 # the strategy is to move the exception frame "down" 12 bytes. then, we
2197 # can store the default result where the exception frame was.
2198 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
2199 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2200 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2202 mov.w
&0x30d8,EXC_VOFF
(%a6
) # vector offset = 0xd0
2203 mov.w
&0xe006,2+FP_SRC
(%a6
) # set fsave status
2205 frestore FP_SRC
(%a6
) # restore src operand
2207 mov.
l (%a6
),%a6
# restore frame pointer
2209 mov.
l LOCAL_SIZE+EXC_SR
(%sp
),LOCAL_SIZE+EXC_SR-
0xc(%sp
)
2210 mov.
l LOCAL_SIZE+
2+EXC_PC
(%sp
),LOCAL_SIZE+
2+EXC_PC-
0xc(%sp
)
2211 mov.
l LOCAL_SIZE+EXC_EA
(%sp
),LOCAL_SIZE+EXC_EA-
0xc(%sp
)
2213 # now, we copy the default result to it's proper location
2214 mov.
l LOCAL_SIZE+FP_DST_EX
(%sp
),LOCAL_SIZE+
0x4(%sp
)
2215 mov.
l LOCAL_SIZE+FP_DST_HI
(%sp
),LOCAL_SIZE+
0x8(%sp
)
2216 mov.
l LOCAL_SIZE+FP_DST_LO
(%sp
),LOCAL_SIZE+
0xc(%sp
)
2218 add.l &LOCAL_SIZE-
0x8,%sp
2224 btst
&0x5,EXC_SR
(%a6
)
2227 mov.
l EXC_A7
(%a6
),%a0
2232 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
2235 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2236 # the strategy is to move the exception frame "down" 12 bytes. then, we
2237 # can store the default result where the exception frame was.
2238 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
2239 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2240 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2242 mov.w
&0x30d0,EXC_VOFF
(%a6
) # vector offset = 0xd0
2243 mov.w
&0xe004,2+FP_SRC
(%a6
) # set fsave status
2245 frestore FP_SRC
(%a6
) # restore src operand
2247 mov.
l (%a6
),%a6
# restore frame pointer
2249 mov.
l LOCAL_SIZE+EXC_SR
(%sp
),LOCAL_SIZE+EXC_SR-
0xc(%sp
)
2250 mov.
l LOCAL_SIZE+
2+EXC_PC
(%sp
),LOCAL_SIZE+
2+EXC_PC-
0xc(%sp
)
2251 mov.
l LOCAL_SIZE+EXC_EA
(%sp
),LOCAL_SIZE+EXC_EA-
0xc(%sp
)
2253 # now, we copy the default result to it's proper location
2254 mov.
l LOCAL_SIZE+FP_DST_EX
(%sp
),LOCAL_SIZE+
0x4(%sp
)
2255 mov.
l LOCAL_SIZE+FP_DST_HI
(%sp
),LOCAL_SIZE+
0x8(%sp
)
2256 mov.
l LOCAL_SIZE+FP_DST_LO
(%sp
),LOCAL_SIZE+
0xc(%sp
)
2258 add.l &LOCAL_SIZE-
0x8,%sp
2264 btst
&0x5,EXC_SR
(%a6
)
2267 mov.
l EXC_A7
(%a6
),%a0
2272 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
2275 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2276 # the strategy is to move the exception frame "down" 12 bytes. then, we
2277 # can store the default result where the exception frame was.
2278 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0/fp1
2279 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2280 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2282 mov.w
&0x30c4,EXC_VOFF
(%a6
) # vector offset = 0xc4
2283 mov.w
&0xe001,2+FP_SRC
(%a6
) # set fsave status
2285 frestore FP_SRC
(%a6
) # restore src operand
2287 mov.
l (%a6
),%a6
# restore frame pointer
2289 mov.
l LOCAL_SIZE+EXC_SR
(%sp
),LOCAL_SIZE+EXC_SR-
0xc(%sp
)
2290 mov.
l LOCAL_SIZE+
2+EXC_PC
(%sp
),LOCAL_SIZE+
2+EXC_PC-
0xc(%sp
)
2291 mov.
l LOCAL_SIZE+EXC_EA
(%sp
),LOCAL_SIZE+EXC_EA-
0xc(%sp
)
2293 # now, we copy the default result to it's proper location
2294 mov.
l LOCAL_SIZE+FP_DST_EX
(%sp
),LOCAL_SIZE+
0x4(%sp
)
2295 mov.
l LOCAL_SIZE+FP_DST_HI
(%sp
),LOCAL_SIZE+
0x8(%sp
)
2296 mov.
l LOCAL_SIZE+FP_DST_LO
(%sp
),LOCAL_SIZE+
0xc(%sp
)
2298 add.l &LOCAL_SIZE-
0x8,%sp
2303 #########################################################################
2306 # if we're stuffing a source operand back into an fsave frame then we
2307 # have to make sure that for single or double source operands that the
2308 # format stuffed is as weird as the hardware usually makes it.
2312 bfextu EXC_EXTWORD
(%a6
){&3:&3},%d0
# extract src specifier
2313 cmpi.
b %d0
,&0x1 # was src sgl?
2314 beq.
b funimp_skew_sgl
# yes
2315 cmpi.
b %d0
,&0x5 # was src dbl?
2316 beq.
b funimp_skew_dbl
# yes
2320 mov.w FP_SRC_EX
(%a6
),%d0
# fetch DENORM exponent
2321 andi.w
&0x7fff,%d0
# strip sign
2322 beq.
b funimp_skew_sgl_not
2324 bgt.
b funimp_skew_sgl_not
2325 neg.w
%d0
# make exponent negative
2326 addi.w
&0x3f81,%d0
# find amt to shift
2327 mov.
l FP_SRC_HI
(%a6
),%d1
# fetch DENORM hi(man)
2328 lsr.
l %d0
,%d1
# shift it
2329 bset
&31,%d1
# set j-bit
2330 mov.
l %d1
,FP_SRC_HI
(%a6
) # insert new hi(man)
2331 andi.w
&0x8000,FP_SRC_EX
(%a6
) # clear old exponent
2332 ori.w
&0x3f80,FP_SRC_EX
(%a6
) # insert new "skewed" exponent
2333 funimp_skew_sgl_not
:
2337 mov.w FP_SRC_EX
(%a6
),%d0
# fetch DENORM exponent
2338 andi.w
&0x7fff,%d0
# strip sign
2339 beq.
b funimp_skew_dbl_not
2341 bgt.
b funimp_skew_dbl_not
2343 tst.
b FP_SRC_EX
(%a6
) # make "internal format"
2344 smi.
b 0x2+FP_SRC
(%a6
)
2345 mov.w
%d0
,FP_SRC_EX
(%a6
) # insert exponent with cleared sign
2346 clr.
l %d0
# clear g,r,s
2347 lea FP_SRC
(%a6
),%a0
# pass ptr to src op
2348 mov.w
&0x3c01,%d1
# pass denorm threshold
2349 bsr.
l dnrm_lp
# denorm it
2350 mov.w
&0x3c00,%d0
# new exponent
2351 tst.
b 0x2+FP_SRC
(%a6
) # is sign set?
2352 beq.
b fss_dbl_denorm_done
# no
2353 bset
&15,%d0
# set sign
2354 fss_dbl_denorm_done
:
2355 bset
&0x7,FP_SRC_HI
(%a6
) # set j-bit
2356 mov.w
%d0
,FP_SRC_EX
(%a6
) # insert new exponent
2357 funimp_skew_dbl_not
:
2360 #########################################################################
2363 btst
&0x5,EXC_SR
(%a6
)
2365 mov.
l 0x0(%a0
),FP_DST_EX
(%a6
)
2366 mov.
l 0x4(%a0
),FP_DST_HI
(%a6
)
2367 mov.
l 0x8(%a0
),FP_DST_LO
(%a6
)
2371 #########################################################################
2372 # XDEF **************************************************************** #
2373 # _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2374 # effective address" exception. #
2376 # This handler should be the first code executed upon taking the #
2377 # FP Unimplemented Effective Address exception in an operating #
2380 # XREF **************************************************************** #
2381 # _imem_read_long() - read instruction longword #
2382 # fix_skewed_ops() - adjust src operand in fsave frame #
2383 # set_tag_x() - determine optype of src/dst operands #
2384 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
2385 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
2386 # load_fpn2() - load dst operand from FP regfile #
2387 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2388 # decbin() - convert packed data to FP binary data #
2389 # _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2390 # _real_access() - "callout" for access error exception #
2391 # _mem_read() - read extended immediate operand from memory #
2392 # _fpsp_done() - "callout" for exit; work all done #
2393 # _real_trace() - "callout" for Trace enabled exception #
2394 # fmovm_dynamic() - emulate dynamic fmovm instruction #
2395 # fmovm_ctrl() - emulate fmovm control instruction #
2397 # INPUT *************************************************************** #
2398 # - The system stack contains the "Unimplemented <ea>" stk frame #
2400 # OUTPUT ************************************************************** #
2401 # If access error: #
2402 # - The system stack is changed to an access error stack frame #
2403 # If FPU disabled: #
2404 # - The system stack is changed to an FPU disabled stack frame #
2405 # If Trace exception enabled: #
2406 # - The system stack is changed to a Trace exception stack frame #
2407 # Else: (normal case) #
2408 # - None (correct result has been stored as appropriate) #
2410 # ALGORITHM *********************************************************** #
2411 # This exception handles 3 types of operations: #
2412 # (1) FP Instructions using extended precision or packed immediate #
2413 # addressing mode. #
2414 # (2) The "fmovm.x" instruction w/ dynamic register specification. #
2415 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2417 # For immediate data operations, the data is read in w/ a #
2418 # _mem_read() "callout", converted to FP binary (if packed), and used #
2419 # as the source operand to the instruction specified by the instruction #
2420 # word. If no FP exception should be reported ads a result of the #
2421 # emulation, then the result is stored to the destination register and #
2422 # the handler exits through _fpsp_done(). If an enabled exc has been #
2423 # signalled as a result of emulation, then an fsave state frame #
2424 # corresponding to the FP exception type must be entered into the 060 #
2425 # FPU before exiting. In either the enabled or disabled cases, we #
2426 # must also check if a Trace exception is pending, in which case, we #
2427 # must create a Trace exception stack frame from the current exception #
2428 # stack frame. If no Trace is pending, we simply exit through #
2430 # For "fmovm.x", call the routine fmovm_dynamic() which will #
2431 # decode and emulate the instruction. No FP exceptions can be pending #
2432 # as a result of this operation emulation. A Trace exception can be #
2433 # pending, though, which means the current stack frame must be changed #
2434 # to a Trace stack frame and an exit made through _real_trace(). #
2435 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2436 # was executed from supervisor mode, this handler must store the FP #
2437 # register file values to the system stack by itself since #
2438 # fmovm_dynamic() can't handle this. A normal exit is made through #
2440 # For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2441 # Again, a Trace exception may be pending and an exit made through #
2442 # _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2444 # Before any of the above is attempted, it must be checked to #
2445 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2446 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2447 # has higher priority, we check the disabled bit in the PCR. If set, #
2448 # then we must create an 8 word "FPU disabled" exception stack frame #
2449 # from the current 4 word exception stack frame. This includes #
2450 # reproducing the effective address of the instruction to put on the #
2451 # new stack frame. #
2453 # In the process of all emulation work, if a _mem_read() #
2454 # "callout" returns a failing result indicating an access error, then #
2455 # we must create an access error stack frame from the current stack #
2456 # frame. This information includes a faulting address and a fault- #
2457 # status-longword. These are created within this handler. #
2459 #########################################################################
2464 # This exception type takes priority over the "Line F Emulator"
2465 # exception. Therefore, the FPU could be disabled when entering here.
2466 # So, we must check to see if it's disabled and handle that case separately.
2467 mov.
l %d0
,-(%sp
) # save d0
2468 movc
%pcr
,%d0
# load proc cr
2469 btst
&0x1,%d0
# is FPU disabled?
2470 bne.w iea_disabled
# yes
2471 mov.
l (%sp
)+,%d0
# restore d0
2473 link
%a6
,&-LOCAL_SIZE
# init stack frame
2475 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2476 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
2477 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1 on stack
2479 # PC of instruction that took the exception is the PC in the frame
2480 mov.
l EXC_PC
(%a6
),EXC_EXTWPTR
(%a6
)
2482 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
2483 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
2484 bsr.
l _imem_read_long
# fetch the instruction words
2485 mov.
l %d0
,EXC_OPWORD
(%a6
) # store OPWORD and EXTWORD
2487 #########################################################################
2489 tst.w
%d0
# is operation fmovem?
2490 bmi.w iea_fmovm
# yes
2493 # here, we will have:
2494 # fabs fdabs fsabs facos fmod
2495 # fadd fdadd fsadd fasin frem
2497 # fdiv fddiv fsdiv fatanh fsin
2499 # fintrz fcosh fsinh
2500 # fmove fdmove fsmove fetox ftan
2501 # fmul fdmul fsmul fetoxm1 ftanh
2502 # fneg fdneg fsneg fgetexp ftentox
2503 # fsgldiv fgetman ftwotox
2506 # fsub fdsub fssub flogn
2508 # which can all use f<op>.{x,p}
2509 # so, now it's immediate data extended precision AND PACKED FORMAT!
2512 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
2514 btst
&0xa,%d0
# is src fmt x or p?
2515 bne.
b iea_op_pack
# packed
2518 mov.
l EXC_EXTWPTR
(%a6
),%a0
# pass: ptr to #<data>
2519 lea FP_SRC
(%a6
),%a1
# pass: ptr to super addr
2520 mov.
l &0xc,%d0
# pass: 12 bytes
2521 bsr.
l _imem_read
# read extended immediate
2523 tst.
l %d1
# did ifetch fail?
2524 bne.w iea_iacc
# yes
2530 mov.
l EXC_EXTWPTR
(%a6
),%a0
# pass: ptr to #<data>
2531 lea FP_SRC
(%a6
),%a1
# pass: ptr to super dst
2532 mov.
l &0xc,%d0
# pass: 12 bytes
2533 bsr.
l _imem_read
# read packed operand
2535 tst.
l %d1
# did ifetch fail?
2536 bne.w iea_iacc
# yes
2538 # The packed operand is an INF or a NAN if the exponent field is all ones.
2539 bfextu FP_SRC
(%a6
){&1:&15},%d0
# get exp
2540 cmpi.w
%d0
,&0x7fff # INF or NAN?
2541 beq.
b iea_op_setsrc
# operand is an INF or NAN
2543 # The packed operand is a zero if the mantissa is all zero, else it's
2544 # a normal packed op.
2545 mov.
b 3+FP_SRC
(%a6
),%d0
# get byte 4
2546 andi.b &0x0f,%d0
# clear all but last nybble
2547 bne.
b iea_op_gp_not_spec
# not a zero
2548 tst.
l FP_SRC_HI
(%a6
) # is lw 2 zero?
2549 bne.
b iea_op_gp_not_spec
# not a zero
2550 tst.
l FP_SRC_LO
(%a6
) # is lw 3 zero?
2551 beq.
b iea_op_setsrc
# operand is a ZERO
2553 lea FP_SRC
(%a6
),%a0
# pass: ptr to packed op
2554 bsr.
l decbin
# convert to extended
2555 fmovm.x
&0x80,FP_SRC
(%a6
) # make this the srcop
2558 addi.
l &0xc,EXC_EXTWPTR
(%a6
) # update extension word pointer
2560 # FP_SRC now holds the src operand.
2561 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
2562 bsr.
l set_tag_x
# tag the operand type
2563 mov.
b %d0
,STAG
(%a6
) # could be ANYTHING!!!
2564 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
2565 bne.
b iea_op_getdst
# no
2566 bsr.
l unnorm_fix
# yes; convert to NORM/DENORM/ZERO
2567 mov.
b %d0
,STAG
(%a6
) # set new optype tag
2569 clr.
b STORE_FLG
(%a6
) # clear "store result" boolean
2571 btst
&0x5,1+EXC_CMDREG
(%a6
) # is operation monadic or dyadic?
2572 beq.
b iea_op_extract
# monadic
2573 btst
&0x4,1+EXC_CMDREG
(%a6
) # is operation fsincos,ftst,fcmp?
2574 bne.
b iea_op_spec
# yes
2577 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# fetch dst regno
2578 bsr.
l load_fpn2
# load dst operand
2580 lea FP_DST
(%a6
),%a0
# pass: ptr to dst op
2581 bsr.
l set_tag_x
# tag the operand type
2582 mov.
b %d0
,DTAG
(%a6
) # could be ANYTHING!!!
2583 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
2584 bne.
b iea_op_extract
# no
2585 bsr.
l unnorm_fix
# yes; convert to NORM/DENORM/ZERO
2586 mov.
b %d0
,DTAG
(%a6
) # set new optype tag
2587 bra.
b iea_op_extract
2589 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2591 btst
&0x3,1+EXC_CMDREG
(%a6
) # is operation fsincos?
2592 beq.
b iea_op_extract
# yes
2593 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2594 # store a result. then, only fcmp will branch back and pick up a dst operand.
2595 st STORE_FLG
(%a6
) # don't store a final result
2596 btst
&0x1,1+EXC_CMDREG
(%a6
) # is operation fcmp?
2597 beq.
b iea_op_loaddst
# yes
2601 mov.
b FPCR_MODE
(%a6
),%d0
# pass: rnd mode,prec
2603 mov.
b 1+EXC_CMDREG
(%a6
),%d1
2604 andi.w
&0x007f,%d1
# extract extension
2612 mov.
l (tbl_unsupp.
l,%pc
,%d1.w
*4),%d1
# fetch routine addr
2613 jsr
(tbl_unsupp.
l,%pc
,%d1.
l*1)
2616 # Exceptions in order of precedence:
2618 # SNAN : all operations
2619 # OPERR : all reg-reg or mem-reg operations that can normally operr
2620 # OVFL : same as OPERR
2621 # UNFL : same as OPERR
2622 # DZ : same as OPERR
2623 # INEX2 : same as OPERR
2624 # INEX1 : all packed immediate operations
2627 # we determine the highest priority exception(if any) set by the
2628 # emulation routine that has also been enabled by the user.
2629 mov.
b FPCR_ENABLE
(%a6
),%d0
# fetch exceptions enabled
2630 bne.
b iea_op_ena
# some are enabled
2632 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2633 # these don't save results.
2635 tst.
b STORE_FLG
(%a6
) # does this op store a result?
2636 bne.
b iea_op_exit1
# exit with no frestore
2639 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# fetch dst regno
2640 bsr.
l store_fpreg
# store the result
2643 mov.
l EXC_PC
(%a6
),USER_FPIAR
(%a6
) # set FPIAR to "Current PC"
2644 mov.
l EXC_EXTWPTR
(%a6
),EXC_PC
(%a6
) # set "Next PC" in exc frame
2646 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
2647 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2648 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2650 unlk
%a6
# unravel the frame
2652 btst
&0x7,(%sp
) # is trace on?
2653 bne.w iea_op_trace
# yes
2655 bra.
l _fpsp_done
# exit to os
2658 and.b FPSR_EXCEPT
(%a6
),%d0
# keep only ones enable and set
2659 bfffo
%d0
{&24:&8},%d0
# find highest priority exception
2660 bne.
b iea_op_exc
# at least one was set
2662 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2663 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2664 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # did overflow occur?
2668 btst
&inex2_bit
,FPCR_ENABLE
(%a6
) # is inexact enabled?
2669 beq.
b iea_op_store
# no
2670 bra.
b iea_op_exc_ovfl
# yes
2672 # an enabled exception occurred. we have to insert the exception type back into
2675 subi.
l &24,%d0
# fix offset to be 0-8
2676 cmpi.
b %d0
,&0x6 # is exception INEX?
2677 bne.
b iea_op_exc_force
# no
2679 # the enabled exception was inexact. so, if it occurs with an overflow
2680 # or underflow that was disabled, then we have to force an overflow or
2682 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # did overflow occur?
2683 bne.
b iea_op_exc_ovfl
# yes
2684 btst
&unfl_bit
,FPSR_EXCEPT
(%a6
) # did underflow occur?
2685 bne.
b iea_op_exc_unfl
# yes
2688 mov.w
(tbl_iea_except.
b,%pc
,%d0.w
*2),2+FP_SRC
(%a6
)
2689 bra.
b iea_op_exit2
# exit with frestore
2692 short
0xe002, 0xe006, 0xe004, 0xe005
2693 short
0xe003, 0xe002, 0xe001, 0xe001
2696 mov.w
&0xe005,2+FP_SRC
(%a6
)
2700 mov.w
&0xe003,2+FP_SRC
(%a6
)
2703 mov.
l EXC_PC
(%a6
),USER_FPIAR
(%a6
) # set FPIAR to "Current PC"
2704 mov.
l EXC_EXTWPTR
(%a6
),EXC_PC
(%a6
) # set "Next PC" in exc frame
2706 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
2707 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2708 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2710 frestore FP_SRC
(%a6
) # restore exceptional state
2712 unlk
%a6
# unravel the frame
2714 btst
&0x7,(%sp
) # is trace on?
2715 bne.
b iea_op_trace
# yes
2717 bra.
l _fpsp_done
# exit to os
2720 # The opclass two instruction that took an "Unimplemented Effective Address"
2721 # exception was being traced. Make the "current" PC the FPIAR and put it in
2722 # the trace stack frame then jump to _real_trace().
2724 # UNIMP EA FRAME TRACE FRAME
2725 # ***************** *****************
2726 # * 0x0 * 0x0f0 * * Current *
2727 # ***************** * PC *
2728 # * Current * *****************
2729 # * PC * * 0x2 * 0x024 *
2730 # ***************** *****************
2732 # ***************** * PC *
2737 mov.
l (%sp
),-(%sp
) # shift stack frame "down"
2738 mov.w
0x8(%sp
),0x4(%sp
)
2739 mov.w
&0x2024,0x6(%sp
) # stk fmt = 0x2; voff = 0x024
2740 fmov.
l %fpiar
,0x8(%sp
) # "Current PC" is in FPIAR
2744 #########################################################################
2746 btst
&14,%d0
# ctrl or data reg
2747 beq.w iea_fmovm_ctrl
2751 btst
&0x5,EXC_SR
(%a6
) # user or supervisor mode
2752 bne.
b iea_fmovm_data_s
2756 mov.
l %a0
,EXC_A7
(%a6
) # store current a7
2757 bsr.
l fmovm_dynamic
# do dynamic fmovm
2758 mov.
l EXC_A7
(%a6
),%a0
# load possibly new a7
2759 mov.
l %a0
,%usp
# update usp
2760 bra.w iea_fmovm_exit
2763 clr.
b SPCOND_FLG
(%a6
)
2764 lea
0x2+EXC_VOFF
(%a6
),%a0
2765 mov.
l %a0
,EXC_A7
(%a6
)
2766 bsr.
l fmovm_dynamic
# do dynamic fmovm
2768 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
2769 beq.w iea_fmovm_data_predec
2770 cmpi.
b SPCOND_FLG
(%a6
),&mia7_flg
2771 bne.w iea_fmovm_exit
2773 # right now, d0 = the size.
2774 # the data has been fetched from the supervisor stack, but we have not
2775 # incremented the stack pointer by the appropriate number of bytes.
2777 iea_fmovm_data_postinc
:
2778 btst
&0x7,EXC_SR
(%a6
)
2779 bne.
b iea_fmovm_data_pi_trace
2781 mov.w EXC_SR
(%a6
),(EXC_SR
,%a6
,%d0
)
2782 mov.
l EXC_EXTWPTR
(%a6
),(EXC_PC
,%a6
,%d0
)
2783 mov.w
&0x00f0,(EXC_VOFF
,%a6
,%d0
)
2785 lea
(EXC_SR
,%a6
,%d0
),%a0
2786 mov.
l %a0
,EXC_SR
(%a6
)
2788 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
2789 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2790 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2796 iea_fmovm_data_pi_trace
:
2797 mov.w EXC_SR
(%a6
),(EXC_SR-
0x4,%a6
,%d0
)
2798 mov.
l EXC_EXTWPTR
(%a6
),(EXC_PC-
0x4,%a6
,%d0
)
2799 mov.w
&0x2024,(EXC_VOFF-
0x4,%a6
,%d0
)
2800 mov.
l EXC_PC
(%a6
),(EXC_VOFF+
0x2-0x4,%a6
,%d0
)
2802 lea
(EXC_SR-
0x4,%a6
,%d0
),%a0
2803 mov.
l %a0
,EXC_SR
(%a6
)
2805 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
2806 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2807 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2813 # right now, d1 = size and d0 = the strg.
2814 iea_fmovm_data_predec
:
2815 mov.
b %d1
,EXC_VOFF
(%a6
) # store strg
2816 mov.
b %d0
,0x1+EXC_VOFF
(%a6
) # store size
2818 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
2819 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2820 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2822 mov.
l (%a6
),-(%sp
) # make a copy of a6
2823 mov.
l %d0
,-(%sp
) # save d0
2824 mov.
l %d1
,-(%sp
) # save d1
2825 mov.
l EXC_EXTWPTR
(%a6
),-(%sp
) # make a copy of Next PC
2828 mov.
b 0x1+EXC_VOFF
(%a6
),%d0
# fetch size
2829 neg.l %d0
# get negative of size
2831 btst
&0x7,EXC_SR
(%a6
) # is trace enabled?
2832 beq.
b iea_fmovm_data_p2
2834 mov.w EXC_SR
(%a6
),(EXC_SR-
0x4,%a6
,%d0
)
2835 mov.
l EXC_PC
(%a6
),(EXC_VOFF-
0x2,%a6
,%d0
)
2836 mov.
l (%sp
)+,(EXC_PC-
0x4,%a6
,%d0
)
2837 mov.w
&0x2024,(EXC_VOFF-
0x4,%a6
,%d0
)
2839 pea
(%a6
,%d0
) # create final sp
2840 bra.
b iea_fmovm_data_p3
2843 mov.w EXC_SR
(%a6
),(EXC_SR
,%a6
,%d0
)
2844 mov.
l (%sp
)+,(EXC_PC
,%a6
,%d0
)
2845 mov.w
&0x00f0,(EXC_VOFF
,%a6
,%d0
)
2847 pea
(0x4,%a6
,%d0
) # create final sp
2851 mov.
b EXC_VOFF
(%a6
),%d1
# fetch strg
2855 fmovm.x
&0x80,(0x4+0x8,%a6
,%d0
)
2860 fmovm.x
&0x40,(0x4+0x8,%a6
,%d0
)
2865 fmovm.x
&0x20,(0x4+0x8,%a6
,%d0
)
2870 fmovm.x
&0x10,(0x4+0x8,%a6
,%d0
)
2875 fmovm.x
&0x08,(0x4+0x8,%a6
,%d0
)
2880 fmovm.x
&0x04,(0x4+0x8,%a6
,%d0
)
2885 fmovm.x
&0x02,(0x4+0x8,%a6
,%d0
)
2890 fmovm.x
&0x01,(0x4+0x8,%a6
,%d0
)
2897 btst
&0x7,(%sp
) # is trace enabled?
2901 #########################################################################
2904 bsr.
l fmovm_ctrl
# load ctrl regs
2907 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
2908 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
2909 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
2911 btst
&0x7,EXC_SR
(%a6
) # is trace on?
2912 bne.
b iea_fmovm_trace
# yes
2914 mov.
l EXC_EXTWPTR
(%a6
),EXC_PC
(%a6
) # set Next PC
2916 unlk
%a6
# unravel the frame
2918 bra.
l _fpsp_done
# exit to os
2921 # The control reg instruction that took an "Unimplemented Effective Address"
2922 # exception was being traced. The "Current PC" for the trace frame is the
2923 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2924 # After fixing the stack frame, jump to _real_trace().
2926 # UNIMP EA FRAME TRACE FRAME
2927 # ***************** *****************
2928 # * 0x0 * 0x0f0 * * Current *
2929 # ***************** * PC *
2930 # * Current * *****************
2931 # * PC * * 0x2 * 0x024 *
2932 # ***************** *****************
2934 # ***************** * PC *
2938 # this ain't a pretty solution, but it works:
2939 # -restore a6 (not with unlk)
2940 # -shift stack frame down over where old a6 used to be
2941 # -add LOCAL_SIZE to stack pointer
2943 mov.
l (%a6
),%a6
# restore frame pointer
2944 mov.w EXC_SR+LOCAL_SIZE
(%sp
),0x0+LOCAL_SIZE
(%sp
)
2945 mov.
l EXC_PC+LOCAL_SIZE
(%sp
),0x8+LOCAL_SIZE
(%sp
)
2946 mov.
l EXC_EXTWPTR+LOCAL_SIZE
(%sp
),0x2+LOCAL_SIZE
(%sp
)
2947 mov.w
&0x2024,0x6+LOCAL_SIZE
(%sp
) # stk fmt = 0x2; voff = 0x024
2948 add.l &LOCAL_SIZE
,%sp
# clear stack frame
2952 #########################################################################
2953 # The FPU is disabled and so we should really have taken the "Line
2954 # F Emulator" exception. So, here we create an 8-word stack frame
2955 # from our 4-word stack frame. This means we must calculate the length
2956 # of the faulting instruction to get the "next PC". This is trivial for
2957 # immediate operands but requires some extra work for fmovm dynamic
2958 # which can use most addressing modes.
2960 mov.
l (%sp
)+,%d0
# restore d0
2962 link
%a6
,&-LOCAL_SIZE
# init stack frame
2964 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
2966 # PC of instruction that took the exception is the PC in the frame
2967 mov.
l EXC_PC
(%a6
),EXC_EXTWPTR
(%a6
)
2968 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
2969 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
2970 bsr.
l _imem_read_long
# fetch the instruction words
2971 mov.
l %d0
,EXC_OPWORD
(%a6
) # store OPWORD and EXTWORD
2973 tst.w
%d0
# is instr fmovm?
2974 bmi.
b iea_dis_fmovm
# yes
2975 # instruction is using an extended precision immediate operand. therefore,
2976 # the total instruction length is 16 bytes.
2978 mov.
l &0x10,%d0
# 16 bytes of instruction
2981 btst
&0xe,%d0
# is instr fmovm ctrl
2982 bne.
b iea_dis_fmovm_data
# no
2983 # the instruction is a fmovm.l with 2 or 3 registers.
2984 bfextu
%d0
{&19:&3},%d1
2986 cmpi.
b %d1
,&0x7 # move all regs?
2990 # the instruction is an fmovm.x dynamic which can use many addressing
2991 # modes and thus can have several different total instruction lengths.
2992 # call fmovm_calc_ea which will go through the ea calc process and,
2993 # as a by-product, will tell us how long the instruction is.
2997 mov.
l EXC_EXTWPTR
(%a6
),%d0
2998 sub.l EXC_PC
(%a6
),%d0
3000 mov.w
%d0
,EXC_VOFF
(%a6
) # store stack shift value
3002 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3006 # here, we actually create the 8-word frame from the 4-word frame,
3007 # with the "next PC" as additional info.
3008 # the <ea> field is let as undefined.
3009 subq.
l &0x8,%sp
# make room for new stack
3010 mov.
l %d0
,-(%sp
) # save d0
3011 mov.w
0xc(%sp
),0x4(%sp
) # move SR
3012 mov.
l 0xe(%sp
),0x6(%sp
) # move Current PC
3015 mov.
l 0x6(%sp
),0x10(%sp
) # move Current PC
3016 add.l %d0
,0x6(%sp
) # make Next PC
3017 mov.w
&0x402c,0xa(%sp
) # insert offset,frame format
3018 mov.
l (%sp
)+,%d0
# restore d0
3020 bra.
l _real_fpu_disabled
3028 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
3029 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1 on stack
3031 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3035 subq.w
&0x8,%sp
# make stack frame bigger
3036 mov.
l 0x8(%sp
),(%sp
) # store SR,hi(PC)
3037 mov.w
0xc(%sp
),0x4(%sp
) # store lo(PC)
3038 mov.w
&0x4008,0x6(%sp
) # store voff
3039 mov.
l 0x2(%sp
),0x8(%sp
) # store ea
3040 mov.
l &0x09428001,0xc(%sp
) # store fslw
3043 btst
&0x5,(%sp
) # user or supervisor mode?
3044 beq.
b iea_acc_done2
# user
3045 bset
&0x2,0xd(%sp
) # set supervisor TM bit
3051 lea
-LOCAL_SIZE
(%a6
),%sp
3056 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1 on stack
3057 fmovm.
l LOCAL_SIZE+USER_FPCR
(%sp
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
3061 mov.
l 0x4+LOCAL_SIZE
(%sp
),-0x8+0x4+LOCAL_SIZE
(%sp
)
3062 mov.w
0x8+LOCAL_SIZE
(%sp
),-0x8+0x8+LOCAL_SIZE
(%sp
)
3063 mov.w
&0x4008,-0x8+0xa+LOCAL_SIZE
(%sp
)
3064 mov.
l %a0
,-0x8+0xc+LOCAL_SIZE
(%sp
)
3065 mov.w
%d0
,-0x8+0x10+LOCAL_SIZE
(%sp
)
3066 mov.w
&0x0001,-0x8+0x12+LOCAL_SIZE
(%sp
)
3068 movm.
l LOCAL_SIZE+EXC_DREGS
(%sp
),&0x0303 # restore d0-d1/a0-a1
3069 add.w
&LOCAL_SIZE-
0x4,%sp
3073 #########################################################################
3074 # XDEF **************************************************************** #
3075 # _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3077 # This handler should be the first code executed upon taking the #
3078 # FP Operand Error exception in an operating system. #
3080 # XREF **************************************************************** #
3081 # _imem_read_long() - read instruction longword #
3082 # fix_skewed_ops() - adjust src operand in fsave frame #
3083 # _real_operr() - "callout" to operating system operr handler #
3084 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3085 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3086 # facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3088 # INPUT *************************************************************** #
3089 # - The system stack contains the FP Operr exception frame #
3090 # - The fsave frame contains the source operand #
3092 # OUTPUT ************************************************************** #
3093 # No access error: #
3094 # - The system stack is unchanged #
3095 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3097 # ALGORITHM *********************************************************** #
3098 # In a system where the FP Operr exception is enabled, the goal #
3099 # is to get to the handler specified at _real_operr(). But, on the 060, #
3100 # for opclass zero and two instruction taking this exception, the #
3101 # input operand in the fsave frame may be incorrect for some cases #
3102 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3103 # do just this and then exits through _real_operr(). #
3104 # For opclass 3 instructions, the 060 doesn't store the default #
3105 # operr result out to memory or data register file as it should. #
3106 # This code must emulate the move out before finally exiting through #
3107 # _real_inex(). The move out, if to memory, is performed using #
3108 # _mem_write() "callout" routines that may return a failing result. #
3109 # In this special case, the handler must exit through facc_out() #
3110 # which creates an access error stack frame from the current operr #
3113 #########################################################################
3118 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
3120 fsave FP_SRC
(%a6
) # grab the "busy" frame
3122 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3123 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
3124 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1 on stack
3126 # the FPIAR holds the "current PC" of the faulting instruction
3127 mov.
l USER_FPIAR
(%a6
),EXC_EXTWPTR
(%a6
)
3129 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
3130 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
3131 bsr.
l _imem_read_long
# fetch the instruction words
3132 mov.
l %d0
,EXC_OPWORD
(%a6
)
3134 ##############################################################################
3136 btst
&13,%d0
# is instr an fmove out?
3137 bne.
b foperr_out
# fmove out
3140 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3141 # this would be the case for opclass two operations with a source infinity or
3142 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3143 # cause an operr so we don't need to check for them here.
3144 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
3145 bsr.
l fix_skewed_ops
# fix src op
3148 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
3149 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
3150 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3152 frestore FP_SRC
(%a6
)
3157 ########################################################################
3160 # the hardware does not save the default result to memory on enabled
3161 # operand error exceptions. we do this here before passing control to
3162 # the user operand error handler.
3164 # byte, word, and long destination format operations can pass
3165 # through here. we simply need to test the sign of the src
3166 # operand and save the appropriate minimum or maximum integer value
3167 # to the effective address as pointed to by the stacked effective address.
3169 # although packed opclass three operations can take operand error
3170 # exceptions, they won't pass through here since they are caught
3171 # first by the unsupported data format exception handler. that handler
3172 # sends them directly to _real_operr() if necessary.
3176 mov.w FP_SRC_EX
(%a6
),%d1
# fetch exponent
3179 bne.
b foperr_out_not_qnan
3180 # the operand is either an infinity or a QNAN.
3181 tst.
l FP_SRC_LO
(%a6
)
3182 bne.
b foperr_out_qnan
3183 mov.
l FP_SRC_HI
(%a6
),%d1
3184 andi.l &0x7fffffff,%d1
3185 beq.
b foperr_out_not_qnan
3187 mov.
l FP_SRC_HI
(%a6
),L_SCR1
(%a6
)
3188 bra.
b foperr_out_jmp
3190 foperr_out_not_qnan
:
3191 mov.
l &0x7fffffff,%d1
3192 tst.
b FP_SRC_EX
(%a6
)
3193 bpl.
b foperr_out_not_qnan2
3195 foperr_out_not_qnan2
:
3196 mov.
l %d1
,L_SCR1
(%a6
)
3199 bfextu
%d0
{&19:&3},%d0
# extract dst format field
3200 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract <ea> mode,reg
3201 mov.w
(tbl_operr.
b,%pc
,%d0.w
*2),%a0
3202 jmp
(tbl_operr.
b,%pc
,%a0
)
3205 short foperr_out_l
- tbl_operr
# long word integer
3206 short tbl_operr
- tbl_operr
# sgl prec shouldn't happen
3207 short tbl_operr
- tbl_operr
# ext prec shouldn't happen
3208 short foperr_exit
- tbl_operr
# packed won't enter here
3209 short foperr_out_w
- tbl_operr
# word integer
3210 short tbl_operr
- tbl_operr
# dbl prec shouldn't happen
3211 short foperr_out_b
- tbl_operr
# byte integer
3212 short tbl_operr
- tbl_operr
# packed won't enter here
3215 mov.
b L_SCR1
(%a6
),%d0
# load positive default result
3216 cmpi.
b %d1
,&0x7 # is <ea> mode a data reg?
3217 ble.
b foperr_out_b_save_dn
# yes
3218 mov.
l EXC_EA
(%a6
),%a0
# pass: <ea> of default result
3219 bsr.
l _dmem_write_byte
# write the default result
3221 tst.
l %d1
# did dstore fail?
3222 bne.
l facc_out_b
# yes
3225 foperr_out_b_save_dn
:
3227 bsr.
l store_dreg_b
# store result to regfile
3231 mov.w L_SCR1
(%a6
),%d0
# load positive default result
3232 cmpi.
b %d1
,&0x7 # is <ea> mode a data reg?
3233 ble.
b foperr_out_w_save_dn
# yes
3234 mov.
l EXC_EA
(%a6
),%a0
# pass: <ea> of default result
3235 bsr.
l _dmem_write_word
# write the default result
3237 tst.
l %d1
# did dstore fail?
3238 bne.
l facc_out_w
# yes
3241 foperr_out_w_save_dn
:
3243 bsr.
l store_dreg_w
# store result to regfile
3247 mov.
l L_SCR1
(%a6
),%d0
# load positive default result
3248 cmpi.
b %d1
,&0x7 # is <ea> mode a data reg?
3249 ble.
b foperr_out_l_save_dn
# yes
3250 mov.
l EXC_EA
(%a6
),%a0
# pass: <ea> of default result
3251 bsr.
l _dmem_write_long
# write the default result
3253 tst.
l %d1
# did dstore fail?
3254 bne.
l facc_out_l
# yes
3257 foperr_out_l_save_dn
:
3259 bsr.
l store_dreg_l
# store result to regfile
3262 #########################################################################
3263 # XDEF **************************************************************** #
3264 # _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3266 # This handler should be the first code executed upon taking the #
3267 # FP Signalling NAN exception in an operating system. #
3269 # XREF **************************************************************** #
3270 # _imem_read_long() - read instruction longword #
3271 # fix_skewed_ops() - adjust src operand in fsave frame #
3272 # _real_snan() - "callout" to operating system SNAN handler #
3273 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3274 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3275 # facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3276 # _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3278 # INPUT *************************************************************** #
3279 # - The system stack contains the FP SNAN exception frame #
3280 # - The fsave frame contains the source operand #
3282 # OUTPUT ************************************************************** #
3283 # No access error: #
3284 # - The system stack is unchanged #
3285 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3287 # ALGORITHM *********************************************************** #
3288 # In a system where the FP SNAN exception is enabled, the goal #
3289 # is to get to the handler specified at _real_snan(). But, on the 060, #
3290 # for opclass zero and two instructions taking this exception, the #
3291 # input operand in the fsave frame may be incorrect for some cases #
3292 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3293 # do just this and then exits through _real_snan(). #
3294 # For opclass 3 instructions, the 060 doesn't store the default #
3295 # SNAN result out to memory or data register file as it should. #
3296 # This code must emulate the move out before finally exiting through #
3297 # _real_snan(). The move out, if to memory, is performed using #
3298 # _mem_write() "callout" routines that may return a failing result. #
3299 # In this special case, the handler must exit through facc_out() #
3300 # which creates an access error stack frame from the current SNAN #
3302 # For the case of an extended precision opclass 3 instruction, #
3303 # if the effective addressing mode was -() or ()+, then the address #
3304 # register must get updated by calling _calc_ea_fout(). If the <ea> #
3305 # was -(a7) from supervisor mode, then the exception frame currently #
3306 # on the system stack must be carefully moved "down" to make room #
3307 # for the operand being moved. #
3309 #########################################################################
3314 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
3316 fsave FP_SRC
(%a6
) # grab the "busy" frame
3318 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3319 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
3320 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1 on stack
3322 # the FPIAR holds the "current PC" of the faulting instruction
3323 mov.
l USER_FPIAR
(%a6
),EXC_EXTWPTR
(%a6
)
3325 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
3326 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
3327 bsr.
l _imem_read_long
# fetch the instruction words
3328 mov.
l %d0
,EXC_OPWORD
(%a6
)
3330 ##############################################################################
3332 btst
&13,%d0
# is instr an fmove out?
3333 bne.w fsnan_out
# fmove out
3336 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3337 # this would be the case for opclass two operations with a source infinity or
3338 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3340 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
3341 bsr.
l fix_skewed_ops
# fix src op
3344 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
3345 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
3346 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3348 frestore FP_SRC
(%a6
)
3353 ########################################################################
3356 # the hardware does not save the default result to memory on enabled
3357 # snan exceptions. we do this here before passing control to
3358 # the user snan handler.
3360 # byte, word, long, and packed destination format operations can pass
3361 # through here. since packed format operations already were handled by
3362 # fpsp_unsupp(), then we need to do nothing else for them here.
3363 # for byte, word, and long, we simply need to test the sign of the src
3364 # operand and save the appropriate minimum or maximum integer value
3365 # to the effective address as pointed to by the stacked effective address.
3369 bfextu
%d0
{&19:&3},%d0
# extract dst format field
3370 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract <ea> mode,reg
3371 mov.w
(tbl_snan.
b,%pc
,%d0.w
*2),%a0
3372 jmp
(tbl_snan.
b,%pc
,%a0
)
3375 short fsnan_out_l
- tbl_snan
# long word integer
3376 short fsnan_out_s
- tbl_snan
# sgl prec shouldn't happen
3377 short fsnan_out_x
- tbl_snan
# ext prec shouldn't happen
3378 short tbl_snan
- tbl_snan
# packed needs no help
3379 short fsnan_out_w
- tbl_snan
# word integer
3380 short fsnan_out_d
- tbl_snan
# dbl prec shouldn't happen
3381 short fsnan_out_b
- tbl_snan
# byte integer
3382 short tbl_snan
- tbl_snan
# packed needs no help
3385 mov.
b FP_SRC_HI
(%a6
),%d0
# load upper byte of SNAN
3386 bset
&6,%d0
# set SNAN bit
3387 cmpi.
b %d1
,&0x7 # is <ea> mode a data reg?
3388 ble.
b fsnan_out_b_dn
# yes
3389 mov.
l EXC_EA
(%a6
),%a0
# pass: <ea> of default result
3390 bsr.
l _dmem_write_byte
# write the default result
3392 tst.
l %d1
# did dstore fail?
3393 bne.
l facc_out_b
# yes
3398 bsr.
l store_dreg_b
# store result to regfile
3402 mov.w FP_SRC_HI
(%a6
),%d0
# load upper word of SNAN
3403 bset
&14,%d0
# set SNAN bit
3404 cmpi.
b %d1
,&0x7 # is <ea> mode a data reg?
3405 ble.
b fsnan_out_w_dn
# yes
3406 mov.
l EXC_EA
(%a6
),%a0
# pass: <ea> of default result
3407 bsr.
l _dmem_write_word
# write the default result
3409 tst.
l %d1
# did dstore fail?
3410 bne.
l facc_out_w
# yes
3415 bsr.
l store_dreg_w
# store result to regfile
3419 mov.
l FP_SRC_HI
(%a6
),%d0
# load upper longword of SNAN
3420 bset
&30,%d0
# set SNAN bit
3421 cmpi.
b %d1
,&0x7 # is <ea> mode a data reg?
3422 ble.
b fsnan_out_l_dn
# yes
3423 mov.
l EXC_EA
(%a6
),%a0
# pass: <ea> of default result
3424 bsr.
l _dmem_write_long
# write the default result
3426 tst.
l %d1
# did dstore fail?
3427 bne.
l facc_out_l
# yes
3432 bsr.
l store_dreg_l
# store result to regfile
3436 cmpi.
b %d1
,&0x7 # is <ea> mode a data reg?
3437 ble.
b fsnan_out_d_dn
# yes
3438 mov.
l FP_SRC_EX
(%a6
),%d0
# fetch SNAN sign
3439 andi.l &0x80000000,%d0
# keep sign
3440 ori.
l &0x7fc00000,%d0
# insert new exponent,SNAN bit
3441 mov.
l FP_SRC_HI
(%a6
),%d1
# load mantissa
3442 lsr.
l &0x8,%d1
# shift mantissa for sgl
3443 or.l %d1
,%d0
# create sgl SNAN
3444 mov.
l EXC_EA
(%a6
),%a0
# pass: <ea> of default result
3445 bsr.
l _dmem_write_long
# write the default result
3447 tst.
l %d1
# did dstore fail?
3448 bne.
l facc_out_l
# yes
3452 mov.
l FP_SRC_EX
(%a6
),%d0
# fetch SNAN sign
3453 andi.l &0x80000000,%d0
# keep sign
3454 ori.
l &0x7fc00000,%d0
# insert new exponent,SNAN bit
3456 mov.
l FP_SRC_HI
(%a6
),%d1
# load mantissa
3457 lsr.
l &0x8,%d1
# shift mantissa for sgl
3458 or.l %d1
,%d0
# create sgl SNAN
3461 bsr.
l store_dreg_l
# store result to regfile
3465 mov.
l FP_SRC_EX
(%a6
),%d0
# fetch SNAN sign
3466 andi.l &0x80000000,%d0
# keep sign
3467 ori.
l &0x7ff80000,%d0
# insert new exponent,SNAN bit
3468 mov.
l FP_SRC_HI
(%a6
),%d1
# load hi mantissa
3469 mov.
l %d0
,FP_SCR0_EX
(%a6
) # store to temp space
3470 mov.
l &11,%d0
# load shift amt
3472 or.l %d1
,FP_SCR0_EX
(%a6
) # create dbl hi
3473 mov.
l FP_SRC_HI
(%a6
),%d1
# load hi mantissa
3474 andi.l &0x000007ff,%d1
3476 mov.
l %d1
,FP_SCR0_HI
(%a6
) # store to temp space
3477 mov.
l FP_SRC_LO
(%a6
),%d1
# load lo mantissa
3479 or.l %d1
,FP_SCR0_HI
(%a6
) # create dbl lo
3480 lea FP_SCR0
(%a6
),%a0
# pass: ptr to operand
3481 mov.
l EXC_EA
(%a6
),%a1
# pass: dst addr
3482 movq.
l &0x8,%d0
# pass: size of 8 bytes
3483 bsr.
l _dmem_write
# write the default result
3485 tst.
l %d1
# did dstore fail?
3486 bne.
l facc_out_d
# yes
3490 # for extended precision, if the addressing mode is pre-decrement or
3491 # post-increment, then the address register did not get updated.
3492 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3494 clr.
b SPCOND_FLG
(%a6
) # clear special case flag
3496 mov.w FP_SRC_EX
(%a6
),FP_SCR0_EX
(%a6
)
3497 clr.w
2+FP_SCR0
(%a6
)
3498 mov.
l FP_SRC_HI
(%a6
),%d0
3500 mov.
l %d0
,FP_SCR0_HI
(%a6
)
3501 mov.
l FP_SRC_LO
(%a6
),FP_SCR0_LO
(%a6
)
3503 btst
&0x5,EXC_SR
(%a6
) # supervisor mode exception?
3504 bne.
b fsnan_out_x_s
# yes
3506 mov.
l %usp
,%a0
# fetch user stack pointer
3507 mov.
l %a0
,EXC_A7
(%a6
) # save on stack for calc_ea()
3508 mov.
l (%a6
),EXC_A6
(%a6
)
3510 bsr.
l _calc_ea_fout
# find the correct ea,update An
3512 mov.
l %a0
,EXC_EA
(%a6
) # stack correct <ea>
3514 mov.
l EXC_A7
(%a6
),%a0
3515 mov.
l %a0
,%usp
# restore user stack pointer
3516 mov.
l EXC_A6
(%a6
),(%a6
)
3519 lea FP_SCR0
(%a6
),%a0
# pass: ptr to operand
3520 movq.
l &0xc,%d0
# pass: size of extended
3521 bsr.
l _dmem_write
# write the default result
3523 tst.
l %d1
# did dstore fail?
3524 bne.
l facc_out_x
# yes
3529 mov.
l (%a6
),EXC_A6
(%a6
)
3531 bsr.
l _calc_ea_fout
# find the correct ea,update An
3533 mov.
l %a0
,EXC_EA
(%a6
) # stack correct <ea>
3535 mov.
l EXC_A6
(%a6
),(%a6
)
3537 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
# is <ea> mode -(a7)?
3538 bne.
b fsnan_out_x_save
# no
3540 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3541 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
3542 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
3543 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3545 frestore FP_SRC
(%a6
)
3547 mov.
l EXC_A6
(%a6
),%a6
# restore frame pointer
3549 mov.
l LOCAL_SIZE+EXC_SR
(%sp
),LOCAL_SIZE+EXC_SR-
0xc(%sp
)
3550 mov.
l LOCAL_SIZE+EXC_PC+
0x2(%sp
),LOCAL_SIZE+EXC_PC+
0x2-0xc(%sp
)
3551 mov.
l LOCAL_SIZE+EXC_EA
(%sp
),LOCAL_SIZE+EXC_EA-
0xc(%sp
)
3553 mov.
l LOCAL_SIZE+FP_SCR0_EX
(%sp
),LOCAL_SIZE+EXC_SR
(%sp
)
3554 mov.
l LOCAL_SIZE+FP_SCR0_HI
(%sp
),LOCAL_SIZE+EXC_PC+
0x2(%sp
)
3555 mov.
l LOCAL_SIZE+FP_SCR0_LO
(%sp
),LOCAL_SIZE+EXC_EA
(%sp
)
3557 add.l &LOCAL_SIZE-
0x8,%sp
3561 #########################################################################
3562 # XDEF **************************************************************** #
3563 # _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3565 # This handler should be the first code executed upon taking the #
3566 # FP Inexact exception in an operating system. #
3568 # XREF **************************************************************** #
3569 # _imem_read_long() - read instruction longword #
3570 # fix_skewed_ops() - adjust src operand in fsave frame #
3571 # set_tag_x() - determine optype of src/dst operands #
3572 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3573 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
3574 # load_fpn2() - load dst operand from FP regfile #
3575 # smovcr() - emulate an "fmovcr" instruction #
3576 # fout() - emulate an opclass 3 instruction #
3577 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3578 # _real_inex() - "callout" to operating system inexact handler #
3580 # INPUT *************************************************************** #
3581 # - The system stack contains the FP Inexact exception frame #
3582 # - The fsave frame contains the source operand #
3584 # OUTPUT ************************************************************** #
3585 # - The system stack is unchanged #
3586 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3588 # ALGORITHM *********************************************************** #
3589 # In a system where the FP Inexact exception is enabled, the goal #
3590 # is to get to the handler specified at _real_inex(). But, on the 060, #
3591 # for opclass zero and two instruction taking this exception, the #
3592 # hardware doesn't store the correct result to the destination FP #
3593 # register as did the '040 and '881/2. This handler must emulate the #
3594 # instruction in order to get this value and then store it to the #
3595 # correct register before calling _real_inex(). #
3596 # For opclass 3 instructions, the 060 doesn't store the default #
3597 # inexact result out to memory or data register file as it should. #
3598 # This code must emulate the move out by calling fout() before finally #
3599 # exiting through _real_inex(). #
3601 #########################################################################
3606 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
3608 fsave FP_SRC
(%a6
) # grab the "busy" frame
3610 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3611 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
3612 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1 on stack
3614 # the FPIAR holds the "current PC" of the faulting instruction
3615 mov.
l USER_FPIAR
(%a6
),EXC_EXTWPTR
(%a6
)
3617 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
3618 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
3619 bsr.
l _imem_read_long
# fetch the instruction words
3620 mov.
l %d0
,EXC_OPWORD
(%a6
)
3622 ##############################################################################
3624 btst
&13,%d0
# is instr an fmove out?
3625 bne.w finex_out
# fmove out
3628 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3629 # longword integer directly into the upper longword of the mantissa along
3630 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3631 bfextu
%d0
{&19:&3},%d0
# fetch instr size
3632 bne.
b finex_cont
# instr size is not long
3633 cmpi.w FP_SRC_EX
(%a6
),&0x401e # is exponent 0x401e?
3634 bne.
b finex_cont
# no
3636 fmov.
l FP_SRC_HI
(%a6
),%fp0
# load integer src
3637 fmov.x
%fp0
,FP_SRC
(%a6
) # store integer as extended precision
3638 mov.w
&0xe001,0x2+FP_SRC
(%a6
)
3641 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
3642 bsr.
l fix_skewed_ops
# fix src op
3644 # Here, we zero the ccode and exception byte field since we're going to
3645 # emulate the whole instruction. Notice, though, that we don't kill the
3646 # INEX1 bit. This is because a packed op has long since been converted
3647 # to extended before arriving here. Therefore, we need to retain the
3648 # INEX1 bit from when the operand was first converted.
3649 andi.l &0x00ff01ff,USER_FPSR
(%a6
) # zero all but accured field
3651 fmov.
l &0x0,%fpcr
# zero current control regs
3654 bfextu EXC_EXTWORD
(%a6
){&0:&6},%d1
# extract upper 6 of cmdreg
3655 cmpi.
b %d1
,&0x17 # is op an fmovecr?
3656 beq.w finex_fmovcr
# yes
3658 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
3659 bsr.
l set_tag_x
# tag the operand type
3660 mov.
b %d0
,STAG
(%a6
) # maybe NORM,DENORM
3662 # bits four and five of the fp extension word separate the monadic and dyadic
3663 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3664 # will never take this exception, but fsincos will.
3665 btst
&0x5,1+EXC_CMDREG
(%a6
) # is operation monadic or dyadic?
3666 beq.
b finex_extract
# monadic
3668 btst
&0x4,1+EXC_CMDREG
(%a6
) # is operation an fsincos?
3669 bne.
b finex_extract
# yes
3671 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# dyadic; load dst reg
3672 bsr.
l load_fpn2
# load dst into FP_DST
3674 lea FP_DST
(%a6
),%a0
# pass: ptr to dst op
3675 bsr.
l set_tag_x
# tag the operand type
3676 cmpi.
b %d0
,&UNNORM
# is operand an UNNORM?
3677 bne.
b finex_op2_done
# no
3678 bsr.
l unnorm_fix
# yes; convert to NORM,DENORM,or ZERO
3680 mov.
b %d0
,DTAG
(%a6
) # save dst optype tag
3684 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd prec/mode
3686 mov.
b 1+EXC_CMDREG
(%a6
),%d1
3687 andi.w
&0x007f,%d1
# extract extension
3692 mov.
l (tbl_unsupp.
l,%pc
,%d1.w
*4),%d1
# fetch routine addr
3693 jsr
(tbl_unsupp.
l,%pc
,%d1.
l*1)
3695 # the operation has been emulated. the result is in fp0.
3697 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
3701 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
3702 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
3703 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3705 frestore FP_SRC
(%a6
)
3712 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd prec,mode
3713 mov.
b 1+EXC_CMDREG
(%a6
),%d1
3714 andi.l &0x0000007f,%d1
# pass rom offset
3718 ########################################################################
3721 # the hardware does not save the default result to memory on enabled
3722 # inexact exceptions. we do this here before passing control to
3723 # the user inexact handler.
3725 # byte, word, and long destination format operations can pass
3726 # through here. so can double and single precision.
3727 # although packed opclass three operations can take inexact
3728 # exceptions, they won't pass through here since they are caught
3729 # first by the unsupported data format exception handler. that handler
3730 # sends them directly to _real_inex() if necessary.
3734 mov.
b &NORM
,STAG
(%a6
) # src is a NORM
3737 mov.
b FPCR_MODE
(%a6
),%d0
# pass rnd prec,mode
3739 andi.l &0xffff00ff,USER_FPSR
(%a6
) # zero exception field
3741 lea FP_SRC
(%a6
),%a0
# pass ptr to src operand
3743 bsr.
l fout
# store the default result
3747 #########################################################################
3748 # XDEF **************************************************************** #
3749 # _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3751 # This handler should be the first code executed upon taking #
3752 # the FP DZ exception in an operating system. #
3754 # XREF **************************************************************** #
3755 # _imem_read_long() - read instruction longword from memory #
3756 # fix_skewed_ops() - adjust fsave operand #
3757 # _real_dz() - "callout" exit point from FP DZ handler #
3759 # INPUT *************************************************************** #
3760 # - The system stack contains the FP DZ exception stack. #
3761 # - The fsave frame contains the source operand. #
3763 # OUTPUT ************************************************************** #
3764 # - The system stack contains the FP DZ exception stack. #
3765 # - The fsave frame contains the adjusted source operand. #
3767 # ALGORITHM *********************************************************** #
3768 # In a system where the DZ exception is enabled, the goal is to #
3769 # get to the handler specified at _real_dz(). But, on the 060, when the #
3770 # exception is taken, the input operand in the fsave state frame may #
3771 # be incorrect for some cases and need to be adjusted. So, this package #
3772 # adjusts the operand using fix_skewed_ops() and then branches to #
3775 #########################################################################
3780 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
3782 fsave FP_SRC
(%a6
) # grab the "busy" frame
3784 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3785 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
3786 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1 on stack
3788 # the FPIAR holds the "current PC" of the faulting instruction
3789 mov.
l USER_FPIAR
(%a6
),EXC_EXTWPTR
(%a6
)
3791 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
3792 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
3793 bsr.
l _imem_read_long
# fetch the instruction words
3794 mov.
l %d0
,EXC_OPWORD
(%a6
)
3796 ##############################################################################
3799 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3800 # this would be the case for opclass two operations with a source zero
3801 # in the sgl or dbl format.
3802 lea FP_SRC
(%a6
),%a0
# pass: ptr to src op
3803 bsr.
l fix_skewed_ops
# fix src op
3806 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
3807 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
3808 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3810 frestore FP_SRC
(%a6
)
3815 #########################################################################
3816 # XDEF **************************************************************** #
3817 # _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. #
3819 # This handler should be the first code executed upon taking the #
3820 # "Line F Emulator" exception in an operating system. #
3822 # XREF **************************************************************** #
3823 # _fpsp_unimp() - handle "FP Unimplemented" exceptions #
3824 # _real_fpu_disabled() - handle "FPU disabled" exceptions #
3825 # _real_fline() - handle "FLINE" exceptions #
3826 # _imem_read_long() - read instruction longword #
3828 # INPUT *************************************************************** #
3829 # - The system stack contains a "Line F Emulator" exception #
3832 # OUTPUT ************************************************************** #
3833 # - The system stack is unchanged #
3835 # ALGORITHM *********************************************************** #
3836 # When a "Line F Emulator" exception occurs, there are 3 possible #
3837 # exception types, denoted by the exception stack frame format number: #
3838 # (1) FPU unimplemented instruction (6 word stack frame) #
3839 # (2) FPU disabled (8 word stack frame) #
3840 # (3) Line F (4 word stack frame) #
3842 # This module determines which and forks the flow off to the #
3843 # appropriate "callout" (for "disabled" and "Line F") or to the #
3844 # correct emulation code (for "FPU unimplemented"). #
3845 # This code also must check for "fmovecr" instructions w/ a #
3846 # non-zero <ea> field. These may get flagged as "Line F" but should #
3847 # really be flagged as "FPU Unimplemented". (This is a "feature" on #
3850 #########################################################################
3855 # check to see if this exception is a "FP Unimplemented Instruction"
3856 # exception. if so, branch directly to that handler's entry point.
3857 cmpi.w
0x6(%sp
),&0x202c
3860 # check to see if the FPU is disabled. if so, jump to the OS entry
3861 # point for that condition.
3862 cmpi.w
0x6(%sp
),&0x402c
3863 beq.
l _real_fpu_disabled
3865 # the exception was an "F-Line Illegal" exception. we check to see
3866 # if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3867 # so, convert the F-Line exception stack frame to an FP Unimplemented
3868 # Instruction exception stack frame else branch to the OS entry
3869 # point for the F-Line exception handler.
3870 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
3872 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
3874 mov.
l EXC_PC
(%a6
),EXC_EXTWPTR
(%a6
)
3875 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
3876 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
3877 bsr.
l _imem_read_long
# fetch instruction words
3879 bfextu
%d0
{&0:&10},%d1
# is it an fmovecr?
3881 bne.
b fline_fline
# no
3883 bfextu
%d0
{&16:&6},%d1
# is it an fmovecr?
3885 bne.
b fline_fline
# no
3887 # it's an fmovecr w/ a non-zero <ea> that has entered through
3888 # the F-Line Illegal exception.
3889 # so, we need to convert the F-Line exception stack frame into an
3890 # FP Unimplemented Instruction stack frame and jump to that entry
3893 # but, if the FPU is disabled, then we need to jump to the FPU diabled
3899 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3903 sub.l &0x8,%sp
# make room for "Next PC", <ea>
3904 mov.w
0x8(%sp
),(%sp
)
3905 mov.
l 0xa(%sp
),0x2(%sp
) # move "Current PC"
3906 mov.w
&0x402c,0x6(%sp
)
3907 mov.
l 0x2(%sp
),0xc(%sp
)
3908 addq.
l &0x4,0x2(%sp
) # set "Next PC"
3910 bra.
l _real_fpu_disabled
3913 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3917 fmov.
l 0x2(%sp
),%fpiar
# set current PC
3918 addq.
l &0x4,0x2(%sp
) # set Next PC
3921 mov.
l 0x8(%sp
),0x4(%sp
)
3922 mov.
b &0x20,0x6(%sp
)
3927 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
3933 #########################################################################
3934 # XDEF **************************************************************** #
3935 # _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented #
3936 # Instruction" exception. #
3938 # This handler should be the first code executed upon taking the #
3939 # FP Unimplemented Instruction exception in an operating system. #
3941 # XREF **************************************************************** #
3942 # _imem_read_{word,long}() - read instruction word/longword #
3943 # load_fop() - load src/dst ops from memory and/or FP regfile #
3944 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3945 # tbl_trans - addr of table of emulation routines for trnscndls #
3946 # _real_access() - "callout" for access error exception #
3947 # _fpsp_done() - "callout" for exit; work all done #
3948 # _real_trace() - "callout" for Trace enabled exception #
3949 # smovcr() - emulate "fmovecr" instruction #
3950 # funimp_skew() - adjust fsave src ops to "incorrect" value #
3951 # _ftrapcc() - emulate an "ftrapcc" instruction #
3952 # _fdbcc() - emulate an "fdbcc" instruction #
3953 # _fscc() - emulate an "fscc" instruction #
3954 # _real_trap() - "callout" for Trap exception #
3955 # _real_bsun() - "callout" for enabled Bsun exception #
3957 # INPUT *************************************************************** #
3958 # - The system stack contains the "Unimplemented Instr" stk frame #
3960 # OUTPUT ************************************************************** #
3961 # If access error: #
3962 # - The system stack is changed to an access error stack frame #
3963 # If Trace exception enabled: #
3964 # - The system stack is changed to a Trace exception stack frame #
3965 # Else: (normal case) #
3966 # - Correct result has been stored as appropriate #
3968 # ALGORITHM *********************************************************** #
3969 # There are two main cases of instructions that may enter here to #
3970 # be emulated: (1) the FPgen instructions, most of which were also #
3971 # unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". #
3972 # For the first set, this handler calls the routine load_fop() #
3973 # to load the source and destination (for dyadic) operands to be used #
3974 # for instruction emulation. The correct emulation routine is then #
3975 # chosen by decoding the instruction type and indexing into an #
3976 # emulation subroutine index table. After emulation returns, this #
3977 # handler checks to see if an exception should occur as a result of the #
3978 # FP instruction emulation. If so, then an FP exception of the correct #
3979 # type is inserted into the FPU state frame using the "frestore" #
3980 # instruction before exiting through _fpsp_done(). In either the #
3981 # exceptional or non-exceptional cases, we must check to see if the #
3982 # Trace exception is enabled. If so, then we must create a Trace #
3983 # exception frame from the current exception frame and exit through #
3985 # For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines #
3986 # _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three #
3987 # may flag that a BSUN exception should be taken. If so, then the #
3988 # current exception stack frame is converted into a BSUN exception #
3989 # stack frame and an exit is made through _real_bsun(). If the #
3990 # instruction was "ftrapcc" and a Trap exception should result, a Trap #
3991 # exception stack frame is created from the current frame and an exit #
3992 # is made through _real_trap(). If a Trace exception is pending, then #
3993 # a Trace exception frame is created from the current frame and a jump #
3994 # is made to _real_trace(). Finally, if none of these conditions exist, #
3995 # then the handler exits though the callout _fpsp_done(). #
3997 # In any of the above scenarios, if a _mem_read() or _mem_write() #
3998 # "callout" returns a failing value, then an access error stack frame #
3999 # is created from the current stack frame and an exit is made through #
4002 #########################################################################
4005 # FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
4008 # * * => <ea> of fp unimp instr.
4012 # * 0x2 * 0x02c * => frame format and vector offset(vector #11)
4015 # - Next PC - => PC of instr to execute after exc handling
4018 # * SR * => SR at the time the exception was taken
4021 # Note: the !NULL bit does not get set in the fsave frame when the
4022 # machine encounters an fp unimp exception. Therefore, it must be set
4023 # before leaving this handler.
4028 link.w
%a6
,&-LOCAL_SIZE
# init stack frame
4030 movm.
l &0x0303,EXC_DREGS
(%a6
) # save d0-d1/a0-a1
4031 fmovm.
l %fpcr
,%fpsr
,%fpiar
,USER_FPCR
(%a6
) # save ctrl regs
4032 fmovm.x
&0xc0,EXC_FPREGS
(%a6
) # save fp0-fp1
4034 btst
&0x5,EXC_SR
(%a6
) # user mode exception?
4035 bne.
b funimp_s
# no; supervisor mode
4037 # save the value of the user stack pointer onto the stack frame
4039 mov.
l %usp
,%a0
# fetch user stack pointer
4040 mov.
l %a0
,EXC_A7
(%a6
) # store in stack frame
4043 # store the value of the supervisor stack pointer BEFORE the exc occurred.
4044 # old_sp is address just above stacked effective address.
4046 lea
4+EXC_EA
(%a6
),%a0
# load old a7'
4047 mov.
l %a0
,EXC_A7
(%a6
) # store a7'
4048 mov.
l %a0
,OLD_A7
(%a6
) # make a copy
4052 # the FPIAR holds the "current PC" of the faulting instruction.
4053 mov.
l USER_FPIAR
(%a6
),EXC_EXTWPTR
(%a6
)
4055 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
4056 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
4057 bsr.
l _imem_read_long
# fetch the instruction words
4058 mov.
l %d0
,EXC_OPWORD
(%a6
)
4060 ############################################################################
4062 fmov.
l &0x0,%fpcr
# clear FPCR
4063 fmov.
l &0x0,%fpsr
# clear FPSR
4065 clr.
b SPCOND_FLG
(%a6
) # clear "special case" flag
4067 # Divide the fp instructions into 8 types based on the TYPE field in
4068 # bits 6-8 of the opword(classes 6,7 are undefined).
4069 # (for the '060, only two types can take this exception)
4070 # bftst %d0{&7:&3} # test TYPE
4071 btst
&22,%d0
# type 0 or 1 ?
4072 bne.w funimp_misc
# type 1
4074 #########################################
4075 # TYPE == 0: General instructions #
4076 #########################################
4079 clr.
b STORE_FLG
(%a6
) # clear "store result" flag
4081 # clear the ccode byte and exception status byte
4082 andi.l &0x00ff00ff,USER_FPSR
(%a6
)
4084 bfextu
%d0
{&16:&6},%d1
# extract upper 6 of cmdreg
4085 cmpi.
b %d1
,&0x17 # is op an fmovecr?
4086 beq.w funimp_fmovcr
# yes
4089 bsr.
l _load_fop
# load
4092 mov.
b FPCR_MODE
(%a6
),%d0
# fetch rnd mode
4094 mov.
b 1+EXC_CMDREG
(%a6
),%d1
4095 andi.w
&0x003f,%d1
# extract extension bits
4096 lsl.w
&0x3,%d1
# shift right 3 bits
4097 or.b STAG
(%a6
),%d1
# insert src optag bits
4099 lea FP_DST
(%a6
),%a1
# pass dst ptr in a1
4100 lea FP_SRC
(%a6
),%a0
# pass src ptr in a0
4102 mov.w
(tbl_trans.w
,%pc
,%d1.w
*2),%d1
4103 jsr
(tbl_trans.w
,%pc
,%d1.w
*1) # emulate
4106 mov.
b FPCR_ENABLE
(%a6
),%d0
# fetch exceptions enabled
4107 bne.w funimp_ena
# some are enabled
4110 bfextu EXC_CMDREG
(%a6
){&6:&3},%d0
# fetch Dn
4111 bsr.
l store_fpreg
# store result to fp regfile
4114 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
4115 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
4116 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4118 funimp_gen_exit_cmp
:
4119 cmpi.
b SPCOND_FLG
(%a6
),&mia7_flg
# was the ea mode (sp)+ ?
4120 beq.
b funimp_gen_exit_a7
# yes
4122 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
# was the ea mode -(sp) ?
4123 beq.
b funimp_gen_exit_a7
# yes
4125 funimp_gen_exit_cont
:
4128 funimp_gen_exit_cont2
:
4129 btst
&0x7,(%sp
) # is trace on?
4130 beq.
l _fpsp_done
# no
4132 # this catches a problem with the case where an exception will be re-inserted
4133 # into the machine. the frestore has already been executed...so, the fmov.l
4134 # alone of the control register would trigger an unwanted exception.
4135 # until I feel like fixing this, we'll sidestep the exception.
4137 fmov.
l %fpiar
,0x14(%sp
) # "Current PC" is in FPIAR
4139 mov.w
&0x2024,0x6(%sp
) # stk fmt = 0x2; voff = 0x24
4143 btst
&0x5,EXC_SR
(%a6
) # supervisor or user mode?
4144 bne.
b funimp_gen_exit_a7_s
# supervisor
4147 mov.
l EXC_A7
(%a6
),%a0
4150 bra.
b funimp_gen_exit_cont
4152 # if the instruction was executed from supervisor mode and the addressing
4153 # mode was (a7)+, then the stack frame for the rte must be shifted "up"
4154 # "n" bytes where "n" is the size of the src operand type.
4155 # f<op>.{b,w,l,s,d,x,p}
4156 funimp_gen_exit_a7_s
:
4157 mov.
l %d0
,-(%sp
) # save d0
4158 mov.
l EXC_A7
(%a6
),%d0
# load new a7'
4159 sub.l OLD_A7
(%a6
),%d0
# subtract old a7'
4160 mov.
l 0x2+EXC_PC
(%a6
),(0x2+EXC_PC
,%a6
,%d0
) # shift stack frame
4161 mov.
l EXC_SR
(%a6
),(EXC_SR
,%a6
,%d0
) # shift stack frame
4162 mov.w
%d0
,EXC_SR
(%a6
) # store incr number
4163 mov.
l (%sp
)+,%d0
# restore d0
4167 add.w
(%sp
),%sp
# stack frame shifted
4168 bra.
b funimp_gen_exit_cont2
4170 ######################
4171 # fmovecr.x #ccc,fpn #
4172 ######################
4175 mov.
b FPCR_MODE
(%a6
),%d0
4176 mov.
b 1+EXC_CMDREG
(%a6
),%d1
4177 andi.l &0x0000007f,%d1
# pass rom offset in d1
4181 #########################################################################
4184 # the user has enabled some exceptions. we figure not to see this too
4185 # often so that's why it gets lower priority.
4189 # was an exception set that was also enabled?
4190 and.b FPSR_EXCEPT
(%a6
),%d0
# keep only ones enabled and set
4191 bfffo
%d0
{&24:&8},%d0
# find highest priority exception
4192 bne.
b funimp_exc
# at least one was set
4194 # no exception that was enabled was set BUT if we got an exact overflow
4195 # and overflow wasn't enabled but inexact was (yech!) then this is
4196 # an inexact exception; otherwise, return to normal non-exception flow.
4197 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # did overflow occur?
4198 beq.w funimp_store
# no; return to normal flow
4200 # the overflow w/ exact result happened but was inexact set in the FPCR?
4202 btst
&inex2_bit
,FPCR_ENABLE
(%a6
) # is inexact enabled?
4203 beq.w funimp_store
# no; return to normal flow
4204 bra.
b funimp_exc_ovfl
# yes
4206 # some exception happened that was actually enabled.
4207 # we'll insert this new exception into the FPU and then return.
4209 subi.
l &24,%d0
# fix offset to be 0-8
4210 cmpi.
b %d0
,&0x6 # is exception INEX?
4211 bne.
b funimp_exc_force
# no
4213 # the enabled exception was inexact. so, if it occurs with an overflow
4214 # or underflow that was disabled, then we have to force an overflow or
4215 # underflow frame. the eventual overflow or underflow handler will see that
4216 # it's actually an inexact and act appropriately. this is the only easy
4217 # way to have the EXOP available for the enabled inexact handler when
4218 # a disabled overflow or underflow has also happened.
4219 btst
&ovfl_bit
,FPSR_EXCEPT
(%a6
) # did overflow occur?
4220 bne.
b funimp_exc_ovfl
# yes
4221 btst
&unfl_bit
,FPSR_EXCEPT
(%a6
) # did underflow occur?
4222 bne.
b funimp_exc_unfl
# yes
4224 # force the fsave exception status bits to signal an exception of the
4225 # appropriate type. don't forget to "skew" the source operand in case we
4226 # "unskewed" the one the hardware initially gave us.
4228 mov.
l %d0
,-(%sp
) # save d0
4229 bsr.
l funimp_skew
# check for special case
4230 mov.
l (%sp
)+,%d0
# restore d0
4231 mov.w
(tbl_funimp_except.
b,%pc
,%d0.w
*2),2+FP_SRC
(%a6
)
4232 bra.
b funimp_gen_exit2
# exit with frestore
4235 short
0xe002, 0xe006, 0xe004, 0xe005
4236 short
0xe003, 0xe002, 0xe001, 0xe001
4238 # insert an overflow frame
4240 bsr.
l funimp_skew
# check for special case
4241 mov.w
&0xe005,2+FP_SRC
(%a6
)
4242 bra.
b funimp_gen_exit2
4244 # insert an underflow frame
4246 bsr.
l funimp_skew
# check for special case
4247 mov.w
&0xe003,2+FP_SRC
(%a6
)
4249 # this is the general exit point for an enabled exception that will be
4250 # restored into the machine for the instruction just emulated.
4252 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
4253 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
4254 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4256 frestore FP_SRC
(%a6
) # insert exceptional status
4258 bra.w funimp_gen_exit_cmp
4260 ############################################################################
4263 # TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4265 # These instructions were implemented on the '881/2 and '040 in hardware but
4266 # are emulated in software on the '060.
4269 bfextu
%d0
{&10:&3},%d1
# extract mode field
4270 cmpi.
b %d1
,&0x1 # is it an fdb<cc>?
4271 beq.w funimp_fdbcc
# yes
4272 cmpi.
b %d1
,&0x7 # is it an fs<cc>?
4273 bne.w funimp_fscc
# yes
4274 bfextu
%d0
{&13:&3},%d1
4275 cmpi.
b %d1
,&0x2 # is it an fs<cc>?
4276 blt.w funimp_fscc
# yes
4278 #########################
4280 # ftrap<cc>.w #<data> #
4281 # ftrap<cc>.l #<data> #
4282 #########################
4285 bsr.
l _ftrapcc
# FTRAP<cc>()
4287 cmpi.
b SPCOND_FLG
(%a6
),&fbsun_flg
# is enabled bsun occurring?
4288 beq.w funimp_bsun
# yes
4290 cmpi.
b SPCOND_FLG
(%a6
),&ftrapcc_flg
# should a trap occur?
4291 bne.w funimp_done
# no
4293 # FP UNIMP FRAME TRAP FRAME
4294 # ***************** *****************
4295 # ** <EA> ** ** Current PC **
4296 # ***************** *****************
4297 # * 0x2 * 0x02c * * 0x2 * 0x01c *
4298 # ***************** *****************
4299 # ** Next PC ** ** Next PC **
4300 # ***************** *****************
4302 # ***************** *****************
4303 # (6 words) (6 words)
4305 # the ftrapcc instruction should take a trap. so, here we must create a
4306 # trap stack frame from an unimplemented fp instruction stack frame and
4307 # jump to the user supplied entry point for the trap exception
4309 mov.
l USER_FPIAR
(%a6
),EXC_EA
(%a6
) # Address = Current PC
4310 mov.w
&0x201c,EXC_VOFF
(%a6
) # Vector Offset = 0x01c
4312 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
4313 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
4314 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4319 #########################
4320 # fdb<cc> Dn,<label> #
4321 #########################
4324 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
4325 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
4326 bsr.
l _imem_read_word
# read displacement
4328 tst.
l %d1
# did ifetch fail?
4329 bne.w funimp_iacc
# yes
4331 ext.
l %d0
# sign extend displacement
4333 bsr.
l _fdbcc
# FDB<cc>()
4335 cmpi.
b SPCOND_FLG
(%a6
),&fbsun_flg
# is enabled bsun occurring?
4338 bra.w funimp_done
# branch to finish
4345 bsr.
l _fscc
# FS<cc>()
4347 # I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4348 # does not need to update "An" before taking a bsun exception.
4349 cmpi.
b SPCOND_FLG
(%a6
),&fbsun_flg
# is enabled bsun occurring?
4352 btst
&0x5,EXC_SR
(%a6
) # yes; is it a user mode exception?
4353 bne.
b funimp_fscc_s
# no
4356 mov.
l EXC_A7
(%a6
),%a0
# yes; set new USP
4358 bra.w funimp_done
# branch to finish
4360 # remember, I'm assuming that post-increment is bogus...(it IS!!!)
4361 # so, the least significant WORD of the stacked effective address got
4362 # overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4363 # so that the rte will work correctly without destroying the result.
4364 # even though the operation size is byte, the stack ptr is decr by 2.
4366 # remember, also, this instruction may be traced.
4368 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
# was a7 modified?
4369 bne.w funimp_done
# no
4371 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
4372 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
4373 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4377 btst
&0x7,(%sp
) # is trace enabled?
4378 bne.
b funimp_fscc_s_trace
# yes
4381 mov.
l 0x2(%sp
),(%sp
) # shift SR,hi(PC) "down"
4382 mov.
l 0x6(%sp
),0x4(%sp
) # shift lo(PC),voff "down"
4385 funimp_fscc_s_trace
:
4387 mov.
l 0x2(%sp
),(%sp
) # shift SR,hi(PC) "down"
4388 mov.w
0x6(%sp
),0x4(%sp
) # shift lo(PC)
4389 mov.w
&0x2024,0x6(%sp
) # fmt/voff = $2024
4390 fmov.
l %fpiar
,0x8(%sp
) # insert "current PC"
4395 # The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4396 # the fp unimplemented instruction exception stack frame into a bsun stack frame,
4397 # restore a bsun exception into the machine, and branch to the user
4398 # supplied bsun hook.
4400 # FP UNIMP FRAME BSUN FRAME
4401 # ***************** *****************
4402 # ** <EA> ** * 0x0 * 0x0c0 *
4403 # ***************** *****************
4404 # * 0x2 * 0x02c * ** Current PC **
4405 # ***************** *****************
4406 # ** Next PC ** * SR *
4407 # ***************** *****************
4413 mov.w
&0x00c0,2+EXC_EA
(%a6
) # Fmt = 0x0; Vector Offset = 0x0c0
4414 mov.
l USER_FPIAR
(%a6
),EXC_VOFF
(%a6
) # PC = Current PC
4415 mov.w EXC_SR
(%a6
),2+EXC_PC
(%a6
) # shift SR "up"
4417 mov.w
&0xe000,2+FP_SRC
(%a6
) # bsun exception enabled
4419 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
4420 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
4421 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4423 frestore FP_SRC
(%a6
) # restore bsun exception
4427 addq.
l &0x4,%sp
# erase sludge
4429 bra.
l _real_bsun
# branch to user bsun hook
4432 # all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4435 # as usual, we have to check for trace mode being on here. since instructions
4436 # modifying the supervisor stack frame don't pass through here, this is a
4437 # relatively easy task.
4440 fmovm.x EXC_FP0
(%a6
),&0xc0 # restore fp0-fp1
4441 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
4442 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4446 btst
&0x7,(%sp
) # is trace enabled?
4447 bne.
b funimp_trace
# yes
4451 # FP UNIMP FRAME TRACE FRAME
4452 # ***************** *****************
4453 # ** <EA> ** ** Current PC **
4454 # ***************** *****************
4455 # * 0x2 * 0x02c * * 0x2 * 0x024 *
4456 # ***************** *****************
4457 # ** Next PC ** ** Next PC **
4458 # ***************** *****************
4460 # ***************** *****************
4461 # (6 words) (6 words)
4463 # the fscc instruction should take a trace trap. so, here we must create a
4464 # trace stack frame from an unimplemented fp instruction stack frame and
4465 # jump to the user supplied entry point for the trace exception
4467 fmov.
l %fpiar
,0x8(%sp
) # current PC is in fpiar
4468 mov.
b &0x24,0x7(%sp
) # vector offset = 0x024
4472 ################################################################
4477 short tbl_trans
- tbl_trans
# $00-0 fmovecr all
4478 short tbl_trans
- tbl_trans
# $00-1 fmovecr all
4479 short tbl_trans
- tbl_trans
# $00-2 fmovecr all
4480 short tbl_trans
- tbl_trans
# $00-3 fmovecr all
4481 short tbl_trans
- tbl_trans
# $00-4 fmovecr all
4482 short tbl_trans
- tbl_trans
# $00-5 fmovecr all
4483 short tbl_trans
- tbl_trans
# $00-6 fmovecr all
4484 short tbl_trans
- tbl_trans
# $00-7 fmovecr all
4486 short tbl_trans
- tbl_trans
# $01-0 fint norm
4487 short tbl_trans
- tbl_trans
# $01-1 fint zero
4488 short tbl_trans
- tbl_trans
# $01-2 fint inf
4489 short tbl_trans
- tbl_trans
# $01-3 fint qnan
4490 short tbl_trans
- tbl_trans
# $01-5 fint denorm
4491 short tbl_trans
- tbl_trans
# $01-4 fint snan
4492 short tbl_trans
- tbl_trans
# $01-6 fint unnorm
4493 short tbl_trans
- tbl_trans
# $01-7 ERROR
4495 short ssinh
- tbl_trans
# $02-0 fsinh norm
4496 short src_zero
- tbl_trans
# $02-1 fsinh zero
4497 short src_inf
- tbl_trans
# $02-2 fsinh inf
4498 short src_qnan
- tbl_trans
# $02-3 fsinh qnan
4499 short ssinhd
- tbl_trans
# $02-5 fsinh denorm
4500 short src_snan
- tbl_trans
# $02-4 fsinh snan
4501 short tbl_trans
- tbl_trans
# $02-6 fsinh unnorm
4502 short tbl_trans
- tbl_trans
# $02-7 ERROR
4504 short tbl_trans
- tbl_trans
# $03-0 fintrz norm
4505 short tbl_trans
- tbl_trans
# $03-1 fintrz zero
4506 short tbl_trans
- tbl_trans
# $03-2 fintrz inf
4507 short tbl_trans
- tbl_trans
# $03-3 fintrz qnan
4508 short tbl_trans
- tbl_trans
# $03-5 fintrz denorm
4509 short tbl_trans
- tbl_trans
# $03-4 fintrz snan
4510 short tbl_trans
- tbl_trans
# $03-6 fintrz unnorm
4511 short tbl_trans
- tbl_trans
# $03-7 ERROR
4513 short tbl_trans
- tbl_trans
# $04-0 fsqrt norm
4514 short tbl_trans
- tbl_trans
# $04-1 fsqrt zero
4515 short tbl_trans
- tbl_trans
# $04-2 fsqrt inf
4516 short tbl_trans
- tbl_trans
# $04-3 fsqrt qnan
4517 short tbl_trans
- tbl_trans
# $04-5 fsqrt denorm
4518 short tbl_trans
- tbl_trans
# $04-4 fsqrt snan
4519 short tbl_trans
- tbl_trans
# $04-6 fsqrt unnorm
4520 short tbl_trans
- tbl_trans
# $04-7 ERROR
4522 short tbl_trans
- tbl_trans
# $05-0 ERROR
4523 short tbl_trans
- tbl_trans
# $05-1 ERROR
4524 short tbl_trans
- tbl_trans
# $05-2 ERROR
4525 short tbl_trans
- tbl_trans
# $05-3 ERROR
4526 short tbl_trans
- tbl_trans
# $05-4 ERROR
4527 short tbl_trans
- tbl_trans
# $05-5 ERROR
4528 short tbl_trans
- tbl_trans
# $05-6 ERROR
4529 short tbl_trans
- tbl_trans
# $05-7 ERROR
4531 short slognp1
- tbl_trans
# $06-0 flognp1 norm
4532 short src_zero
- tbl_trans
# $06-1 flognp1 zero
4533 short sopr_inf
- tbl_trans
# $06-2 flognp1 inf
4534 short src_qnan
- tbl_trans
# $06-3 flognp1 qnan
4535 short slognp1d
- tbl_trans
# $06-5 flognp1 denorm
4536 short src_snan
- tbl_trans
# $06-4 flognp1 snan
4537 short tbl_trans
- tbl_trans
# $06-6 flognp1 unnorm
4538 short tbl_trans
- tbl_trans
# $06-7 ERROR
4540 short tbl_trans
- tbl_trans
# $07-0 ERROR
4541 short tbl_trans
- tbl_trans
# $07-1 ERROR
4542 short tbl_trans
- tbl_trans
# $07-2 ERROR
4543 short tbl_trans
- tbl_trans
# $07-3 ERROR
4544 short tbl_trans
- tbl_trans
# $07-4 ERROR
4545 short tbl_trans
- tbl_trans
# $07-5 ERROR
4546 short tbl_trans
- tbl_trans
# $07-6 ERROR
4547 short tbl_trans
- tbl_trans
# $07-7 ERROR
4549 short setoxm1
- tbl_trans
# $08-0 fetoxm1 norm
4550 short src_zero
- tbl_trans
# $08-1 fetoxm1 zero
4551 short setoxm1i
- tbl_trans
# $08-2 fetoxm1 inf
4552 short src_qnan
- tbl_trans
# $08-3 fetoxm1 qnan
4553 short setoxm1d
- tbl_trans
# $08-5 fetoxm1 denorm
4554 short src_snan
- tbl_trans
# $08-4 fetoxm1 snan
4555 short tbl_trans
- tbl_trans
# $08-6 fetoxm1 unnorm
4556 short tbl_trans
- tbl_trans
# $08-7 ERROR
4558 short stanh
- tbl_trans
# $09-0 ftanh norm
4559 short src_zero
- tbl_trans
# $09-1 ftanh zero
4560 short src_one
- tbl_trans
# $09-2 ftanh inf
4561 short src_qnan
- tbl_trans
# $09-3 ftanh qnan
4562 short stanhd
- tbl_trans
# $09-5 ftanh denorm
4563 short src_snan
- tbl_trans
# $09-4 ftanh snan
4564 short tbl_trans
- tbl_trans
# $09-6 ftanh unnorm
4565 short tbl_trans
- tbl_trans
# $09-7 ERROR
4567 short satan
- tbl_trans
# $0a-0 fatan norm
4568 short src_zero
- tbl_trans
# $0a-1 fatan zero
4569 short spi_2
- tbl_trans
# $0a-2 fatan inf
4570 short src_qnan
- tbl_trans
# $0a-3 fatan qnan
4571 short satand
- tbl_trans
# $0a-5 fatan denorm
4572 short src_snan
- tbl_trans
# $0a-4 fatan snan
4573 short tbl_trans
- tbl_trans
# $0a-6 fatan unnorm
4574 short tbl_trans
- tbl_trans
# $0a-7 ERROR
4576 short tbl_trans
- tbl_trans
# $0b-0 ERROR
4577 short tbl_trans
- tbl_trans
# $0b-1 ERROR
4578 short tbl_trans
- tbl_trans
# $0b-2 ERROR
4579 short tbl_trans
- tbl_trans
# $0b-3 ERROR
4580 short tbl_trans
- tbl_trans
# $0b-4 ERROR
4581 short tbl_trans
- tbl_trans
# $0b-5 ERROR
4582 short tbl_trans
- tbl_trans
# $0b-6 ERROR
4583 short tbl_trans
- tbl_trans
# $0b-7 ERROR
4585 short sasin
- tbl_trans
# $0c-0 fasin norm
4586 short src_zero
- tbl_trans
# $0c-1 fasin zero
4587 short t_operr
- tbl_trans
# $0c-2 fasin inf
4588 short src_qnan
- tbl_trans
# $0c-3 fasin qnan
4589 short sasind
- tbl_trans
# $0c-5 fasin denorm
4590 short src_snan
- tbl_trans
# $0c-4 fasin snan
4591 short tbl_trans
- tbl_trans
# $0c-6 fasin unnorm
4592 short tbl_trans
- tbl_trans
# $0c-7 ERROR
4594 short satanh
- tbl_trans
# $0d-0 fatanh norm
4595 short src_zero
- tbl_trans
# $0d-1 fatanh zero
4596 short t_operr
- tbl_trans
# $0d-2 fatanh inf
4597 short src_qnan
- tbl_trans
# $0d-3 fatanh qnan
4598 short satanhd
- tbl_trans
# $0d-5 fatanh denorm
4599 short src_snan
- tbl_trans
# $0d-4 fatanh snan
4600 short tbl_trans
- tbl_trans
# $0d-6 fatanh unnorm
4601 short tbl_trans
- tbl_trans
# $0d-7 ERROR
4603 short ssin
- tbl_trans
# $0e-0 fsin norm
4604 short src_zero
- tbl_trans
# $0e-1 fsin zero
4605 short t_operr
- tbl_trans
# $0e-2 fsin inf
4606 short src_qnan
- tbl_trans
# $0e-3 fsin qnan
4607 short ssind
- tbl_trans
# $0e-5 fsin denorm
4608 short src_snan
- tbl_trans
# $0e-4 fsin snan
4609 short tbl_trans
- tbl_trans
# $0e-6 fsin unnorm
4610 short tbl_trans
- tbl_trans
# $0e-7 ERROR
4612 short stan
- tbl_trans
# $0f-0 ftan norm
4613 short src_zero
- tbl_trans
# $0f-1 ftan zero
4614 short t_operr
- tbl_trans
# $0f-2 ftan inf
4615 short src_qnan
- tbl_trans
# $0f-3 ftan qnan
4616 short stand
- tbl_trans
# $0f-5 ftan denorm
4617 short src_snan
- tbl_trans
# $0f-4 ftan snan
4618 short tbl_trans
- tbl_trans
# $0f-6 ftan unnorm
4619 short tbl_trans
- tbl_trans
# $0f-7 ERROR
4621 short setox
- tbl_trans
# $10-0 fetox norm
4622 short ld_pone
- tbl_trans
# $10-1 fetox zero
4623 short szr_inf
- tbl_trans
# $10-2 fetox inf
4624 short src_qnan
- tbl_trans
# $10-3 fetox qnan
4625 short setoxd
- tbl_trans
# $10-5 fetox denorm
4626 short src_snan
- tbl_trans
# $10-4 fetox snan
4627 short tbl_trans
- tbl_trans
# $10-6 fetox unnorm
4628 short tbl_trans
- tbl_trans
# $10-7 ERROR
4630 short stwotox
- tbl_trans
# $11-0 ftwotox norm
4631 short ld_pone
- tbl_trans
# $11-1 ftwotox zero
4632 short szr_inf
- tbl_trans
# $11-2 ftwotox inf
4633 short src_qnan
- tbl_trans
# $11-3 ftwotox qnan
4634 short stwotoxd
- tbl_trans
# $11-5 ftwotox denorm
4635 short src_snan
- tbl_trans
# $11-4 ftwotox snan
4636 short tbl_trans
- tbl_trans
# $11-6 ftwotox unnorm
4637 short tbl_trans
- tbl_trans
# $11-7 ERROR
4639 short stentox
- tbl_trans
# $12-0 ftentox norm
4640 short ld_pone
- tbl_trans
# $12-1 ftentox zero
4641 short szr_inf
- tbl_trans
# $12-2 ftentox inf
4642 short src_qnan
- tbl_trans
# $12-3 ftentox qnan
4643 short stentoxd
- tbl_trans
# $12-5 ftentox denorm
4644 short src_snan
- tbl_trans
# $12-4 ftentox snan
4645 short tbl_trans
- tbl_trans
# $12-6 ftentox unnorm
4646 short tbl_trans
- tbl_trans
# $12-7 ERROR
4648 short tbl_trans
- tbl_trans
# $13-0 ERROR
4649 short tbl_trans
- tbl_trans
# $13-1 ERROR
4650 short tbl_trans
- tbl_trans
# $13-2 ERROR
4651 short tbl_trans
- tbl_trans
# $13-3 ERROR
4652 short tbl_trans
- tbl_trans
# $13-4 ERROR
4653 short tbl_trans
- tbl_trans
# $13-5 ERROR
4654 short tbl_trans
- tbl_trans
# $13-6 ERROR
4655 short tbl_trans
- tbl_trans
# $13-7 ERROR
4657 short slogn
- tbl_trans
# $14-0 flogn norm
4658 short t_dz2
- tbl_trans
# $14-1 flogn zero
4659 short sopr_inf
- tbl_trans
# $14-2 flogn inf
4660 short src_qnan
- tbl_trans
# $14-3 flogn qnan
4661 short slognd
- tbl_trans
# $14-5 flogn denorm
4662 short src_snan
- tbl_trans
# $14-4 flogn snan
4663 short tbl_trans
- tbl_trans
# $14-6 flogn unnorm
4664 short tbl_trans
- tbl_trans
# $14-7 ERROR
4666 short slog10
- tbl_trans
# $15-0 flog10 norm
4667 short t_dz2
- tbl_trans
# $15-1 flog10 zero
4668 short sopr_inf
- tbl_trans
# $15-2 flog10 inf
4669 short src_qnan
- tbl_trans
# $15-3 flog10 qnan
4670 short slog10d
- tbl_trans
# $15-5 flog10 denorm
4671 short src_snan
- tbl_trans
# $15-4 flog10 snan
4672 short tbl_trans
- tbl_trans
# $15-6 flog10 unnorm
4673 short tbl_trans
- tbl_trans
# $15-7 ERROR
4675 short slog2
- tbl_trans
# $16-0 flog2 norm
4676 short t_dz2
- tbl_trans
# $16-1 flog2 zero
4677 short sopr_inf
- tbl_trans
# $16-2 flog2 inf
4678 short src_qnan
- tbl_trans
# $16-3 flog2 qnan
4679 short slog2d
- tbl_trans
# $16-5 flog2 denorm
4680 short src_snan
- tbl_trans
# $16-4 flog2 snan
4681 short tbl_trans
- tbl_trans
# $16-6 flog2 unnorm
4682 short tbl_trans
- tbl_trans
# $16-7 ERROR
4684 short tbl_trans
- tbl_trans
# $17-0 ERROR
4685 short tbl_trans
- tbl_trans
# $17-1 ERROR
4686 short tbl_trans
- tbl_trans
# $17-2 ERROR
4687 short tbl_trans
- tbl_trans
# $17-3 ERROR
4688 short tbl_trans
- tbl_trans
# $17-4 ERROR
4689 short tbl_trans
- tbl_trans
# $17-5 ERROR
4690 short tbl_trans
- tbl_trans
# $17-6 ERROR
4691 short tbl_trans
- tbl_trans
# $17-7 ERROR
4693 short tbl_trans
- tbl_trans
# $18-0 fabs norm
4694 short tbl_trans
- tbl_trans
# $18-1 fabs zero
4695 short tbl_trans
- tbl_trans
# $18-2 fabs inf
4696 short tbl_trans
- tbl_trans
# $18-3 fabs qnan
4697 short tbl_trans
- tbl_trans
# $18-5 fabs denorm
4698 short tbl_trans
- tbl_trans
# $18-4 fabs snan
4699 short tbl_trans
- tbl_trans
# $18-6 fabs unnorm
4700 short tbl_trans
- tbl_trans
# $18-7 ERROR
4702 short scosh
- tbl_trans
# $19-0 fcosh norm
4703 short ld_pone
- tbl_trans
# $19-1 fcosh zero
4704 short ld_pinf
- tbl_trans
# $19-2 fcosh inf
4705 short src_qnan
- tbl_trans
# $19-3 fcosh qnan
4706 short scoshd
- tbl_trans
# $19-5 fcosh denorm
4707 short src_snan
- tbl_trans
# $19-4 fcosh snan
4708 short tbl_trans
- tbl_trans
# $19-6 fcosh unnorm
4709 short tbl_trans
- tbl_trans
# $19-7 ERROR
4711 short tbl_trans
- tbl_trans
# $1a-0 fneg norm
4712 short tbl_trans
- tbl_trans
# $1a-1 fneg zero
4713 short tbl_trans
- tbl_trans
# $1a-2 fneg inf
4714 short tbl_trans
- tbl_trans
# $1a-3 fneg qnan
4715 short tbl_trans
- tbl_trans
# $1a-5 fneg denorm
4716 short tbl_trans
- tbl_trans
# $1a-4 fneg snan
4717 short tbl_trans
- tbl_trans
# $1a-6 fneg unnorm
4718 short tbl_trans
- tbl_trans
# $1a-7 ERROR
4720 short tbl_trans
- tbl_trans
# $1b-0 ERROR
4721 short tbl_trans
- tbl_trans
# $1b-1 ERROR
4722 short tbl_trans
- tbl_trans
# $1b-2 ERROR
4723 short tbl_trans
- tbl_trans
# $1b-3 ERROR
4724 short tbl_trans
- tbl_trans
# $1b-4 ERROR
4725 short tbl_trans
- tbl_trans
# $1b-5 ERROR
4726 short tbl_trans
- tbl_trans
# $1b-6 ERROR
4727 short tbl_trans
- tbl_trans
# $1b-7 ERROR
4729 short sacos
- tbl_trans
# $1c-0 facos norm
4730 short ld_ppi2
- tbl_trans
# $1c-1 facos zero
4731 short t_operr
- tbl_trans
# $1c-2 facos inf
4732 short src_qnan
- tbl_trans
# $1c-3 facos qnan
4733 short sacosd
- tbl_trans
# $1c-5 facos denorm
4734 short src_snan
- tbl_trans
# $1c-4 facos snan
4735 short tbl_trans
- tbl_trans
# $1c-6 facos unnorm
4736 short tbl_trans
- tbl_trans
# $1c-7 ERROR
4738 short scos
- tbl_trans
# $1d-0 fcos norm
4739 short ld_pone
- tbl_trans
# $1d-1 fcos zero
4740 short t_operr
- tbl_trans
# $1d-2 fcos inf
4741 short src_qnan
- tbl_trans
# $1d-3 fcos qnan
4742 short scosd
- tbl_trans
# $1d-5 fcos denorm
4743 short src_snan
- tbl_trans
# $1d-4 fcos snan
4744 short tbl_trans
- tbl_trans
# $1d-6 fcos unnorm
4745 short tbl_trans
- tbl_trans
# $1d-7 ERROR
4747 short sgetexp
- tbl_trans
# $1e-0 fgetexp norm
4748 short src_zero
- tbl_trans
# $1e-1 fgetexp zero
4749 short t_operr
- tbl_trans
# $1e-2 fgetexp inf
4750 short src_qnan
- tbl_trans
# $1e-3 fgetexp qnan
4751 short sgetexpd
- tbl_trans
# $1e-5 fgetexp denorm
4752 short src_snan
- tbl_trans
# $1e-4 fgetexp snan
4753 short tbl_trans
- tbl_trans
# $1e-6 fgetexp unnorm
4754 short tbl_trans
- tbl_trans
# $1e-7 ERROR
4756 short sgetman
- tbl_trans
# $1f-0 fgetman norm
4757 short src_zero
- tbl_trans
# $1f-1 fgetman zero
4758 short t_operr
- tbl_trans
# $1f-2 fgetman inf
4759 short src_qnan
- tbl_trans
# $1f-3 fgetman qnan
4760 short sgetmand
- tbl_trans
# $1f-5 fgetman denorm
4761 short src_snan
- tbl_trans
# $1f-4 fgetman snan
4762 short tbl_trans
- tbl_trans
# $1f-6 fgetman unnorm
4763 short tbl_trans
- tbl_trans
# $1f-7 ERROR
4765 short tbl_trans
- tbl_trans
# $20-0 fdiv norm
4766 short tbl_trans
- tbl_trans
# $20-1 fdiv zero
4767 short tbl_trans
- tbl_trans
# $20-2 fdiv inf
4768 short tbl_trans
- tbl_trans
# $20-3 fdiv qnan
4769 short tbl_trans
- tbl_trans
# $20-5 fdiv denorm
4770 short tbl_trans
- tbl_trans
# $20-4 fdiv snan
4771 short tbl_trans
- tbl_trans
# $20-6 fdiv unnorm
4772 short tbl_trans
- tbl_trans
# $20-7 ERROR
4774 short smod_snorm
- tbl_trans
# $21-0 fmod norm
4775 short smod_szero
- tbl_trans
# $21-1 fmod zero
4776 short smod_sinf
- tbl_trans
# $21-2 fmod inf
4777 short sop_sqnan
- tbl_trans
# $21-3 fmod qnan
4778 short smod_sdnrm
- tbl_trans
# $21-5 fmod denorm
4779 short sop_ssnan
- tbl_trans
# $21-4 fmod snan
4780 short tbl_trans
- tbl_trans
# $21-6 fmod unnorm
4781 short tbl_trans
- tbl_trans
# $21-7 ERROR
4783 short tbl_trans
- tbl_trans
# $22-0 fadd norm
4784 short tbl_trans
- tbl_trans
# $22-1 fadd zero
4785 short tbl_trans
- tbl_trans
# $22-2 fadd inf
4786 short tbl_trans
- tbl_trans
# $22-3 fadd qnan
4787 short tbl_trans
- tbl_trans
# $22-5 fadd denorm
4788 short tbl_trans
- tbl_trans
# $22-4 fadd snan
4789 short tbl_trans
- tbl_trans
# $22-6 fadd unnorm
4790 short tbl_trans
- tbl_trans
# $22-7 ERROR
4792 short tbl_trans
- tbl_trans
# $23-0 fmul norm
4793 short tbl_trans
- tbl_trans
# $23-1 fmul zero
4794 short tbl_trans
- tbl_trans
# $23-2 fmul inf
4795 short tbl_trans
- tbl_trans
# $23-3 fmul qnan
4796 short tbl_trans
- tbl_trans
# $23-5 fmul denorm
4797 short tbl_trans
- tbl_trans
# $23-4 fmul snan
4798 short tbl_trans
- tbl_trans
# $23-6 fmul unnorm
4799 short tbl_trans
- tbl_trans
# $23-7 ERROR
4801 short tbl_trans
- tbl_trans
# $24-0 fsgldiv norm
4802 short tbl_trans
- tbl_trans
# $24-1 fsgldiv zero
4803 short tbl_trans
- tbl_trans
# $24-2 fsgldiv inf
4804 short tbl_trans
- tbl_trans
# $24-3 fsgldiv qnan
4805 short tbl_trans
- tbl_trans
# $24-5 fsgldiv denorm
4806 short tbl_trans
- tbl_trans
# $24-4 fsgldiv snan
4807 short tbl_trans
- tbl_trans
# $24-6 fsgldiv unnorm
4808 short tbl_trans
- tbl_trans
# $24-7 ERROR
4810 short srem_snorm
- tbl_trans
# $25-0 frem norm
4811 short srem_szero
- tbl_trans
# $25-1 frem zero
4812 short srem_sinf
- tbl_trans
# $25-2 frem inf
4813 short sop_sqnan
- tbl_trans
# $25-3 frem qnan
4814 short srem_sdnrm
- tbl_trans
# $25-5 frem denorm
4815 short sop_ssnan
- tbl_trans
# $25-4 frem snan
4816 short tbl_trans
- tbl_trans
# $25-6 frem unnorm
4817 short tbl_trans
- tbl_trans
# $25-7 ERROR
4819 short sscale_snorm
- tbl_trans
# $26-0 fscale norm
4820 short sscale_szero
- tbl_trans
# $26-1 fscale zero
4821 short sscale_sinf
- tbl_trans
# $26-2 fscale inf
4822 short sop_sqnan
- tbl_trans
# $26-3 fscale qnan
4823 short sscale_sdnrm
- tbl_trans
# $26-5 fscale denorm
4824 short sop_ssnan
- tbl_trans
# $26-4 fscale snan
4825 short tbl_trans
- tbl_trans
# $26-6 fscale unnorm
4826 short tbl_trans
- tbl_trans
# $26-7 ERROR
4828 short tbl_trans
- tbl_trans
# $27-0 fsglmul norm
4829 short tbl_trans
- tbl_trans
# $27-1 fsglmul zero
4830 short tbl_trans
- tbl_trans
# $27-2 fsglmul inf
4831 short tbl_trans
- tbl_trans
# $27-3 fsglmul qnan
4832 short tbl_trans
- tbl_trans
# $27-5 fsglmul denorm
4833 short tbl_trans
- tbl_trans
# $27-4 fsglmul snan
4834 short tbl_trans
- tbl_trans
# $27-6 fsglmul unnorm
4835 short tbl_trans
- tbl_trans
# $27-7 ERROR
4837 short tbl_trans
- tbl_trans
# $28-0 fsub norm
4838 short tbl_trans
- tbl_trans
# $28-1 fsub zero
4839 short tbl_trans
- tbl_trans
# $28-2 fsub inf
4840 short tbl_trans
- tbl_trans
# $28-3 fsub qnan
4841 short tbl_trans
- tbl_trans
# $28-5 fsub denorm
4842 short tbl_trans
- tbl_trans
# $28-4 fsub snan
4843 short tbl_trans
- tbl_trans
# $28-6 fsub unnorm
4844 short tbl_trans
- tbl_trans
# $28-7 ERROR
4846 short tbl_trans
- tbl_trans
# $29-0 ERROR
4847 short tbl_trans
- tbl_trans
# $29-1 ERROR
4848 short tbl_trans
- tbl_trans
# $29-2 ERROR
4849 short tbl_trans
- tbl_trans
# $29-3 ERROR
4850 short tbl_trans
- tbl_trans
# $29-4 ERROR
4851 short tbl_trans
- tbl_trans
# $29-5 ERROR
4852 short tbl_trans
- tbl_trans
# $29-6 ERROR
4853 short tbl_trans
- tbl_trans
# $29-7 ERROR
4855 short tbl_trans
- tbl_trans
# $2a-0 ERROR
4856 short tbl_trans
- tbl_trans
# $2a-1 ERROR
4857 short tbl_trans
- tbl_trans
# $2a-2 ERROR
4858 short tbl_trans
- tbl_trans
# $2a-3 ERROR
4859 short tbl_trans
- tbl_trans
# $2a-4 ERROR
4860 short tbl_trans
- tbl_trans
# $2a-5 ERROR
4861 short tbl_trans
- tbl_trans
# $2a-6 ERROR
4862 short tbl_trans
- tbl_trans
# $2a-7 ERROR
4864 short tbl_trans
- tbl_trans
# $2b-0 ERROR
4865 short tbl_trans
- tbl_trans
# $2b-1 ERROR
4866 short tbl_trans
- tbl_trans
# $2b-2 ERROR
4867 short tbl_trans
- tbl_trans
# $2b-3 ERROR
4868 short tbl_trans
- tbl_trans
# $2b-4 ERROR
4869 short tbl_trans
- tbl_trans
# $2b-5 ERROR
4870 short tbl_trans
- tbl_trans
# $2b-6 ERROR
4871 short tbl_trans
- tbl_trans
# $2b-7 ERROR
4873 short tbl_trans
- tbl_trans
# $2c-0 ERROR
4874 short tbl_trans
- tbl_trans
# $2c-1 ERROR
4875 short tbl_trans
- tbl_trans
# $2c-2 ERROR
4876 short tbl_trans
- tbl_trans
# $2c-3 ERROR
4877 short tbl_trans
- tbl_trans
# $2c-4 ERROR
4878 short tbl_trans
- tbl_trans
# $2c-5 ERROR
4879 short tbl_trans
- tbl_trans
# $2c-6 ERROR
4880 short tbl_trans
- tbl_trans
# $2c-7 ERROR
4882 short tbl_trans
- tbl_trans
# $2d-0 ERROR
4883 short tbl_trans
- tbl_trans
# $2d-1 ERROR
4884 short tbl_trans
- tbl_trans
# $2d-2 ERROR
4885 short tbl_trans
- tbl_trans
# $2d-3 ERROR
4886 short tbl_trans
- tbl_trans
# $2d-4 ERROR
4887 short tbl_trans
- tbl_trans
# $2d-5 ERROR
4888 short tbl_trans
- tbl_trans
# $2d-6 ERROR
4889 short tbl_trans
- tbl_trans
# $2d-7 ERROR
4891 short tbl_trans
- tbl_trans
# $2e-0 ERROR
4892 short tbl_trans
- tbl_trans
# $2e-1 ERROR
4893 short tbl_trans
- tbl_trans
# $2e-2 ERROR
4894 short tbl_trans
- tbl_trans
# $2e-3 ERROR
4895 short tbl_trans
- tbl_trans
# $2e-4 ERROR
4896 short tbl_trans
- tbl_trans
# $2e-5 ERROR
4897 short tbl_trans
- tbl_trans
# $2e-6 ERROR
4898 short tbl_trans
- tbl_trans
# $2e-7 ERROR
4900 short tbl_trans
- tbl_trans
# $2f-0 ERROR
4901 short tbl_trans
- tbl_trans
# $2f-1 ERROR
4902 short tbl_trans
- tbl_trans
# $2f-2 ERROR
4903 short tbl_trans
- tbl_trans
# $2f-3 ERROR
4904 short tbl_trans
- tbl_trans
# $2f-4 ERROR
4905 short tbl_trans
- tbl_trans
# $2f-5 ERROR
4906 short tbl_trans
- tbl_trans
# $2f-6 ERROR
4907 short tbl_trans
- tbl_trans
# $2f-7 ERROR
4909 short ssincos
- tbl_trans
# $30-0 fsincos norm
4910 short ssincosz
- tbl_trans
# $30-1 fsincos zero
4911 short ssincosi
- tbl_trans
# $30-2 fsincos inf
4912 short ssincosqnan
- tbl_trans
# $30-3 fsincos qnan
4913 short ssincosd
- tbl_trans
# $30-5 fsincos denorm
4914 short ssincossnan
- tbl_trans
# $30-4 fsincos snan
4915 short tbl_trans
- tbl_trans
# $30-6 fsincos unnorm
4916 short tbl_trans
- tbl_trans
# $30-7 ERROR
4918 short ssincos
- tbl_trans
# $31-0 fsincos norm
4919 short ssincosz
- tbl_trans
# $31-1 fsincos zero
4920 short ssincosi
- tbl_trans
# $31-2 fsincos inf
4921 short ssincosqnan
- tbl_trans
# $31-3 fsincos qnan
4922 short ssincosd
- tbl_trans
# $31-5 fsincos denorm
4923 short ssincossnan
- tbl_trans
# $31-4 fsincos snan
4924 short tbl_trans
- tbl_trans
# $31-6 fsincos unnorm
4925 short tbl_trans
- tbl_trans
# $31-7 ERROR
4927 short ssincos
- tbl_trans
# $32-0 fsincos norm
4928 short ssincosz
- tbl_trans
# $32-1 fsincos zero
4929 short ssincosi
- tbl_trans
# $32-2 fsincos inf
4930 short ssincosqnan
- tbl_trans
# $32-3 fsincos qnan
4931 short ssincosd
- tbl_trans
# $32-5 fsincos denorm
4932 short ssincossnan
- tbl_trans
# $32-4 fsincos snan
4933 short tbl_trans
- tbl_trans
# $32-6 fsincos unnorm
4934 short tbl_trans
- tbl_trans
# $32-7 ERROR
4936 short ssincos
- tbl_trans
# $33-0 fsincos norm
4937 short ssincosz
- tbl_trans
# $33-1 fsincos zero
4938 short ssincosi
- tbl_trans
# $33-2 fsincos inf
4939 short ssincosqnan
- tbl_trans
# $33-3 fsincos qnan
4940 short ssincosd
- tbl_trans
# $33-5 fsincos denorm
4941 short ssincossnan
- tbl_trans
# $33-4 fsincos snan
4942 short tbl_trans
- tbl_trans
# $33-6 fsincos unnorm
4943 short tbl_trans
- tbl_trans
# $33-7 ERROR
4945 short ssincos
- tbl_trans
# $34-0 fsincos norm
4946 short ssincosz
- tbl_trans
# $34-1 fsincos zero
4947 short ssincosi
- tbl_trans
# $34-2 fsincos inf
4948 short ssincosqnan
- tbl_trans
# $34-3 fsincos qnan
4949 short ssincosd
- tbl_trans
# $34-5 fsincos denorm
4950 short ssincossnan
- tbl_trans
# $34-4 fsincos snan
4951 short tbl_trans
- tbl_trans
# $34-6 fsincos unnorm
4952 short tbl_trans
- tbl_trans
# $34-7 ERROR
4954 short ssincos
- tbl_trans
# $35-0 fsincos norm
4955 short ssincosz
- tbl_trans
# $35-1 fsincos zero
4956 short ssincosi
- tbl_trans
# $35-2 fsincos inf
4957 short ssincosqnan
- tbl_trans
# $35-3 fsincos qnan
4958 short ssincosd
- tbl_trans
# $35-5 fsincos denorm
4959 short ssincossnan
- tbl_trans
# $35-4 fsincos snan
4960 short tbl_trans
- tbl_trans
# $35-6 fsincos unnorm
4961 short tbl_trans
- tbl_trans
# $35-7 ERROR
4963 short ssincos
- tbl_trans
# $36-0 fsincos norm
4964 short ssincosz
- tbl_trans
# $36-1 fsincos zero
4965 short ssincosi
- tbl_trans
# $36-2 fsincos inf
4966 short ssincosqnan
- tbl_trans
# $36-3 fsincos qnan
4967 short ssincosd
- tbl_trans
# $36-5 fsincos denorm
4968 short ssincossnan
- tbl_trans
# $36-4 fsincos snan
4969 short tbl_trans
- tbl_trans
# $36-6 fsincos unnorm
4970 short tbl_trans
- tbl_trans
# $36-7 ERROR
4972 short ssincos
- tbl_trans
# $37-0 fsincos norm
4973 short ssincosz
- tbl_trans
# $37-1 fsincos zero
4974 short ssincosi
- tbl_trans
# $37-2 fsincos inf
4975 short ssincosqnan
- tbl_trans
# $37-3 fsincos qnan
4976 short ssincosd
- tbl_trans
# $37-5 fsincos denorm
4977 short ssincossnan
- tbl_trans
# $37-4 fsincos snan
4978 short tbl_trans
- tbl_trans
# $37-6 fsincos unnorm
4979 short tbl_trans
- tbl_trans
# $37-7 ERROR
4983 # the instruction fetch access for the displacement word for the
4984 # fdbcc emulation failed. here, we create an access error frame
4985 # from the current frame and branch to _real_access().
4987 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
4988 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
4989 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
4991 mov.
l USER_FPIAR
(%a6
),EXC_PC
(%a6
) # store current PC
4995 mov.
l (%sp
),-(%sp
) # store SR,hi(PC)
4996 mov.w
0x8(%sp
),0x4(%sp
) # store lo(PC)
4997 mov.w
&0x4008,0x6(%sp
) # store voff
4998 mov.
l 0x2(%sp
),0x8(%sp
) # store EA
4999 mov.
l &0x09428001,0xc(%sp
) # store FSLW
5001 btst
&0x5,(%sp
) # user or supervisor mode?
5002 beq.
b funimp_iacc_end
# user
5003 bset
&0x2,0xd(%sp
) # set supervisor TM bit
5008 #########################################################################
5009 # ssin(): computes the sine of a normalized input #
5010 # ssind(): computes the sine of a denormalized input #
5011 # scos(): computes the cosine of a normalized input #
5012 # scosd(): computes the cosine of a denormalized input #
5013 # ssincos(): computes the sine and cosine of a normalized input #
5014 # ssincosd(): computes the sine and cosine of a denormalized input #
5016 # INPUT *************************************************************** #
5017 # a0 = pointer to extended precision input #
5018 # d0 = round precision,mode #
5020 # OUTPUT ************************************************************** #
5021 # fp0 = sin(X) or cos(X) #
5027 # ACCURACY and MONOTONICITY ******************************************* #
5028 # The returned result is within 1 ulp in 64 significant bit, i.e. #
5029 # within 0.5001 ulp to 53 bits if the result is subsequently #
5030 # rounded to double precision. The result is provably monotonic #
5031 # in double precision. #
5033 # ALGORITHM *********************************************************** #
5036 # 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
5038 # 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
5040 # 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5041 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
5042 # Overwrite k by k := k + AdjN. #
5044 # 4. If k is even, go to 6. #
5046 # 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
5047 # Return sgn*cos(r) where cos(r) is approximated by an #
5048 # even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
5052 # 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
5053 # where sin(r) is approximated by an odd polynomial in r #
5054 # r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
5057 # 7. If |X| > 1, go to 9. #
5059 # 8. (|X|<2**(-40)) If SIN is invoked, return X; #
5060 # otherwise return 1. #
5062 # 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
5066 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5068 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5069 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
5071 # 3. If k is even, go to 5. #
5073 # 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
5074 # j1 exclusive or with the l.s.b. of k. #
5075 # sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
5076 # SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
5077 # sin(r) and cos(r) are computed as odd and even #
5078 # polynomials in r, respectively. Exit #
5080 # 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
5081 # SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
5082 # sin(r) and cos(r) are computed as odd and even #
5083 # polynomials in r, respectively. Exit #
5085 # 6. If |X| > 1, go to 8. #
5087 # 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
5089 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
5092 #########################################################################
5094 SINA7
: long
0xBD6AAA77,0xCCC994F5
5095 SINA6
: long
0x3DE61209,0x7AAE8DA1
5096 SINA5
: long
0xBE5AE645,0x2A118AE4
5097 SINA4
: long
0x3EC71DE3,0xA5341531
5098 SINA3
: long
0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5099 SINA2
: long
0x3FF80000,0x88888888,0x888859AF,0x00000000
5100 SINA1
: long
0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5102 COSB8
: long
0x3D2AC4D0,0xD6011EE3
5103 COSB7
: long
0xBDA9396F,0x9F45AC19
5104 COSB6
: long
0x3E21EED9,0x0612C972
5105 COSB5
: long
0xBE927E4F,0xB79D9FCF
5106 COSB4
: long
0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5107 COSB3
: long
0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5108 COSB2
: long
0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5109 COSB1
: long
0xBF000000
5128 ############################################
5131 mov.
l &0,ADJN
(%a6
) # yes; SET ADJN TO 0
5134 ############################################
5137 mov.
l &1,ADJN
(%a6
) # yes; SET ADJN TO 1
5139 ############################################
5141 #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5143 fmov.x
(%a0
),%fp0
# LOAD INPUT
5144 fmov.x
%fp0
,X
(%a6
) # save input at X
5147 mov.
l (%a0
),%d1
# put exp in hi word
5148 mov.w
4(%a0
),%d1
# fetch hi(man)
5149 and.l &0x7FFFFFFF,%d1
# strip sign
5151 cmpi.
l %d1
,&0x3FD78000 # is |X| >= 2**(-40)?
5153 bra.w SINSM
# yes; input is very small
5156 cmp.
l %d1
,&0x4004BC7E # is |X| < 15 PI?
5158 bra.w SREDUCEX
# yes; input is very large
5160 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5161 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5164 fmul.d TWOBYPI
(%pc
),%fp1
# X*2/PI
5166 lea PITBL+
0x200(%pc
),%a1
# TABLE OF N*PI/2, N = -32,...,32
5168 fmov.
l %fp1
,INT
(%a6
) # CONVERT TO INTEGER
5170 mov.
l INT
(%a6
),%d1
# make a copy of N
5171 asl.
l &4,%d1
# N *= 16
5172 add.l %d1
,%a1
# tbl_addr = a1 + (N*16)
5174 # A1 IS THE ADDRESS OF N*PIBY2
5175 # ...WHICH IS IN TWO PIECES Y1 & Y2
5176 fsub.x
(%a1
)+,%fp0
# X-Y1
5177 fsub.s
(%a1
),%fp0
# fp0 = R = (X-Y1)-Y2
5180 #--continuation from REDUCEX
5182 #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5184 add.l ADJN
(%a6
),%d1
# SEE IF D0 IS ODD OR EVEN
5185 ror.
l &1,%d1
# D0 WAS ODD IFF D0 IS NEGATIVE
5189 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5190 #--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5191 #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5192 #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5193 #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5195 #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5196 #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5198 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
5200 fmov.x
%fp0
,X
(%a6
) # X IS R
5201 fmul.x
%fp0
,%fp0
# FP0 IS S
5203 fmov.d SINA7
(%pc
),%fp3
5204 fmov.d SINA6
(%pc
),%fp2
5207 fmul.x
%fp1
,%fp1
# FP1 IS T
5210 and.l &0x80000000,%d1
5211 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5212 eor.
l %d1
,X
(%a6
) # X IS NOW R'= SGN*R
5214 fmul.x
%fp1
,%fp3
# TA7
5215 fmul.x
%fp1
,%fp2
# TA6
5217 fadd.d SINA5
(%pc
),%fp3
# A5+TA7
5218 fadd.d SINA4
(%pc
),%fp2
# A4+TA6
5220 fmul.x
%fp1
,%fp3
# T(A5+TA7)
5221 fmul.x
%fp1
,%fp2
# T(A4+TA6)
5223 fadd.d SINA3
(%pc
),%fp3
# A3+T(A5+TA7)
5224 fadd.x SINA2
(%pc
),%fp2
# A2+T(A4+TA6)
5226 fmul.x
%fp3
,%fp1
# T(A3+T(A5+TA7))
5228 fmul.x
%fp0
,%fp2
# S(A2+T(A4+TA6))
5229 fadd.x SINA1
(%pc
),%fp1
# A1+T(A3+T(A5+TA7))
5230 fmul.x X
(%a6
),%fp0
# R'*S
5232 fadd.x
%fp2
,%fp1
# [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5234 fmul.x
%fp1
,%fp0
# SIN(R')-R'
5236 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
5238 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5239 fadd.x X
(%a6
),%fp0
# last inst - possible exception set
5242 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5243 #--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5244 #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5245 #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5246 #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5248 #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5249 #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5250 #--AND IS THEREFORE STORED AS SINGLE PRECISION.
5252 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
5254 fmul.x
%fp0
,%fp0
# FP0 IS S
5256 fmov.d COSB8
(%pc
),%fp2
5257 fmov.d COSB7
(%pc
),%fp3
5260 fmul.x
%fp1
,%fp1
# FP1 IS T
5262 fmov.x
%fp0
,X
(%a6
) # X IS S
5264 and.l &0x80000000,%d1
5265 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5267 fmul.x
%fp1
,%fp2
# TB8
5269 eor.
l %d1
,X
(%a6
) # X IS NOW S'= SGN*S
5270 and.l &0x80000000,%d1
5272 fmul.x
%fp1
,%fp3
# TB7
5274 or.l &0x3F800000,%d1
# D0 IS SGN IN SINGLE
5275 mov.
l %d1
,POSNEG1
(%a6
)
5277 fadd.d COSB6
(%pc
),%fp2
# B6+TB8
5278 fadd.d COSB5
(%pc
),%fp3
# B5+TB7
5280 fmul.x
%fp1
,%fp2
# T(B6+TB8)
5281 fmul.x
%fp1
,%fp3
# T(B5+TB7)
5283 fadd.d COSB4
(%pc
),%fp2
# B4+T(B6+TB8)
5284 fadd.x COSB3
(%pc
),%fp3
# B3+T(B5+TB7)
5286 fmul.x
%fp1
,%fp2
# T(B4+T(B6+TB8))
5287 fmul.x
%fp3
,%fp1
# T(B3+T(B5+TB7))
5289 fadd.x COSB2
(%pc
),%fp2
# B2+T(B4+T(B6+TB8))
5290 fadd.s COSB1
(%pc
),%fp1
# B1+T(B3+T(B5+TB7))
5292 fmul.x
%fp2
,%fp0
# S(B2+T(B4+T(B6+TB8)))
5298 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
5300 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5301 fadd.s POSNEG1
(%a6
),%fp0
# last inst - possible exception set
5304 ##############################################
5306 # SINe: Big OR Small?
5307 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5308 #--IF |X| < 2**(-40), RETURN X OR 1.
5310 cmp.
l %d1
,&0x3FFF8000
5318 # here, the operation may underflow iff the precision is sgl or dbl.
5319 # extended denorms are handled through another entry point.
5321 # mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5323 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5324 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
5325 fmov.x X
(%a6
),%fp0
# last inst - possible exception set
5329 fmov.s
&0x3F800000,%fp0
# fp0 = 1.0
5330 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5331 fadd.s
&0x80800000,%fp0
# last inst - possible exception set
5334 ################################################
5336 #--SIN(X) = X FOR DENORMALIZED X
5340 ############################################
5342 #--COS(X) = 1 FOR DENORMALIZED X
5344 fmov.s
&0x3F800000,%fp0
# fp0 = 1.0
5347 ##################################################
5354 fmov.x
(%a0
),%fp0
# LOAD INPUT
5359 and.l &0x7FFFFFFF,%d1
# COMPACTIFY X
5361 cmp.
l %d1
,&0x3FD78000 # |X| >= 2**(-40)?
5366 cmp.
l %d1
,&0x4004BC7E # |X| < 15 PI?
5371 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5372 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5376 fmul.d TWOBYPI
(%pc
),%fp1
# X*2/PI
5378 lea PITBL+
0x200(%pc
),%a1
# TABLE OF N*PI/2, N = -32,...,32
5380 fmov.
l %fp1
,INT
(%a6
) # CONVERT TO INTEGER
5384 add.l %d1
,%a1
# ADDRESS OF N*PIBY2, IN Y1, Y2
5386 fsub.x
(%a1
)+,%fp0
# X-Y1
5387 fsub.s
(%a1
),%fp0
# FP0 IS R = (X-Y1)-Y2
5390 #--continuation point from REDUCEX
5394 cmp.
l %d1
,&0 # D0 < 0 IFF N IS ODD
5398 #--REGISTERS SAVED SO FAR: D0, A0, FP2.
5399 fmovm.x
&0x04,-(%sp
) # save fp2
5401 fmov.x
%fp0
,RPRIME
(%a6
)
5402 fmul.x
%fp0
,%fp0
# FP0 IS S = R*R
5403 fmov.d SINA7
(%pc
),%fp1
# A7
5404 fmov.d COSB8
(%pc
),%fp2
# B8
5405 fmul.x
%fp0
,%fp1
# SA7
5406 fmul.x
%fp0
,%fp2
# SB8
5411 and.l &0x80000000,%d2
5413 and.l &0x80000000,%d2
5415 fadd.d SINA6
(%pc
),%fp1
# A6+SA7
5416 fadd.d COSB7
(%pc
),%fp2
# B7+SB8
5418 fmul.x
%fp0
,%fp1
# S(A6+SA7)
5419 eor.
l %d2
,RPRIME
(%a6
)
5421 fmul.x
%fp0
,%fp2
# S(B7+SB8)
5423 and.l &0x80000000,%d1
5424 mov.
l &0x3F800000,POSNEG1
(%a6
)
5425 eor.
l %d1
,POSNEG1
(%a6
)
5427 fadd.d SINA5
(%pc
),%fp1
# A5+S(A6+SA7)
5428 fadd.d COSB6
(%pc
),%fp2
# B6+S(B7+SB8)
5430 fmul.x
%fp0
,%fp1
# S(A5+S(A6+SA7))
5431 fmul.x
%fp0
,%fp2
# S(B6+S(B7+SB8))
5432 fmov.x
%fp0
,SPRIME
(%a6
)
5434 fadd.d SINA4
(%pc
),%fp1
# A4+S(A5+S(A6+SA7))
5435 eor.
l %d1
,SPRIME
(%a6
)
5436 fadd.d COSB5
(%pc
),%fp2
# B5+S(B6+S(B7+SB8))
5438 fmul.x
%fp0
,%fp1
# S(A4+...)
5439 fmul.x
%fp0
,%fp2
# S(B5+...)
5441 fadd.d SINA3
(%pc
),%fp1
# A3+S(A4+...)
5442 fadd.d COSB4
(%pc
),%fp2
# B4+S(B5+...)
5444 fmul.x
%fp0
,%fp1
# S(A3+...)
5445 fmul.x
%fp0
,%fp2
# S(B4+...)
5447 fadd.x SINA2
(%pc
),%fp1
# A2+S(A3+...)
5448 fadd.x COSB3
(%pc
),%fp2
# B3+S(B4+...)
5450 fmul.x
%fp0
,%fp1
# S(A2+...)
5451 fmul.x
%fp0
,%fp2
# S(B3+...)
5453 fadd.x SINA1
(%pc
),%fp1
# A1+S(A2+...)
5454 fadd.x COSB2
(%pc
),%fp2
# B2+S(B3+...)
5456 fmul.x
%fp0
,%fp1
# S(A1+...)
5457 fmul.x
%fp2
,%fp0
# S(B2+...)
5459 fmul.x RPRIME
(%a6
),%fp1
# R'S(A1+...)
5460 fadd.s COSB1
(%pc
),%fp0
# B1+S(B2...)
5461 fmul.x SPRIME
(%a6
),%fp0
# S'(B1+S(B2+...))
5463 fmovm.x
(%sp
)+,&0x20 # restore fp2
5466 fadd.x RPRIME
(%a6
),%fp1
# COS(X)
5467 bsr sto_cos
# store cosine result
5468 fadd.s POSNEG1
(%a6
),%fp0
# SIN(X)
5472 #--REGISTERS SAVED SO FAR: FP2.
5473 fmovm.x
&0x04,-(%sp
) # save fp2
5475 fmov.x
%fp0
,RPRIME
(%a6
)
5476 fmul.x
%fp0
,%fp0
# FP0 IS S = R*R
5478 fmov.d COSB8
(%pc
),%fp1
# B8
5479 fmov.d SINA7
(%pc
),%fp2
# A7
5481 fmul.x
%fp0
,%fp1
# SB8
5482 fmov.x
%fp0
,SPRIME
(%a6
)
5483 fmul.x
%fp0
,%fp2
# SA7
5486 and.l &0x80000000,%d1
5488 fadd.d COSB7
(%pc
),%fp1
# B7+SB8
5489 fadd.d SINA6
(%pc
),%fp2
# A6+SA7
5491 eor.
l %d1
,RPRIME
(%a6
)
5492 eor.
l %d1
,SPRIME
(%a6
)
5494 fmul.x
%fp0
,%fp1
# S(B7+SB8)
5496 or.l &0x3F800000,%d1
5497 mov.
l %d1
,POSNEG1
(%a6
)
5499 fmul.x
%fp0
,%fp2
# S(A6+SA7)
5501 fadd.d COSB6
(%pc
),%fp1
# B6+S(B7+SB8)
5502 fadd.d SINA5
(%pc
),%fp2
# A5+S(A6+SA7)
5504 fmul.x
%fp0
,%fp1
# S(B6+S(B7+SB8))
5505 fmul.x
%fp0
,%fp2
# S(A5+S(A6+SA7))
5507 fadd.d COSB5
(%pc
),%fp1
# B5+S(B6+S(B7+SB8))
5508 fadd.d SINA4
(%pc
),%fp2
# A4+S(A5+S(A6+SA7))
5510 fmul.x
%fp0
,%fp1
# S(B5+...)
5511 fmul.x
%fp0
,%fp2
# S(A4+...)
5513 fadd.d COSB4
(%pc
),%fp1
# B4+S(B5+...)
5514 fadd.d SINA3
(%pc
),%fp2
# A3+S(A4+...)
5516 fmul.x
%fp0
,%fp1
# S(B4+...)
5517 fmul.x
%fp0
,%fp2
# S(A3+...)
5519 fadd.x COSB3
(%pc
),%fp1
# B3+S(B4+...)
5520 fadd.x SINA2
(%pc
),%fp2
# A2+S(A3+...)
5522 fmul.x
%fp0
,%fp1
# S(B3+...)
5523 fmul.x
%fp0
,%fp2
# S(A2+...)
5525 fadd.x COSB2
(%pc
),%fp1
# B2+S(B3+...)
5526 fadd.x SINA1
(%pc
),%fp2
# A1+S(A2+...)
5528 fmul.x
%fp0
,%fp1
# S(B2+...)
5529 fmul.x
%fp2
,%fp0
# s(a1+...)
5532 fadd.s COSB1
(%pc
),%fp1
# B1+S(B2...)
5533 fmul.x RPRIME
(%a6
),%fp0
# R'S(A1+...)
5534 fmul.x SPRIME
(%a6
),%fp1
# S'(B1+S(B2+...))
5536 fmovm.x
(%sp
)+,&0x20 # restore fp2
5539 fadd.s POSNEG1
(%a6
),%fp1
# COS(X)
5540 bsr sto_cos
# store cosine result
5541 fadd.x RPRIME
(%a6
),%fp0
# SIN(X)
5544 ################################################
5547 cmp.
l %d1
,&0x3FFF8000
5550 ################################################
5553 # mov.w &0x0000,XDCARE(%a6)
5554 fmov.s
&0x3F800000,%fp1
5557 fsub.s
&0x00800000,%fp1
5558 bsr sto_cos
# store cosine result
5559 fmov.
l %fpcr
,%d0
# d0 must have fpcr,too
5560 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
5564 ##############################################
5567 #--SIN AND COS OF X FOR DENORMALIZED X
5569 mov.
l %d0
,-(%sp
) # save d0
5570 fmov.s
&0x3F800000,%fp1
5571 bsr sto_cos
# store cosine result
5572 mov.
l (%sp
)+,%d0
# restore d0
5575 ############################################
5577 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5578 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5579 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5581 fmovm.x
&0x3c,-(%sp
) # save {fp2-fp5}
5582 mov.
l %d2
,-(%sp
) # save d2
5583 fmov.s
&0x00000000,%fp1
# fp1 = 0
5585 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5586 #--there is a danger of unwanted overflow in first LOOP iteration. In this
5587 #--case, reduce argument by one remainder step to make subsequent reduction
5589 cmp.
l %d1
,&0x7ffeffff # is arg dangerously large?
5592 # yes; create 2**16383*PI/2
5593 mov.w
&0x7ffe,FP_SCR0_EX
(%a6
)
5594 mov.
l &0xc90fdaa2,FP_SCR0_HI
(%a6
)
5595 clr.
l FP_SCR0_LO
(%a6
)
5597 # create low half of 2**16383*PI/2 at FP_SCR1
5598 mov.w
&0x7fdc,FP_SCR1_EX
(%a6
)
5599 mov.
l &0x85a308d3,FP_SCR1_HI
(%a6
)
5600 clr.
l FP_SCR1_LO
(%a6
)
5602 ftest.x
%fp0
# test sign of argument
5605 or.b &0x80,FP_SCR0_EX
(%a6
) # positive arg
5606 or.b &0x80,FP_SCR1_EX
(%a6
)
5608 fadd.x FP_SCR0
(%a6
),%fp0
# high part of reduction is exact
5609 fmov.x
%fp0
,%fp1
# save high result in fp1
5610 fadd.x FP_SCR1
(%a6
),%fp0
# low part of reduction
5611 fsub.x
%fp0
,%fp1
# determine low component of result
5612 fadd.x FP_SCR1
(%a6
),%fp1
# fp0/fp1 are reduced argument.
5614 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5615 #--integer quotient will be stored in N
5616 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5618 fmov.x
%fp0
,INARG
(%a6
) # +-2**K * F, 1 <= F < 2
5619 mov.w INARG
(%a6
),%d1
5620 mov.
l %d1
,%a1
# save a copy of D0
5621 and.l &0x00007FFF,%d1
5622 sub.l &0x00003FFF,%d1
# d0 = K
5626 sub.l &27,%d1
# d0 = L := K-27
5627 mov.
b &0,ENDFLAG
(%a6
)
5630 clr.
l %d1
# d0 = L := 0
5631 mov.
b &1,ENDFLAG
(%a6
)
5634 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5635 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5637 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5638 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5640 mov.
l &0x00003FFE,%d2
# BIASED EXP OF 2/PI
5641 sub.l %d1
,%d2
# BIASED EXP OF 2**(-L)*(2/PI)
5643 mov.
l &0xA2F9836E,FP_SCR0_HI
(%a6
)
5644 mov.
l &0x4E44152A,FP_SCR0_LO
(%a6
)
5645 mov.w
%d2
,FP_SCR0_EX
(%a6
) # FP_SCR0 = 2**(-L)*(2/PI)
5648 fmul.x FP_SCR0
(%a6
),%fp2
# fp2 = X * 2**(-L)*(2/PI)
5650 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5651 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5652 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5653 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5654 #--US THE DESIRED VALUE IN FLOATING POINT.
5657 and.l &0x80000000,%d2
5658 or.l &0x5F000000,%d2
# d2 = SIGN(INARG)*2**63 IN SGL
5659 mov.
l %d2
,TWOTO63
(%a6
)
5660 fadd.s TWOTO63
(%a6
),%fp2
# THE FRACTIONAL PART OF FP1 IS ROUNDED
5661 fsub.s TWOTO63
(%a6
),%fp2
# fp2 = N
5664 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5665 mov.
l %d1
,%d2
# d2 = L
5667 add.l &0x00003FFF,%d2
# BIASED EXP OF 2**L * (PI/2)
5668 mov.w
%d2
,FP_SCR0_EX
(%a6
)
5669 mov.
l &0xC90FDAA2,FP_SCR0_HI
(%a6
)
5670 clr.
l FP_SCR0_LO
(%a6
) # FP_SCR0 = 2**(L) * Piby2_1
5672 add.l &0x00003FDD,%d1
5673 mov.w
%d1
,FP_SCR1_EX
(%a6
)
5674 mov.
l &0x85A308D3,FP_SCR1_HI
(%a6
)
5675 clr.
l FP_SCR1_LO
(%a6
) # FP_SCR1 = 2**(L) * Piby2_2
5677 mov.
b ENDFLAG
(%a6
),%d1
5679 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5680 #--P2 = 2**(L) * Piby2_2
5681 fmov.x
%fp2
,%fp4
# fp4 = N
5682 fmul.x FP_SCR0
(%a6
),%fp4
# fp4 = W = N*P1
5683 fmov.x
%fp2
,%fp5
# fp5 = N
5684 fmul.x FP_SCR1
(%a6
),%fp5
# fp5 = w = N*P2
5685 fmov.x
%fp4
,%fp3
# fp3 = W = N*P1
5687 #--we want P+p = W+w but |p| <= half ulp of P
5688 #--Then, we need to compute A := R-P and a := r-p
5689 fadd.x
%fp5
,%fp3
# fp3 = P
5690 fsub.x
%fp3
,%fp4
# fp4 = W-P
5692 fsub.x
%fp3
,%fp0
# fp0 = A := R - P
5693 fadd.x
%fp5
,%fp4
# fp4 = p = (W-P)+w
5695 fmov.x
%fp0
,%fp3
# fp3 = A
5696 fsub.x
%fp4
,%fp1
# fp1 = a := r - p
5698 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5699 #--|r| <= half ulp of R.
5700 fadd.x
%fp1
,%fp0
# fp0 = R := A+a
5701 #--No need to calculate r if this is the last loop
5705 #--Need to calculate r
5706 fsub.x
%fp0
,%fp3
# fp3 = A-R
5707 fadd.x
%fp3
,%fp1
# fp1 = r := (A-R)+a
5711 fmov.
l %fp2
,INT
(%a6
)
5712 mov.
l (%sp
)+,%d2
# restore d2
5713 fmovm.x
(%sp
)+,&0x3c # restore {fp2-fp5}
5721 #########################################################################
5722 # stan(): computes the tangent of a normalized input #
5723 # stand(): computes the tangent of a denormalized input #
5725 # INPUT *************************************************************** #
5726 # a0 = pointer to extended precision input #
5727 # d0 = round precision,mode #
5729 # OUTPUT ************************************************************** #
5732 # ACCURACY and MONOTONICITY ******************************************* #
5733 # The returned result is within 3 ulp in 64 significant bit, i.e. #
5734 # within 0.5001 ulp to 53 bits if the result is subsequently #
5735 # rounded to double precision. The result is provably monotonic #
5736 # in double precision. #
5738 # ALGORITHM *********************************************************** #
5740 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5742 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5743 # k = N mod 2, so in particular, k = 0 or 1. #
5745 # 3. If k is odd, go to 5. #
5747 # 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5748 # rational function U/V where #
5749 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5750 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5753 # 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5754 # a rational function U/V where #
5755 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5756 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5757 # -Cot(r) = -V/U. Exit. #
5759 # 6. If |X| > 1, go to 8. #
5761 # 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5763 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5766 #########################################################################
5769 long
0x3EA0B759,0xF50F8688
5771 long
0xBEF2BAA5,0xA8924F04
5774 long
0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5777 long
0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5780 long
0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5783 long
0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5786 long
0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5789 long
0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5792 long
0x40010000,0xC90FDAA2,0x00000000,0x00000000
5794 long
0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5796 #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5797 #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5798 #--MOST 69 BITS LONG.
5801 long
0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5802 long
0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5803 long
0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5804 long
0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5805 long
0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5806 long
0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5807 long
0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5808 long
0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5809 long
0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5810 long
0xC0040000,0x90836524,0x88034B96,0x20B00000
5811 long
0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5812 long
0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5813 long
0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5814 long
0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5815 long
0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5816 long
0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5817 long
0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5818 long
0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5819 long
0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5820 long
0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5821 long
0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5822 long
0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5823 long
0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5824 long
0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5825 long
0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5826 long
0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5827 long
0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5828 long
0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5829 long
0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5830 long
0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5831 long
0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5832 long
0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5833 long
0x00000000,0x00000000,0x00000000,0x00000000
5834 long
0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5835 long
0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5836 long
0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5837 long
0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5838 long
0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5839 long
0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5840 long
0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5841 long
0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5842 long
0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5843 long
0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5844 long
0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5845 long
0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5846 long
0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5847 long
0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5848 long
0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5849 long
0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5850 long
0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5851 long
0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5852 long
0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5853 long
0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5854 long
0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5855 long
0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5856 long
0x40040000,0x90836524,0x88034B96,0xA0B00000
5857 long
0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5858 long
0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5859 long
0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5860 long
0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5861 long
0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5862 long
0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5863 long
0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5864 long
0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5865 long
0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5875 fmov.x
(%a0
),%fp0
# LOAD INPUT
5879 and.l &0x7FFFFFFF,%d1
5881 cmp.
l %d1
,&0x3FD78000 # |X| >= 2**(-40)?
5885 cmp.
l %d1
,&0x4004BC7E # |X| < 15 PI?
5890 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5891 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5893 fmul.d TWOBYPI
(%pc
),%fp1
# X*2/PI
5895 lea.
l PITBL+
0x200(%pc
),%a1
# TABLE OF N*PI/2, N = -32,...,32
5897 fmov.
l %fp1
,%d1
# CONVERT TO INTEGER
5900 add.l %d1
,%a1
# ADDRESS N*PIBY2 IN Y1, Y2
5902 fsub.x
(%a1
)+,%fp0
# X-Y1
5904 fsub.s
(%a1
),%fp0
# FP0 IS R = (X-Y1)-Y2
5907 and.l &0x80000000,%d1
# D0 WAS ODD IFF D0 < 0
5910 fmovm.x
&0x0c,-(%sp
) # save fp2,fp3
5916 fmul.x
%fp1
,%fp1
# S = R*R
5918 fmov.d TANQ4
(%pc
),%fp3
5919 fmov.d TANP3
(%pc
),%fp2
5921 fmul.x
%fp1
,%fp3
# SQ4
5922 fmul.x
%fp1
,%fp2
# SP3
5924 fadd.d TANQ3
(%pc
),%fp3
# Q3+SQ4
5925 fadd.x TANP2
(%pc
),%fp2
# P2+SP3
5927 fmul.x
%fp1
,%fp3
# S(Q3+SQ4)
5928 fmul.x
%fp1
,%fp2
# S(P2+SP3)
5930 fadd.x TANQ2
(%pc
),%fp3
# Q2+S(Q3+SQ4)
5931 fadd.x TANP1
(%pc
),%fp2
# P1+S(P2+SP3)
5933 fmul.x
%fp1
,%fp3
# S(Q2+S(Q3+SQ4))
5934 fmul.x
%fp1
,%fp2
# S(P1+S(P2+SP3))
5936 fadd.x TANQ1
(%pc
),%fp3
# Q1+S(Q2+S(Q3+SQ4))
5937 fmul.x
%fp0
,%fp2
# RS(P1+S(P2+SP3))
5939 fmul.x
%fp3
,%fp1
# S(Q1+S(Q2+S(Q3+SQ4)))
5941 fadd.x
%fp2
,%fp0
# R+RS(P1+S(P2+SP3))
5943 fadd.s
&0x3F800000,%fp1
# 1+S(Q1+...)
5945 fmovm.x
(%sp
)+,&0x30 # restore fp2,fp3
5947 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5948 fdiv.x
%fp1
,%fp0
# last inst - possible exception set
5953 fmul.x
%fp0
,%fp0
# S = R*R
5955 fmov.d TANQ4
(%pc
),%fp3
5956 fmov.d TANP3
(%pc
),%fp2
5958 fmul.x
%fp0
,%fp3
# SQ4
5959 fmul.x
%fp0
,%fp2
# SP3
5961 fadd.d TANQ3
(%pc
),%fp3
# Q3+SQ4
5962 fadd.x TANP2
(%pc
),%fp2
# P2+SP3
5964 fmul.x
%fp0
,%fp3
# S(Q3+SQ4)
5965 fmul.x
%fp0
,%fp2
# S(P2+SP3)
5967 fadd.x TANQ2
(%pc
),%fp3
# Q2+S(Q3+SQ4)
5968 fadd.x TANP1
(%pc
),%fp2
# P1+S(P2+SP3)
5970 fmul.x
%fp0
,%fp3
# S(Q2+S(Q3+SQ4))
5971 fmul.x
%fp0
,%fp2
# S(P1+S(P2+SP3))
5973 fadd.x TANQ1
(%pc
),%fp3
# Q1+S(Q2+S(Q3+SQ4))
5974 fmul.x
%fp1
,%fp2
# RS(P1+S(P2+SP3))
5976 fmul.x
%fp3
,%fp0
# S(Q1+S(Q2+S(Q3+SQ4)))
5978 fadd.x
%fp2
,%fp1
# R+RS(P1+S(P2+SP3))
5979 fadd.s
&0x3F800000,%fp0
# 1+S(Q1+...)
5981 fmovm.x
(%sp
)+,&0x30 # restore fp2,fp3
5984 eor.
l &0x80000000,(%sp
)
5986 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5987 fdiv.x
(%sp
)+,%fp0
# last inst - possible exception set
5991 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5992 #--IF |X| < 2**(-40), RETURN X OR 1.
5993 cmp.
l %d1
,&0x3FFF8000
5998 fmov.
l %d0
,%fpcr
# restore users round mode,prec
5999 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
6000 fmov.x
(%sp
)+,%fp0
# last inst - posibble exception set
6004 #--TAN(X) = X FOR DENORMALIZED X
6008 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6009 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6010 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6012 fmovm.x
&0x3c,-(%sp
) # save {fp2-fp5}
6013 mov.
l %d2
,-(%sp
) # save d2
6014 fmov.s
&0x00000000,%fp1
# fp1 = 0
6016 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6017 #--there is a danger of unwanted overflow in first LOOP iteration. In this
6018 #--case, reduce argument by one remainder step to make subsequent reduction
6020 cmp.
l %d1
,&0x7ffeffff # is arg dangerously large?
6023 # yes; create 2**16383*PI/2
6024 mov.w
&0x7ffe,FP_SCR0_EX
(%a6
)
6025 mov.
l &0xc90fdaa2,FP_SCR0_HI
(%a6
)
6026 clr.
l FP_SCR0_LO
(%a6
)
6028 # create low half of 2**16383*PI/2 at FP_SCR1
6029 mov.w
&0x7fdc,FP_SCR1_EX
(%a6
)
6030 mov.
l &0x85a308d3,FP_SCR1_HI
(%a6
)
6031 clr.
l FP_SCR1_LO
(%a6
)
6033 ftest.x
%fp0
# test sign of argument
6036 or.b &0x80,FP_SCR0_EX
(%a6
) # positive arg
6037 or.b &0x80,FP_SCR1_EX
(%a6
)
6039 fadd.x FP_SCR0
(%a6
),%fp0
# high part of reduction is exact
6040 fmov.x
%fp0
,%fp1
# save high result in fp1
6041 fadd.x FP_SCR1
(%a6
),%fp0
# low part of reduction
6042 fsub.x
%fp0
,%fp1
# determine low component of result
6043 fadd.x FP_SCR1
(%a6
),%fp1
# fp0/fp1 are reduced argument.
6045 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6046 #--integer quotient will be stored in N
6047 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6049 fmov.x
%fp0
,INARG
(%a6
) # +-2**K * F, 1 <= F < 2
6050 mov.w INARG
(%a6
),%d1
6051 mov.
l %d1
,%a1
# save a copy of D0
6052 and.l &0x00007FFF,%d1
6053 sub.l &0x00003FFF,%d1
# d0 = K
6057 sub.l &27,%d1
# d0 = L := K-27
6058 mov.
b &0,ENDFLAG
(%a6
)
6061 clr.
l %d1
# d0 = L := 0
6062 mov.
b &1,ENDFLAG
(%a6
)
6065 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
6066 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
6068 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6069 #--2**L * (PIby2_1), 2**L * (PIby2_2)
6071 mov.
l &0x00003FFE,%d2
# BIASED EXP OF 2/PI
6072 sub.l %d1
,%d2
# BIASED EXP OF 2**(-L)*(2/PI)
6074 mov.
l &0xA2F9836E,FP_SCR0_HI
(%a6
)
6075 mov.
l &0x4E44152A,FP_SCR0_LO
(%a6
)
6076 mov.w
%d2
,FP_SCR0_EX
(%a6
) # FP_SCR0 = 2**(-L)*(2/PI)
6079 fmul.x FP_SCR0
(%a6
),%fp2
# fp2 = X * 2**(-L)*(2/PI)
6081 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6082 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
6083 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6084 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
6085 #--US THE DESIRED VALUE IN FLOATING POINT.
6088 and.l &0x80000000,%d2
6089 or.l &0x5F000000,%d2
# d2 = SIGN(INARG)*2**63 IN SGL
6090 mov.
l %d2
,TWOTO63
(%a6
)
6091 fadd.s TWOTO63
(%a6
),%fp2
# THE FRACTIONAL PART OF FP1 IS ROUNDED
6092 fsub.s TWOTO63
(%a6
),%fp2
# fp2 = N
6093 # fintrz.x %fp2,%fp2
6095 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6096 mov.
l %d1
,%d2
# d2 = L
6098 add.l &0x00003FFF,%d2
# BIASED EXP OF 2**L * (PI/2)
6099 mov.w
%d2
,FP_SCR0_EX
(%a6
)
6100 mov.
l &0xC90FDAA2,FP_SCR0_HI
(%a6
)
6101 clr.
l FP_SCR0_LO
(%a6
) # FP_SCR0 = 2**(L) * Piby2_1
6103 add.l &0x00003FDD,%d1
6104 mov.w
%d1
,FP_SCR1_EX
(%a6
)
6105 mov.
l &0x85A308D3,FP_SCR1_HI
(%a6
)
6106 clr.
l FP_SCR1_LO
(%a6
) # FP_SCR1 = 2**(L) * Piby2_2
6108 mov.
b ENDFLAG
(%a6
),%d1
6110 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6111 #--P2 = 2**(L) * Piby2_2
6112 fmov.x
%fp2
,%fp4
# fp4 = N
6113 fmul.x FP_SCR0
(%a6
),%fp4
# fp4 = W = N*P1
6114 fmov.x
%fp2
,%fp5
# fp5 = N
6115 fmul.x FP_SCR1
(%a6
),%fp5
# fp5 = w = N*P2
6116 fmov.x
%fp4
,%fp3
# fp3 = W = N*P1
6118 #--we want P+p = W+w but |p| <= half ulp of P
6119 #--Then, we need to compute A := R-P and a := r-p
6120 fadd.x
%fp5
,%fp3
# fp3 = P
6121 fsub.x
%fp3
,%fp4
# fp4 = W-P
6123 fsub.x
%fp3
,%fp0
# fp0 = A := R - P
6124 fadd.x
%fp5
,%fp4
# fp4 = p = (W-P)+w
6126 fmov.x
%fp0
,%fp3
# fp3 = A
6127 fsub.x
%fp4
,%fp1
# fp1 = a := r - p
6129 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6130 #--|r| <= half ulp of R.
6131 fadd.x
%fp1
,%fp0
# fp0 = R := A+a
6132 #--No need to calculate r if this is the last loop
6136 #--Need to calculate r
6137 fsub.x
%fp0
,%fp3
# fp3 = A-R
6138 fadd.x
%fp3
,%fp1
# fp1 = r := (A-R)+a
6142 fmov.
l %fp2
,INT
(%a6
)
6143 mov.
l (%sp
)+,%d2
# restore d2
6144 fmovm.x
(%sp
)+,&0x3c # restore {fp2-fp5}
6151 #########################################################################
6152 # satan(): computes the arctangent of a normalized number #
6153 # satand(): computes the arctangent of a denormalized number #
6155 # INPUT *************************************************************** #
6156 # a0 = pointer to extended precision input #
6157 # d0 = round precision,mode #
6159 # OUTPUT ************************************************************** #
6162 # ACCURACY and MONOTONICITY ******************************************* #
6163 # The returned result is within 2 ulps in 64 significant bit, #
6164 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6165 # rounded to double precision. The result is provably monotonic #
6166 # in double precision. #
6168 # ALGORITHM *********************************************************** #
6169 # Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6171 # Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6172 # Note that k = -4, -3,..., or 3. #
6173 # Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6174 # significant bits of X with a bit-1 attached at the 6-th #
6175 # bit position. Define u to be u = (X-F) / (1 + X*F). #
6177 # Step 3. Approximate arctan(u) by a polynomial poly. #
6179 # Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6180 # table of values calculated beforehand. Exit. #
6182 # Step 5. If |X| >= 16, go to Step 7. #
6184 # Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6186 # Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6187 # polynomial in X'. #
6188 # Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6190 #########################################################################
6192 ATANA3
: long
0xBFF6687E,0x314987D8
6193 ATANA2
: long
0x4002AC69,0x34A26DB3
6194 ATANA1
: long
0xBFC2476F,0x4E1DA28E
6196 ATANB6
: long
0x3FB34444,0x7F876989
6197 ATANB5
: long
0xBFB744EE,0x7FAF45DB
6198 ATANB4
: long
0x3FBC71C6,0x46940220
6199 ATANB3
: long
0xBFC24924,0x921872F9
6200 ATANB2
: long
0x3FC99999,0x99998FA9
6201 ATANB1
: long
0xBFD55555,0x55555555
6203 ATANC5
: long
0xBFB70BF3,0x98539E6A
6204 ATANC4
: long
0x3FBC7187,0x962D1D7D
6205 ATANC3
: long
0xBFC24924,0x827107B8
6206 ATANC2
: long
0x3FC99999,0x9996263E
6207 ATANC1
: long
0xBFD55555,0x55555536
6209 PPIBY2
: long
0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6210 NPIBY2
: long
0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6212 PTINY
: long
0x00010000,0x80000000,0x00000000,0x00000000
6213 NTINY
: long
0x80010000,0x80000000,0x00000000,0x00000000
6216 long
0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6217 long
0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6218 long
0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6219 long
0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6220 long
0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6221 long
0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6222 long
0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6223 long
0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6224 long
0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6225 long
0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6226 long
0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6227 long
0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6228 long
0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6229 long
0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6230 long
0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6231 long
0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6232 long
0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6233 long
0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6234 long
0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6235 long
0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6236 long
0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6237 long
0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6238 long
0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6239 long
0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6240 long
0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6241 long
0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6242 long
0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6243 long
0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6244 long
0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6245 long
0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6246 long
0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6247 long
0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6248 long
0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6249 long
0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6250 long
0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6251 long
0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6252 long
0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6253 long
0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6254 long
0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6255 long
0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6256 long
0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6257 long
0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6258 long
0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6259 long
0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6260 long
0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6261 long
0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6262 long
0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6263 long
0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6264 long
0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6265 long
0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6266 long
0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6267 long
0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6268 long
0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6269 long
0x3FFE0000,0x97731420,0x365E538C,0x00000000
6270 long
0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6271 long
0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6272 long
0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6273 long
0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6274 long
0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6275 long
0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6276 long
0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6277 long
0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6278 long
0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6279 long
0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6280 long
0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6281 long
0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6282 long
0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6283 long
0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6284 long
0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6285 long
0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6286 long
0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6287 long
0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6288 long
0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6289 long
0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6290 long
0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6291 long
0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6292 long
0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6293 long
0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6294 long
0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6295 long
0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6296 long
0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6297 long
0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6298 long
0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6299 long
0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6300 long
0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6301 long
0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6302 long
0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6303 long
0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6304 long
0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6305 long
0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6306 long
0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6307 long
0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6308 long
0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6309 long
0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6310 long
0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6311 long
0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6312 long
0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6313 long
0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6314 long
0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6315 long
0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6316 long
0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6317 long
0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6318 long
0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6319 long
0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6320 long
0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6321 long
0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6322 long
0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6323 long
0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6324 long
0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6325 long
0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6326 long
0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6327 long
0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6328 long
0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6329 long
0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6330 long
0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6331 long
0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6332 long
0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6333 long
0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6334 long
0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6335 long
0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6336 long
0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6337 long
0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6338 long
0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6339 long
0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6340 long
0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6341 long
0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6342 long
0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6343 long
0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6355 #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6357 fmov.x
(%a0
),%fp0
# LOAD INPUT
6362 and.l &0x7FFFFFFF,%d1
6364 cmp.
l %d1
,&0x3FFB8000 # |X| >= 1/16?
6369 cmp.
l %d1
,&0x4002FFFF # |X| < 16 ?
6373 #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6374 #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6375 #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6376 #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6377 #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6378 #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6379 #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6380 #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6381 #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6382 #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6383 #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6384 #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6385 #--WILL INVOLVE A VERY LONG POLYNOMIAL.
6387 #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6388 #--WE CHOSE F TO BE +-2^K * 1.BBBB1
6389 #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6390 #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6391 #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6392 #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6396 and.l &0xF8000000,XFRAC
(%a6
) # FIRST 5 BITS
6397 or.l &0x04000000,XFRAC
(%a6
) # SET 6-TH BIT TO 1
6398 mov.
l &0x00000000,XFRACLO
(%a6
) # LOCATION OF X IS NOW F
6400 fmov.x
%fp0
,%fp1
# FP1 IS X
6401 fmul.x X
(%a6
),%fp1
# FP1 IS X*F, NOTE THAT X*F > 0
6402 fsub.x X
(%a6
),%fp0
# FP0 IS X-F
6403 fadd.s
&0x3F800000,%fp1
# FP1 IS 1 + X*F
6404 fdiv.x
%fp1
,%fp0
# FP0 IS U = (X-F)/(1+X*F)
6406 #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6407 #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6408 #--SAVE REGISTERS FP2.
6410 mov.
l %d2
,-(%sp
) # SAVE d2 TEMPORARILY
6411 mov.
l %d1
,%d2
# THE EXP AND 16 BITS OF X
6412 and.l &0x00007800,%d1
# 4 VARYING BITS OF F'S FRACTION
6413 and.l &0x7FFF0000,%d2
# EXPONENT OF F
6414 sub.l &0x3FFB0000,%d2
# K+4
6416 add.l %d2
,%d1
# THE 7 BITS IDENTIFYING F
6417 asr.
l &7,%d1
# INDEX INTO TBL OF ATAN(|F|)
6418 lea ATANTBL
(%pc
),%a1
6419 add.l %d1
,%a1
# ADDRESS OF ATAN(|F|)
6420 mov.
l (%a1
)+,ATANF
(%a6
)
6421 mov.
l (%a1
)+,ATANFHI
(%a6
)
6422 mov.
l (%a1
)+,ATANFLO
(%a6
) # ATANF IS NOW ATAN(|F|)
6423 mov.
l X
(%a6
),%d1
# LOAD SIGN AND EXPO. AGAIN
6424 and.l &0x80000000,%d1
# SIGN(F)
6425 or.l %d1
,ATANF
(%a6
) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6426 mov.
l (%sp
)+,%d2
# RESTORE d2
6428 #--THAT'S ALL I HAVE TO DO FOR NOW,
6429 #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6431 #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6432 #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6433 #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6434 #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6435 #--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6436 #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6437 #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6439 fmovm.x
&0x04,-(%sp
) # save fp2
6443 fmov.d ATANA3
(%pc
),%fp2
6444 fadd.x
%fp1
,%fp2
# A3+V
6445 fmul.x
%fp1
,%fp2
# V*(A3+V)
6446 fmul.x
%fp0
,%fp1
# U*V
6447 fadd.d ATANA2
(%pc
),%fp2
# A2+V*(A3+V)
6448 fmul.d ATANA1
(%pc
),%fp1
# A1*U*V
6449 fmul.x
%fp2
,%fp1
# A1*U*V*(A2+V*(A3+V))
6450 fadd.x
%fp1
,%fp0
# ATAN(U), FP1 RELEASED
6452 fmovm.x
(%sp
)+,&0x20 # restore fp2
6454 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6455 fadd.x ATANF
(%a6
),%fp0
# ATAN(X)
6459 #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6460 #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6461 cmp.
l %d1
,&0x3FFF8000
6462 bgt.w ATANBIG
# I.E. |X| >= 16
6466 #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6467 #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6468 #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6469 #--WHERE Y = X*X, AND Z = Y*Y.
6471 cmp.
l %d1
,&0x3FD78000
6474 #--COMPUTE POLYNOMIAL
6475 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
6477 fmul.x
%fp0
,%fp0
# FPO IS Y = X*X
6480 fmul.x
%fp1
,%fp1
# FP1 IS Z = Y*Y
6482 fmov.d ATANB6
(%pc
),%fp2
6483 fmov.d ATANB5
(%pc
),%fp3
6485 fmul.x
%fp1
,%fp2
# Z*B6
6486 fmul.x
%fp1
,%fp3
# Z*B5
6488 fadd.d ATANB4
(%pc
),%fp2
# B4+Z*B6
6489 fadd.d ATANB3
(%pc
),%fp3
# B3+Z*B5
6491 fmul.x
%fp1
,%fp2
# Z*(B4+Z*B6)
6492 fmul.x
%fp3
,%fp1
# Z*(B3+Z*B5)
6494 fadd.d ATANB2
(%pc
),%fp2
# B2+Z*(B4+Z*B6)
6495 fadd.d ATANB1
(%pc
),%fp1
# B1+Z*(B3+Z*B5)
6497 fmul.x
%fp0
,%fp2
# Y*(B2+Z*(B4+Z*B6))
6498 fmul.x X
(%a6
),%fp0
# X*Y
6500 fadd.x
%fp2
,%fp1
# [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6502 fmul.x
%fp1
,%fp0
# X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6504 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
6506 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6511 #--|X| < 2^(-40), ATAN(X) = X
6513 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6514 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
6515 fmov.x X
(%a6
),%fp0
# last inst - possible exception set
6520 #--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6521 #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6522 cmp.
l %d1
,&0x40638000
6525 #--APPROXIMATE ATAN(-1/X) BY
6526 #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6527 #--THIS CAN BE RE-WRITTEN AS
6528 #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6530 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
6532 fmov.s
&0xBF800000,%fp1
# LOAD -1
6533 fdiv.x
%fp0
,%fp1
# FP1 IS -1/X
6535 #--DIVIDE IS STILL CRANKING
6537 fmov.x
%fp1
,%fp0
# FP0 IS X'
6538 fmul.x
%fp0
,%fp0
# FP0 IS Y = X'*X'
6539 fmov.x
%fp1
,X
(%a6
) # X IS REALLY X'
6542 fmul.x
%fp1
,%fp1
# FP1 IS Z = Y*Y
6544 fmov.d ATANC5
(%pc
),%fp3
6545 fmov.d ATANC4
(%pc
),%fp2
6547 fmul.x
%fp1
,%fp3
# Z*C5
6548 fmul.x
%fp1
,%fp2
# Z*B4
6550 fadd.d ATANC3
(%pc
),%fp3
# C3+Z*C5
6551 fadd.d ATANC2
(%pc
),%fp2
# C2+Z*C4
6553 fmul.x
%fp3
,%fp1
# Z*(C3+Z*C5), FP3 RELEASED
6554 fmul.x
%fp0
,%fp2
# Y*(C2+Z*C4)
6556 fadd.d ATANC1
(%pc
),%fp1
# C1+Z*(C3+Z*C5)
6557 fmul.x X
(%a6
),%fp0
# X'*Y
6559 fadd.x
%fp2
,%fp1
# [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6561 fmul.x
%fp1
,%fp0
# X'*Y*([B1+Z*(B3+Z*B5)]
6562 # ... +[Y*(B2+Z*(B4+Z*B6))])
6565 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
6567 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6572 fadd.x NPIBY2
(%pc
),%fp0
6576 fadd.x PPIBY2
(%pc
),%fp0
6580 #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6585 fmov.x NPIBY2
(%pc
),%fp0
6587 fadd.x PTINY
(%pc
),%fp0
6591 fmov.x PPIBY2
(%pc
),%fp0
6593 fadd.x NTINY
(%pc
),%fp0
6597 #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6601 #########################################################################
6602 # sasin(): computes the inverse sine of a normalized input #
6603 # sasind(): computes the inverse sine of a denormalized input #
6605 # INPUT *************************************************************** #
6606 # a0 = pointer to extended precision input #
6607 # d0 = round precision,mode #
6609 # OUTPUT ************************************************************** #
6612 # ACCURACY and MONOTONICITY ******************************************* #
6613 # The returned result is within 3 ulps in 64 significant bit, #
6614 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6615 # rounded to double precision. The result is provably monotonic #
6616 # in double precision. #
6618 # ALGORITHM *********************************************************** #
6621 # 1. If |X| >= 1, go to 3. #
6623 # 2. (|X| < 1) Calculate asin(X) by #
6624 # z := sqrt( [1-X][1+X] ) #
6625 # asin(X) = atan( x / z ). #
6628 # 3. If |X| > 1, go to 5. #
6630 # 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6632 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6635 #########################################################################
6639 fmov.x
(%a0
),%fp0
# LOAD INPUT
6643 and.l &0x7FFFFFFF,%d1
6644 cmp.
l %d1
,&0x3FFF8000
6647 # This catch is added here for the '060 QSP. Originally, the call to
6648 # satan() would handle this case by causing the exception which would
6649 # not be caught until gen_except(). Now, with the exceptions being
6650 # detected inside of satan(), the exception would have been handled there
6651 # instead of inside sasin() as expected.
6652 cmp.
l %d1
,&0x3FD78000
6655 #--THIS IS THE USUAL CASE, |X| < 1
6656 #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6659 fmov.s
&0x3F800000,%fp1
6660 fsub.x
%fp0
,%fp1
# 1-X
6661 fmovm.x
&0x4,-(%sp
) # {fp2}
6662 fmov.s
&0x3F800000,%fp2
6663 fadd.x
%fp0
,%fp2
# 1+X
6664 fmul.x
%fp2
,%fp1
# (1+X)(1-X)
6665 fmovm.x
(%sp
)+,&0x20 # {fp2}
6666 fsqrt.x
%fp1
# SQRT([1-X][1+X])
6667 fdiv.x
%fp1
,%fp0
# X/SQRT([1-X][1+X])
6668 fmovm.x
&0x01,-(%sp
) # save X/SQRT(...)
6669 lea
(%sp
),%a0
# pass ptr to X/SQRT(...)
6671 add.l &0xc,%sp
# clear X/SQRT(...) from stack
6676 fcmp.s
%fp0
,&0x3F800000
6677 fbgt t_operr
# cause an operr exception
6679 #--|X| = 1, ASIN(X) = +- PI/2.
6681 fmov.x PIBY2
(%pc
),%fp0
6683 and.l &0x80000000,%d1
# SIGN BIT OF X
6684 or.l &0x3F800000,%d1
# +-1 IN SGL FORMAT
6685 mov.
l %d1
,-(%sp
) # push SIGN(X) IN SGL-FMT
6690 #--|X| < 2^(-40), ATAN(X) = X
6692 fmov.
l %d0
,%fpcr
# restore users rnd mode,prec
6693 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
6694 fmov.x
(%a0
),%fp0
# last inst - possible exception
6698 #--ASIN(X) = X FOR DENORMALIZED X
6702 #########################################################################
6703 # sacos(): computes the inverse cosine of a normalized input #
6704 # sacosd(): computes the inverse cosine of a denormalized input #
6706 # INPUT *************************************************************** #
6707 # a0 = pointer to extended precision input #
6708 # d0 = round precision,mode #
6710 # OUTPUT ************************************************************** #
6713 # ACCURACY and MONOTONICITY ******************************************* #
6714 # The returned result is within 3 ulps in 64 significant bit, #
6715 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6716 # rounded to double precision. The result is provably monotonic #
6717 # in double precision. #
6719 # ALGORITHM *********************************************************** #
6722 # 1. If |X| >= 1, go to 3. #
6724 # 2. (|X| < 1) Calculate acos(X) by #
6725 # z := (1-X) / (1+X) #
6726 # acos(X) = 2 * atan( sqrt(z) ). #
6729 # 3. If |X| > 1, go to 5. #
6731 # 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6733 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6736 #########################################################################
6740 fmov.x
(%a0
),%fp0
# LOAD INPUT
6742 mov.
l (%a0
),%d1
# pack exp w/ upper 16 fraction
6744 and.l &0x7FFFFFFF,%d1
6745 cmp.
l %d1
,&0x3FFF8000
6748 #--THIS IS THE USUAL CASE, |X| < 1
6749 #--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6752 fmov.s
&0x3F800000,%fp1
6753 fadd.x
%fp0
,%fp1
# 1+X
6755 fadd.s
&0x3F800000,%fp0
# 1-X
6756 fdiv.x
%fp1
,%fp0
# (1-X)/(1+X)
6757 fsqrt.x
%fp0
# SQRT((1-X)/(1+X))
6758 mov.
l %d0
,-(%sp
) # save original users fpcr
6760 fmovm.x
&0x01,-(%sp
) # save SQRT(...) to stack
6761 lea
(%sp
),%a0
# pass ptr to sqrt
6762 bsr satan
# ATAN(SQRT([1-X]/[1+X]))
6763 add.l &0xc,%sp
# clear SQRT(...) from stack
6765 fmov.
l (%sp
)+,%fpcr
# restore users round prec,mode
6766 fadd.x
%fp0
,%fp0
# 2 * ATAN( STUFF )
6771 fcmp.s
%fp0
,&0x3F800000
6772 fbgt t_operr
# cause an operr exception
6774 #--|X| = 1, ACOS(X) = 0 OR PI
6775 tst.
b (%a0
) # is X positive or negative?
6779 #Returns PI and inexact exception
6781 fmov.x PI
(%pc
),%fp0
# load PI
6782 fmov.
l %d0
,%fpcr
# load round mode,prec
6783 fadd.s
&0x00800000,%fp0
# add a small value
6787 bra ld_pzero
# answer is positive zero
6790 #--ACOS(X) = PI/2 FOR DENORMALIZED X
6792 fmov.
l %d0
,%fpcr
# load user's rnd mode/prec
6793 fmov.x PIBY2
(%pc
),%fp0
6796 #########################################################################
6797 # setox(): computes the exponential for a normalized input #
6798 # setoxd(): computes the exponential for a denormalized input #
6799 # setoxm1(): computes the exponential minus 1 for a normalized input #
6800 # setoxm1d(): computes the exponential minus 1 for a denormalized input #
6802 # INPUT *************************************************************** #
6803 # a0 = pointer to extended precision input #
6804 # d0 = round precision,mode #
6806 # OUTPUT ************************************************************** #
6807 # fp0 = exp(X) or exp(X)-1 #
6809 # ACCURACY and MONOTONICITY ******************************************* #
6810 # The returned result is within 0.85 ulps in 64 significant bit, #
6811 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6812 # rounded to double precision. The result is provably monotonic #
6813 # in double precision. #
6815 # ALGORITHM and IMPLEMENTATION **************************************** #
6819 # Step 1. Set ans := 1.0 #
6821 # Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6822 # Notes: This will always generate one exception -- inexact. #
6828 # Step 1. Filter out extreme cases of input argument. #
6829 # 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6830 # 1.2 Go to Step 7. #
6831 # 1.3 If |X| < 16380 log(2), go to Step 2. #
6832 # 1.4 Go to Step 8. #
6833 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6834 # To avoid the use of floating-point comparisons, a #
6835 # compact representation of |X| is used. This format is a #
6836 # 32-bit integer, the upper (more significant) 16 bits #
6837 # are the sign and biased exponent field of |X|; the #
6838 # lower 16 bits are the 16 most significant fraction #
6839 # (including the explicit bit) bits of |X|. Consequently, #
6840 # the comparisons in Steps 1.1 and 1.3 can be performed #
6841 # by integer comparison. Note also that the constant #
6842 # 16380 log(2) used in Step 1.3 is also in the compact #
6843 # form. Thus taking the branch to Step 2 guarantees #
6844 # |X| < 16380 log(2). There is no harm to have a small #
6845 # number of cases where |X| is less than, but close to, #
6846 # 16380 log(2) and the branch to Step 9 is taken. #
6848 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6849 # 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6851 # 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6852 # 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6854 # 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6855 # 2.5 Calculate the address of the stored value of #
6857 # 2.6 Create the value Scale = 2^M. #
6858 # Notes: The calculation in 2.2 is really performed by #
6859 # Z := X * constant #
6860 # N := round-to-nearest-integer(Z) #
6862 # constant := single-precision( 64/log 2 ). #
6864 # Using a single-precision constant avoids memory #
6865 # access. Another effect of using a single-precision #
6866 # "constant" is that the calculated value Z is #
6868 # Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6870 # This error has to be considered later in Steps 3 and 4. #
6872 # Step 3. Calculate X - N*log2/64. #
6873 # 3.1 R := X + N*L1, #
6874 # where L1 := single-precision(-log2/64). #
6875 # 3.2 R := R + N*L2, #
6876 # L2 := extended-precision(-log2/64 - L1).#
6877 # Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6878 # approximate the value -log2/64 to 88 bits of accuracy. #
6879 # b) N*L1 is exact because N is no longer than 22 bits #
6880 # and L1 is no longer than 24 bits. #
6881 # c) The calculation X+N*L1 is also exact due to #
6882 # cancellation. Thus, R is practically X+N(L1+L2) to full #
6884 # d) It is important to estimate how large can |R| be #
6887 # N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6888 # X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6889 # X*64/log2 - N = f - eps*X 64/log2 #
6890 # X - N*log2/64 = f*log2/64 - eps*X #
6893 # Now |X| <= 16446 log2, thus #
6895 # |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6896 # <= 0.57 log2/64. #
6897 # This bound will be used in Step 4. #
6899 # Step 4. Approximate exp(R)-1 by a polynomial #
6900 # p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6901 # Notes: a) In order to reduce memory access, the coefficients #
6902 # are made as "short" as possible: A1 (which is 1/2), A4 #
6903 # and A5 are single precision; A2 and A3 are double #
6905 # b) Even with the restrictions above, #
6906 # |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6907 # Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6908 # c) To fully use the pipeline, p is separated into #
6909 # two independent pieces of roughly equal complexities #
6910 # p = [ R + R*S*(A2 + S*A4) ] + #
6911 # [ S*(A1 + S*(A3 + S*A5)) ] #
6914 # Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6915 # ans := T + ( T*p + t) #
6916 # where T and t are the stored values for 2^(J/64). #
6917 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6918 # 2^(J/64) to roughly 85 bits; T is in extended precision #
6919 # and t is in single precision. Note also that T is #
6920 # rounded to 62 bits so that the last two bits of T are #
6921 # zero. The reason for such a special form is that T-1, #
6922 # T-2, and T-8 will all be exact --- a property that will #
6923 # give much more accurate computation of the function #
6926 # Step 6. Reconstruction of exp(X) #
6927 # exp(X) = 2^M * 2^(J/64) * exp(R). #
6928 # 6.1 If AdjFlag = 0, go to 6.3 #
6929 # 6.2 ans := ans * AdjScale #
6930 # 6.3 Restore the user FPCR #
6931 # 6.4 Return ans := ans * Scale. Exit. #
6932 # Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6933 # |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6934 # neither overflow nor underflow. If AdjFlag = 1, that #
6936 # X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6937 # Hence, exp(X) may overflow or underflow or neither. #
6938 # When that is the case, AdjScale = 2^(M1) where M1 is #
6939 # approximately M. Thus 6.2 will never cause #
6940 # over/underflow. Possible exception in 6.4 is overflow #
6941 # or underflow. The inexact exception is not generated in #
6942 # 6.4. Although one can argue that the inexact flag #
6943 # should always be raised, to simulate that exception #
6944 # cost to much than the flag is worth in practical uses. #
6946 # Step 7. Return 1 + X. #
6948 # 7.2 Restore user FPCR. #
6949 # 7.3 Return ans := 1 + ans. Exit #
6950 # Notes: For non-zero X, the inexact exception will always be #
6951 # raised by 7.3. That is the only exception raised by 7.3.#
6952 # Note also that we use the FMOVEM instruction to move X #
6953 # in Step 7.1 to avoid unnecessary trapping. (Although #
6954 # the FMOVEM may not seem relevant since X is normalized, #
6955 # the precaution will be useful in the library version of #
6956 # this code where the separate entry for denormalized #
6957 # inputs will be done away with.) #
6959 # Step 8. Handle exp(X) where |X| >= 16380log2. #
6960 # 8.1 If |X| > 16480 log2, go to Step 9. #
6961 # (mimic 2.2 - 2.6) #
6962 # 8.2 N := round-to-integer( X * 64/log2 ) #
6963 # 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6964 # 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6966 # 8.5 Calculate the address of the stored value #
6968 # 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6969 # 8.7 Go to Step 3. #
6970 # Notes: Refer to notes for 2.2 - 2.6. #
6972 # Step 9. Handle exp(X), |X| > 16480 log2. #
6973 # 9.1 If X < 0, go to 9.3 #
6974 # 9.2 ans := Huge, go to 9.4 #
6975 # 9.3 ans := Tiny. #
6976 # 9.4 Restore user FPCR. #
6977 # 9.5 Return ans := ans * ans. Exit. #
6978 # Notes: Exp(X) will surely overflow or underflow, depending on #
6979 # X's sign. "Huge" and "Tiny" are respectively large/tiny #
6980 # extended-precision numbers whose square over/underflow #
6981 # with an inexact result. Thus, 9.5 always raises the #
6982 # inexact together with either overflow or underflow. #
6987 # Step 1. Set ans := 0 #
6989 # Step 2. Return ans := X + ans. Exit. #
6990 # Notes: This will return X with the appropriate rounding #
6991 # precision prescribed by the user FPCR. #
6996 # Step 1. Check |X| #
6997 # 1.1 If |X| >= 1/4, go to Step 1.3. #
6998 # 1.2 Go to Step 7. #
6999 # 1.3 If |X| < 70 log(2), go to Step 2. #
7000 # 1.4 Go to Step 10. #
7001 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
7002 # However, it is conceivable |X| can be small very often #
7003 # because EXPM1 is intended to evaluate exp(X)-1 #
7004 # accurately when |X| is small. For further details on #
7005 # the comparisons, see the notes on Step 1 of setox. #
7007 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
7008 # 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
7009 # 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
7011 # 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
7012 # 2.4 Calculate the address of the stored value of #
7014 # 2.5 Create the values Sc = 2^M and #
7015 # OnebySc := -2^(-M). #
7016 # Notes: See the notes on Step 2 of setox. #
7018 # Step 3. Calculate X - N*log2/64. #
7019 # 3.1 R := X + N*L1, #
7020 # where L1 := single-precision(-log2/64). #
7021 # 3.2 R := R + N*L2, #
7022 # L2 := extended-precision(-log2/64 - L1).#
7023 # Notes: Applying the analysis of Step 3 of setox in this case #
7024 # shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
7027 # Step 4. Approximate exp(R)-1 by a polynomial #
7028 # p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
7029 # Notes: a) In order to reduce memory access, the coefficients #
7030 # are made as "short" as possible: A1 (which is 1/2), A5 #
7031 # and A6 are single precision; A2, A3 and A4 are double #
7033 # b) Even with the restriction above, #
7034 # |p - (exp(R)-1)| < |R| * 2^(-72.7) #
7035 # for all |R| <= 0.0055. #
7036 # c) To fully use the pipeline, p is separated into #
7037 # two independent pieces of roughly equal complexity #
7038 # p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
7039 # [ R + S*(A1 + S*(A3 + S*A5)) ] #
7042 # Step 5. Compute 2^(J/64)*p by #
7044 # where T and t are the stored values for 2^(J/64). #
7045 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
7046 # 2^(J/64) to roughly 85 bits; T is in extended precision #
7047 # and t is in single precision. Note also that T is #
7048 # rounded to 62 bits so that the last two bits of T are #
7049 # zero. The reason for such a special form is that T-1, #
7050 # T-2, and T-8 will all be exact --- a property that will #
7051 # be exploited in Step 6 below. The total relative error #
7052 # in p is no bigger than 2^(-67.7) compared to the final #
7055 # Step 6. Reconstruction of exp(X)-1 #
7056 # exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
7057 # 6.1 If M <= 63, go to Step 6.3. #
7058 # 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
7059 # 6.3 If M >= -3, go to 6.5. #
7060 # 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
7061 # 6.5 ans := (T + OnebySc) + (p + t). #
7062 # 6.6 Restore user FPCR. #
7063 # 6.7 Return ans := Sc * ans. Exit. #
7064 # Notes: The various arrangements of the expressions give #
7065 # accurate evaluations. #
7067 # Step 7. exp(X)-1 for |X| < 1/4. #
7068 # 7.1 If |X| >= 2^(-65), go to Step 9. #
7069 # 7.2 Go to Step 8. #
7071 # Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
7072 # 8.1 If |X| < 2^(-16312), goto 8.3 #
7073 # 8.2 Restore FPCR; return ans := X - 2^(-16382). #
7075 # 8.3 X := X * 2^(140). #
7076 # 8.4 Restore FPCR; ans := ans - 2^(-16382). #
7077 # Return ans := ans*2^(140). Exit #
7078 # Notes: The idea is to return "X - tiny" under the user #
7079 # precision and rounding modes. To avoid unnecessary #
7080 # inefficiency, we stay away from denormalized numbers #
7081 # the best we can. For |X| >= 2^(-16312), the #
7082 # straightforward 8.2 generates the inexact exception as #
7083 # the case warrants. #
7085 # Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
7086 # p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
7087 # Notes: a) In order to reduce memory access, the coefficients #
7088 # are made as "short" as possible: B1 (which is 1/2), B9 #
7089 # to B12 are single precision; B3 to B8 are double #
7090 # precision; and B2 is double extended. #
7091 # b) Even with the restriction above, #
7092 # |p - (exp(X)-1)| < |X| 2^(-70.6) #
7093 # for all |X| <= 0.251. #
7094 # Note that 0.251 is slightly bigger than 1/4. #
7095 # c) To fully preserve accuracy, the polynomial is #
7097 # X + ( S*B1 + Q ) where S = X*X and #
7098 # Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
7099 # d) To fully use the pipeline, Q is separated into #
7100 # two independent pieces of roughly equal complexity #
7101 # Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
7102 # [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
7104 # Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
7105 # 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
7106 # practical purposes. Therefore, go to Step 1 of setox. #
7107 # 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
7110 # Restore user FPCR #
7111 # Return ans := ans + 2^(-126). Exit. #
7112 # Notes: 10.2 will always create an inexact and return -1 + tiny #
7113 # in the user rounding precision and mode. #
7115 #########################################################################
7117 L2
: long
0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7119 EEXPA3
: long
0x3FA55555,0x55554CC1
7120 EEXPA2
: long
0x3FC55555,0x55554A54
7122 EM1A4
: long
0x3F811111,0x11174385
7123 EM1A3
: long
0x3FA55555,0x55554F5A
7125 EM1A2
: long
0x3FC55555,0x55555555,0x00000000,0x00000000
7127 EM1B8
: long
0x3EC71DE3,0xA5774682
7128 EM1B7
: long
0x3EFA01A0,0x19D7CB68
7130 EM1B6
: long
0x3F2A01A0,0x1A019DF3
7131 EM1B5
: long
0x3F56C16C,0x16C170E2
7133 EM1B4
: long
0x3F811111,0x11111111
7134 EM1B3
: long
0x3FA55555,0x55555555
7136 EM1B2
: long
0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7139 TWO140
: long
0x48B00000,0x00000000
7141 long
0x37300000,0x00000000
7144 long
0x3FFF0000,0x80000000,0x00000000,0x00000000
7145 long
0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7146 long
0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7147 long
0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7148 long
0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7149 long
0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7150 long
0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7151 long
0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7152 long
0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7153 long
0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7154 long
0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7155 long
0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7156 long
0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7157 long
0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7158 long
0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7159 long
0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7160 long
0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7161 long
0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7162 long
0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7163 long
0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7164 long
0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7165 long
0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7166 long
0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7167 long
0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7168 long
0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7169 long
0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7170 long
0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7171 long
0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7172 long
0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7173 long
0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7174 long
0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7175 long
0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7176 long
0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7177 long
0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7178 long
0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7179 long
0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7180 long
0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7181 long
0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7182 long
0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7183 long
0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7184 long
0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7185 long
0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7186 long
0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7187 long
0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7188 long
0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7189 long
0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7190 long
0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7191 long
0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7192 long
0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7193 long
0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7194 long
0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7195 long
0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7196 long
0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7197 long
0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7198 long
0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7199 long
0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7200 long
0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7201 long
0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7202 long
0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7203 long
0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7204 long
0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7205 long
0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7206 long
0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7207 long
0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7211 set ADJSCALE
,FP_SCR1
7217 #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7220 mov.
l (%a0
),%d1
# load part of input X
7221 and.l &0x7FFF0000,%d1
# biased expo. of X
7222 cmp.
l %d1
,&0x3FBE0000 # 2^(-65)
7223 bge.
b EXPC1
# normal case
7227 #--The case |X| >= 2^(-65)
7228 mov.w
4(%a0
),%d1
# expo. and partial sig. of |X|
7229 cmp.
l %d1
,&0x400CB167 # 16380 log2 trunc. 16 bits
7230 blt.
b EXPMAIN
# normal case
7235 #--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7236 fmov.x
(%a0
),%fp0
# load input from (a0)
7239 fmul.s
&0x42B8AA3B,%fp0
# 64/log2 * X
7240 fmovm.x
&0xc,-(%sp
) # save fp2 {%fp2/%fp3}
7241 mov.
l &0,ADJFLAG
(%a6
)
7242 fmov.
l %fp0
,%d1
# N = int( X * 64/log2 )
7243 lea EEXPTBL
(%pc
),%a1
7244 fmov.
l %d1
,%fp0
# convert to floating-format
7246 mov.
l %d1
,L_SCR1
(%a6
) # save N temporarily
7247 and.l &0x3F,%d1
# D0 is J = N mod 64
7249 add.l %d1
,%a1
# address of 2^(J/64)
7250 mov.
l L_SCR1
(%a6
),%d1
7251 asr.
l &6,%d1
# D0 is M
7252 add.w
&0x3FFF,%d1
# biased expo. of 2^(M)
7253 mov.w L2
(%pc
),L_SCR1
(%a6
) # prefetch L2, no need in CB
7257 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7258 #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7260 fmul.s
&0xBC317218,%fp0
# N * L1, L1 = lead(-log2/64)
7261 fmul.x L2
(%pc
),%fp2
# N * L2, L1+L2 = -log2/64
7262 fadd.x
%fp1
,%fp0
# X + N*L1
7263 fadd.x
%fp2
,%fp0
# fp0 is R, reduced arg.
7266 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7267 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7268 #--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7269 #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7272 fmul.x
%fp1
,%fp1
# fp1 IS S = R*R
7274 fmov.s
&0x3AB60B70,%fp2
# fp2 IS A5
7276 fmul.x
%fp1
,%fp2
# fp2 IS S*A5
7278 fmul.s
&0x3C088895,%fp3
# fp3 IS S*A4
7280 fadd.d EEXPA3
(%pc
),%fp2
# fp2 IS A3+S*A5
7281 fadd.d EEXPA2
(%pc
),%fp3
# fp3 IS A2+S*A4
7283 fmul.x
%fp1
,%fp2
# fp2 IS S*(A3+S*A5)
7284 mov.w
%d1
,SCALE
(%a6
) # SCALE is 2^(M) in extended
7285 mov.
l &0x80000000,SCALE+
4(%a6
)
7288 fmul.x
%fp1
,%fp3
# fp3 IS S*(A2+S*A4)
7290 fadd.s
&0x3F000000,%fp2
# fp2 IS A1+S*(A3+S*A5)
7291 fmul.x
%fp0
,%fp3
# fp3 IS R*S*(A2+S*A4)
7293 fmul.x
%fp1
,%fp2
# fp2 IS S*(A1+S*(A3+S*A5))
7294 fadd.x
%fp3
,%fp0
# fp0 IS R+R*S*(A2+S*A4),
7296 fmov.x
(%a1
)+,%fp1
# fp1 is lead. pt. of 2^(J/64)
7297 fadd.x
%fp2
,%fp0
# fp0 is EXP(R) - 1
7300 #--final reconstruction process
7301 #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7303 fmul.x
%fp1
,%fp0
# 2^(J/64)*(Exp(R)-1)
7304 fmovm.x
(%sp
)+,&0x30 # fp2 restored {%fp2/%fp3}
7305 fadd.s
(%a1
),%fp0
# accurate 2^(J/64)
7307 fadd.x
%fp1
,%fp0
# 2^(J/64) + 2^(J/64)*...
7308 mov.
l ADJFLAG
(%a6
),%d1
7314 fmul.x ADJSCALE
(%a6
),%fp0
7316 fmov.
l %d0
,%fpcr
# restore user FPCR
7317 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7318 fmul.x SCALE
(%a6
),%fp0
# multiply 2^(M)
7323 fmovm.x
(%a0
),&0x80 # load X
7325 fadd.s
&0x3F800000,%fp0
# 1+X in user mode
7330 cmp.
l %d1
,&0x400CB27C # 16480 log2
7333 fmov.x
(%a0
),%fp0
# load input from (a0)
7336 fmul.s
&0x42B8AA3B,%fp0
# 64/log2 * X
7337 fmovm.x
&0xc,-(%sp
) # save fp2 {%fp2/%fp3}
7338 mov.
l &1,ADJFLAG
(%a6
)
7339 fmov.
l %fp0
,%d1
# N = int( X * 64/log2 )
7340 lea EEXPTBL
(%pc
),%a1
7341 fmov.
l %d1
,%fp0
# convert to floating-format
7342 mov.
l %d1
,L_SCR1
(%a6
) # save N temporarily
7343 and.l &0x3F,%d1
# D0 is J = N mod 64
7345 add.l %d1
,%a1
# address of 2^(J/64)
7346 mov.
l L_SCR1
(%a6
),%d1
7347 asr.
l &6,%d1
# D0 is K
7348 mov.
l %d1
,L_SCR1
(%a6
) # save K temporarily
7349 asr.
l &1,%d1
# D0 is M1
7350 sub.l %d1
,L_SCR1
(%a6
) # a1 is M
7351 add.w
&0x3FFF,%d1
# biased expo. of 2^(M1)
7352 mov.w
%d1
,ADJSCALE
(%a6
) # ADJSCALE := 2^(M1)
7353 mov.
l &0x80000000,ADJSCALE+
4(%a6
)
7354 clr.
l ADJSCALE+
8(%a6
)
7355 mov.
l L_SCR1
(%a6
),%d1
# D0 is M
7356 add.w
&0x3FFF,%d1
# biased expo. of 2^(M)
7357 bra.w EXPCONT1
# go back to Step 3
7361 tst.
b (%a0
) # is X positive or negative?
7367 #--entry point for EXP(X), X is denormalized
7369 andi.l &0x80000000,(%sp
)
7370 ori.
l &0x00800000,(%sp
) # sign(X)*2^(-126)
7372 fmov.s
&0x3F800000,%fp0
7380 #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7384 mov.
l (%a0
),%d1
# load part of input X
7385 and.l &0x7FFF0000,%d1
# biased expo. of X
7386 cmp.
l %d1
,&0x3FFD0000 # 1/4
7387 bge.
b EM1CON1
# |X| >= 1/4
7392 #--The case |X| >= 1/4
7393 mov.w
4(%a0
),%d1
# expo. and partial sig. of |X|
7394 cmp.
l %d1
,&0x4004C215 # 70log2 rounded up to 16 bits
7395 ble.
b EM1MAIN
# 1/4 <= |X| <= 70log2
7400 #--This is the case: 1/4 <= |X| <= 70 log2.
7401 fmov.x
(%a0
),%fp0
# load input from (a0)
7404 fmul.s
&0x42B8AA3B,%fp0
# 64/log2 * X
7405 fmovm.x
&0xc,-(%sp
) # save fp2 {%fp2/%fp3}
7406 fmov.
l %fp0
,%d1
# N = int( X * 64/log2 )
7407 lea EEXPTBL
(%pc
),%a1
7408 fmov.
l %d1
,%fp0
# convert to floating-format
7410 mov.
l %d1
,L_SCR1
(%a6
) # save N temporarily
7411 and.l &0x3F,%d1
# D0 is J = N mod 64
7413 add.l %d1
,%a1
# address of 2^(J/64)
7414 mov.
l L_SCR1
(%a6
),%d1
7415 asr.
l &6,%d1
# D0 is M
7416 mov.
l %d1
,L_SCR1
(%a6
) # save a copy of M
7419 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7420 #--a0 points to 2^(J/64), D0 and a1 both contain M
7422 fmul.s
&0xBC317218,%fp0
# N * L1, L1 = lead(-log2/64)
7423 fmul.x L2
(%pc
),%fp2
# N * L2, L1+L2 = -log2/64
7424 fadd.x
%fp1
,%fp0
# X + N*L1
7425 fadd.x
%fp2
,%fp0
# fp0 is R, reduced arg.
7426 add.w
&0x3FFF,%d1
# D0 is biased expo. of 2^M
7429 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7430 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7431 #--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7432 #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7435 fmul.x
%fp1
,%fp1
# fp1 IS S = R*R
7437 fmov.s
&0x3950097B,%fp2
# fp2 IS a6
7439 fmul.x
%fp1
,%fp2
# fp2 IS S*A6
7441 fmul.s
&0x3AB60B6A,%fp3
# fp3 IS S*A5
7443 fadd.d EM1A4
(%pc
),%fp2
# fp2 IS A4+S*A6
7444 fadd.d EM1A3
(%pc
),%fp3
# fp3 IS A3+S*A5
7445 mov.w
%d1
,SC(%a6
) # SC is 2^(M) in extended
7446 mov.
l &0x80000000,SC+
4(%a6
)
7449 fmul.x
%fp1
,%fp2
# fp2 IS S*(A4+S*A6)
7450 mov.
l L_SCR1
(%a6
),%d1
# D0 is M
7451 neg.w
%d1
# D0 is -M
7452 fmul.x
%fp1
,%fp3
# fp3 IS S*(A3+S*A5)
7453 add.w
&0x3FFF,%d1
# biased expo. of 2^(-M)
7454 fadd.d EM1A2
(%pc
),%fp2
# fp2 IS A2+S*(A4+S*A6)
7455 fadd.s
&0x3F000000,%fp3
# fp3 IS A1+S*(A3+S*A5)
7457 fmul.x
%fp1
,%fp2
# fp2 IS S*(A2+S*(A4+S*A6))
7458 or.w
&0x8000,%d1
# signed/expo. of -2^(-M)
7459 mov.w
%d1
,ONEBYSC
(%a6
) # OnebySc is -2^(-M)
7460 mov.
l &0x80000000,ONEBYSC+
4(%a6
)
7461 clr.
l ONEBYSC+
8(%a6
)
7462 fmul.x
%fp3
,%fp1
# fp1 IS S*(A1+S*(A3+S*A5))
7464 fmul.x
%fp0
,%fp2
# fp2 IS R*S*(A2+S*(A4+S*A6))
7465 fadd.x
%fp1
,%fp0
# fp0 IS R+S*(A1+S*(A3+S*A5))
7467 fadd.x
%fp2
,%fp0
# fp0 IS EXP(R)-1
7469 fmovm.x
(%sp
)+,&0x30 # fp2 restored {%fp2/%fp3}
7472 #--Compute 2^(J/64)*p
7474 fmul.x
(%a1
),%fp0
# 2^(J/64)*(Exp(R)-1)
7478 mov.
l L_SCR1
(%a6
),%d1
# retrieve M
7482 fmov.s
12(%a1
),%fp1
# fp1 is t
7483 fadd.x ONEBYSC
(%a6
),%fp1
# fp1 is t+OnebySc
7484 fadd.x
%fp1
,%fp0
# p+(t+OnebySc), fp1 released
7485 fadd.x
(%a1
),%fp0
# T+(p+(t+OnebySc))
7493 fadd.s
12(%a1
),%fp0
# p+t
7494 fadd.x
(%a1
),%fp0
# T+(p+t)
7495 fadd.x ONEBYSC
(%a6
),%fp0
# OnebySc + (T+(p+t))
7498 #--Step 6.5 -3 <= M <= 63
7499 fmov.x
(%a1
)+,%fp1
# fp1 is T
7500 fadd.s
(%a1
),%fp0
# fp0 is p+t
7501 fadd.x ONEBYSC
(%a6
),%fp1
# fp1 is T+OnebySc
7502 fadd.x
%fp1
,%fp0
# (T+OnebySc)+(p+t)
7511 #--Step 7 |X| < 1/4.
7512 cmp.
l %d1
,&0x3FBE0000 # 2^(-65)
7516 #--Step 8 |X| < 2^(-65)
7517 cmp.
l %d1
,&0x00330000 # 2^(-16312)
7520 mov.
l &0x80010000,SC(%a6
) # SC is -2^(-16382)
7521 mov.
l &0x80000000,SC+
4(%a6
)
7525 mov.
b &FADD_OP
,%d1
# last inst is ADD
7532 fmul.d TWO140
(%pc
),%fp0
7533 mov.
l &0x80010000,SC(%a6
)
7534 mov.
l &0x80000000,SC+
4(%a6
)
7538 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7539 fmul.d TWON140
(%pc
),%fp0
7543 #--Step 9 exp(X)-1 by a simple polynomial
7544 fmov.x
(%a0
),%fp0
# fp0 is X
7545 fmul.x
%fp0
,%fp0
# fp0 is S := X*X
7546 fmovm.x
&0xc,-(%sp
) # save fp2 {%fp2/%fp3}
7547 fmov.s
&0x2F30CAA8,%fp1
# fp1 is B12
7548 fmul.x
%fp0
,%fp1
# fp1 is S*B12
7549 fmov.s
&0x310F8290,%fp2
# fp2 is B11
7550 fadd.s
&0x32D73220,%fp1
# fp1 is B10+S*B12
7552 fmul.x
%fp0
,%fp2
# fp2 is S*B11
7553 fmul.x
%fp0
,%fp1
# fp1 is S*(B10 + ...
7555 fadd.s
&0x3493F281,%fp2
# fp2 is B9+S*...
7556 fadd.d EM1B8
(%pc
),%fp1
# fp1 is B8+S*...
7558 fmul.x
%fp0
,%fp2
# fp2 is S*(B9+...
7559 fmul.x
%fp0
,%fp1
# fp1 is S*(B8+...
7561 fadd.d EM1B7
(%pc
),%fp2
# fp2 is B7+S*...
7562 fadd.d EM1B6
(%pc
),%fp1
# fp1 is B6+S*...
7564 fmul.x
%fp0
,%fp2
# fp2 is S*(B7+...
7565 fmul.x
%fp0
,%fp1
# fp1 is S*(B6+...
7567 fadd.d EM1B5
(%pc
),%fp2
# fp2 is B5+S*...
7568 fadd.d EM1B4
(%pc
),%fp1
# fp1 is B4+S*...
7570 fmul.x
%fp0
,%fp2
# fp2 is S*(B5+...
7571 fmul.x
%fp0
,%fp1
# fp1 is S*(B4+...
7573 fadd.d EM1B3
(%pc
),%fp2
# fp2 is B3+S*...
7574 fadd.x EM1B2
(%pc
),%fp1
# fp1 is B2+S*...
7576 fmul.x
%fp0
,%fp2
# fp2 is S*(B3+...
7577 fmul.x
%fp0
,%fp1
# fp1 is S*(B2+...
7579 fmul.x
%fp0
,%fp2
# fp2 is S*S*(B3+...)
7580 fmul.x
(%a0
),%fp1
# fp1 is X*S*(B2...
7582 fmul.s
&0x3F000000,%fp0
# fp0 is S*B1
7583 fadd.x
%fp2
,%fp1
# fp1 is Q
7585 fmovm.x
(%sp
)+,&0x30 # fp2 restored {%fp2/%fp3}
7587 fadd.x
%fp1
,%fp0
# fp0 is S*B1+Q
7594 #--Step 10 |X| > 70 log2
7599 fmov.s
&0xBF800000,%fp0
# fp0 is -1
7601 fadd.s
&0x00800000,%fp0
# -1 + 2^(-126)
7606 #--entry point for EXPM1(X), here X is denormalized
7610 #########################################################################
7611 # sgetexp(): returns the exponent portion of the input argument. #
7612 # The exponent bias is removed and the exponent value is #
7613 # returned as an extended precision number in fp0. #
7614 # sgetexpd(): handles denormalized numbers. #
7616 # sgetman(): extracts the mantissa of the input argument. The #
7617 # mantissa is converted to an extended precision number w/ #
7618 # an exponent of $3fff and is returned in fp0. The range of #
7619 # the result is [1.0 - 2.0). #
7620 # sgetmand(): handles denormalized numbers. #
7622 # INPUT *************************************************************** #
7623 # a0 = pointer to extended precision input #
7625 # OUTPUT ************************************************************** #
7626 # fp0 = exponent(X) or mantissa(X) #
7628 #########################################################################
7632 mov.w SRC_EX
(%a0
),%d0
# get the exponent
7633 bclr &0xf,%d0
# clear the sign bit
7634 subi.w
&0x3fff,%d0
# subtract off the bias
7635 fmov.w
%d0
,%fp0
# return exp in fp0
7636 blt.
b sgetexpn
# it's negative
7640 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
7645 bsr.
l norm
# normalize
7646 neg.w
%d0
# new exp = -(shft amt)
7647 subi.w
&0x3fff,%d0
# subtract off the bias
7648 fmov.w
%d0
,%fp0
# return exp in fp0
7649 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
7654 mov.w SRC_EX
(%a0
),%d0
# get the exp
7655 ori.w
&0x7fff,%d0
# clear old exp
7656 bclr &0xe,%d0
# make it the new exp +-3fff
7658 # here, we build the result in a tmp location so as not to disturb the input
7659 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
) # copy to tmp loc
7660 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
) # copy to tmp loc
7661 mov.w
%d0
,FP_SCR0_EX
(%a6
) # insert new exponent
7662 fmov.x FP_SCR0
(%a6
),%fp0
# put new value back in fp0
7663 bmi.
b sgetmann
# it's negative
7667 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
7671 # For denormalized numbers, shift the mantissa until the j-bit = 1,
7672 # then load the exponent with +/1 $3fff.
7676 bsr.
l norm
# normalize exponent
7679 #########################################################################
7680 # scosh(): computes the hyperbolic cosine of a normalized input #
7681 # scoshd(): computes the hyperbolic cosine of a denormalized input #
7683 # INPUT *************************************************************** #
7684 # a0 = pointer to extended precision input #
7685 # d0 = round precision,mode #
7687 # OUTPUT ************************************************************** #
7690 # ACCURACY and MONOTONICITY ******************************************* #
7691 # The returned result is within 3 ulps in 64 significant bit, #
7692 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7693 # rounded to double precision. The result is provably monotonic #
7694 # in double precision. #
7696 # ALGORITHM *********************************************************** #
7699 # 1. If |X| > 16380 log2, go to 3. #
7701 # 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7702 # y = |X|, z = exp(Y), and #
7703 # cosh(X) = (1/2)*( z + 1/z ). #
7706 # 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7708 # 4. (16380 log2 < |X| <= 16480 log2) #
7709 # cosh(X) = sign(X) * exp(|X|)/2. #
7710 # However, invoking exp(|X|) may cause premature #
7711 # overflow. Thus, we calculate sinh(X) as follows: #
7713 # Fact := 2**(16380) #
7714 # Y' := Y - 16381 log2 #
7715 # cosh(X) := Fact * exp(Y'). #
7718 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7719 # Huge*Huge to generate overflow and an infinity with #
7720 # the appropriate sign. Huge is the largest finite number #
7721 # in extended format. Exit. #
7723 #########################################################################
7726 long
0x7FFB0000,0x80000000,0x00000000,0x00000000
7730 fmov.x
(%a0
),%fp0
# LOAD INPUT
7734 and.l &0x7FFFFFFF,%d1
7735 cmp.
l %d1
,&0x400CB167
7738 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7739 #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7745 fmovm.x
&0x01,-(%sp
) # save |X| to stack
7746 lea
(%sp
),%a0
# pass ptr to |X|
7747 bsr setox
# FP0 IS EXP(|X|)
7748 add.l &0xc,%sp
# erase |X| from stack
7749 fmul.s
&0x3F000000,%fp0
# (1/2)EXP(|X|)
7752 fmov.s
&0x3E800000,%fp1
# (1/4)
7753 fdiv.x
%fp0
,%fp1
# 1/(2 EXP(|X|))
7756 mov.
b &FADD_OP
,%d1
# last inst is ADD
7761 cmp.
l %d1
,&0x400CB2B3
7765 fsub.d T1
(%pc
),%fp0
# (|X|-16381LOG2_LEAD)
7766 fsub.d T2
(%pc
),%fp0
# |X| - 16381 LOG2, ACCURATE
7770 fmovm.x
&0x01,-(%sp
) # save fp0 to stack
7771 lea
(%sp
),%a0
# pass ptr to fp0
7773 add.l &0xc,%sp
# clear fp0 from stack
7777 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7778 fmul.x TWO16380
(%pc
),%fp0
7785 #--COSH(X) = 1 FOR DENORMALIZED X
7787 fmov.s
&0x3F800000,%fp0
7790 fadd.s
&0x00800000,%fp0
7793 #########################################################################
7794 # ssinh(): computes the hyperbolic sine of a normalized input #
7795 # ssinhd(): computes the hyperbolic sine of a denormalized input #
7797 # INPUT *************************************************************** #
7798 # a0 = pointer to extended precision input #
7799 # d0 = round precision,mode #
7801 # OUTPUT ************************************************************** #
7804 # ACCURACY and MONOTONICITY ******************************************* #
7805 # The returned result is within 3 ulps in 64 significant bit, #
7806 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7807 # rounded to double precision. The result is provably monotonic #
7808 # in double precision. #
7810 # ALGORITHM *********************************************************** #
7813 # 1. If |X| > 16380 log2, go to 3. #
7815 # 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7816 # y = |X|, sgn = sign(X), and z = expm1(Y), #
7817 # sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7820 # 3. If |X| > 16480 log2, go to 5. #
7822 # 4. (16380 log2 < |X| <= 16480 log2) #
7823 # sinh(X) = sign(X) * exp(|X|)/2. #
7824 # However, invoking exp(|X|) may cause premature overflow. #
7825 # Thus, we calculate sinh(X) as follows: #
7828 # sgnFact := sgn * 2**(16380) #
7829 # Y' := Y - 16381 log2 #
7830 # sinh(X) := sgnFact * exp(Y'). #
7833 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7834 # sign(X)*Huge*Huge to generate overflow and an infinity with #
7835 # the appropriate sign. Huge is the largest finite number in #
7836 # extended format. Exit. #
7838 #########################################################################
7842 fmov.x
(%a0
),%fp0
# LOAD INPUT
7846 mov.
l %d1
,%a1
# save (compacted) operand
7847 and.l &0x7FFFFFFF,%d1
7848 cmp.
l %d1
,&0x400CB167
7851 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7852 #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7854 fabs.x
%fp0
# Y = |X|
7856 movm.
l &0x8040,-(%sp
) # {a1/d0}
7857 fmovm.x
&0x01,-(%sp
) # save Y on stack
7858 lea
(%sp
),%a0
# pass ptr to Y
7860 bsr setoxm1
# FP0 IS Z = EXPM1(Y)
7861 add.l &0xc,%sp
# clear Y from stack
7863 movm.
l (%sp
)+,&0x0201 # {a1/d0}
7866 fadd.s
&0x3F800000,%fp1
# 1+Z
7868 fdiv.x
%fp1
,%fp0
# Z/(1+Z)
7870 and.l &0x80000000,%d1
7871 or.l &0x3F000000,%d1
7876 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7877 fmul.s
(%sp
)+,%fp0
# last fp inst - possible exceptions set
7881 cmp.
l %d1
,&0x400CB2B3
7884 fsub.d T1
(%pc
),%fp0
# (|X|-16381LOG2_LEAD)
7886 mov.
l &0x80000000,-(%sp
)
7888 and.l &0x80000000,%d1
7889 or.l &0x7FFB0000,%d1
7890 mov.
l %d1
,-(%sp
) # EXTENDED FMT
7891 fsub.d T2
(%pc
),%fp0
# |X| - 16381 LOG2, ACCURATE
7895 fmovm.x
&0x01,-(%sp
) # save fp0 on stack
7896 lea
(%sp
),%a0
# pass ptr to fp0
7898 add.l &0xc,%sp
# clear fp0 from stack
7902 mov.
b &FMUL_OP
,%d1
# last inst is MUL
7903 fmul.x
(%sp
)+,%fp0
# possible exception
7907 #--SINH(X) = X FOR DENORMALIZED X
7911 #########################################################################
7912 # stanh(): computes the hyperbolic tangent of a normalized input #
7913 # stanhd(): computes the hyperbolic tangent of a denormalized input #
7915 # INPUT *************************************************************** #
7916 # a0 = pointer to extended precision input #
7917 # d0 = round precision,mode #
7919 # OUTPUT ************************************************************** #
7922 # ACCURACY and MONOTONICITY ******************************************* #
7923 # The returned result is within 3 ulps in 64 significant bit, #
7924 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7925 # rounded to double precision. The result is provably monotonic #
7926 # in double precision. #
7928 # ALGORITHM *********************************************************** #
7931 # 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7933 # 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7934 # sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7935 # tanh(X) = sgn*( z/(2+z) ). #
7938 # 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7941 # 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7943 # 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7944 # sgn := sign(X), y := 2|X|, z := exp(Y), #
7945 # tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7948 # 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7949 # calculate Tanh(X) by #
7950 # sgn := sign(X), Tiny := 2**(-126), #
7951 # tanh(X) := sgn - sgn*Tiny. #
7954 # 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7956 #########################################################################
7967 fmov.x
(%a0
),%fp0
# LOAD INPUT
7973 and.l &0x7FFFFFFF,%d1
7974 cmp.
l %d1
, &0x3fd78000 # is |X| < 2^(-40)?
7975 blt.w TANHBORS
# yes
7976 cmp.
l %d1
, &0x3fffddce # is |X| > (5/2)LOG2?
7977 bgt.w TANHBORS
# yes
7979 #--THIS IS THE USUAL CASE
7980 #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7984 and.l &0x7FFF0000,%d1
7985 add.l &0x00010000,%d1
# EXPONENT OF 2|X|
7987 and.l &0x80000000,SGN
(%a6
)
7988 fmov.x X
(%a6
),%fp0
# FP0 IS Y = 2|X|
7992 fmovm.x
&0x1,-(%sp
) # save Y on stack
7993 lea
(%sp
),%a0
# pass ptr to Y
7994 bsr setoxm1
# FP0 IS Z = EXPM1(Y)
7995 add.l &0xc,%sp
# clear Y from stack
7999 fadd.s
&0x40000000,%fp1
# Z+2
8004 fmov.
l %d0
,%fpcr
# restore users round prec,mode
8009 cmp.
l %d1
,&0x3FFF8000
8012 cmp.
l %d1
,&0x40048AA1
8015 #-- (5/2) LOG2 < |X| < 50 LOG2,
8016 #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8017 #--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
8021 and.l &0x7FFF0000,%d1
8022 add.l &0x00010000,%d1
# EXPO OF 2|X|
8023 mov.
l %d1
,X
(%a6
) # Y = 2|X|
8024 and.l &0x80000000,SGN
(%a6
)
8026 fmov.x X
(%a6
),%fp0
# Y = 2|X|
8030 fmovm.x
&0x01,-(%sp
) # save Y on stack
8031 lea
(%sp
),%a0
# pass ptr to Y
8032 bsr setox
# FP0 IS EXP(Y)
8033 add.l &0xc,%sp
# clear Y from stack
8036 fadd.s
&0x3F800000,%fp0
# EXP(Y)+1
8038 eor.
l &0xC0000000,%d1
# -SIGN(X)*2
8039 fmov.s
%d1
,%fp1
# -SIGN(X)*2 IN SGL FMT
8040 fdiv.x
%fp0
,%fp1
# -SIGN(X)2 / [EXP(Y)+1 ]
8043 or.l &0x3F800000,%d1
# SGN
8044 fmov.s
%d1
,%fp0
# SGN IN SGL FMT
8046 fmov.
l %d0
,%fpcr
# restore users round prec,mode
8047 mov.
b &FADD_OP
,%d1
# last inst is ADD
8052 fmov.
l %d0
,%fpcr
# restore users round prec,mode
8053 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
8054 fmov.x X
(%a6
),%fp0
# last inst - possible exception set
8057 #---RETURN SGN(X) - SGN(X)EPS
8060 and.l &0x80000000,%d1
8061 or.l &0x3F800000,%d1
8063 and.l &0x80000000,%d1
8064 eor.
l &0x80800000,%d1
# -SIGN(X)*EPS
8066 fmov.
l %d0
,%fpcr
# restore users round prec,mode
8071 #--TANH(X) = X FOR DENORMALIZED X
8075 #########################################################################
8076 # slogn(): computes the natural logarithm of a normalized input #
8077 # slognd(): computes the natural logarithm of a denormalized input #
8078 # slognp1(): computes the log(1+X) of a normalized input #
8079 # slognp1d(): computes the log(1+X) of a denormalized input #
8081 # INPUT *************************************************************** #
8082 # a0 = pointer to extended precision input #
8083 # d0 = round precision,mode #
8085 # OUTPUT ************************************************************** #
8086 # fp0 = log(X) or log(1+X) #
8088 # ACCURACY and MONOTONICITY ******************************************* #
8089 # The returned result is within 2 ulps in 64 significant bit, #
8090 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8091 # rounded to double precision. The result is provably monotonic #
8092 # in double precision. #
8094 # ALGORITHM *********************************************************** #
8096 # Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
8097 # polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
8098 # move on to Step 2. #
8100 # Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
8101 # seven significant bits of Y plus 2**(-7), i.e. #
8102 # F = 1.xxxxxx1 in base 2 where the six "x" match those #
8103 # of Y. Note that |Y-F| <= 2**(-7). #
8105 # Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
8106 # polynomial in u, log(1+u) = poly. #
8108 # Step 4. Reconstruct #
8109 # log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
8110 # by k*log(2) + (log(F) + poly). The values of log(F) are #
8111 # calculated beforehand and stored in the program. #
8114 # Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8115 # polynomial in u where u = 2X/(2+X). Otherwise, move on #
8118 # Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8119 # in Step 2 of the algorithm for LOGN and compute #
8120 # log(1+X) as k*log(2) + log(F) + poly where poly #
8121 # approximates log(1+u), u = (Y-F)/F. #
8123 # Implementation Notes: #
8124 # Note 1. There are 64 different possible values for F, thus 64 #
8125 # log(F)'s need to be tabulated. Moreover, the values of #
8126 # 1/F are also tabulated so that the division in (Y-F)/F #
8127 # can be performed by a multiplication. #
8129 # Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8130 # the value Y-F has to be calculated carefully when #
8133 # Note 3. To fully exploit the pipeline, polynomials are usually #
8134 # separated into two parts evaluated independently before #
8137 #########################################################################
8139 long
0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8151 long
0x3FC2499A,0xB5E4040B
8153 long
0xBFC555B5,0x848CB7DB
8156 long
0x3FC99999,0x987D8730
8158 long
0xBFCFFFFF,0xFF6F7E97
8161 long
0x3FD55555,0x555555A4
8163 long
0xBFE00000,0x00000008
8166 long
0x3F175496,0xADD7DAD6
8168 long
0x3F3C71C2,0xFE80C7E0
8171 long
0x3F624924,0x928BCCFF
8173 long
0x3F899999,0x999995EC
8176 long
0x3FB55555,0x55555555
8178 long
0x40000000,0x00000000
8181 long
0x3f990000,0x80000000,0x00000000,0x00000000
8184 long
0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8185 long
0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8186 long
0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8187 long
0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8188 long
0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8189 long
0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8190 long
0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8191 long
0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8192 long
0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8193 long
0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8194 long
0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8195 long
0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8196 long
0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8197 long
0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8198 long
0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8199 long
0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8200 long
0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8201 long
0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8202 long
0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8203 long
0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8204 long
0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8205 long
0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8206 long
0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8207 long
0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8208 long
0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8209 long
0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8210 long
0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8211 long
0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8212 long
0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8213 long
0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8214 long
0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8215 long
0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8216 long
0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8217 long
0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8218 long
0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8219 long
0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8220 long
0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8221 long
0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8222 long
0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8223 long
0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8224 long
0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8225 long
0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8226 long
0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8227 long
0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8228 long
0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8229 long
0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8230 long
0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8231 long
0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8232 long
0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8233 long
0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8234 long
0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8235 long
0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8236 long
0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8237 long
0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8238 long
0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8239 long
0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8240 long
0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8241 long
0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8242 long
0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8243 long
0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8244 long
0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8245 long
0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8246 long
0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8247 long
0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8248 long
0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8249 long
0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8250 long
0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8251 long
0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8252 long
0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8253 long
0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8254 long
0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8255 long
0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8256 long
0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8257 long
0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8258 long
0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8259 long
0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8260 long
0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8261 long
0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8262 long
0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8263 long
0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8264 long
0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8265 long
0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8266 long
0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8267 long
0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8268 long
0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8269 long
0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8270 long
0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8271 long
0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8272 long
0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8273 long
0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8274 long
0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8275 long
0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8276 long
0x3FFE0000,0x94458094,0x45809446,0x00000000
8277 long
0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8278 long
0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8279 long
0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8280 long
0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8281 long
0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8282 long
0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8283 long
0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8284 long
0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8285 long
0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8286 long
0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8287 long
0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8288 long
0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8289 long
0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8290 long
0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8291 long
0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8292 long
0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8293 long
0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8294 long
0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8295 long
0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8296 long
0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8297 long
0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8298 long
0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8299 long
0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8300 long
0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8301 long
0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8302 long
0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8303 long
0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8304 long
0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8305 long
0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8306 long
0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8307 long
0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8308 long
0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8309 long
0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8310 long
0x3FFE0000,0x80808080,0x80808081,0x00000000
8311 long
0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8327 #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8329 fmov.x
(%a0
),%fp0
# LOAD INPUT
8330 mov.
l &0x00000000,ADJK
(%a6
)
8333 #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8334 #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8340 mov.
l 4(%a0
),X+
4(%a6
)
8341 mov.
l 8(%a0
),X+
8(%a6
)
8343 cmp.
l %d1
,&0 # CHECK IF X IS NEGATIVE
8344 blt.w LOGNEG
# LOG OF NEGATIVE ARGUMENT IS INVALID
8345 # X IS POSITIVE, CHECK IF X IS NEAR 1
8346 cmp.
l %d1
,&0x3ffef07d # IS X < 15/16?
8348 cmp.
l %d1
,&0x3fff8841 # IS X > 17/16?
8352 #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8354 #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8355 #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8356 #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8357 #-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8358 #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8359 #--LOG(1+U) CAN BE VERY EFFICIENT.
8360 #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8361 #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8363 #--GET K, Y, F, AND ADDRESS OF 1/F.
8365 asr.
l &8,%d1
# SHIFTED 16 BITS, BIASED EXPO. OF X
8366 sub.l &0x3FFF,%d1
# THIS IS K
8367 add.l ADJK
(%a6
),%d1
# ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8368 lea LOGTBL
(%pc
),%a0
# BASE ADDRESS OF 1/F AND LOG(F)
8369 fmov.
l %d1
,%fp1
# CONVERT K TO FLOATING-POINT FORMAT
8371 #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8372 mov.
l &0x3FFF0000,X
(%a6
) # X IS NOW Y, I.E. 2^(-K)*X
8373 mov.
l XFRAC
(%a6
),FFRAC
(%a6
)
8374 and.l &0xFE000000,FFRAC
(%a6
) # FIRST 7 BITS OF Y
8375 or.l &0x01000000,FFRAC
(%a6
) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8376 mov.
l FFRAC
(%a6
),%d1
# READY TO GET ADDRESS OF 1/F
8377 and.l &0x7E000000,%d1
8380 asr.
l &4,%d1
# SHIFTED 20, D0 IS THE DISPLACEMENT
8381 add.l %d1
,%a0
# A0 IS THE ADDRESS FOR 1/F
8384 mov.
l &0x3fff0000,F
(%a6
)
8386 fsub.x F
(%a6
),%fp0
# Y-F
8387 fmovm.x
&0xc,-(%sp
) # SAVE FP2-3 WHILE FP0 IS NOT READY
8388 #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8389 #--REGISTERS SAVED: FPCR, FP1, FP2
8392 #--AN RE-ENTRY POINT FOR LOGNP1
8393 fmul.x
(%a0
),%fp0
# FP0 IS U = (Y-F)/F
8394 fmul.x LOGOF2
(%pc
),%fp1
# GET K*LOG2 WHILE FP0 IS NOT READY
8396 fmul.x
%fp2
,%fp2
# FP2 IS V=U*U
8397 fmov.x
%fp1
,KLOG2
(%a6
) # PUT K*LOG2 IN MEMEORY, FREE FP1
8399 #--LOG(1+U) IS APPROXIMATED BY
8400 #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8401 #--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8406 fmul.d LOGA6
(%pc
),%fp1
# V*A6
8407 fmul.d LOGA5
(%pc
),%fp2
# V*A5
8409 fadd.d LOGA4
(%pc
),%fp1
# A4+V*A6
8410 fadd.d LOGA3
(%pc
),%fp2
# A3+V*A5
8412 fmul.x
%fp3
,%fp1
# V*(A4+V*A6)
8413 fmul.x
%fp3
,%fp2
# V*(A3+V*A5)
8415 fadd.d LOGA2
(%pc
),%fp1
# A2+V*(A4+V*A6)
8416 fadd.d LOGA1
(%pc
),%fp2
# A1+V*(A3+V*A5)
8418 fmul.x
%fp3
,%fp1
# V*(A2+V*(A4+V*A6))
8419 add.l &16,%a0
# ADDRESS OF LOG(F)
8420 fmul.x
%fp3
,%fp2
# V*(A1+V*(A3+V*A5))
8422 fmul.x
%fp0
,%fp1
# U*V*(A2+V*(A4+V*A6))
8423 fadd.x
%fp2
,%fp0
# U+V*(A1+V*(A3+V*A5))
8425 fadd.x
(%a0
),%fp1
# LOG(F)+U*V*(A2+V*(A4+V*A6))
8426 fmovm.x
(%sp
)+,&0x30 # RESTORE FP2-3
8427 fadd.x
%fp1
,%fp0
# FP0 IS LOG(F) + LOG(1+U)
8430 fadd.x KLOG2
(%a6
),%fp0
# FINAL ADD
8436 # if the input is exactly equal to one, then exit through ld_pzero.
8437 # if these 2 lines weren't here, the correct answer would be returned
8438 # but the INEX2 bit would be set.
8439 fcmp.
b %fp0
,&0x1 # is it equal to one?
8440 fbeq.
l ld_pzero
# yes
8442 #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8444 fsub.s one
(%pc
),%fp1
# FP1 IS X-1
8445 fadd.s one
(%pc
),%fp0
# FP0 IS X+1
8446 fadd.x
%fp1
,%fp1
# FP1 IS 2(X-1)
8447 #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8448 #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8451 #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8452 fdiv.x
%fp0
,%fp1
# FP1 IS U
8453 fmovm.x
&0xc,-(%sp
) # SAVE FP2-3
8454 #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8455 #--LET V=U*U, W=V*V, CALCULATE
8456 #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8457 #--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8459 fmul.x
%fp0
,%fp0
# FP0 IS V
8460 fmov.x
%fp1
,SAVEU
(%a6
) # STORE U IN MEMORY, FREE FP1
8462 fmul.x
%fp1
,%fp1
# FP1 IS W
8464 fmov.d LOGB5
(%pc
),%fp3
8465 fmov.d LOGB4
(%pc
),%fp2
8467 fmul.x
%fp1
,%fp3
# W*B5
8468 fmul.x
%fp1
,%fp2
# W*B4
8470 fadd.d LOGB3
(%pc
),%fp3
# B3+W*B5
8471 fadd.d LOGB2
(%pc
),%fp2
# B2+W*B4
8473 fmul.x
%fp3
,%fp1
# W*(B3+W*B5), FP3 RELEASED
8475 fmul.x
%fp0
,%fp2
# V*(B2+W*B4)
8477 fadd.d LOGB1
(%pc
),%fp1
# B1+W*(B3+W*B5)
8478 fmul.x SAVEU
(%a6
),%fp0
# FP0 IS U*V
8480 fadd.x
%fp2
,%fp1
# B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8481 fmovm.x
(%sp
)+,&0x30 # FP2-3 RESTORED
8483 fmul.x
%fp1
,%fp0
# U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8486 fadd.x SAVEU
(%a6
),%fp0
8489 #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8495 #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8497 mov.
l &-100,ADJK
(%a6
) # INPUT = 2^(ADJK) * FP0
8499 #----normalize the input value by left shifting k bits (k to be determined
8500 #----below), adjusting exponent and storing -k to ADJK
8501 #----the value TWOTO100 is no longer needed.
8502 #----Note that this code assumes the denormalized input is NON-ZERO.
8504 movm.
l &0x3f00,-(%sp
) # save some registers {d2-d7}
8505 mov.
l (%a0
),%d3
# D3 is exponent of smallest norm. #
8507 mov.
l 8(%a0
),%d5
# (D4,D5) is (Hi_X,Lo_X)
8508 clr.
l %d2
# D2 used for holding K
8518 bfffo
%d4
{&0:&32},%d6
8520 add.l %d6
,%d2
# (D3,D4,D5) is normalized
8523 mov.
l %d4
,XFRAC
(%a6
)
8524 mov.
l %d5
,XFRAC+
4(%a6
)
8528 movm.
l (%sp
)+,&0xfc # restore registers {d2-d7}
8530 bra.w LOGBGN
# begin regular log(X)
8534 bfffo
%d4
{&0:&32},%d6
# find first 1
8535 mov.
l %d6
,%d2
# get k
8537 mov.
l %d5
,%d7
# a copy of D5
8542 or.l %d7
,%d4
# (D3,D4,D5) normalized
8545 mov.
l %d4
,XFRAC
(%a6
)
8546 mov.
l %d5
,XFRAC+
4(%a6
)
8550 movm.
l (%sp
)+,&0xfc # restore registers {d2-d7}
8552 bra.w LOGBGN
# begin regular log(X)
8555 #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8557 fmov.x
(%a0
),%fp0
# LOAD INPUT
8558 fabs.x
%fp0
# test magnitude
8559 fcmp.x
%fp0
,LTHOLD
(%pc
) # compare with min threshold
8560 fbgt.w LP1REAL
# if greater, continue
8562 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
8563 fmov.x
(%a0
),%fp0
# return signed argument
8567 fmov.x
(%a0
),%fp0
# LOAD INPUT
8568 mov.
l &0x00000000,ADJK
(%a6
)
8569 fmov.x
%fp0
,%fp1
# FP1 IS INPUT Z
8570 fadd.s one
(%pc
),%fp0
# X := ROUND(1+Z)
8572 mov.w XFRAC
(%a6
),XDCARE
(%a6
)
8575 ble.w LP1NEG0
# LOG OF ZERO OR -VE
8576 cmp.
l %d1
,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8578 cmp.
l %d1
,&0x3fffc000
8580 #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8581 #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8582 #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8585 #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8586 cmp.
l %d1
,&0x3ffef07d
8588 cmp.
l %d1
,&0x3fff8841
8592 #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8593 #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8594 fadd.x
%fp1
,%fp1
# FP1 IS 2Z
8595 fadd.s one
(%pc
),%fp0
# FP0 IS 1+X
8600 #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8601 #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8602 #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8603 #--THERE ARE ONLY TWO CASES.
8604 #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8605 #--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8606 #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8607 #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8609 mov.
l XFRAC
(%a6
),FFRAC
(%a6
)
8610 and.l &0xFE000000,FFRAC
(%a6
)
8611 or.l &0x01000000,FFRAC
(%a6
) # F OBTAINED
8612 cmp.
l %d1
,&0x3FFF8000 # SEE IF 1+Z > 1
8616 fmov.s TWO
(%pc
),%fp0
8617 mov.
l &0x3fff0000,F
(%a6
)
8619 fsub.x F
(%a6
),%fp0
# 2-F
8620 mov.
l FFRAC
(%a6
),%d1
8621 and.l &0x7E000000,%d1
8624 asr.
l &4,%d1
# D0 CONTAINS DISPLACEMENT FOR 1/F
8625 fadd.x
%fp1
,%fp1
# GET 2Z
8626 fmovm.x
&0xc,-(%sp
) # SAVE FP2 {%fp2/%fp3}
8627 fadd.x
%fp1
,%fp0
# FP0 IS Y-F = (2-F)+2Z
8628 lea LOGTBL
(%pc
),%a0
# A0 IS ADDRESS OF 1/F
8630 fmov.s negone
(%pc
),%fp1
# FP1 IS K = -1
8634 fmov.s one
(%pc
),%fp0
8635 mov.
l &0x3fff0000,F
(%a6
)
8637 fsub.x F
(%a6
),%fp0
# 1-F
8638 mov.
l FFRAC
(%a6
),%d1
8639 and.l &0x7E000000,%d1
8643 fadd.x
%fp1
,%fp0
# FP0 IS Y-F
8644 fmovm.x
&0xc,-(%sp
) # FP2 SAVED {%fp2/%fp3}
8646 add.l %d1
,%a0
# A0 IS ADDRESS OF 1/F
8647 fmov.s zero
(%pc
),%fp1
# FP1 IS K = 0
8651 #--FPCR SAVED. D0 IS X IN COMPACT FORM.
8655 fmov.s negone
(%pc
),%fp0
8661 fmov.s zero
(%pc
),%fp0
8667 #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8668 # Simply return the denorm
8672 #########################################################################
8673 # satanh(): computes the inverse hyperbolic tangent of a norm input #
8674 # satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8676 # INPUT *************************************************************** #
8677 # a0 = pointer to extended precision input #
8678 # d0 = round precision,mode #
8680 # OUTPUT ************************************************************** #
8681 # fp0 = arctanh(X) #
8683 # ACCURACY and MONOTONICITY ******************************************* #
8684 # The returned result is within 3 ulps in 64 significant bit, #
8685 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8686 # rounded to double precision. The result is provably monotonic #
8687 # in double precision. #
8689 # ALGORITHM *********************************************************** #
8692 # 1. If |X| >= 1, go to 3. #
8694 # 2. (|X| < 1) Calculate atanh(X) by #
8698 # atanh(X) := sgn * (1/2) * logp1(z) #
8701 # 3. If |X| > 1, go to 5. #
8703 # 4. (|X| = 1) Generate infinity with an appropriate sign and #
8704 # divide-by-zero by #
8706 # atan(X) := sgn / (+0). #
8709 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8712 #########################################################################
8718 and.l &0x7FFFFFFF,%d1
8719 cmp.
l %d1
,&0x3FFF8000
8722 #--THIS IS THE USUAL CASE, |X| < 1
8723 #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8725 fabs.x
(%a0
),%fp0
# Y = |X|
8728 fadd.x
%fp0
,%fp0
# 2Y
8729 fadd.s
&0x3F800000,%fp1
# 1-Y
8730 fdiv.x
%fp1
,%fp0
# 2Y/(1-Y)
8732 and.l &0x80000000,%d1
8733 or.l &0x3F000000,%d1
# SIGN(X)*HALF
8736 mov.
l %d0
,-(%sp
) # save rnd prec,mode
8737 clr.
l %d0
# pass ext prec,RN
8738 fmovm.x
&0x01,-(%sp
) # save Z on stack
8739 lea
(%sp
),%a0
# pass ptr to Z
8740 bsr slognp1
# LOG1P(Z)
8741 add.l &0xc,%sp
# clear Z from stack
8743 mov.
l (%sp
)+,%d0
# fetch old prec,mode
8744 fmov.
l %d0
,%fpcr
# load it
8745 mov.
b &FMUL_OP
,%d1
# last inst is MUL
8750 fabs.x
(%a0
),%fp0
# |X|
8751 fcmp.s
%fp0
,&0x3F800000
8756 #--ATANH(X) = X FOR DENORMALIZED X
8760 #########################################################################
8761 # slog10(): computes the base-10 logarithm of a normalized input #
8762 # slog10d(): computes the base-10 logarithm of a denormalized input #
8763 # slog2(): computes the base-2 logarithm of a normalized input #
8764 # slog2d(): computes the base-2 logarithm of a denormalized input #
8766 # INPUT *************************************************************** #
8767 # a0 = pointer to extended precision input #
8768 # d0 = round precision,mode #
8770 # OUTPUT ************************************************************** #
8771 # fp0 = log_10(X) or log_2(X) #
8773 # ACCURACY and MONOTONICITY ******************************************* #
8774 # The returned result is within 1.7 ulps in 64 significant bit, #
8775 # i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8776 # rounded to double precision. The result is provably monotonic #
8777 # in double precision. #
8779 # ALGORITHM *********************************************************** #
8783 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8784 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8785 # Notes: Default means round-to-nearest mode, no floating-point #
8786 # traps, and precision control = double extended. #
8788 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8789 # Notes: Even if X is denormalized, log(X) is always normalized. #
8791 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8792 # 2.1 Restore the user FPCR #
8793 # 2.2 Return ans := Y * INV_L10. #
8797 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8798 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8799 # Notes: Default means round-to-nearest mode, no floating-point #
8800 # traps, and precision control = double extended. #
8802 # Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8804 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8805 # 2.1 Restore the user FPCR #
8806 # 2.2 Return ans := Y * INV_L10. #
8810 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8811 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8812 # Notes: Default means round-to-nearest mode, no floating-point #
8813 # traps, and precision control = double extended. #
8815 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8816 # Notes: Even if X is denormalized, log(X) is always normalized. #
8818 # Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8819 # 2.1 Restore the user FPCR #
8820 # 2.2 Return ans := Y * INV_L2. #
8824 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8825 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8826 # Notes: Default means round-to-nearest mode, no floating-point #
8827 # traps, and precision control = double extended. #
8829 # Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8832 # Step 2. Return k. #
8833 # 2.1 Get integer k, X = 2^k. #
8834 # 2.2 Restore the user FPCR. #
8835 # 2.3 Return ans := convert-to-double-extended(k). #
8837 # Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8839 # Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8840 # 4.1 Restore the user FPCR #
8841 # 4.2 Return ans := Y * INV_L2. #
8843 #########################################################################
8846 long
0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8849 long
0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8852 #--entry point for Log10(X), X is normalized
8855 fcmp.x
%fp0
,(%a0
) # if operand == 1,
8856 fbeq.
l ld_pzero
# return an EXACT zero
8862 bsr slogn
# log(X), X normal.
8864 fmul.x INV_L10
(%pc
),%fp0
8868 #--entry point for Log10(X), X is denormalized
8874 bsr slognd
# log(X), X denorm.
8876 fmul.x INV_L10
(%pc
),%fp0
8880 #--entry point for Log2(X), X is normalized
8886 bne.
b continue
# X is not 2^k
8889 and.l &0x7FFFFFFF,%d1
8894 and.l &0x00007FFF,%d1
8904 bsr slogn
# log(X), X normal.
8906 fmul.x INV_L2
(%pc
),%fp0
8913 #--entry point for Log2(X), X is denormalized
8919 bsr slognd
# log(X), X denorm.
8921 fmul.x INV_L2
(%pc
),%fp0
8924 #########################################################################
8925 # stwotox(): computes 2**X for a normalized input #
8926 # stwotoxd(): computes 2**X for a denormalized input #
8927 # stentox(): computes 10**X for a normalized input #
8928 # stentoxd(): computes 10**X for a denormalized input #
8930 # INPUT *************************************************************** #
8931 # a0 = pointer to extended precision input #
8932 # d0 = round precision,mode #
8934 # OUTPUT ************************************************************** #
8935 # fp0 = 2**X or 10**X #
8937 # ACCURACY and MONOTONICITY ******************************************* #
8938 # The returned result is within 2 ulps in 64 significant bit, #
8939 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8940 # rounded to double precision. The result is provably monotonic #
8941 # in double precision. #
8943 # ALGORITHM *********************************************************** #
8946 # 1. If |X| > 16480, go to ExpBig. #
8948 # 2. If |X| < 2**(-70), go to ExpSm. #
8950 # 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8952 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8954 # 4. Overwrite r := r * log2. Then #
8955 # 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8956 # Go to expr to compute that expression. #
8959 # 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8961 # 2. If |X| < 2**(-70), go to ExpSm. #
8963 # 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8964 # N := round-to-int(y). Decompose N as #
8965 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8968 # r := ((X - N*L1)-N*L2) * L10 #
8969 # where L1, L2 are the leading and trailing parts of #
8970 # log_10(2)/64 and L10 is the natural log of 10. Then #
8971 # 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8972 # Go to expr to compute that expression. #
8975 # 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8977 # 2. Overwrite Fact1 and Fact2 by #
8978 # Fact1 := 2**(M) * Fact1 #
8979 # Fact2 := 2**(M) * Fact2 #
8980 # Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8982 # 3. Calculate P where 1 + P approximates exp(r): #
8983 # P = r + r*r*(A1+r*(A2+...+r*A5)). #
8985 # 4. Let AdjFact := 2**(M'). Return #
8986 # AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8990 # 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8991 # generate underflow by Tiny * Tiny. #
8994 # 1. Return 1 + X. #
8996 #########################################################################
8999 long
0x406A934F,0x0979A371 # 64LOG10/LOG2
9001 long
0x3F734413,0x509F8000 # LOG2/64LOG10
9004 long
0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
9006 LOG10
: long
0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
9008 LOG2
: long
0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9010 EXPA5
: long
0x3F56C16D,0x6F7BD0B2
9011 EXPA4
: long
0x3F811112,0x302C712C
9012 EXPA3
: long
0x3FA55555,0x55554CC1
9013 EXPA2
: long
0x3FC55555,0x55554A54
9014 EXPA1
: long
0x3FE00000,0x00000000,0x00000000,0x00000000
9017 long
0x3FFF0000,0x80000000,0x00000000,0x3F738000
9018 long
0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9019 long
0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9020 long
0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9021 long
0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9022 long
0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9023 long
0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9024 long
0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9025 long
0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9026 long
0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9027 long
0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9028 long
0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9029 long
0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9030 long
0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9031 long
0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9032 long
0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9033 long
0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9034 long
0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9035 long
0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9036 long
0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9037 long
0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9038 long
0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9039 long
0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9040 long
0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9041 long
0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9042 long
0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9043 long
0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9044 long
0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9045 long
0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9046 long
0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9047 long
0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9048 long
0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9049 long
0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9050 long
0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9051 long
0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9052 long
0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9053 long
0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9054 long
0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9055 long
0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9056 long
0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9057 long
0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9058 long
0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9059 long
0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9060 long
0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9061 long
0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9062 long
0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9063 long
0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9064 long
0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9065 long
0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9066 long
0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9067 long
0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9068 long
0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9069 long
0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9070 long
0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9071 long
0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9072 long
0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9073 long
0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9074 long
0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9075 long
0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9076 long
0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9077 long
0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9078 long
0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9079 long
0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9080 long
0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9092 set FACT1LOW
,FACT1+
8
9096 set FACT2LOW
,FACT2+
8
9099 #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9101 fmovm.x
(%a0
),&0x80 # LOAD INPUT
9106 and.l &0x7FFFFFFF,%d1
9108 cmp.
l %d1
,&0x3FB98000 # |X| >= 2**(-70)?
9113 cmp.
l %d1
,&0x400D80C0 # |X| > 16480?
9118 #--USUAL CASE, 2^(-70) <= |X| <= 16480
9121 fmul.s
&0x42800000,%fp1
# 64 * X
9122 fmov.
l %fp1
,INT
(%a6
) # N = ROUND-TO-INT(64 X)
9124 lea TEXPTBL
(%pc
),%a1
# LOAD ADDRESS OF TABLE OF 2^(J/64)
9125 fmov.
l INT
(%a6
),%fp1
# N --> FLOATING FMT
9128 and.l &0x3F,%d1
# D0 IS J
9129 asl.
l &4,%d1
# DISPLACEMENT FOR 2^(J/64)
9130 add.l %d1
,%a1
# ADDRESS FOR 2^(J/64)
9131 asr.
l &6,%d2
# d2 IS L, N = 64L + J
9133 asr.
l &1,%d1
# D0 IS M
9134 sub.l %d1
,%d2
# d2 IS M', N = 64(M+M') + J
9137 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9138 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9139 #--ADJFACT = 2^(M').
9140 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9142 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
9144 fmul.s
&0x3C800000,%fp1
# (1/64)*N
9145 mov.
l (%a1
)+,FACT1
(%a6
)
9146 mov.
l (%a1
)+,FACT1HI
(%a6
)
9147 mov.
l (%a1
)+,FACT1LOW
(%a6
)
9148 mov.w
(%a1
)+,FACT2
(%a6
)
9150 fsub.x
%fp1
,%fp0
# X - (1/64)*INT(64 X)
9152 mov.w
(%a1
)+,FACT2HI
(%a6
)
9153 clr.w FACT2HI+
2(%a6
)
9155 add.w
%d1
,FACT1
(%a6
)
9156 fmul.x LOG2
(%pc
),%fp0
# FP0 IS R
9157 add.w
%d1
,FACT2
(%a6
)
9163 cmp.
l %d1
,&0x3FFF8000
9166 #--|X| IS SMALL, RETURN 1 + X
9168 fmov.
l %d0
,%fpcr
# restore users round prec,mode
9169 fadd.s
&0x3F800000,%fp0
# RETURN 1 + X
9173 #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9174 #--REGISTERS SAVE SO FAR ARE FPCR AND D0
9179 bra t_ovfl2
# t_ovfl expects positive value
9182 bra t_unfl2
# t_unfl expects positive value
9186 #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9188 fmov.
l %d0
,%fpcr
# set user's rounding mode/precision
9189 fmov.s
&0x3F800000,%fp0
# RETURN 1 + X
9191 or.l &0x00800001,%d1
9196 #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9198 fmovm.x
(%a0
),&0x80 # LOAD INPUT
9203 and.l &0x7FFFFFFF,%d1
9205 cmp.
l %d1
,&0x3FB98000 # |X| >= 2**(-70)?
9210 cmp.
l %d1
,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9215 #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9218 fmul.d L2TEN64
(%pc
),%fp1
# X*64*LOG10/LOG2
9219 fmov.
l %fp1
,INT
(%a6
) # N=INT(X*64*LOG10/LOG2)
9221 lea TEXPTBL
(%pc
),%a1
# LOAD ADDRESS OF TABLE OF 2^(J/64)
9222 fmov.
l INT
(%a6
),%fp1
# N --> FLOATING FMT
9225 and.l &0x3F,%d1
# D0 IS J
9226 asl.
l &4,%d1
# DISPLACEMENT FOR 2^(J/64)
9227 add.l %d1
,%a1
# ADDRESS FOR 2^(J/64)
9228 asr.
l &6,%d2
# d2 IS L, N = 64L + J
9230 asr.
l &1,%d1
# D0 IS M
9231 sub.l %d1
,%d2
# d2 IS M', N = 64(M+M') + J
9234 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9235 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9236 #--ADJFACT = 2^(M').
9237 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9238 fmovm.x
&0x0c,-(%sp
) # save fp2/fp3
9242 fmul.d L10TWO1
(%pc
),%fp1
# N*(LOG2/64LOG10)_LEAD
9243 mov.
l (%a1
)+,FACT1
(%a6
)
9245 fmul.x L10TWO2
(%pc
),%fp2
# N*(LOG2/64LOG10)_TRAIL
9247 mov.
l (%a1
)+,FACT1HI
(%a6
)
9248 mov.
l (%a1
)+,FACT1LOW
(%a6
)
9249 fsub.x
%fp1
,%fp0
# X - N L_LEAD
9250 mov.w
(%a1
)+,FACT2
(%a6
)
9252 fsub.x
%fp2
,%fp0
# X - N L_TRAIL
9254 mov.w
(%a1
)+,FACT2HI
(%a6
)
9255 clr.w FACT2HI+
2(%a6
)
9258 fmul.x LOG10
(%pc
),%fp0
# FP0 IS R
9259 add.w
%d1
,FACT1
(%a6
)
9260 add.w
%d1
,FACT2
(%a6
)
9263 #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9264 #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9265 #--FP0 IS R. THE FOLLOWING CODE COMPUTES
9266 #-- 2**(M'+M) * 2**(J/64) * EXP(R)
9269 fmul.x
%fp1
,%fp1
# FP1 IS S = R*R
9271 fmov.d EXPA5
(%pc
),%fp2
# FP2 IS A5
9272 fmov.d EXPA4
(%pc
),%fp3
# FP3 IS A4
9274 fmul.x
%fp1
,%fp2
# FP2 IS S*A5
9275 fmul.x
%fp1
,%fp3
# FP3 IS S*A4
9277 fadd.d EXPA3
(%pc
),%fp2
# FP2 IS A3+S*A5
9278 fadd.d EXPA2
(%pc
),%fp3
# FP3 IS A2+S*A4
9280 fmul.x
%fp1
,%fp2
# FP2 IS S*(A3+S*A5)
9281 fmul.x
%fp1
,%fp3
# FP3 IS S*(A2+S*A4)
9283 fadd.d EXPA1
(%pc
),%fp2
# FP2 IS A1+S*(A3+S*A5)
9284 fmul.x
%fp0
,%fp3
# FP3 IS R*S*(A2+S*A4)
9286 fmul.x
%fp1
,%fp2
# FP2 IS S*(A1+S*(A3+S*A5))
9287 fadd.x
%fp3
,%fp0
# FP0 IS R+R*S*(A2+S*A4)
9288 fadd.x
%fp2
,%fp0
# FP0 IS EXP(R) - 1
9290 fmovm.x
(%sp
)+,&0x30 # restore fp2/fp3
9292 #--FINAL RECONSTRUCTION PROCESS
9293 #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9295 fmul.x FACT1
(%a6
),%fp0
9296 fadd.x FACT2
(%a6
),%fp0
9297 fadd.x FACT1
(%a6
),%fp0
9299 fmov.
l %d0
,%fpcr
# restore users round prec,mode
9300 mov.w
%d2
,ADJFACT
(%a6
) # INSERT EXPONENT
9302 mov.
l &0x80000000,ADJFACT+
4(%a6
)
9303 clr.
l ADJFACT+
8(%a6
)
9304 mov.
b &FMUL_OP
,%d1
# last inst is MUL
9305 fmul.x ADJFACT
(%a6
),%fp0
# FINAL ADJUSTMENT
9310 #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9312 fmov.
l %d0
,%fpcr
# set user's rounding mode/precision
9313 fmov.s
&0x3F800000,%fp0
# RETURN 1 + X
9315 or.l &0x00800001,%d1
9319 #########################################################################
9320 # smovcr(): returns the ROM constant at the offset specified in d1 #
9321 # rounded to the mode and precision specified in d0. #
9323 # INPUT *************************************************************** #
9324 # d0 = rnd prec,mode #
9327 # OUTPUT ************************************************************** #
9328 # fp0 = the ROM constant rounded to the user's rounding mode,prec #
9330 #########################################################################
9334 mov.
l %d1
,-(%sp
) # save rom offset for a sec
9336 lsr.
b &0x4,%d0
# shift ctrl bits to lo
9337 mov.
l %d0
,%d1
# make a copy
9338 andi.w
&0x3,%d1
# extract rnd mode
9339 andi.w
&0xc,%d0
# extract rnd prec
9340 swap
%d0
# put rnd prec in hi
9341 mov.w
%d1
,%d0
# put rnd mode in lo
9343 mov.
l (%sp
)+,%d1
# get rom offset
9346 # check range of offset
9348 tst.
b %d1
# if zero, offset is to pi
9349 beq.
b pi_tbl
# it is pi
9350 cmpi.
b %d1
,&0x0a # check range $01 - $0a
9351 ble.
b z_val
# if in this range, return zero
9352 cmpi.
b %d1
,&0x0e # check range $0b - $0e
9353 ble.
b sm_tbl
# valid constants in this range
9354 cmpi.
b %d1
,&0x2f # check range $10 - $2f
9355 ble.
b z_val
# if in this range, return zero
9356 cmpi.
b %d1
,&0x3f # check range $30 - $3f
9357 ble.
b bg_tbl
# valid constants in this range
9360 bra.
l ld_pzero
# return a zero
9363 # the answer is PI rounded to the proper precision.
9365 # fetch a pointer to the answer table relating to the proper rounding
9369 tst.
b %d0
# is rmode RN?
9370 bne.
b pi_not_rn
# no
9372 lea.
l PIRN
(%pc
),%a0
# yes; load PI RN table addr
9375 cmpi.
b %d0
,&rp_mode
# is rmode RP?
9378 lea.
l PIRZRM
(%pc
),%a0
# no; load PI RZ,RM table addr
9381 lea.
l PIRP
(%pc
),%a0
# load PI RP table addr
9385 # the answer is one of:
9386 # $0B log10(2) (inexact)
9388 # $0D log2(e) (inexact)
9389 # $0E log10(e) (exact)
9391 # fetch a pointer to the answer table relating to the proper rounding
9395 subi.
b &0xb,%d1
# make offset in 0-4 range
9396 tst.
b %d0
# is rmode RN?
9397 bne.
b sm_not_rn
# no
9399 lea.
l SMALRN
(%pc
),%a0
# yes; load RN table addr
9401 cmpi.
b %d1
,&0x2 # is result log10(e)?
9402 ble.
b set_finx
# no; answer is inexact
9403 bra.
b no_finx
# yes; answer is exact
9405 cmpi.
b %d0
,&rp_mode
# is rmode RP?
9408 lea.
l SMALRZRM
(%pc
),%a0
# no; load RZ,RM table addr
9411 lea.
l SMALRP
(%pc
),%a0
# load RP table addr
9415 # the answer is one of:
9416 # $30 ln(2) (inexact)
9417 # $31 ln(10) (inexact)
9424 # $38 10^32 (inexact)
9425 # $39 10^64 (inexact)
9426 # $3A 10^128 (inexact)
9427 # $3B 10^256 (inexact)
9428 # $3C 10^512 (inexact)
9429 # $3D 10^1024 (inexact)
9430 # $3E 10^2048 (inexact)
9431 # $3F 10^4096 (inexact)
9433 # fetch a pointer to the answer table relating to the proper rounding
9437 subi.
b &0x30,%d1
# make offset in 0-f range
9438 tst.
b %d0
# is rmode RN?
9439 bne.
b bg_not_rn
# no
9441 lea.
l BIGRN
(%pc
),%a0
# yes; load RN table addr
9443 cmpi.
b %d1
,&0x1 # is offset <= $31?
9444 ble.
b set_finx
# yes; answer is inexact
9445 cmpi.
b %d1
,&0x7 # is $32 <= offset <= $37?
9446 ble.
b no_finx
# yes; answer is exact
9447 bra.
b set_finx
# no; answer is inexact
9449 cmpi.
b %d0
,&rp_mode
# is rmode RP?
9452 lea.
l BIGRZRM
(%pc
),%a0
# no; load RZ,RM table addr
9455 lea.
l BIGRP
(%pc
),%a0
# load RP table addr
9458 # answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9460 ori.
l &inx2a_mask
,USER_FPSR
(%a6
) # set INEX2/AINEX
9462 mulu.w
&0xc,%d1
# offset points into tables
9463 swap
%d0
# put rnd prec in lo word
9464 tst.
b %d0
# is precision extended?
9466 bne.
b not_ext
# if xprec, do not call round
9468 # Precision is extended
9469 fmovm.x
(%a0
,%d1.w
),&0x80 # return result in fp0
9472 # Precision is single or double
9474 swap
%d0
# rnd prec in upper word
9476 # call round() to round the answer to the proper precision.
9477 # exponents out of range for single or double DO NOT cause underflow
9479 mov.w
0x0(%a0
,%d1.w
),FP_SCR1_EX
(%a6
) # load first word
9480 mov.
l 0x4(%a0
,%d1.w
),FP_SCR1_HI
(%a6
) # load second word
9481 mov.
l 0x8(%a0
,%d1.w
),FP_SCR1_LO
(%a6
) # load third word
9483 clr.
l %d0
# clear g,r,s
9484 lea FP_SCR1
(%a6
),%a0
# pass ptr to answer
9485 clr.w LOCAL_SGN
(%a0
) # sign always positive
9486 bsr.
l _round
# round the mantissa
9488 fmovm.x
(%a0
),&0x80 # return rounded result in fp0
9493 PIRN
: long
0x40000000,0xc90fdaa2,0x2168c235 # pi
9494 PIRZRM
: long
0x40000000,0xc90fdaa2,0x2168c234 # pi
9495 PIRP
: long
0x40000000,0xc90fdaa2,0x2168c235 # pi
9497 SMALRN
: long
0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
9498 long
0x40000000,0xadf85458,0xa2bb4a9a # e
9499 long
0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
9500 long
0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9501 long
0x00000000,0x00000000,0x00000000 # 0.0
9504 long
0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
9505 long
0x40000000,0xadf85458,0xa2bb4a9a # e
9506 long
0x3fff0000,0xb8aa3b29,0x5c17f0bb # log2(e)
9507 long
0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9508 long
0x00000000,0x00000000,0x00000000 # 0.0
9510 SMALRP
: long
0x3ffd0000,0x9a209a84,0xfbcff799 # log10(2)
9511 long
0x40000000,0xadf85458,0xa2bb4a9b # e
9512 long
0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
9513 long
0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9514 long
0x00000000,0x00000000,0x00000000 # 0.0
9516 BIGRN
: long
0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
9517 long
0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
9519 long
0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9520 long
0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9521 long
0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9522 long
0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9523 long
0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9524 long
0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9525 long
0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
9526 long
0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
9527 long
0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
9528 long
0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
9529 long
0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
9530 long
0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
9531 long
0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
9532 long
0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
9535 long
0x3ffe0000,0xb17217f7,0xd1cf79ab # ln(2)
9536 long
0x40000000,0x935d8ddd,0xaaa8ac16 # ln(10)
9538 long
0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9539 long
0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9540 long
0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9541 long
0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9542 long
0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9543 long
0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9544 long
0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
9545 long
0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
9546 long
0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
9547 long
0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
9548 long
0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
9549 long
0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
9550 long
0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
9551 long
0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
9554 long
0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
9555 long
0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
9557 long
0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9558 long
0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9559 long
0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9560 long
0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9561 long
0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9562 long
0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9563 long
0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
9564 long
0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
9565 long
0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
9566 long
0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
9567 long
0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
9568 long
0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
9569 long
0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
9570 long
0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
9572 #########################################################################
9573 # sscale(): computes the destination operand scaled by the source #
9574 # operand. If the absoulute value of the source operand is #
9575 # >= 2^14, an overflow or underflow is returned. #
9577 # INPUT *************************************************************** #
9578 # a0 = pointer to double-extended source operand X #
9579 # a1 = pointer to double-extended destination operand Y #
9581 # OUTPUT ************************************************************** #
9582 # fp0 = scale(X,Y) #
9584 #########################################################################
9590 mov.
l %d0
,-(%sp
) # store off ctrl bits for now
9592 mov.w DST_EX
(%a1
),%d1
# get dst exponent
9593 smi.
b SIGN
(%a6
) # use SIGN to hold dst sign
9594 andi.l &0x00007fff,%d1
# strip sign from dst exp
9596 mov.w SRC_EX
(%a0
),%d0
# check src bounds
9597 andi.w
&0x7fff,%d0
# clr src sign bit
9598 cmpi.w
%d0
,&0x3fff # is src ~ ZERO?
9599 blt.w src_small
# yes
9600 cmpi.w
%d0
,&0x400c # no; is src too big?
9604 # Source is within 2^14 range.
9607 fintrz.x SRC
(%a0
),%fp0
# calc int of src
9608 fmov.
l %fp0
,%d0
# int src to d0
9609 # don't want any accrued bits from the fintrz showing up later since
9610 # we may need to read the fpsr for the last fp op in t_catch2().
9613 tst.
b DST_HI
(%a1
) # is dst denormalized?
9616 # the dst is a DENORM. normalize the DENORM and add the adjustment to
9617 # the src value. then, jump to the norm part of the routine.
9619 mov.
l %d0
,-(%sp
) # save src for now
9621 mov.w DST_EX
(%a1
),FP_SCR0_EX
(%a6
) # make a copy
9622 mov.
l DST_HI
(%a1
),FP_SCR0_HI
(%a6
)
9623 mov.
l DST_LO
(%a1
),FP_SCR0_LO
(%a6
)
9625 lea FP_SCR0
(%a6
),%a0
# pass ptr to DENORM
9626 bsr.
l norm
# normalize the DENORM
9628 add.l (%sp
)+,%d0
# add adjustment to src
9630 fmovm.x FP_SCR0
(%a6
),&0x80 # load normalized DENORM
9632 cmpi.w
%d0
,&-0x3fff # is the shft amt really low?
9633 bge.
b sok_norm2
# thank goodness no
9635 # the multiply factor that we're trying to create should be a denorm
9636 # for the multiply to work. therefore, we're going to actually do a
9637 # multiply with a denorm which will cause an unimplemented data type
9638 # exception to be put into the machine which will be caught and corrected
9639 # later. we don't do this with the DENORMs above because this method
9640 # is slower. but, don't fret, I don't see it being used much either.
9641 fmov.
l (%sp
)+,%fpcr
# restore user fpcr
9642 mov.
l &0x80000000,%d1
# load normalized mantissa
9643 subi.
l &-0x3fff,%d0
# how many should we shift?
9644 neg.l %d0
# make it positive
9645 cmpi.
b %d0
,&0x20 # is it > 32?
9646 bge.
b sok_dnrm_32
# yes
9647 lsr.
l %d0
,%d1
# no; bit stays in upper lw
9648 clr.
l -(%sp
) # insert zero low mantissa
9649 mov.
l %d1
,-(%sp
) # insert new high mantissa
9650 clr.
l -(%sp
) # make zero exponent
9653 subi.
b &0x20,%d0
# get shift count
9654 lsr.
l %d0
,%d1
# make low mantissa longword
9655 mov.
l %d1
,-(%sp
) # insert new low mantissa
9656 clr.
l -(%sp
) # insert zero high mantissa
9657 clr.
l -(%sp
) # make zero exponent
9660 # the src will force the dst to a DENORM value or worse. so, let's
9661 # create an fp multiply that will create the result.
9663 fmovm.x
DST(%a1
),&0x80 # load fp0 with normalized src
9665 fmov.
l (%sp
)+,%fpcr
# restore user fpcr
9667 addi.w
&0x3fff,%d0
# turn src amt into exp value
9668 swap
%d0
# put exponent in high word
9669 clr.
l -(%sp
) # insert new exponent
9670 mov.
l &0x80000000,-(%sp
) # insert new high mantissa
9671 mov.
l %d0
,-(%sp
) # insert new lo mantissa
9674 fmov.
l %fpcr
,%d0
# d0 needs fpcr for t_catch2
9675 mov.
b &FMUL_OP
,%d1
# last inst is MUL
9676 fmul.x
(%sp
)+,%fp0
# do the multiply
9677 bra t_catch2
# catch any exceptions
9680 # Source is outside of 2^14 range. Test the sign and branch
9681 # to the appropriate exception handler.
9684 mov.
l (%sp
)+,%d0
# restore ctrl bits
9685 exg
%a0
,%a1
# swap src,dst ptrs
9686 tst.
b SRC_EX
(%a1
) # is src negative?
9687 bmi t_unfl
# yes; underflow
9688 bra t_ovfl_sc
# no; overflow
9691 # The source input is below 1, so we check for denormalized numbers
9695 tst.
b DST_HI
(%a1
) # is dst denormalized?
9696 bpl.
b ssmall_done
# yes
9699 fmov.
l %d0
,%fpcr
# no; load control bits
9700 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
9701 fmov.x
DST(%a1
),%fp0
# simply return dest
9704 mov.
l (%sp
)+,%d0
# load control bits into d1
9705 mov.
l %a1
,%a0
# pass ptr to dst
9708 #########################################################################
9709 # smod(): computes the fp MOD of the input values X,Y. #
9710 # srem(): computes the fp (IEEE) REM of the input values X,Y. #
9712 # INPUT *************************************************************** #
9713 # a0 = pointer to extended precision input X #
9714 # a1 = pointer to extended precision input Y #
9715 # d0 = round precision,mode #
9717 # The input operands X and Y can be either normalized or #
9720 # OUTPUT ************************************************************** #
9721 # fp0 = FREM(X,Y) or FMOD(X,Y) #
9723 # ALGORITHM *********************************************************** #
9725 # Step 1. Save and strip signs of X and Y: signX := sign(X), #
9726 # signY := sign(Y), X := |X|, Y := |Y|, #
9727 # signQ := signX EOR signY. Record whether MOD or REM #
9730 # Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9732 # R := X, go to Step 4. #
9734 # R := 2^(-L)X, j := L. #
9737 # Step 3. Perform MOD(X,Y) #
9738 # 3.1 If R = Y, go to Step 9. #
9739 # 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9740 # 3.3 If j = 0, go to Step 4. #
9741 # 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9744 # Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9745 # Last_Subtract := false (used in Step 7 below). If #
9746 # MOD is requested, go to Step 6. #
9748 # Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9749 # 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9751 # 5.2 If R > Y/2, then { set Last_Subtract := true, #
9752 # Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9753 # 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9754 # then { Q := Q + 1, signX := -signX }. #
9756 # Step 6. R := signX*R. #
9758 # Step 7. If Last_Subtract = true, R := R - Y. #
9760 # Step 8. Return signQ, last 7 bits of Q, and R as required. #
9762 # Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9763 # X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9764 # R := 0. Return signQ, last 7 bits of Q, and R. #
9766 #########################################################################
9769 set Sc_Flag
,L_SCR3+
1
9784 long
0x00010000,0x80000000,0x00000000,0x00000000
9788 clr.
b FPSR_QBYTE
(%a6
)
9789 mov.
l %d0
,-(%sp
) # save ctrl bits
9795 clr.
b FPSR_QBYTE
(%a6
)
9796 mov.
l %d0
,-(%sp
) # save ctrl bits
9797 mov.
b &0x1,Mod_Flag
(%a6
)
9800 #..Save sign of X and Y
9801 movm.
l &0x3f00,-(%sp
) # save data registers
9802 mov.w SRC_EX
(%a0
),%d3
9803 mov.w
%d3
,SignY
(%a6
)
9804 and.l &0x00007FFF,%d3
# Y := |Y|
9807 mov.
l SRC_HI
(%a0
),%d4
9808 mov.
l SRC_LO
(%a0
),%d5
# (D3,D4,D5) is |Y|
9813 mov.
l &0x00003FFE,%d3
# $3FFD + 1
9822 bfffo
%d4
{&0:&32},%d6
9824 sub.l %d6
,%d3
# (D3,D4,D5) is normalized
9825 # ...with bias $7FFD
9830 bfffo
%d4
{&0:&32},%d6
9833 mov.
l %d5
,%d7
# a copy of D5
9838 or.l %d7
,%d4
# (D3,D4,D5) normalized
9839 # ...with bias $7FFD
9843 add.l &0x00003FFE,%d3
# (D3,D4,D5) normalized
9844 # ...with bias $7FFD
9847 mov.w DST_EX
(%a1
),%d0
9848 mov.w
%d0
,SignX
(%a6
)
9849 mov.w SignY
(%a6
),%d1
9851 and.l &0x00008000,%d1
9852 mov.w
%d1
,SignQ
(%a6
) # sign(Q) obtained
9853 and.l &0x00007FFF,%d0
9854 mov.
l DST_HI
(%a1
),%d1
9855 mov.
l DST_LO
(%a1
),%d2
# (D0,D1,D2) is |X|
9858 mov.
l &0x00003FFE,%d0
9867 bfffo
%d1
{&0:&32},%d6
9869 sub.l %d6
,%d0
# (D0,D1,D2) is normalized
9870 # ...with bias $7FFD
9875 bfffo
%d1
{&0:&32},%d6
9878 mov.
l %d2
,%d7
# a copy of D2
9883 or.l %d7
,%d1
# (D0,D1,D2) normalized
9884 # ...with bias $7FFD
9888 add.l &0x00003FFE,%d0
# (D0,D1,D2) normalized
9889 # ...with bias $7FFD
9893 mov.
l %d3
,L_SCR1
(%a6
) # save biased exp(Y)
9894 mov.
l %d0
,-(%sp
) # save biased exp(X)
9895 sub.l %d3
,%d0
# L := expo(X)-expo(Y)
9897 clr.
l %d6
# D6 := carry <- 0
9899 mov.
l &0,%a1
# A1 is k; j+k=L, Q=0
9901 #..(Carry,D1,D2) is R
9905 #..expo(X) < expo(Y). Thus X = mod(X,Y)
9907 mov.
l (%sp
)+,%d0
# restore d0
9911 addq.
l &0x4,%sp
# erase exp(X)
9912 #..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9914 tst.
l %d6
# test carry bit
9917 #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9918 cmp.
l %d1
,%d4
# compare hi(R) and hi(Y)
9920 cmp.
l %d2
,%d5
# compare lo(R) and lo(Y)
9923 #..At this point, R = Y
9927 #..use the borrow of the previous compare
9928 bcs.
b R_LT_Y
# borrow is set iff R < Y
9931 #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9932 #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9933 sub.l %d5
,%d2
# lo(R) - lo(Y)
9934 subx.
l %d4
,%d1
# hi(R) - hi(Y)
9935 clr.
l %d6
# clear carry
9936 addq.
l &1,%d3
# Q := Q + 1
9939 #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9940 tst.
l %d0
# see if j = 0.
9943 add.l %d3
,%d3
# Q := 2Q
9944 add.l %d2
,%d2
# lo(R) = 2lo(R)
9945 roxl.
l &1,%d1
# hi(R) = 2hi(R) + carry
9946 scs
%d6
# set Carry if 2(R) overflows
9947 addq.
l &1,%a1
# k := k+1
9948 subq.
l &1,%d0
# j := j - 1
9949 #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9954 #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9957 mov.
l L_SCR1
(%a6
),%d0
# new biased expo of R
9966 bfffo
%d1
{&0:&32},%d6
9968 sub.l %d6
,%d0
# (D0,D1,D2) is normalized
9969 # ...with bias $7FFD
9974 bfffo
%d1
{&0:&32},%d6
9975 bmi.
b Get_Mod
# already normalized
9978 mov.
l %d2
,%d7
# a copy of D2
9983 or.l %d7
,%d1
# (D0,D1,D2) normalized
9987 cmp.
l %d0
,&0x000041FE
9993 mov.
l L_SCR1
(%a6
),%d6
9997 fmov.x R
(%a6
),%fp0
# no exception
9998 mov.
b &1,Sc_Flag
(%a6
)
10001 mov.
l %d1
,R_Hi
(%a6
)
10002 mov.
l %d2
,R_Lo
(%a6
)
10005 mov.
l L_SCR1
(%a6
),%d6
10007 mov.
l %d6
,L_SCR1
(%a6
)
10010 mov.
l %d4
,Y_Hi
(%a6
)
10011 mov.
l %d5
,Y_Lo
(%a6
)
10016 tst.
b Mod_Flag
(%a6
)
10019 mov.
l L_SCR1
(%a6
),%d6
# new biased expo(Y)
10020 subq.
l &1,%d6
# biased expo(Y/2)
10036 fsub.x Y
(%a6
),%fp0
# no exceptions
10037 addq.
l &1,%d3
# Q := Q + 1
10042 mov.w SignX
(%a6
),%d6
10050 mov.w SignQ
(%a6
),%d6
# D6 is sign(Q)
10053 and.l &0x0000007F,%d3
# 7 bits of Q
10054 or.l %d6
,%d3
# sign and bits of Q
10057 # and.l &0xFF00FFFF,%d6
10059 # fmov.l %d6,%fpsr # put Q in fpsr
10060 mov.
b %d3
,FPSR_QBYTE
(%a6
) # put Q in fpsr
10064 movm.
l (%sp
)+,&0xfc # {%d2-%d7}
10069 mov.
b &FMUL_OP
,%d1
# last inst is MUL
10070 fmul.x Scale
(%pc
),%fp0
# may cause underflow
10072 # the '040 package did this apparently to see if the dst operand for the
10073 # preceding fmul was a denorm. but, it better not have been since the
10074 # algorithm just got done playing with fp0 and expected no exceptions
10075 # as a result. trust me...
10076 # bra t_avoid_unsupp # check for denorm as a
10077 # ;result of the scaling
10080 mov.
b &FMOV_OP
,%d1
# last inst is MOVE
10081 fmov.x
%fp0
,%fp0
# capture exceptions & round
10085 #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10087 cmp.
l %d0
,&8 # D0 is j
10097 fmov.s
&0x00000000,%fp0
10102 #..Check parity of Q
10104 and.l &0x00000001,%d6
10106 beq.w Fix_Sign
# Q is even
10108 #..Q is odd, Q := Q + 1, signX := -signX
10110 mov.w SignX
(%a6
),%d6
10111 eor.
l &0x00008000,%d6
10112 mov.w
%d6
,SignX
(%a6
)
10115 qnan
: long
0x7fff0000, 0xffffffff, 0xffffffff
10117 #########################################################################
10118 # XDEF **************************************************************** #
10119 # t_dz(): Handle DZ exception during transcendental emulation. #
10120 # Sets N bit according to sign of source operand. #
10121 # t_dz2(): Handle DZ exception during transcendental emulation. #
10122 # Sets N bit always. #
10124 # XREF **************************************************************** #
10127 # INPUT *************************************************************** #
10128 # a0 = pointer to source operand #
10130 # OUTPUT ************************************************************** #
10131 # fp0 = default result #
10133 # ALGORITHM *********************************************************** #
10134 # - Store properly signed INF into fp0. #
10135 # - Set FPSR exception status dz bit, ccode inf bit, and #
10136 # accrued dz bit. #
10138 #########################################################################
10142 tst.
b SRC_EX
(%a0
) # no; is src negative?
10146 fmov.s
&0x7f800000,%fp0
# return +INF in fp0
10147 ori.
l &dzinf_mask
,USER_FPSR
(%a6
) # set I/DZ/ADZ
10152 fmov.s
&0xff800000,%fp0
# return -INF in fp0
10153 ori.
l &dzinf_mask+neg_mask
,USER_FPSR
(%a6
) # set N/I/DZ/ADZ
10156 #################################################################
10157 # OPERR exception: #
10158 # - set FPSR exception status operr bit, condition code #
10159 # nan bit; Store default NAN into fp0 #
10160 #################################################################
10163 ori.
l &opnan_mask
,USER_FPSR
(%a6
) # set NaN/OPERR/AIOP
10164 fmovm.x qnan
(%pc
),&0x80 # return default NAN in fp0
10167 #################################################################
10168 # Extended DENORM: #
10169 # - For all functions that have a denormalized input and #
10170 # that f(x)=x, this is the entry point. #
10171 # - we only return the EXOP here if either underflow or #
10172 # inexact is enabled. #
10173 #################################################################
10175 # Entry point for scale w/ extended denorm. The function does
10176 # NOT set INEX2/AUNFL/AINEX.
10179 ori.
l &unfl_mask
,USER_FPSR
(%a6
) # set UNFL
10184 ori.
l &unfinx_mask
,USER_FPSR
(%a6
) # set UNFL/INEX2/AUNFL/AINEX
10187 mov.
l %a0
,%a1
# make copy of src ptr
10188 mov.
l %d0
,%d1
# make copy of rnd prec,mode
10189 andi.b &0xc0,%d1
# extended precision?
10190 bne.
b xdnrm_sd
# no
10192 # result precision is extended.
10193 tst.
b LOCAL_EX
(%a0
) # is denorm negative?
10194 bpl.
b xdnrm_exit
# no
10196 bset
&neg_bit
,FPSR_CC
(%a6
) # yes; set 'N' ccode bit
10199 # result precision is single or double
10202 tst.
b LOCAL_EX
(%a0
) # is denorm pos or neg?
10203 smi.
b %d1
# set d0 accodingly
10207 fmovm.x
(%a0
),&0x80 # return default result in fp0
10209 mov.
b FPCR_ENABLE
(%a6
),%d0
10210 andi.b &0x0a,%d0
# is UNFL or INEX enabled?
10211 bne.
b xdnrm_ena
# yes
10217 # we have a DENORM that needs to be converted into an EXOP.
10218 # so, normalize the mantissa, add 0x6000 to the new exponent,
10219 # and return the result in fp1.
10221 mov.w LOCAL_EX
(%a1
),FP_SCR0_EX
(%a6
)
10222 mov.
l LOCAL_HI
(%a1
),FP_SCR0_HI
(%a6
)
10223 mov.
l LOCAL_LO
(%a1
),FP_SCR0_LO
(%a6
)
10225 lea FP_SCR0
(%a6
),%a0
10226 bsr.
l norm
# normalize mantissa
10227 addi.
l &0x6000,%d0
# add extra bias
10228 andi.w
&0x8000,FP_SCR0_EX
(%a6
) # keep old sign
10229 or.w
%d0
,FP_SCR0_EX
(%a6
) # insert new exponent
10231 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
10234 #################################################################
10235 # UNFL exception: #
10236 # - This routine is for cases where even an EXOP isn't #
10237 # large enough to hold the range of this result. #
10238 # In such a case, the EXOP equals zero. #
10239 # - Return the default result to the proper precision #
10240 # with the sign of this result being the same as that #
10241 # of the src operand. #
10242 # - t_unfl2() is provided to force the result sign to #
10243 # positive which is the desired result for fetox(). #
10244 #################################################################
10247 ori.
l &unfinx_mask
,USER_FPSR
(%a6
) # set UNFL/INEX2/AUNFL/AINEX
10249 tst.
b (%a0
) # is result pos or neg?
10250 smi.
b %d1
# set d1 accordingly
10251 bsr.
l unf_sub
# calc default unfl result
10252 fmovm.x
(%a0
),&0x80 # return default result in fp0
10254 fmov.s
&0x00000000,%fp1
# return EXOP in fp1
10257 # t_unfl2 ALWAYS tells unf_sub to create a positive result
10260 ori.
l &unfinx_mask
,USER_FPSR
(%a6
) # set UNFL/INEX2/AUNFL/AINEX
10262 sf.b %d1
# set d0 to represent positive
10263 bsr.
l unf_sub
# calc default unfl result
10264 fmovm.x
(%a0
),&0x80 # return default result in fp0
10266 fmov.s
&0x0000000,%fp1
# return EXOP in fp1
10269 #################################################################
10270 # OVFL exception: #
10271 # - This routine is for cases where even an EXOP isn't #
10272 # large enough to hold the range of this result. #
10273 # - Return the default result to the proper precision #
10274 # with the sign of this result being the same as that #
10275 # of the src operand. #
10276 # - t_ovfl2() is provided to force the result sign to #
10277 # positive which is the desired result for fcosh(). #
10278 # - t_ovfl_sc() is provided for scale() which only sets #
10279 # the inexact bits if the number is inexact for the #
10280 # precision indicated. #
10281 #################################################################
10285 ori.
l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set OVFL/AOVFL/AINEX
10287 mov.
b %d0
,%d1
# fetch rnd mode/prec
10288 andi.b &0xc0,%d1
# extract rnd prec
10289 beq.
b ovfl_work
# prec is extended
10291 tst.
b LOCAL_HI
(%a0
) # is dst a DENORM?
10292 bmi.
b ovfl_sc_norm
# no
10294 # dst op is a DENORM. we have to normalize the mantissa to see if the
10295 # result would be inexact for the given precision. make a copy of the
10296 # dst so we don't screw up the version passed to us.
10297 mov.w LOCAL_EX
(%a0
),FP_SCR0_EX
(%a6
)
10298 mov.
l LOCAL_HI
(%a0
),FP_SCR0_HI
(%a6
)
10299 mov.
l LOCAL_LO
(%a0
),FP_SCR0_LO
(%a6
)
10300 lea FP_SCR0
(%a6
),%a0
# pass ptr to FP_SCR0
10301 movm.
l &0xc080,-(%sp
) # save d0-d1/a0
10302 bsr.
l norm
# normalize mantissa
10303 movm.
l (%sp
)+,&0x0103 # restore d0-d1/a0
10306 cmpi.
b %d1
,&0x40 # is prec dbl?
10307 bne.
b ovfl_sc_dbl
# no; sgl
10309 tst.
l LOCAL_LO
(%a0
) # is lo lw of sgl set?
10310 bne.
b ovfl_sc_inx
# yes
10311 tst.
b 3+LOCAL_HI
(%a0
) # is lo byte of hi lw set?
10312 bne.
b ovfl_sc_inx
# yes
10313 bra.
b ovfl_work
# don't set INEX2
10315 mov.
l LOCAL_LO
(%a0
),%d1
# are any of lo 11 bits of
10316 andi.l &0x7ff,%d1
# dbl mantissa set?
10317 beq.
b ovfl_work
# no; don't set INEX2
10319 ori.
l &inex2_mask
,USER_FPSR
(%a6
) # set INEX2
10320 bra.
b ovfl_work
# continue
10324 ori.
l &ovfinx_mask
,USER_FPSR
(%a6
) # set OVFL/INEX2/AOVFL/AINEX
10327 tst.
b LOCAL_EX
(%a0
) # what is the sign?
10328 smi.
b %d1
# set d1 accordingly
10329 bsr.
l ovf_res
# calc default ovfl result
10330 mov.
b %d0
,FPSR_CC
(%a6
) # insert new ccodes
10331 fmovm.x
(%a0
),&0x80 # return default result in fp0
10333 fmov.s
&0x00000000,%fp1
# return EXOP in fp1
10336 # t_ovfl2 ALWAYS tells ovf_res to create a positive result
10339 ori.
l &ovfinx_mask
,USER_FPSR
(%a6
) # set OVFL/INEX2/AOVFL/AINEX
10341 sf.b %d1
# clear sign flag for positive
10342 bsr.
l ovf_res
# calc default ovfl result
10343 mov.
b %d0
,FPSR_CC
(%a6
) # insert new ccodes
10344 fmovm.x
(%a0
),&0x80 # return default result in fp0
10346 fmov.s
&0x00000000,%fp1
# return EXOP in fp1
10349 #################################################################
10351 # - the last operation of a transcendental emulation #
10352 # routine may have caused an underflow or overflow. #
10353 # we find out if this occurred by doing an fsave and #
10354 # checking the exception bit. if one did occur, then we #
10355 # jump to fgen_except() which creates the default #
10356 # result and EXOP for us. #
10357 #################################################################
10366 #################################################################
10367 # INEX2 exception: #
10368 # - The inex2 and ainex bits are set. #
10369 #################################################################
10377 ori.w
&inx2a_mask
,2+USER_FPSR
(%a6
) # set INEX2/AINEX
10382 ori.
l &inx2a_mask+neg_mask
,USER_FPSR
(%a6
) # set N/INEX2/AINEX
10386 mov.
b &z_bmask
,FPSR_CC
(%a6
)
10387 ori.w
&inx2a_mask
,2+USER_FPSR
(%a6
) # set INEX2/AINEX
10390 # an underflow or overflow exception occurred.
10391 # we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10393 ori.w
&inx2a_mask
,FPSR_EXCEPT
(%a6
)
10409 or.l %d0
,USER_FPSR
(%a6
)
10413 #########################################################################
10415 #########################################################################
10416 # unf_res(): underflow default result calculation for transcendentals #
10419 # d0 : rnd mode,precision #
10420 # d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) #
10422 # a0 : points to result (in instruction memory) #
10423 #########################################################################
10425 ori.
l &unfinx_mask
,USER_FPSR
(%a6
)
10427 andi.w
&0x10,%d1
# keep sign bit in 4th spot
10429 lsr.
b &0x4,%d0
# shift rnd prec,mode to lo bits
10430 andi.b &0xf,%d0
# strip hi rnd mode bit
10431 or.b %d1
,%d0
# concat {sgn,mode,prec}
10433 mov.
l %d0
,%d1
# make a copy
10434 lsl.
b &0x1,%d1
# mult index 2 by 2
10436 mov.
b (tbl_unf_cc.
b,%pc
,%d0.w
*1),FPSR_CC
(%a6
) # insert ccode bits
10437 lea
(tbl_unf_result.
b,%pc
,%d1.w
*8),%a0
# grab result ptr
10441 byte
0x4, 0x4, 0x4, 0x0
10442 byte
0x4, 0x4, 0x4, 0x0
10443 byte
0x4, 0x4, 0x4, 0x0
10444 byte
0x0, 0x0, 0x0, 0x0
10445 byte
0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10446 byte
0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10447 byte
0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10450 long
0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10451 long
0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10452 long
0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10453 long
0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10455 long
0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10456 long
0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10457 long
0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10458 long
0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10460 long
0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10461 long
0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10462 long
0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10463 long
0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10465 long
0x0,0x0,0x0,0x0
10466 long
0x0,0x0,0x0,0x0
10467 long
0x0,0x0,0x0,0x0
10468 long
0x0,0x0,0x0,0x0
10470 long
0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10471 long
0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10472 long
0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10473 long
0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10475 long
0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10476 long
0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10477 long
0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10478 long
0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10480 long
0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10481 long
0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10482 long
0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10483 long
0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10485 ############################################################
10487 #########################################################################
10488 # src_zero(): Return signed zero according to sign of src operand. #
10489 #########################################################################
10492 tst.
b SRC_EX
(%a0
) # get sign of src operand
10493 bmi.
b ld_mzero
# if neg, load neg zero
10496 # ld_pzero(): return a positive zero.
10500 fmov.s
&0x00000000,%fp0
# load +0
10501 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set 'Z' ccode bit
10504 # ld_mzero(): return a negative zero.
10507 fmov.s
&0x80000000,%fp0
# load -0
10508 mov.
b &neg_bmask+z_bmask
,FPSR_CC
(%a6
) # set 'N','Z' ccode bits
10511 #########################################################################
10512 # dst_zero(): Return signed zero according to sign of dst operand. #
10513 #########################################################################
10516 tst.
b DST_EX
(%a1
) # get sign of dst operand
10517 bmi.
b ld_mzero
# if neg, load neg zero
10518 bra.
b ld_pzero
# load positive zero
10520 #########################################################################
10521 # src_inf(): Return signed inf according to sign of src operand. #
10522 #########################################################################
10525 tst.
b SRC_EX
(%a0
) # get sign of src operand
10526 bmi.
b ld_minf
# if negative branch
10529 # ld_pinf(): return a positive infinity.
10533 fmov.s
&0x7f800000,%fp0
# load +INF
10534 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set 'INF' ccode bit
10538 # ld_minf():return a negative infinity.
10542 fmov.s
&0xff800000,%fp0
# load -INF
10543 mov.
b &neg_bmask+inf_bmask
,FPSR_CC
(%a6
) # set 'N','I' ccode bits
10546 #########################################################################
10547 # dst_inf(): Return signed inf according to sign of dst operand. #
10548 #########################################################################
10551 tst.
b DST_EX
(%a1
) # get sign of dst operand
10552 bmi.
b ld_minf
# if negative branch
10556 #################################################################
10557 # szr_inf(): Return +ZERO for a negative src operand or #
10558 # +INF for a positive src operand. #
10559 # Routine used for fetox, ftwotox, and ftentox. #
10560 #################################################################
10562 tst.
b SRC_EX
(%a0
) # check sign of source
10566 #########################################################################
10567 # sopr_inf(): Return +INF for a positive src operand or #
10568 # jump to operand error routine for a negative src operand. #
10569 # Routine used for flogn, flognp1, flog10, and flog2. #
10570 #########################################################################
10573 tst.
b SRC_EX
(%a0
) # check sign of source
10577 #################################################################
10578 # setoxm1i(): Return minus one for a negative src operand or #
10579 # positive infinity for a positive src operand. #
10580 # Routine used for fetoxm1. #
10581 #################################################################
10584 tst.
b SRC_EX
(%a0
) # check sign of source
10588 #########################################################################
10589 # src_one(): Return signed one according to sign of src operand. #
10590 #########################################################################
10593 tst.
b SRC_EX
(%a0
) # check sign of source
10597 # ld_pone(): return positive one.
10601 fmov.s
&0x3f800000,%fp0
# load +1
10606 # ld_mone(): return negative one.
10610 fmov.s
&0xbf800000,%fp0
# load -1
10611 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
10614 ppiby2
: long
0x3fff0000, 0xc90fdaa2, 0x2168c235
10615 mpiby2
: long
0xbfff0000, 0xc90fdaa2, 0x2168c235
10617 #################################################################
10618 # spi_2(): Return signed PI/2 according to sign of src operand. #
10619 #################################################################
10622 tst.
b SRC_EX
(%a0
) # check sign of source
10626 # ld_ppi2(): return positive PI/2.
10631 fmov.x ppiby2
(%pc
),%fp0
# load +pi/2
10632 bra.w t_pinx2
# set INEX2
10635 # ld_mpi2(): return negative PI/2.
10640 fmov.x mpiby2
(%pc
),%fp0
# load -pi/2
10641 bra.w t_minx2
# set INEX2
10643 ####################################################
10644 # The following routines give support for fsincos. #
10645 ####################################################
10648 # ssincosz(): When the src operand is ZERO, store a one in the
10649 # cosine register and return a ZERO in fp0 w/ the same sign
10650 # as the src operand.
10654 fmov.s
&0x3f800000,%fp1
10655 tst.
b SRC_EX
(%a0
) # test sign
10657 fmov.s
&0x80000000,%fp0
# return sin result in fp0
10658 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
)
10659 bra.
b sto_cos
# store cosine result
10661 fmov.s
&0x00000000,%fp0
# return sin result in fp0
10662 mov.
b &z_bmask
,FPSR_CC
(%a6
)
10663 bra.
b sto_cos
# store cosine result
10666 # ssincosi(): When the src operand is INF, store a QNAN in the cosine
10667 # register and jump to the operand error routine for negative
10672 fmov.x qnan
(%pc
),%fp1
# load NAN
10673 bsr.
l sto_cos
# store cosine result
10677 # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10678 # register and branch to the src QNAN routine.
10682 fmov.x LOCAL_EX
(%a0
),%fp1
10687 # ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10688 # in the cosine register and branch to the src SNAN routine.
10692 fmov.x LOCAL_EX
(%a0
),%fp1
10696 ########################################################################
10698 #########################################################################
10699 # sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #
10700 # fp1 holds the result of the cosine portion of ssincos(). #
10701 # the value in fp1 will not take any exceptions when moved. #
10703 # fp1 : fp value to store #
10706 #########################################################################
10709 mov.
b 1+EXC_CMDREG
(%a6
),%d0
10711 mov.w
(tbl_sto_cos.
b,%pc
,%d0.w
*2),%d0
10712 jmp
(tbl_sto_cos.
b,%pc
,%d0.w
*1)
10715 short sto_cos_0
- tbl_sto_cos
10716 short sto_cos_1
- tbl_sto_cos
10717 short sto_cos_2
- tbl_sto_cos
10718 short sto_cos_3
- tbl_sto_cos
10719 short sto_cos_4
- tbl_sto_cos
10720 short sto_cos_5
- tbl_sto_cos
10721 short sto_cos_6
- tbl_sto_cos
10722 short sto_cos_7
- tbl_sto_cos
10725 fmovm.x
&0x40,EXC_FP0
(%a6
)
10728 fmovm.x
&0x40,EXC_FP1
(%a6
)
10749 ##################################################################
10754 mov.
b DTAG
(%a6
),%d1
10768 mov.
b DTAG
(%a6
),%d1
10782 mov.
b DTAG
(%a6
),%d1
10796 mov.
b SRC_EX
(%a0
),%d1
# get src sign
10797 mov.
b DST_EX
(%a1
),%d0
# get dst sign
10798 eor.
b %d0
,%d1
# get qbyte sign
10800 mov.
b %d1
,FPSR_QBYTE
(%a6
)
10807 clr.
b FPSR_QBYTE
(%a6
)
10809 mov.
b SRC_EX
(%a0
),%d1
# get src sign
10810 mov.
b DST_EX
(%a1
),%d0
# get dst sign
10811 eor.
b %d0
,%d1
# get qbyte sign
10813 mov.
b %d1
,FPSR_QBYTE
(%a6
)
10814 cmpi.
b DTAG
(%a6
),&DENORM
10820 fmov.
l (%sp
)+,%fpcr
10821 fmov.x
DST(%a1
),%fp0
10827 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode
10830 #########################################################################
10835 mov.
b DTAG
(%a6
),%d1
10849 mov.
b DTAG
(%a6
),%d1
10863 mov.
b DTAG
(%a6
),%d1
10875 #########################################################################
10876 global sscale_snorm
10877 global sscale_sdnrm
10880 mov.
b DTAG
(%a6
),%d1
10892 global sscale_szero
10894 mov.
b DTAG
(%a6
),%d1
10908 mov.
b DTAG
(%a6
),%d1
10916 ########################################################################
10919 # sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10923 mov.
b DTAG
(%a6
),%d1
10931 # sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10935 mov.
b DTAG
(%a6
),%d1
10937 beq.
b dst_qnan_src_snan
10943 ori.
l &snaniop_mask
,USER_FPSR
(%a6
) # set NAN/SNAN/AIOP
10947 # dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10951 fmov.x
DST(%a1
),%fp0
# the fmove sets the SNAN bit
10952 fmov.
l %fpsr
,%d0
# catch resulting status
10953 or.l %d0
,USER_FPSR
(%a6
) # store status
10957 # dst_qnan(): Return the dst QNAN.
10961 fmov.x
DST(%a1
),%fp0
# return the non-signalling nan
10962 tst.
b DST_EX
(%a1
) # set ccodes according to QNAN sign
10965 mov.
b &nan_bmask
,FPSR_CC
(%a6
)
10968 mov.
b &neg_bmask+nan_bmask
,FPSR_CC
(%a6
)
10972 # src_snan(): Return the src SNAN w/ the SNAN bit set.
10976 fmov.x SRC
(%a0
),%fp0
# the fmove sets the SNAN bit
10977 fmov.
l %fpsr
,%d0
# catch resulting status
10978 or.l %d0
,USER_FPSR
(%a6
) # store status
10982 # src_qnan(): Return the src QNAN.
10986 fmov.x SRC
(%a0
),%fp0
# return the non-signalling nan
10987 tst.
b SRC_EX
(%a0
) # set ccodes according to QNAN sign
10990 mov.
b &nan_bmask
,FPSR_CC
(%a6
)
10993 mov.
b &neg_bmask+nan_bmask
,FPSR_CC
(%a6
)
10998 # These entry points are used by the exception handler
10999 # routines where an instruction is selected by an index into
11000 # a large jump table corresponding to a given instruction which
11001 # has been decoded. Flow continues here where we now decode
11002 # further accoding to the source operand type.
11007 mov.
b STAG
(%a6
),%d1
11021 mov.
b STAG
(%a6
),%d1
11035 mov.
b STAG
(%a6
),%d1
11049 mov.
b STAG
(%a6
),%d1
11063 mov.
b STAG
(%a6
),%d1
11077 mov.
b STAG
(%a6
),%d1
11091 mov.
b STAG
(%a6
),%d1
11105 mov.
b STAG
(%a6
),%d1
11119 mov.
b STAG
(%a6
),%d1
11133 mov.
b STAG
(%a6
),%d1
11147 mov.
b STAG
(%a6
),%d1
11161 mov.
b STAG
(%a6
),%d1
11175 mov.
b STAG
(%a6
),%d1
11189 mov.
b STAG
(%a6
),%d1
11203 mov.
b STAG
(%a6
),%d1
11217 mov.
b STAG
(%a6
),%d1
11231 mov.
b STAG
(%a6
),%d1
11245 mov.
b STAG
(%a6
),%d1
11259 mov.
b STAG
(%a6
),%d1
11273 mov.
b STAG
(%a6
),%d1
11287 mov.
b STAG
(%a6
),%d1
11301 mov.
b STAG
(%a6
),%d1
11315 mov.
b STAG
(%a6
),%d1
11329 mov.
b STAG
(%a6
),%d1
11341 #########################################################################
11342 # XDEF **************************************************************** #
11343 # fgen_except(): catch an exception during transcendental #
11346 # XREF **************************************************************** #
11347 # fmul() - emulate a multiply instruction #
11348 # fadd() - emulate an add instruction #
11349 # fin() - emulate an fmove instruction #
11351 # INPUT *************************************************************** #
11352 # fp0 = destination operand #
11353 # d0 = type of instruction that took exception #
11354 # fsave frame = source operand #
11356 # OUTPUT ************************************************************** #
11360 # ALGORITHM *********************************************************** #
11361 # An exception occurred on the last instruction of the #
11362 # transcendental emulation. hopefully, this won't be happening much #
11363 # because it will be VERY slow. #
11364 # The only exceptions capable of passing through here are #
11365 # Overflow, Underflow, and Unsupported Data Type. #
11367 #########################################################################
11371 cmpi.
b 0x3(%sp
),&0x7 # is exception UNSUPP?
11372 beq.
b fge_unsupp
# yes
11374 mov.
b &NORM
,STAG
(%a6
)
11377 mov.
b &NORM
,DTAG
(%a6
)
11379 # ok, I have a problem with putting the dst op at FP_DST. the emulation
11380 # routines aren't supposed to alter the operands but we've just squashed
11383 # 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11384 # then a potential bug. to begin with, only the dyadic functions
11385 # frem,fmod, and fscale would get the dst trashed here. But, for
11386 # the 060SP, the FP_DST is never used again anyways.
11387 fmovm.x
&0x80,FP_DST
(%a6
) # dst op is in fp0
11389 lea
0x4(%sp
),%a0
# pass: ptr to src op
11390 lea FP_DST
(%a6
),%a1
# pass: ptr to dst op
11392 cmpi.
b %d1
,&FMOV_OP
11393 beq.
b fge_fin
# it was an "fmov"
11394 cmpi.
b %d1
,&FADD_OP
11395 beq.
b fge_fadd
# it was an "fadd"
11407 mov.
b &DENORM
,STAG
(%a6
)
11411 # This table holds the offsets of the emulation routines for each individual
11412 # math operation relative to the address of this table. Included are
11413 # routines like fadd/fmul/fabs as well as the transcendentals.
11414 # The location within the table is determined by the extension bits of the
11415 # operation longword.
11420 long fin
- tbl_unsupp
# 00: fmove
11421 long fint
- tbl_unsupp
# 01: fint
11422 long fsinh
- tbl_unsupp
# 02: fsinh
11423 long fintrz
- tbl_unsupp
# 03: fintrz
11424 long
fsqrt - tbl_unsupp
# 04: fsqrt
11425 long tbl_unsupp
- tbl_unsupp
11426 long flognp1
- tbl_unsupp
# 06: flognp1
11427 long tbl_unsupp
- tbl_unsupp
11428 long fetoxm1
- tbl_unsupp
# 08: fetoxm1
11429 long ftanh
- tbl_unsupp
# 09: ftanh
11430 long fatan
- tbl_unsupp
# 0a: fatan
11431 long tbl_unsupp
- tbl_unsupp
11432 long fasin
- tbl_unsupp
# 0c: fasin
11433 long fatanh
- tbl_unsupp
# 0d: fatanh
11434 long fsine
- tbl_unsupp
# 0e: fsin
11435 long ftan
- tbl_unsupp
# 0f: ftan
11436 long fetox
- tbl_unsupp
# 10: fetox
11437 long ftwotox
- tbl_unsupp
# 11: ftwotox
11438 long ftentox
- tbl_unsupp
# 12: ftentox
11439 long tbl_unsupp
- tbl_unsupp
11440 long flogn
- tbl_unsupp
# 14: flogn
11441 long flog10
- tbl_unsupp
# 15: flog10
11442 long flog2
- tbl_unsupp
# 16: flog2
11443 long tbl_unsupp
- tbl_unsupp
11444 long
fabs - tbl_unsupp
# 18: fabs
11445 long fcosh
- tbl_unsupp
# 19: fcosh
11446 long
fneg - tbl_unsupp
# 1a: fneg
11447 long tbl_unsupp
- tbl_unsupp
11448 long facos
- tbl_unsupp
# 1c: facos
11449 long fcos
- tbl_unsupp
# 1d: fcos
11450 long fgetexp
- tbl_unsupp
# 1e: fgetexp
11451 long fgetman
- tbl_unsupp
# 1f: fgetman
11452 long
fdiv - tbl_unsupp
# 20: fdiv
11453 long fmod
- tbl_unsupp
# 21: fmod
11454 long
fadd - tbl_unsupp
# 22: fadd
11455 long
fmul - tbl_unsupp
# 23: fmul
11456 long fsgldiv
- tbl_unsupp
# 24: fsgldiv
11457 long frem
- tbl_unsupp
# 25: frem
11458 long fscale
- tbl_unsupp
# 26: fscale
11459 long fsglmul
- tbl_unsupp
# 27: fsglmul
11460 long
fsub - tbl_unsupp
# 28: fsub
11461 long tbl_unsupp
- tbl_unsupp
11462 long tbl_unsupp
- tbl_unsupp
11463 long tbl_unsupp
- tbl_unsupp
11464 long tbl_unsupp
- tbl_unsupp
11465 long tbl_unsupp
- tbl_unsupp
11466 long tbl_unsupp
- tbl_unsupp
11467 long tbl_unsupp
- tbl_unsupp
11468 long fsincos
- tbl_unsupp
# 30: fsincos
11469 long fsincos
- tbl_unsupp
# 31: fsincos
11470 long fsincos
- tbl_unsupp
# 32: fsincos
11471 long fsincos
- tbl_unsupp
# 33: fsincos
11472 long fsincos
- tbl_unsupp
# 34: fsincos
11473 long fsincos
- tbl_unsupp
# 35: fsincos
11474 long fsincos
- tbl_unsupp
# 36: fsincos
11475 long fsincos
- tbl_unsupp
# 37: fsincos
11476 long fcmp
- tbl_unsupp
# 38: fcmp
11477 long tbl_unsupp
- tbl_unsupp
11478 long ftst
- tbl_unsupp
# 3a: ftst
11479 long tbl_unsupp
- tbl_unsupp
11480 long tbl_unsupp
- tbl_unsupp
11481 long tbl_unsupp
- tbl_unsupp
11482 long tbl_unsupp
- tbl_unsupp
11483 long tbl_unsupp
- tbl_unsupp
11484 long fsin
- tbl_unsupp
# 40: fsmove
11485 long fssqrt
- tbl_unsupp
# 41: fssqrt
11486 long tbl_unsupp
- tbl_unsupp
11487 long tbl_unsupp
- tbl_unsupp
11488 long fdin
- tbl_unsupp
# 44: fdmove
11489 long fdsqrt
- tbl_unsupp
# 45: fdsqrt
11490 long tbl_unsupp
- tbl_unsupp
11491 long tbl_unsupp
- tbl_unsupp
11492 long tbl_unsupp
- tbl_unsupp
11493 long tbl_unsupp
- tbl_unsupp
11494 long tbl_unsupp
- tbl_unsupp
11495 long tbl_unsupp
- tbl_unsupp
11496 long tbl_unsupp
- tbl_unsupp
11497 long tbl_unsupp
- tbl_unsupp
11498 long tbl_unsupp
- tbl_unsupp
11499 long tbl_unsupp
- tbl_unsupp
11500 long tbl_unsupp
- tbl_unsupp
11501 long tbl_unsupp
- tbl_unsupp
11502 long tbl_unsupp
- tbl_unsupp
11503 long tbl_unsupp
- tbl_unsupp
11504 long tbl_unsupp
- tbl_unsupp
11505 long tbl_unsupp
- tbl_unsupp
11506 long tbl_unsupp
- tbl_unsupp
11507 long tbl_unsupp
- tbl_unsupp
11508 long fsabs
- tbl_unsupp
# 58: fsabs
11509 long tbl_unsupp
- tbl_unsupp
11510 long fsneg
- tbl_unsupp
# 5a: fsneg
11511 long tbl_unsupp
- tbl_unsupp
11512 long fdabs
- tbl_unsupp
# 5c: fdabs
11513 long tbl_unsupp
- tbl_unsupp
11514 long fdneg
- tbl_unsupp
# 5e: fdneg
11515 long tbl_unsupp
- tbl_unsupp
11516 long fsdiv
- tbl_unsupp
# 60: fsdiv
11517 long tbl_unsupp
- tbl_unsupp
11518 long fsadd
- tbl_unsupp
# 62: fsadd
11519 long fsmul
- tbl_unsupp
# 63: fsmul
11520 long fddiv
- tbl_unsupp
# 64: fddiv
11521 long tbl_unsupp
- tbl_unsupp
11522 long fdadd
- tbl_unsupp
# 66: fdadd
11523 long fdmul
- tbl_unsupp
# 67: fdmul
11524 long fssub
- tbl_unsupp
# 68: fssub
11525 long tbl_unsupp
- tbl_unsupp
11526 long tbl_unsupp
- tbl_unsupp
11527 long tbl_unsupp
- tbl_unsupp
11528 long fdsub
- tbl_unsupp
# 6c: fdsub
11530 #########################################################################
11531 # XDEF **************************************************************** #
11532 # fmul(): emulates the fmul instruction #
11533 # fsmul(): emulates the fsmul instruction #
11534 # fdmul(): emulates the fdmul instruction #
11536 # XREF **************************************************************** #
11537 # scale_to_zero_src() - scale src exponent to zero #
11538 # scale_to_zero_dst() - scale dst exponent to zero #
11539 # unf_res() - return default underflow result #
11540 # ovf_res() - return default overflow result #
11541 # res_qnan() - return QNAN result #
11542 # res_snan() - return SNAN result #
11544 # INPUT *************************************************************** #
11545 # a0 = pointer to extended precision source operand #
11546 # a1 = pointer to extended precision destination operand #
11547 # d0 rnd prec,mode #
11549 # OUTPUT ************************************************************** #
11551 # fp1 = EXOP (if exception occurred) #
11553 # ALGORITHM *********************************************************** #
11554 # Handle NANs, infinities, and zeroes as special cases. Divide #
11555 # norms/denorms into ext/sgl/dbl precision. #
11556 # For norms/denorms, scale the exponents such that a multiply #
11557 # instruction won't cause an exception. Use the regular fmul to #
11558 # compute a result. Check if the regular operands would have taken #
11559 # an exception. If so, return the default overflow/underflow result #
11560 # and return the EXOP if exceptions are enabled. Else, scale the #
11561 # result operand to the proper exponent. #
11563 #########################################################################
11567 long
0x3fff - 0x7ffe # ext_max
11568 long
0x3fff - 0x407e # sgl_max
11569 long
0x3fff - 0x43fe # dbl_max
11571 long
0x3fff + 0x0001 # ext_unfl
11572 long
0x3fff - 0x3f80 # sgl_unfl
11573 long
0x3fff - 0x3c00 # dbl_unfl
11577 andi.b &0x30,%d0
# clear rnd prec
11578 ori.
b &s_mode
*0x10,%d0
# insert sgl prec
11584 ori.
b &d_mode
*0x10,%d0
# insert dbl prec
11588 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
11591 mov.
b DTAG
(%a6
),%d1
11593 or.b STAG
(%a6
),%d1
# combine src tags
11594 bne.w fmul_not_norm
# optimize on non-norm input
11597 mov.w DST_EX
(%a1
),FP_SCR1_EX
(%a6
)
11598 mov.
l DST_HI
(%a1
),FP_SCR1_HI
(%a6
)
11599 mov.
l DST_LO
(%a1
),FP_SCR1_LO
(%a6
)
11601 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
11602 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
11603 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
11605 bsr.
l scale_to_zero_src
# scale src exponent
11606 mov.
l %d0
,-(%sp
) # save scale factor 1
11608 bsr.
l scale_to_zero_dst
# scale dst exponent
11610 add.l %d0
,(%sp
) # SCALE_FACTOR = scale1 + scale2
11612 mov.w
2+L_SCR3
(%a6
),%d1
# fetch precision
11613 lsr.
b &0x6,%d1
# shift to lo bits
11614 mov.
l (%sp
)+,%d0
# load S.F.
11615 cmp.
l %d0
,(tbl_fmul_ovfl.w
,%pc
,%d1.w
*4) # would result ovfl?
11616 beq.w fmul_may_ovfl
# result may rnd to overflow
11617 blt.w fmul_ovfl
# result will overflow
11619 cmp.
l %d0
,(tbl_fmul_unfl.w
,%pc
,%d1.w
*4) # would result unfl?
11620 beq.w fmul_may_unfl
# result may rnd to no unfl
11621 bgt.w fmul_unfl
# result will underflow
11625 # - the result of the multiply operation will neither overflow nor underflow.
11626 # - do the multiply to the proper precision and rounding mode.
11627 # - scale the result exponent using the scale factor. if both operands were
11628 # normalized then we really don't need to go through this scaling. but for now,
11632 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst operand
11634 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
11635 fmov.
l &0x0,%fpsr
# clear FPSR
11637 fmul.x FP_SCR0
(%a6
),%fp0
# execute multiply
11639 fmov.
l %fpsr
,%d1
# save status
11640 fmov.
l &0x0,%fpcr
# clear FPCR
11642 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
11645 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
11646 mov.
l %d2
,-(%sp
) # save d2
11647 mov.w FP_SCR0_EX
(%a6
),%d1
# load {sgn,exp}
11648 mov.
l %d1
,%d2
# make a copy
11649 andi.l &0x7fff,%d1
# strip sign
11650 andi.w
&0x8000,%d2
# keep old sign
11651 sub.l %d0
,%d1
# add scale factor
11652 or.w
%d2
,%d1
# concat old sign,new exp
11653 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
11654 mov.
l (%sp
)+,%d2
# restore d2
11655 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
11660 # - the result of the multiply operation is an overflow.
11661 # - do the multiply to the proper precision and rounding mode in order to
11662 # set the inexact bits.
11663 # - calculate the default result and return it in fp0.
11664 # - if overflow or inexact is enabled, we need a multiply result rounded to
11665 # extended precision. if the original operation was extended, then we have this
11666 # result. if the original operation was single or double, we have to do another
11667 # multiply using extended precision and the correct rounding mode. the result
11668 # of this operation then has its exponent scaled by -0x6000 to create the
11669 # exceptional operand.
11672 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst operand
11674 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
11675 fmov.
l &0x0,%fpsr
# clear FPSR
11677 fmul.x FP_SCR0
(%a6
),%fp0
# execute multiply
11679 fmov.
l %fpsr
,%d1
# save status
11680 fmov.
l &0x0,%fpcr
# clear FPCR
11682 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
11684 # save setting this until now because this is where fmul_may_ovfl may jump in
11686 or.l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
11688 mov.
b FPCR_ENABLE
(%a6
),%d1
11689 andi.b &0x13,%d1
# is OVFL or INEX enabled?
11690 bne.
b fmul_ovfl_ena
# yes
11692 # calculate the default result
11694 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
11695 sne
%d1
# set sign param accordingly
11696 mov.
l L_SCR3
(%a6
),%d0
# pass rnd prec,mode
11697 bsr.
l ovf_res
# calculate default result
11698 or.b %d0
,FPSR_CC
(%a6
) # set INF,N if applicable
11699 fmovm.x
(%a0
),&0x80 # return default result in fp0
11703 # OVFL is enabled; Create EXOP:
11704 # - if precision is extended, then we have the EXOP. simply bias the exponent
11705 # with an extra -0x6000. if the precision is single or double, we need to
11706 # calculate a result rounded to extended precision.
11709 mov.
l L_SCR3
(%a6
),%d1
11710 andi.b &0xc0,%d1
# test the rnd prec
11711 bne.
b fmul_ovfl_ena_sd
# it's sgl or dbl
11713 fmul_ovfl_ena_cont
:
11714 fmovm.x
&0x80,FP_SCR0
(%a6
) # move result to stack
11716 mov.
l %d2
,-(%sp
) # save d2
11717 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
11718 mov.w
%d1
,%d2
# make a copy
11719 andi.l &0x7fff,%d1
# strip sign
11720 sub.l %d0
,%d1
# add scale factor
11721 subi.
l &0x6000,%d1
# subtract bias
11722 andi.w
&0x7fff,%d1
# clear sign bit
11723 andi.w
&0x8000,%d2
# keep old sign
11724 or.w
%d2
,%d1
# concat old sign,new exp
11725 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
11726 mov.
l (%sp
)+,%d2
# restore d2
11727 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
11728 bra.
b fmul_ovfl_dis
11731 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst operand
11733 mov.
l L_SCR3
(%a6
),%d1
11734 andi.b &0x30,%d1
# keep rnd mode only
11735 fmov.
l %d1
,%fpcr
# set FPCR
11737 fmul.x FP_SCR0
(%a6
),%fp0
# execute multiply
11739 fmov.
l &0x0,%fpcr
# clear FPCR
11740 bra.
b fmul_ovfl_ena_cont
11744 # - the result of the multiply operation MAY overflow.
11745 # - do the multiply to the proper precision and rounding mode in order to
11746 # set the inexact bits.
11747 # - calculate the default result and return it in fp0.
11750 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
11752 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
11753 fmov.
l &0x0,%fpsr
# clear FPSR
11755 fmul.x FP_SCR0
(%a6
),%fp0
# execute multiply
11757 fmov.
l %fpsr
,%d1
# save status
11758 fmov.
l &0x0,%fpcr
# clear FPCR
11760 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
11762 fabs.x
%fp0
,%fp1
# make a copy of result
11763 fcmp.
b %fp1
,&0x2 # is |result| >= 2.b?
11764 fbge.w fmul_ovfl_tst
# yes; overflow has occurred
11766 # no, it didn't overflow; we have correct result
11767 bra.w fmul_normal_exit
11771 # - the result of the multiply operation is an underflow.
11772 # - do the multiply to the proper precision and rounding mode in order to
11773 # set the inexact bits.
11774 # - calculate the default result and return it in fp0.
11775 # - if overflow or inexact is enabled, we need a multiply result rounded to
11776 # extended precision. if the original operation was extended, then we have this
11777 # result. if the original operation was single or double, we have to do another
11778 # multiply using extended precision and the correct rounding mode. the result
11779 # of this operation then has its exponent scaled by -0x6000 to create the
11780 # exceptional operand.
11783 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
11785 # for fun, let's use only extended precision, round to zero. then, let
11786 # the unf_res() routine figure out all the rest.
11787 # will we get the correct answer.
11788 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst operand
11790 fmov.
l &rz_mode
*0x10,%fpcr
# set FPCR
11791 fmov.
l &0x0,%fpsr
# clear FPSR
11793 fmul.x FP_SCR0
(%a6
),%fp0
# execute multiply
11795 fmov.
l %fpsr
,%d1
# save status
11796 fmov.
l &0x0,%fpcr
# clear FPCR
11798 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
11800 mov.
b FPCR_ENABLE
(%a6
),%d1
11801 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
11802 bne.
b fmul_unfl_ena
# yes
11805 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
11807 lea FP_SCR0
(%a6
),%a0
# pass: result addr
11808 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
11809 bsr.
l unf_res
# calculate default result
11810 or.b %d0
,FPSR_CC
(%a6
) # unf_res2 may have set 'Z'
11811 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
11818 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op
11820 mov.
l L_SCR3
(%a6
),%d1
11821 andi.b &0xc0,%d1
# is precision extended?
11822 bne.
b fmul_unfl_ena_sd
# no, sgl or dbl
11824 # if the rnd mode is anything but RZ, then we have to re-do the above
11825 # multiplication because we used RZ for all.
11826 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
11828 fmul_unfl_ena_cont
:
11829 fmov.
l &0x0,%fpsr
# clear FPSR
11831 fmul.x FP_SCR0
(%a6
),%fp1
# execute multiply
11833 fmov.
l &0x0,%fpcr
# clear FPCR
11835 fmovm.x
&0x40,FP_SCR0
(%a6
) # save result to stack
11836 mov.
l %d2
,-(%sp
) # save d2
11837 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
11838 mov.
l %d1
,%d2
# make a copy
11839 andi.l &0x7fff,%d1
# strip sign
11840 andi.w
&0x8000,%d2
# keep old sign
11841 sub.l %d0
,%d1
# add scale factor
11842 addi.
l &0x6000,%d1
# add bias
11844 or.w
%d2
,%d1
# concat old sign,new exp
11845 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
11846 mov.
l (%sp
)+,%d2
# restore d2
11847 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
11848 bra.w fmul_unfl_dis
11851 mov.
l L_SCR3
(%a6
),%d1
11852 andi.b &0x30,%d1
# use only rnd mode
11853 fmov.
l %d1
,%fpcr
# set FPCR
11855 bra.
b fmul_unfl_ena_cont
11858 # -use the correct rounding mode and precision. this code favors operations
11859 # that do not underflow.
11861 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst operand
11863 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
11864 fmov.
l &0x0,%fpsr
# clear FPSR
11866 fmul.x FP_SCR0
(%a6
),%fp0
# execute multiply
11868 fmov.
l %fpsr
,%d1
# save status
11869 fmov.
l &0x0,%fpcr
# clear FPCR
11871 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
11873 fabs.x
%fp0
,%fp1
# make a copy of result
11874 fcmp.
b %fp1
,&0x2 # is |result| > 2.b?
11875 fbgt.w fmul_normal_exit
# no; no underflow occurred
11876 fblt.w fmul_unfl
# yes; underflow occurred
11879 # we still don't know if underflow occurred. result is ~ equal to 2. but,
11880 # we don't know if the result was an underflow that rounded up to a 2 or
11881 # a normalized number that rounded down to a 2. so, redo the entire operation
11882 # using RZ as the rounding mode to see what the pre-rounded result is.
11883 # this case should be relatively rare.
11885 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst operand
11887 mov.
l L_SCR3
(%a6
),%d1
11888 andi.b &0xc0,%d1
# keep rnd prec
11889 ori.
b &rz_mode
*0x10,%d1
# insert RZ
11891 fmov.
l %d1
,%fpcr
# set FPCR
11892 fmov.
l &0x0,%fpsr
# clear FPSR
11894 fmul.x FP_SCR0
(%a6
),%fp1
# execute multiply
11896 fmov.
l &0x0,%fpcr
# clear FPCR
11897 fabs.x
%fp1
# make absolute value
11898 fcmp.
b %fp1
,&0x2 # is |result| < 2.b?
11899 fbge.w fmul_normal_exit
# no; no underflow occurred
11900 bra.w fmul_unfl
# yes, underflow occurred
11902 ################################################################################
11905 # Multiply: inputs are not both normalized; what are they?
11908 mov.w
(tbl_fmul_op.
b,%pc
,%d1.w
*2),%d1
11909 jmp
(tbl_fmul_op.
b,%pc
,%d1.w
)
11913 short fmul_norm
- tbl_fmul_op
# NORM x NORM
11914 short fmul_zero
- tbl_fmul_op
# NORM x ZERO
11915 short fmul_inf_src
- tbl_fmul_op
# NORM x INF
11916 short fmul_res_qnan
- tbl_fmul_op
# NORM x QNAN
11917 short fmul_norm
- tbl_fmul_op
# NORM x DENORM
11918 short fmul_res_snan
- tbl_fmul_op
# NORM x SNAN
11919 short tbl_fmul_op
- tbl_fmul_op
#
11920 short tbl_fmul_op
- tbl_fmul_op
#
11922 short fmul_zero
- tbl_fmul_op
# ZERO x NORM
11923 short fmul_zero
- tbl_fmul_op
# ZERO x ZERO
11924 short fmul_res_operr
- tbl_fmul_op
# ZERO x INF
11925 short fmul_res_qnan
- tbl_fmul_op
# ZERO x QNAN
11926 short fmul_zero
- tbl_fmul_op
# ZERO x DENORM
11927 short fmul_res_snan
- tbl_fmul_op
# ZERO x SNAN
11928 short tbl_fmul_op
- tbl_fmul_op
#
11929 short tbl_fmul_op
- tbl_fmul_op
#
11931 short fmul_inf_dst
- tbl_fmul_op
# INF x NORM
11932 short fmul_res_operr
- tbl_fmul_op
# INF x ZERO
11933 short fmul_inf_dst
- tbl_fmul_op
# INF x INF
11934 short fmul_res_qnan
- tbl_fmul_op
# INF x QNAN
11935 short fmul_inf_dst
- tbl_fmul_op
# INF x DENORM
11936 short fmul_res_snan
- tbl_fmul_op
# INF x SNAN
11937 short tbl_fmul_op
- tbl_fmul_op
#
11938 short tbl_fmul_op
- tbl_fmul_op
#
11940 short fmul_res_qnan
- tbl_fmul_op
# QNAN x NORM
11941 short fmul_res_qnan
- tbl_fmul_op
# QNAN x ZERO
11942 short fmul_res_qnan
- tbl_fmul_op
# QNAN x INF
11943 short fmul_res_qnan
- tbl_fmul_op
# QNAN x QNAN
11944 short fmul_res_qnan
- tbl_fmul_op
# QNAN x DENORM
11945 short fmul_res_snan
- tbl_fmul_op
# QNAN x SNAN
11946 short tbl_fmul_op
- tbl_fmul_op
#
11947 short tbl_fmul_op
- tbl_fmul_op
#
11949 short fmul_norm
- tbl_fmul_op
# NORM x NORM
11950 short fmul_zero
- tbl_fmul_op
# NORM x ZERO
11951 short fmul_inf_src
- tbl_fmul_op
# NORM x INF
11952 short fmul_res_qnan
- tbl_fmul_op
# NORM x QNAN
11953 short fmul_norm
- tbl_fmul_op
# NORM x DENORM
11954 short fmul_res_snan
- tbl_fmul_op
# NORM x SNAN
11955 short tbl_fmul_op
- tbl_fmul_op
#
11956 short tbl_fmul_op
- tbl_fmul_op
#
11958 short fmul_res_snan
- tbl_fmul_op
# SNAN x NORM
11959 short fmul_res_snan
- tbl_fmul_op
# SNAN x ZERO
11960 short fmul_res_snan
- tbl_fmul_op
# SNAN x INF
11961 short fmul_res_snan
- tbl_fmul_op
# SNAN x QNAN
11962 short fmul_res_snan
- tbl_fmul_op
# SNAN x DENORM
11963 short fmul_res_snan
- tbl_fmul_op
# SNAN x SNAN
11964 short tbl_fmul_op
- tbl_fmul_op
#
11965 short tbl_fmul_op
- tbl_fmul_op
#
11975 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11977 global fmul_zero
# global for fsglmul
11979 mov.
b SRC_EX
(%a0
),%d0
# exclusive or the signs
11980 mov.
b DST_EX
(%a1
),%d1
11982 bpl.
b fmul_zero_p
# result ZERO is pos.
11984 fmov.s
&0x80000000,%fp0
# load -ZERO
11985 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
) # set Z/N
11988 fmov.s
&0x00000000,%fp0
# load +ZERO
11989 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set Z
11993 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11995 # Note: The j-bit for an infinity is a don't-care. However, to be
11996 # strictly compatible w/ the 68881/882, we make sure to return an
11997 # INF w/ the j-bit set if the input INF j-bit was set. Destination
11998 # INFs take priority.
12000 global fmul_inf_dst
# global for fsglmul
12002 fmovm.x
DST(%a1
),&0x80 # return INF result in fp0
12003 mov.
b SRC_EX
(%a0
),%d0
# exclusive or the signs
12004 mov.
b DST_EX
(%a1
),%d1
12006 bpl.
b fmul_inf_dst_p
# result INF is pos.
12008 fabs.x
%fp0
# clear result sign
12009 fneg.x
%fp0
# set result sign
12010 mov.
b &inf_bmask+neg_bmask
,FPSR_CC
(%a6
) # set INF/N
12013 fabs.x
%fp0
# clear result sign
12014 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set INF
12017 global fmul_inf_src
# global for fsglmul
12019 fmovm.x SRC
(%a0
),&0x80 # return INF result in fp0
12020 mov.
b SRC_EX
(%a0
),%d0
# exclusive or the signs
12021 mov.
b DST_EX
(%a1
),%d1
12023 bpl.
b fmul_inf_dst_p
# result INF is pos.
12024 bra.
b fmul_inf_dst_n
12026 #########################################################################
12027 # XDEF **************************************************************** #
12028 # fin(): emulates the fmove instruction #
12029 # fsin(): emulates the fsmove instruction #
12030 # fdin(): emulates the fdmove instruction #
12032 # XREF **************************************************************** #
12033 # norm() - normalize mantissa for EXOP on denorm #
12034 # scale_to_zero_src() - scale src exponent to zero #
12035 # ovf_res() - return default overflow result #
12036 # unf_res() - return default underflow result #
12037 # res_qnan_1op() - return QNAN result #
12038 # res_snan_1op() - return SNAN result #
12040 # INPUT *************************************************************** #
12041 # a0 = pointer to extended precision source operand #
12042 # d0 = round prec/mode #
12044 # OUTPUT ************************************************************** #
12046 # fp1 = EXOP (if exception occurred) #
12048 # ALGORITHM *********************************************************** #
12049 # Handle NANs, infinities, and zeroes as special cases. Divide #
12050 # norms into extended, single, and double precision. #
12051 # Norms can be emulated w/ a regular fmove instruction. For #
12052 # sgl/dbl, must scale exponent and perform an "fmove". Check to see #
12053 # if the result would have overflowed/underflowed. If so, use unf_res() #
12054 # or ovf_res() to return the default result. Also return EXOP if #
12055 # exception is enabled. If no exception, return the default result. #
12056 # Unnorms don't pass through here. #
12058 #########################################################################
12062 andi.b &0x30,%d0
# clear rnd prec
12063 ori.
b &s_mode
*0x10,%d0
# insert sgl precision
12068 andi.b &0x30,%d0
# clear rnd prec
12069 ori.
b &d_mode
*0x10,%d0
# insert dbl precision
12073 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
12075 mov.
b STAG
(%a6
),%d1
# fetch src optype tag
12076 bne.w fin_not_norm
# optimize on non-norm input
12079 # FP MOVE IN: NORMs and DENORMs ONLY!
12082 andi.b &0xc0,%d0
# is precision extended?
12083 bne.w fin_not_ext
# no, so go handle dbl or sgl
12086 # precision selected is extended. so...we cannot get an underflow
12087 # or overflow because of rounding to the correct precision. so...
12088 # skip the scaling and unscaling...
12090 tst.
b SRC_EX
(%a0
) # is the operand negative?
12091 bpl.
b fin_norm_done
# no
12092 bset
&neg_bit
,FPSR_CC
(%a6
) # yes, so set 'N' ccode bit
12094 fmovm.x SRC
(%a0
),&0x80 # return result in fp0
12098 # for an extended precision DENORM, the UNFL exception bit is set
12099 # the accrued bit is NOT set in this instance(no inexactness!)
12102 andi.b &0xc0,%d0
# is precision extended?
12103 bne.w fin_not_ext
# no, so go handle dbl or sgl
12105 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
12106 tst.
b SRC_EX
(%a0
) # is the operand negative?
12107 bpl.
b fin_denorm_done
# no
12108 bset
&neg_bit
,FPSR_CC
(%a6
) # yes, so set 'N' ccode bit
12110 fmovm.x SRC
(%a0
),&0x80 # return result in fp0
12111 btst
&unfl_bit
,FPCR_ENABLE
(%a6
) # is UNFL enabled?
12112 bne.
b fin_denorm_unfl_ena
# yes
12116 # the input is an extended DENORM and underflow is enabled in the FPCR.
12117 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12118 # exponent and insert back into the operand.
12120 fin_denorm_unfl_ena
:
12121 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
12122 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
12123 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
12124 lea FP_SCR0
(%a6
),%a0
# pass: ptr to operand
12125 bsr.
l norm
# normalize result
12126 neg.w
%d0
# new exponent = -(shft val)
12127 addi.w
&0x6000,%d0
# add new bias to exponent
12128 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch old sign,exp
12129 andi.w
&0x8000,%d1
# keep old sign
12130 andi.w
&0x7fff,%d0
# clear sign position
12131 or.w
%d1
,%d0
# concat new exo,old sign
12132 mov.w
%d0
,FP_SCR0_EX
(%a6
) # insert new exponent
12133 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
12137 # operand is to be rounded to single or double precision
12140 cmpi.
b %d0
,&s_mode
*0x10 # separate sgl/dbl prec
12144 # operand is to be rounded to single precision
12147 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
12148 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
12149 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
12150 bsr.
l scale_to_zero_src
# calculate scale factor
12152 cmpi.
l %d0
,&0x3fff-0x3f80 # will move in underflow?
12153 bge.w fin_sd_unfl
# yes; go handle underflow
12154 cmpi.
l %d0
,&0x3fff-0x407e # will move in overflow?
12155 beq.w fin_sd_may_ovfl
# maybe; go check
12156 blt.w fin_sd_ovfl
# yes; go handle overflow
12159 # operand will NOT overflow or underflow when moved into the fp reg file
12162 fmov.
l &0x0,%fpsr
# clear FPSR
12163 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
12165 fmov.x FP_SCR0
(%a6
),%fp0
# perform move
12167 fmov.
l %fpsr
,%d1
# save FPSR
12168 fmov.
l &0x0,%fpcr
# clear FPCR
12170 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
12172 fin_sd_normal_exit
:
12173 mov.
l %d2
,-(%sp
) # save d2
12174 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
12175 mov.w FP_SCR0_EX
(%a6
),%d1
# load {sgn,exp}
12176 mov.w
%d1
,%d2
# make a copy
12177 andi.l &0x7fff,%d1
# strip sign
12178 sub.l %d0
,%d1
# add scale factor
12179 andi.w
&0x8000,%d2
# keep old sign
12180 or.w
%d1
,%d2
# concat old sign,new exponent
12181 mov.w
%d2
,FP_SCR0_EX
(%a6
) # insert new exponent
12182 mov.
l (%sp
)+,%d2
# restore d2
12183 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
12187 # operand is to be rounded to double precision
12190 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
12191 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
12192 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
12193 bsr.
l scale_to_zero_src
# calculate scale factor
12195 cmpi.
l %d0
,&0x3fff-0x3c00 # will move in underflow?
12196 bge.w fin_sd_unfl
# yes; go handle underflow
12197 cmpi.
l %d0
,&0x3fff-0x43fe # will move in overflow?
12198 beq.w fin_sd_may_ovfl
# maybe; go check
12199 blt.w fin_sd_ovfl
# yes; go handle overflow
12200 bra.w fin_sd_normal
# no; ho handle normalized op
12203 # operand WILL underflow when moved in to the fp register file
12206 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
12208 tst.
b FP_SCR0_EX
(%a6
) # is operand negative?
12209 bpl.
b fin_sd_unfl_tst
12210 bset
&neg_bit
,FPSR_CC
(%a6
) # set 'N' ccode bit
12212 # if underflow or inexact is enabled, then go calculate the EXOP first.
12214 mov.
b FPCR_ENABLE
(%a6
),%d1
12215 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
12216 bne.
b fin_sd_unfl_ena
# yes
12219 lea FP_SCR0
(%a6
),%a0
# pass: result addr
12220 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
12221 bsr.
l unf_res
# calculate default result
12222 or.b %d0
,FPSR_CC
(%a6
) # unf_res may have set 'Z'
12223 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
12227 # operand will underflow AND underflow or inexact is enabled.
12228 # therefore, we must return the result rounded to extended precision.
12231 mov.
l FP_SCR0_HI
(%a6
),FP_SCR1_HI
(%a6
)
12232 mov.
l FP_SCR0_LO
(%a6
),FP_SCR1_LO
(%a6
)
12233 mov.w FP_SCR0_EX
(%a6
),%d1
# load current exponent
12235 mov.
l %d2
,-(%sp
) # save d2
12236 mov.w
%d1
,%d2
# make a copy
12237 andi.l &0x7fff,%d1
# strip sign
12238 sub.l %d0
,%d1
# subtract scale factor
12239 andi.w
&0x8000,%d2
# extract old sign
12240 addi.
l &0x6000,%d1
# add new bias
12242 or.w
%d1
,%d2
# concat old sign,new exp
12243 mov.w
%d2
,FP_SCR1_EX
(%a6
) # insert new exponent
12244 fmovm.x FP_SCR1
(%a6
),&0x40 # return EXOP in fp1
12245 mov.
l (%sp
)+,%d2
# restore d2
12246 bra.
b fin_sd_unfl_dis
12249 # operand WILL overflow.
12252 fmov.
l &0x0,%fpsr
# clear FPSR
12253 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
12255 fmov.x FP_SCR0
(%a6
),%fp0
# perform move
12257 fmov.
l &0x0,%fpcr
# clear FPCR
12258 fmov.
l %fpsr
,%d1
# save FPSR
12260 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
12263 or.l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
12265 mov.
b FPCR_ENABLE
(%a6
),%d1
12266 andi.b &0x13,%d1
# is OVFL or INEX enabled?
12267 bne.
b fin_sd_ovfl_ena
# yes
12270 # OVFL is not enabled; therefore, we must create the default result by
12271 # calling ovf_res().
12274 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
12275 sne
%d1
# set sign param accordingly
12276 mov.
l L_SCR3
(%a6
),%d0
# pass: prec,mode
12277 bsr.
l ovf_res
# calculate default result
12278 or.b %d0
,FPSR_CC
(%a6
) # set INF,N if applicable
12279 fmovm.x
(%a0
),&0x80 # return default result in fp0
12284 # the INEX2 bit has already been updated by the round to the correct precision.
12285 # now, round to extended(and don't alter the FPSR).
12288 mov.
l %d2
,-(%sp
) # save d2
12289 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
12290 mov.
l %d1
,%d2
# make a copy
12291 andi.l &0x7fff,%d1
# strip sign
12292 andi.w
&0x8000,%d2
# keep old sign
12293 sub.l %d0
,%d1
# add scale factor
12294 sub.l &0x6000,%d1
# subtract bias
12297 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
12298 mov.
l (%sp
)+,%d2
# restore d2
12299 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
12300 bra.
b fin_sd_ovfl_dis
12303 # the move in MAY overflow. so...
12306 fmov.
l &0x0,%fpsr
# clear FPSR
12307 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
12309 fmov.x FP_SCR0
(%a6
),%fp0
# perform the move
12311 fmov.
l %fpsr
,%d1
# save status
12312 fmov.
l &0x0,%fpcr
# clear FPCR
12314 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
12316 fabs.x
%fp0
,%fp1
# make a copy of result
12317 fcmp.
b %fp1
,&0x2 # is |result| >= 2.b?
12318 fbge.w fin_sd_ovfl_tst
# yes; overflow has occurred
12320 # no, it didn't overflow; we have correct result
12321 bra.w fin_sd_normal_exit
12323 ##########################################################################
12326 # operand is not a NORM: check its optype and branch accordingly
12329 cmpi.
b %d1
,&DENORM
# weed out DENORM
12331 cmpi.
b %d1
,&SNAN
# weed out SNANs
12333 cmpi.
b %d1
,&QNAN
# weed out QNANs
12337 # do the fmove in; at this point, only possible ops are ZERO and INF.
12338 # use fmov to determine ccodes.
12339 # prec:mode should be zero at this point but it won't affect answer anyways.
12341 fmov.x SRC
(%a0
),%fp0
# do fmove in
12342 fmov.
l %fpsr
,%d0
# no exceptions possible
12343 rol.
l &0x8,%d0
# put ccodes in lo byte
12344 mov.
b %d0
,FPSR_CC
(%a6
) # insert correct ccodes
12347 #########################################################################
12348 # XDEF **************************************************************** #
12349 # fdiv(): emulates the fdiv instruction #
12350 # fsdiv(): emulates the fsdiv instruction #
12351 # fddiv(): emulates the fddiv instruction #
12353 # XREF **************************************************************** #
12354 # scale_to_zero_src() - scale src exponent to zero #
12355 # scale_to_zero_dst() - scale dst exponent to zero #
12356 # unf_res() - return default underflow result #
12357 # ovf_res() - return default overflow result #
12358 # res_qnan() - return QNAN result #
12359 # res_snan() - return SNAN result #
12361 # INPUT *************************************************************** #
12362 # a0 = pointer to extended precision source operand #
12363 # a1 = pointer to extended precision destination operand #
12364 # d0 rnd prec,mode #
12366 # OUTPUT ************************************************************** #
12368 # fp1 = EXOP (if exception occurred) #
12370 # ALGORITHM *********************************************************** #
12371 # Handle NANs, infinities, and zeroes as special cases. Divide #
12372 # norms/denorms into ext/sgl/dbl precision. #
12373 # For norms/denorms, scale the exponents such that a divide #
12374 # instruction won't cause an exception. Use the regular fdiv to #
12375 # compute a result. Check if the regular operands would have taken #
12376 # an exception. If so, return the default overflow/underflow result #
12377 # and return the EXOP if exceptions are enabled. Else, scale the #
12378 # result operand to the proper exponent. #
12380 #########################################################################
12384 long
0x3fff - 0x0000 # ext_unfl
12385 long
0x3fff - 0x3f81 # sgl_unfl
12386 long
0x3fff - 0x3c01 # dbl_unfl
12389 long
0x3fff - 0x7ffe # ext overflow exponent
12390 long
0x3fff - 0x407e # sgl overflow exponent
12391 long
0x3fff - 0x43fe # dbl overflow exponent
12395 andi.b &0x30,%d0
# clear rnd prec
12396 ori.
b &s_mode
*0x10,%d0
# insert sgl prec
12401 andi.b &0x30,%d0
# clear rnd prec
12402 ori.
b &d_mode
*0x10,%d0
# insert dbl prec
12406 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
12409 mov.
b DTAG
(%a6
),%d1
12411 or.b STAG
(%a6
),%d1
# combine src tags
12413 bne.w fdiv_not_norm
# optimize on non-norm input
12416 # DIVIDE: NORMs and DENORMs ONLY!
12419 mov.w DST_EX
(%a1
),FP_SCR1_EX
(%a6
)
12420 mov.
l DST_HI
(%a1
),FP_SCR1_HI
(%a6
)
12421 mov.
l DST_LO
(%a1
),FP_SCR1_LO
(%a6
)
12423 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
12424 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
12425 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
12427 bsr.
l scale_to_zero_src
# scale src exponent
12428 mov.
l %d0
,-(%sp
) # save scale factor 1
12430 bsr.
l scale_to_zero_dst
# scale dst exponent
12432 neg.l (%sp
) # SCALE FACTOR = scale1 - scale2
12435 mov.w
2+L_SCR3
(%a6
),%d1
# fetch precision
12436 lsr.
b &0x6,%d1
# shift to lo bits
12437 mov.
l (%sp
)+,%d0
# load S.F.
12438 cmp.
l %d0
,(tbl_fdiv_ovfl.
b,%pc
,%d1.w
*4) # will result overflow?
12439 ble.w fdiv_may_ovfl
# result will overflow
12441 cmp.
l %d0
,(tbl_fdiv_unfl.w
,%pc
,%d1.w
*4) # will result underflow?
12442 beq.w fdiv_may_unfl
# maybe
12443 bgt.w fdiv_unfl
# yes; go handle underflow
12446 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
12448 fmov.
l L_SCR3
(%a6
),%fpcr
# save FPCR
12449 fmov.
l &0x0,%fpsr
# clear FPSR
12451 fdiv.x FP_SCR0
(%a6
),%fp0
# perform divide
12453 fmov.
l %fpsr
,%d1
# save FPSR
12454 fmov.
l &0x0,%fpcr
# clear FPCR
12456 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
12459 fmovm.x
&0x80,FP_SCR0
(%a6
) # store result on stack
12460 mov.
l %d2
,-(%sp
) # store d2
12461 mov.w FP_SCR0_EX
(%a6
),%d1
# load {sgn,exp}
12462 mov.
l %d1
,%d2
# make a copy
12463 andi.l &0x7fff,%d1
# strip sign
12464 andi.w
&0x8000,%d2
# keep old sign
12465 sub.l %d0
,%d1
# add scale factor
12466 or.w
%d2
,%d1
# concat old sign,new exp
12467 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
12468 mov.
l (%sp
)+,%d2
# restore d2
12469 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
12478 mov.
l (%sp
)+,%d0
# restore scale factor
12479 bra.
b fdiv_normal_exit
12482 mov.
l %d0
,-(%sp
) # save scale factor
12484 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
12486 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
12487 fmov.
l &0x0,%fpsr
# set FPSR
12489 fdiv.x FP_SCR0
(%a6
),%fp0
# execute divide
12494 or.l %d0
,USER_FPSR
(%a6
) # save INEX,N
12496 fmovm.x
&0x01,-(%sp
) # save result to stack
12497 mov.w
(%sp
),%d0
# fetch new exponent
12498 add.l &0xc,%sp
# clear result from stack
12499 andi.l &0x7fff,%d0
# strip sign
12500 sub.l (%sp
),%d0
# add scale factor
12501 cmp.
l %d0
,(tbl_fdiv_ovfl2.
b,%pc
,%d1.w
*4)
12506 or.l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
12508 mov.
b FPCR_ENABLE
(%a6
),%d1
12509 andi.b &0x13,%d1
# is OVFL or INEX enabled?
12510 bne.
b fdiv_ovfl_ena
# yes
12513 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
12514 sne
%d1
# set sign param accordingly
12515 mov.
l L_SCR3
(%a6
),%d0
# pass prec:rnd
12516 bsr.
l ovf_res
# calculate default result
12517 or.b %d0
,FPSR_CC
(%a6
) # set INF if applicable
12518 fmovm.x
(%a0
),&0x80 # return default result in fp0
12522 mov.
l L_SCR3
(%a6
),%d1
12523 andi.b &0xc0,%d1
# is precision extended?
12524 bne.
b fdiv_ovfl_ena_sd
# no, do sgl or dbl
12526 fdiv_ovfl_ena_cont
:
12527 fmovm.x
&0x80,FP_SCR0
(%a6
) # move result to stack
12529 mov.
l %d2
,-(%sp
) # save d2
12530 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
12531 mov.w
%d1
,%d2
# make a copy
12532 andi.l &0x7fff,%d1
# strip sign
12533 sub.l %d0
,%d1
# add scale factor
12534 subi.
l &0x6000,%d1
# subtract bias
12535 andi.w
&0x7fff,%d1
# clear sign bit
12536 andi.w
&0x8000,%d2
# keep old sign
12537 or.w
%d2
,%d1
# concat old sign,new exp
12538 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
12539 mov.
l (%sp
)+,%d2
# restore d2
12540 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
12541 bra.
b fdiv_ovfl_dis
12544 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst operand
12546 mov.
l L_SCR3
(%a6
),%d1
12547 andi.b &0x30,%d1
# keep rnd mode
12548 fmov.
l %d1
,%fpcr
# set FPCR
12550 fdiv.x FP_SCR0
(%a6
),%fp0
# execute divide
12552 fmov.
l &0x0,%fpcr
# clear FPCR
12553 bra.
b fdiv_ovfl_ena_cont
12556 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
12558 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
12560 fmov.
l &rz_mode
*0x10,%fpcr
# set FPCR
12561 fmov.
l &0x0,%fpsr
# clear FPSR
12563 fdiv.x FP_SCR0
(%a6
),%fp0
# execute divide
12565 fmov.
l %fpsr
,%d1
# save status
12566 fmov.
l &0x0,%fpcr
# clear FPCR
12568 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
12570 mov.
b FPCR_ENABLE
(%a6
),%d1
12571 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
12572 bne.
b fdiv_unfl_ena
# yes
12575 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
12577 lea FP_SCR0
(%a6
),%a0
# pass: result addr
12578 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
12579 bsr.
l unf_res
# calculate default result
12580 or.b %d0
,FPSR_CC
(%a6
) # 'Z' may have been set
12581 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
12588 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op
12590 mov.
l L_SCR3
(%a6
),%d1
12591 andi.b &0xc0,%d1
# is precision extended?
12592 bne.
b fdiv_unfl_ena_sd
# no, sgl or dbl
12594 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
12596 fdiv_unfl_ena_cont
:
12597 fmov.
l &0x0,%fpsr
# clear FPSR
12599 fdiv.x FP_SCR0
(%a6
),%fp1
# execute divide
12601 fmov.
l &0x0,%fpcr
# clear FPCR
12603 fmovm.x
&0x40,FP_SCR0
(%a6
) # save result to stack
12604 mov.
l %d2
,-(%sp
) # save d2
12605 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
12606 mov.
l %d1
,%d2
# make a copy
12607 andi.l &0x7fff,%d1
# strip sign
12608 andi.w
&0x8000,%d2
# keep old sign
12609 sub.l %d0
,%d1
# add scale factoer
12610 addi.
l &0x6000,%d1
# add bias
12612 or.w
%d2
,%d1
# concat old sign,new exp
12613 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exp
12614 mov.
l (%sp
)+,%d2
# restore d2
12615 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
12616 bra.w fdiv_unfl_dis
12619 mov.
l L_SCR3
(%a6
),%d1
12620 andi.b &0x30,%d1
# use only rnd mode
12621 fmov.
l %d1
,%fpcr
# set FPCR
12623 bra.
b fdiv_unfl_ena_cont
12626 # the divide operation MAY underflow:
12629 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
12631 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
12632 fmov.
l &0x0,%fpsr
# clear FPSR
12634 fdiv.x FP_SCR0
(%a6
),%fp0
# execute divide
12636 fmov.
l %fpsr
,%d1
# save status
12637 fmov.
l &0x0,%fpcr
# clear FPCR
12639 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
12641 fabs.x
%fp0
,%fp1
# make a copy of result
12642 fcmp.
b %fp1
,&0x1 # is |result| > 1.b?
12643 fbgt.w fdiv_normal_exit
# no; no underflow occurred
12644 fblt.w fdiv_unfl
# yes; underflow occurred
12647 # we still don't know if underflow occurred. result is ~ equal to 1. but,
12648 # we don't know if the result was an underflow that rounded up to a 1
12649 # or a normalized number that rounded down to a 1. so, redo the entire
12650 # operation using RZ as the rounding mode to see what the pre-rounded
12651 # result is. this case should be relatively rare.
12653 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op into fp1
12655 mov.
l L_SCR3
(%a6
),%d1
12656 andi.b &0xc0,%d1
# keep rnd prec
12657 ori.
b &rz_mode
*0x10,%d1
# insert RZ
12659 fmov.
l %d1
,%fpcr
# set FPCR
12660 fmov.
l &0x0,%fpsr
# clear FPSR
12662 fdiv.x FP_SCR0
(%a6
),%fp1
# execute divide
12664 fmov.
l &0x0,%fpcr
# clear FPCR
12665 fabs.x
%fp1
# make absolute value
12666 fcmp.
b %fp1
,&0x1 # is |result| < 1.b?
12667 fbge.w fdiv_normal_exit
# no; no underflow occurred
12668 bra.w fdiv_unfl
# yes; underflow occurred
12670 ############################################################################
12673 # Divide: inputs are not both normalized; what are they?
12676 mov.w
(tbl_fdiv_op.
b,%pc
,%d1.w
*2),%d1
12677 jmp
(tbl_fdiv_op.
b,%pc
,%d1.w
*1)
12681 short fdiv_norm
- tbl_fdiv_op
# NORM / NORM
12682 short fdiv_inf_load
- tbl_fdiv_op
# NORM / ZERO
12683 short fdiv_zero_load
- tbl_fdiv_op
# NORM / INF
12684 short fdiv_res_qnan
- tbl_fdiv_op
# NORM / QNAN
12685 short fdiv_norm
- tbl_fdiv_op
# NORM / DENORM
12686 short fdiv_res_snan
- tbl_fdiv_op
# NORM / SNAN
12687 short tbl_fdiv_op
- tbl_fdiv_op
#
12688 short tbl_fdiv_op
- tbl_fdiv_op
#
12690 short fdiv_zero_load
- tbl_fdiv_op
# ZERO / NORM
12691 short fdiv_res_operr
- tbl_fdiv_op
# ZERO / ZERO
12692 short fdiv_zero_load
- tbl_fdiv_op
# ZERO / INF
12693 short fdiv_res_qnan
- tbl_fdiv_op
# ZERO / QNAN
12694 short fdiv_zero_load
- tbl_fdiv_op
# ZERO / DENORM
12695 short fdiv_res_snan
- tbl_fdiv_op
# ZERO / SNAN
12696 short tbl_fdiv_op
- tbl_fdiv_op
#
12697 short tbl_fdiv_op
- tbl_fdiv_op
#
12699 short fdiv_inf_dst
- tbl_fdiv_op
# INF / NORM
12700 short fdiv_inf_dst
- tbl_fdiv_op
# INF / ZERO
12701 short fdiv_res_operr
- tbl_fdiv_op
# INF / INF
12702 short fdiv_res_qnan
- tbl_fdiv_op
# INF / QNAN
12703 short fdiv_inf_dst
- tbl_fdiv_op
# INF / DENORM
12704 short fdiv_res_snan
- tbl_fdiv_op
# INF / SNAN
12705 short tbl_fdiv_op
- tbl_fdiv_op
#
12706 short tbl_fdiv_op
- tbl_fdiv_op
#
12708 short fdiv_res_qnan
- tbl_fdiv_op
# QNAN / NORM
12709 short fdiv_res_qnan
- tbl_fdiv_op
# QNAN / ZERO
12710 short fdiv_res_qnan
- tbl_fdiv_op
# QNAN / INF
12711 short fdiv_res_qnan
- tbl_fdiv_op
# QNAN / QNAN
12712 short fdiv_res_qnan
- tbl_fdiv_op
# QNAN / DENORM
12713 short fdiv_res_snan
- tbl_fdiv_op
# QNAN / SNAN
12714 short tbl_fdiv_op
- tbl_fdiv_op
#
12715 short tbl_fdiv_op
- tbl_fdiv_op
#
12717 short fdiv_norm
- tbl_fdiv_op
# DENORM / NORM
12718 short fdiv_inf_load
- tbl_fdiv_op
# DENORM / ZERO
12719 short fdiv_zero_load
- tbl_fdiv_op
# DENORM / INF
12720 short fdiv_res_qnan
- tbl_fdiv_op
# DENORM / QNAN
12721 short fdiv_norm
- tbl_fdiv_op
# DENORM / DENORM
12722 short fdiv_res_snan
- tbl_fdiv_op
# DENORM / SNAN
12723 short tbl_fdiv_op
- tbl_fdiv_op
#
12724 short tbl_fdiv_op
- tbl_fdiv_op
#
12726 short fdiv_res_snan
- tbl_fdiv_op
# SNAN / NORM
12727 short fdiv_res_snan
- tbl_fdiv_op
# SNAN / ZERO
12728 short fdiv_res_snan
- tbl_fdiv_op
# SNAN / INF
12729 short fdiv_res_snan
- tbl_fdiv_op
# SNAN / QNAN
12730 short fdiv_res_snan
- tbl_fdiv_op
# SNAN / DENORM
12731 short fdiv_res_snan
- tbl_fdiv_op
# SNAN / SNAN
12732 short tbl_fdiv_op
- tbl_fdiv_op
#
12733 short tbl_fdiv_op
- tbl_fdiv_op
#
12742 global fdiv_zero_load
# global for fsgldiv
12744 mov.
b SRC_EX
(%a0
),%d0
# result sign is exclusive
12745 mov.
b DST_EX
(%a1
),%d1
# or of input signs.
12747 bpl.
b fdiv_zero_load_p
# result is positive
12748 fmov.s
&0x80000000,%fp0
# load a -ZERO
12749 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
) # set Z/N
12752 fmov.s
&0x00000000,%fp0
# load a +ZERO
12753 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set Z
12757 # The destination was In Range and the source was a ZERO. The result,
12758 # therefore, is an INF w/ the proper sign.
12759 # So, determine the sign and return a new INF (w/ the j-bit cleared).
12761 global fdiv_inf_load
# global for fsgldiv
12763 ori.w
&dz_mask+adz_mask
,2+USER_FPSR
(%a6
) # no; set DZ/ADZ
12764 mov.
b SRC_EX
(%a0
),%d0
# load both signs
12765 mov.
b DST_EX
(%a1
),%d1
12767 bpl.
b fdiv_inf_load_p
# result is positive
12768 fmov.s
&0xff800000,%fp0
# make result -INF
12769 mov.
b &inf_bmask+neg_bmask
,FPSR_CC
(%a6
) # set INF/N
12772 fmov.s
&0x7f800000,%fp0
# make result +INF
12773 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set INF
12777 # The destination was an INF w/ an In Range or ZERO source, the result is
12778 # an INF w/ the proper sign.
12779 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12780 # dst INF is set, then then j-bit of the result INF is also set).
12782 global fdiv_inf_dst
# global for fsgldiv
12784 mov.
b DST_EX
(%a1
),%d0
# load both signs
12785 mov.
b SRC_EX
(%a0
),%d1
12787 bpl.
b fdiv_inf_dst_p
# result is positive
12789 fmovm.x
DST(%a1
),&0x80 # return result in fp0
12790 fabs.x
%fp0
# clear sign bit
12791 fneg.x
%fp0
# set sign bit
12792 mov.
b &inf_bmask+neg_bmask
,FPSR_CC
(%a6
) # set INF/NEG
12796 fmovm.x
DST(%a1
),&0x80 # return result in fp0
12797 fabs.x
%fp0
# return positive INF
12798 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set INF
12801 #########################################################################
12802 # XDEF **************************************************************** #
12803 # fneg(): emulates the fneg instruction #
12804 # fsneg(): emulates the fsneg instruction #
12805 # fdneg(): emulates the fdneg instruction #
12807 # XREF **************************************************************** #
12808 # norm() - normalize a denorm to provide EXOP #
12809 # scale_to_zero_src() - scale sgl/dbl source exponent #
12810 # ovf_res() - return default overflow result #
12811 # unf_res() - return default underflow result #
12812 # res_qnan_1op() - return QNAN result #
12813 # res_snan_1op() - return SNAN result #
12815 # INPUT *************************************************************** #
12816 # a0 = pointer to extended precision source operand #
12817 # d0 = rnd prec,mode #
12819 # OUTPUT ************************************************************** #
12821 # fp1 = EXOP (if exception occurred) #
12823 # ALGORITHM *********************************************************** #
12824 # Handle NANs, zeroes, and infinities as special cases. Separate #
12825 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
12826 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
12827 # and an actual fneg performed to see if overflow/underflow would have #
12828 # occurred. If so, return default underflow/overflow result. Else, #
12829 # scale the result exponent and return result. FPSR gets set based on #
12830 # the result value. #
12832 #########################################################################
12836 andi.b &0x30,%d0
# clear rnd prec
12837 ori.
b &s_mode
*0x10,%d0
# insert sgl precision
12842 andi.b &0x30,%d0
# clear rnd prec
12843 ori.
b &d_mode
*0x10,%d0
# insert dbl prec
12847 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
12848 mov.
b STAG
(%a6
),%d1
12849 bne.w fneg_not_norm
# optimize on non-norm input
12852 # NEGATE SIGN : norms and denorms ONLY!
12855 andi.b &0xc0,%d0
# is precision extended?
12856 bne.w fneg_not_ext
# no; go handle sgl or dbl
12859 # precision selected is extended. so...we can not get an underflow
12860 # or overflow because of rounding to the correct precision. so...
12861 # skip the scaling and unscaling...
12863 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
12864 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
12865 mov.w SRC_EX
(%a0
),%d0
12866 eori.w
&0x8000,%d0
# negate sign
12867 bpl.
b fneg_norm_load
# sign is positive
12868 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
12870 mov.w
%d0
,FP_SCR0_EX
(%a6
)
12871 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
12875 # for an extended precision DENORM, the UNFL exception bit is set
12876 # the accrued bit is NOT set in this instance(no inexactness!)
12879 andi.b &0xc0,%d0
# is precision extended?
12880 bne.
b fneg_not_ext
# no; go handle sgl or dbl
12882 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
12884 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
12885 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
12886 mov.w SRC_EX
(%a0
),%d0
12887 eori.w
&0x8000,%d0
# negate sign
12888 bpl.
b fneg_denorm_done
# no
12889 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # yes, set 'N' ccode bit
12891 mov.w
%d0
,FP_SCR0_EX
(%a6
)
12892 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
12894 btst
&unfl_bit
,FPCR_ENABLE
(%a6
) # is UNFL enabled?
12895 bne.
b fneg_ext_unfl_ena
# yes
12899 # the input is an extended DENORM and underflow is enabled in the FPCR.
12900 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12901 # exponent and insert back into the operand.
12904 lea FP_SCR0
(%a6
),%a0
# pass: ptr to operand
12905 bsr.
l norm
# normalize result
12906 neg.w
%d0
# new exponent = -(shft val)
12907 addi.w
&0x6000,%d0
# add new bias to exponent
12908 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch old sign,exp
12909 andi.w
&0x8000,%d1
# keep old sign
12910 andi.w
&0x7fff,%d0
# clear sign position
12911 or.w
%d1
,%d0
# concat old sign, new exponent
12912 mov.w
%d0
,FP_SCR0_EX
(%a6
) # insert new exponent
12913 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
12917 # operand is either single or double
12920 cmpi.
b %d0
,&s_mode
*0x10 # separate sgl/dbl prec
12924 # operand is to be rounded to single precision
12927 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
12928 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
12929 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
12930 bsr.
l scale_to_zero_src
# calculate scale factor
12932 cmpi.
l %d0
,&0x3fff-0x3f80 # will move in underflow?
12933 bge.w fneg_sd_unfl
# yes; go handle underflow
12934 cmpi.
l %d0
,&0x3fff-0x407e # will move in overflow?
12935 beq.w fneg_sd_may_ovfl
# maybe; go check
12936 blt.w fneg_sd_ovfl
# yes; go handle overflow
12939 # operand will NOT overflow or underflow when moved in to the fp reg file
12942 fmov.
l &0x0,%fpsr
# clear FPSR
12943 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
12945 fneg.x FP_SCR0
(%a6
),%fp0
# perform negation
12947 fmov.
l %fpsr
,%d1
# save FPSR
12948 fmov.
l &0x0,%fpcr
# clear FPCR
12950 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
12952 fneg_sd_normal_exit
:
12953 mov.
l %d2
,-(%sp
) # save d2
12954 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
12955 mov.w FP_SCR0_EX
(%a6
),%d1
# load sgn,exp
12956 mov.w
%d1
,%d2
# make a copy
12957 andi.l &0x7fff,%d1
# strip sign
12958 sub.l %d0
,%d1
# add scale factor
12959 andi.w
&0x8000,%d2
# keep old sign
12960 or.w
%d1
,%d2
# concat old sign,new exp
12961 mov.w
%d2
,FP_SCR0_EX
(%a6
) # insert new exponent
12962 mov.
l (%sp
)+,%d2
# restore d2
12963 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
12967 # operand is to be rounded to double precision
12970 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
12971 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
12972 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
12973 bsr.
l scale_to_zero_src
# calculate scale factor
12975 cmpi.
l %d0
,&0x3fff-0x3c00 # will move in underflow?
12976 bge.
b fneg_sd_unfl
# yes; go handle underflow
12977 cmpi.
l %d0
,&0x3fff-0x43fe # will move in overflow?
12978 beq.w fneg_sd_may_ovfl
# maybe; go check
12979 blt.w fneg_sd_ovfl
# yes; go handle overflow
12980 bra.w fneg_sd_normal
# no; ho handle normalized op
12983 # operand WILL underflow when moved in to the fp register file
12986 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
12988 eori.
b &0x80,FP_SCR0_EX
(%a6
) # negate sign
12989 bpl.
b fneg_sd_unfl_tst
12990 bset
&neg_bit
,FPSR_CC
(%a6
) # set 'N' ccode bit
12992 # if underflow or inexact is enabled, go calculate EXOP first.
12994 mov.
b FPCR_ENABLE
(%a6
),%d1
12995 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
12996 bne.
b fneg_sd_unfl_ena
# yes
12999 lea FP_SCR0
(%a6
),%a0
# pass: result addr
13000 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
13001 bsr.
l unf_res
# calculate default result
13002 or.b %d0
,FPSR_CC
(%a6
) # unf_res may have set 'Z'
13003 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
13007 # operand will underflow AND underflow is enabled.
13008 # therefore, we must return the result rounded to extended precision.
13011 mov.
l FP_SCR0_HI
(%a6
),FP_SCR1_HI
(%a6
)
13012 mov.
l FP_SCR0_LO
(%a6
),FP_SCR1_LO
(%a6
)
13013 mov.w FP_SCR0_EX
(%a6
),%d1
# load current exponent
13015 mov.
l %d2
,-(%sp
) # save d2
13016 mov.
l %d1
,%d2
# make a copy
13017 andi.l &0x7fff,%d1
# strip sign
13018 andi.w
&0x8000,%d2
# keep old sign
13019 sub.l %d0
,%d1
# subtract scale factor
13020 addi.
l &0x6000,%d1
# add new bias
13022 or.w
%d2
,%d1
# concat new sign,new exp
13023 mov.w
%d1
,FP_SCR1_EX
(%a6
) # insert new exp
13024 fmovm.x FP_SCR1
(%a6
),&0x40 # return EXOP in fp1
13025 mov.
l (%sp
)+,%d2
# restore d2
13026 bra.
b fneg_sd_unfl_dis
13029 # operand WILL overflow.
13032 fmov.
l &0x0,%fpsr
# clear FPSR
13033 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
13035 fneg.x FP_SCR0
(%a6
),%fp0
# perform negation
13037 fmov.
l &0x0,%fpcr
# clear FPCR
13038 fmov.
l %fpsr
,%d1
# save FPSR
13040 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
13043 or.l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
13045 mov.
b FPCR_ENABLE
(%a6
),%d1
13046 andi.b &0x13,%d1
# is OVFL or INEX enabled?
13047 bne.
b fneg_sd_ovfl_ena
# yes
13050 # OVFL is not enabled; therefore, we must create the default result by
13051 # calling ovf_res().
13054 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
13055 sne
%d1
# set sign param accordingly
13056 mov.
l L_SCR3
(%a6
),%d0
# pass: prec,mode
13057 bsr.
l ovf_res
# calculate default result
13058 or.b %d0
,FPSR_CC
(%a6
) # set INF,N if applicable
13059 fmovm.x
(%a0
),&0x80 # return default result in fp0
13064 # the INEX2 bit has already been updated by the round to the correct precision.
13065 # now, round to extended(and don't alter the FPSR).
13068 mov.
l %d2
,-(%sp
) # save d2
13069 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
13070 mov.
l %d1
,%d2
# make a copy
13071 andi.l &0x7fff,%d1
# strip sign
13072 andi.w
&0x8000,%d2
# keep old sign
13073 sub.l %d0
,%d1
# add scale factor
13074 subi.
l &0x6000,%d1
# subtract bias
13076 or.w
%d2
,%d1
# concat sign,exp
13077 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
13078 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
13079 mov.
l (%sp
)+,%d2
# restore d2
13080 bra.
b fneg_sd_ovfl_dis
13083 # the move in MAY underflow. so...
13086 fmov.
l &0x0,%fpsr
# clear FPSR
13087 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
13089 fneg.x FP_SCR0
(%a6
),%fp0
# perform negation
13091 fmov.
l %fpsr
,%d1
# save status
13092 fmov.
l &0x0,%fpcr
# clear FPCR
13094 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
13096 fabs.x
%fp0
,%fp1
# make a copy of result
13097 fcmp.
b %fp1
,&0x2 # is |result| >= 2.b?
13098 fbge.w fneg_sd_ovfl_tst
# yes; overflow has occurred
13100 # no, it didn't overflow; we have correct result
13101 bra.w fneg_sd_normal_exit
13103 ##########################################################################
13106 # input is not normalized; what is it?
13109 cmpi.
b %d1
,&DENORM
# weed out DENORM
13111 cmpi.
b %d1
,&SNAN
# weed out SNAN
13113 cmpi.
b %d1
,&QNAN
# weed out QNAN
13117 # do the fneg; at this point, only possible ops are ZERO and INF.
13118 # use fneg to determine ccodes.
13119 # prec:mode should be zero at this point but it won't affect answer anyways.
13121 fneg.x SRC_EX
(%a0
),%fp0
# do fneg
13123 rol.
l &0x8,%d0
# put ccodes in lo byte
13124 mov.
b %d0
,FPSR_CC
(%a6
) # insert correct ccodes
13127 #########################################################################
13128 # XDEF **************************************************************** #
13129 # ftst(): emulates the ftest instruction #
13131 # XREF **************************************************************** #
13132 # res{s,q}nan_1op() - set NAN result for monadic instruction #
13134 # INPUT *************************************************************** #
13135 # a0 = pointer to extended precision source operand #
13137 # OUTPUT ************************************************************** #
13140 # ALGORITHM *********************************************************** #
13141 # Check the source operand tag (STAG) and set the FPCR according #
13142 # to the operand type and sign. #
13144 #########################################################################
13148 mov.
b STAG
(%a6
),%d1
13149 bne.
b ftst_not_norm
# optimize on non-norm input
13155 tst.
b SRC_EX
(%a0
) # is operand negative?
13156 bmi.
b ftst_norm_m
# yes
13159 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
13163 # input is not normalized; what is it?
13166 cmpi.
b %d1
,&ZERO
# weed out ZERO
13168 cmpi.
b %d1
,&INF
# weed out INF
13170 cmpi.
b %d1
,&SNAN
# weed out SNAN
13172 cmpi.
b %d1
,&QNAN
# weed out QNAN
13179 tst.
b SRC_EX
(%a0
) # is operand negative?
13180 bmi.
b ftst_denorm_m
# yes
13183 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
13190 tst.
b SRC_EX
(%a0
) # is operand negative?
13191 bmi.
b ftst_inf_m
# yes
13193 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set 'I' ccode bit
13196 mov.
b &inf_bmask+neg_bmask
,FPSR_CC
(%a6
) # set 'I','N' ccode bits
13203 tst.
b SRC_EX
(%a0
) # is operand negative?
13204 bmi.
b ftst_zero_m
# yes
13206 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set 'N' ccode bit
13209 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
) # set 'Z','N' ccode bits
13212 #########################################################################
13213 # XDEF **************************************************************** #
13214 # fint(): emulates the fint instruction #
13216 # XREF **************************************************************** #
13217 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13219 # INPUT *************************************************************** #
13220 # a0 = pointer to extended precision source operand #
13221 # d0 = round precision/mode #
13223 # OUTPUT ************************************************************** #
13226 # ALGORITHM *********************************************************** #
13227 # Separate according to operand type. Unnorms don't pass through #
13228 # here. For norms, load the rounding mode/prec, execute a "fint", then #
13229 # store the resulting FPSR bits. #
13230 # For denorms, force the j-bit to a one and do the same as for #
13231 # norms. Denorms are so low that the answer will either be a zero or a #
13233 # For zeroes/infs/NANs, return the same while setting the FPSR #
13234 # as appropriate. #
13236 #########################################################################
13240 mov.
b STAG
(%a6
),%d1
13241 bne.
b fint_not_norm
# optimize on non-norm input
13247 andi.b &0x30,%d0
# set prec = ext
13249 fmov.
l %d0
,%fpcr
# set FPCR
13250 fmov.
l &0x0,%fpsr
# clear FPSR
13252 fint.x SRC
(%a0
),%fp0
# execute fint
13254 fmov.
l &0x0,%fpcr
# clear FPCR
13255 fmov.
l %fpsr
,%d0
# save FPSR
13256 or.l %d0
,USER_FPSR
(%a6
) # set exception bits
13261 # input is not normalized; what is it?
13264 cmpi.
b %d1
,&ZERO
# weed out ZERO
13266 cmpi.
b %d1
,&INF
# weed out INF
13268 cmpi.
b %d1
,&DENORM
# weed out DENORM
13270 cmpi.
b %d1
,&SNAN
# weed out SNAN
13272 bra.
l res_qnan_1op
# weed out QNAN
13277 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13278 # also, the INEX2 and AINEX exception bits will be set.
13279 # so, we could either set these manually or force the DENORM
13280 # to a very small NORM and ship it to the NORM routine.
13284 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
) # copy sign, zero exp
13285 mov.
b &0x80,FP_SCR0_HI
(%a6
) # force DENORM ==> small NORM
13286 lea FP_SCR0
(%a6
),%a0
13293 tst.
b SRC_EX
(%a0
) # is ZERO negative?
13294 bmi.
b fint_zero_m
# yes
13296 fmov.s
&0x00000000,%fp0
# return +ZERO in fp0
13297 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set 'Z' ccode bit
13300 fmov.s
&0x80000000,%fp0
# return -ZERO in fp0
13301 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
) # set 'Z','N' ccode bits
13308 fmovm.x SRC
(%a0
),&0x80 # return result in fp0
13309 tst.
b SRC_EX
(%a0
) # is INF negative?
13310 bmi.
b fint_inf_m
# yes
13312 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set 'I' ccode bit
13315 mov.
b &inf_bmask+neg_bmask
,FPSR_CC
(%a6
) # set 'N','I' ccode bits
13318 #########################################################################
13319 # XDEF **************************************************************** #
13320 # fintrz(): emulates the fintrz instruction #
13322 # XREF **************************************************************** #
13323 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13325 # INPUT *************************************************************** #
13326 # a0 = pointer to extended precision source operand #
13327 # d0 = round precision/mode #
13329 # OUTPUT ************************************************************** #
13332 # ALGORITHM *********************************************************** #
13333 # Separate according to operand type. Unnorms don't pass through #
13334 # here. For norms, load the rounding mode/prec, execute a "fintrz", #
13335 # then store the resulting FPSR bits. #
13336 # For denorms, force the j-bit to a one and do the same as for #
13337 # norms. Denorms are so low that the answer will either be a zero or a #
13339 # For zeroes/infs/NANs, return the same while setting the FPSR #
13340 # as appropriate. #
13342 #########################################################################
13346 mov.
b STAG
(%a6
),%d1
13347 bne.
b fintrz_not_norm
# optimize on non-norm input
13353 fmov.
l &0x0,%fpsr
# clear FPSR
13355 fintrz.x SRC
(%a0
),%fp0
# execute fintrz
13357 fmov.
l %fpsr
,%d0
# save FPSR
13358 or.l %d0
,USER_FPSR
(%a6
) # set exception bits
13363 # input is not normalized; what is it?
13366 cmpi.
b %d1
,&ZERO
# weed out ZERO
13368 cmpi.
b %d1
,&INF
# weed out INF
13370 cmpi.
b %d1
,&DENORM
# weed out DENORM
13371 beq.
b fintrz_denorm
13372 cmpi.
b %d1
,&SNAN
# weed out SNAN
13374 bra.
l res_qnan_1op
# weed out QNAN
13379 # for DENORMs, the result will be (+/-)ZERO.
13380 # also, the INEX2 and AINEX exception bits will be set.
13381 # so, we could either set these manually or force the DENORM
13382 # to a very small NORM and ship it to the NORM routine.
13386 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
) # copy sign, zero exp
13387 mov.
b &0x80,FP_SCR0_HI
(%a6
) # force DENORM ==> small NORM
13388 lea FP_SCR0
(%a6
),%a0
13395 tst.
b SRC_EX
(%a0
) # is ZERO negative?
13396 bmi.
b fintrz_zero_m
# yes
13398 fmov.s
&0x00000000,%fp0
# return +ZERO in fp0
13399 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set 'Z' ccode bit
13402 fmov.s
&0x80000000,%fp0
# return -ZERO in fp0
13403 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
) # set 'Z','N' ccode bits
13410 fmovm.x SRC
(%a0
),&0x80 # return result in fp0
13411 tst.
b SRC_EX
(%a0
) # is INF negative?
13412 bmi.
b fintrz_inf_m
# yes
13414 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set 'I' ccode bit
13417 mov.
b &inf_bmask+neg_bmask
,FPSR_CC
(%a6
) # set 'N','I' ccode bits
13420 #########################################################################
13421 # XDEF **************************************************************** #
13422 # fabs(): emulates the fabs instruction #
13423 # fsabs(): emulates the fsabs instruction #
13424 # fdabs(): emulates the fdabs instruction #
13426 # XREF **************************************************************** #
13427 # norm() - normalize denorm mantissa to provide EXOP #
13428 # scale_to_zero_src() - make exponent. = 0; get scale factor #
13429 # unf_res() - calculate underflow result #
13430 # ovf_res() - calculate overflow result #
13431 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13433 # INPUT *************************************************************** #
13434 # a0 = pointer to extended precision source operand #
13435 # d0 = rnd precision/mode #
13437 # OUTPUT ************************************************************** #
13439 # fp1 = EXOP (if exception occurred) #
13441 # ALGORITHM *********************************************************** #
13442 # Handle NANs, infinities, and zeroes as special cases. Divide #
13443 # norms into extended, single, and double precision. #
13444 # Simply clear sign for extended precision norm. Ext prec denorm #
13445 # gets an EXOP created for it since it's an underflow. #
13446 # Double and single precision can overflow and underflow. First, #
13447 # scale the operand such that the exponent is zero. Perform an "fabs" #
13448 # using the correct rnd mode/prec. Check to see if the original #
13449 # exponent would take an exception. If so, use unf_res() or ovf_res() #
13450 # to calculate the default result. Also, create the EXOP for the #
13451 # exceptional case. If no exception should occur, insert the correct #
13452 # result exponent and return. #
13453 # Unnorms don't pass through here. #
13455 #########################################################################
13459 andi.b &0x30,%d0
# clear rnd prec
13460 ori.
b &s_mode
*0x10,%d0
# insert sgl precision
13465 andi.b &0x30,%d0
# clear rnd prec
13466 ori.
b &d_mode
*0x10,%d0
# insert dbl precision
13470 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
13471 mov.
b STAG
(%a6
),%d1
13472 bne.w fabs_not_norm
# optimize on non-norm input
13475 # ABSOLUTE VALUE: norms and denorms ONLY!
13478 andi.b &0xc0,%d0
# is precision extended?
13479 bne.
b fabs_not_ext
# no; go handle sgl or dbl
13482 # precision selected is extended. so...we can not get an underflow
13483 # or overflow because of rounding to the correct precision. so...
13484 # skip the scaling and unscaling...
13486 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
13487 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
13488 mov.w SRC_EX
(%a0
),%d1
13489 bclr &15,%d1
# force absolute value
13490 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert exponent
13491 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
13495 # for an extended precision DENORM, the UNFL exception bit is set
13496 # the accrued bit is NOT set in this instance(no inexactness!)
13499 andi.b &0xc0,%d0
# is precision extended?
13500 bne.
b fabs_not_ext
# no
13502 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
13504 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
13505 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
13506 mov.w SRC_EX
(%a0
),%d0
13507 bclr &15,%d0
# clear sign
13508 mov.w
%d0
,FP_SCR0_EX
(%a6
) # insert exponent
13510 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
13512 btst
&unfl_bit
,FPCR_ENABLE
(%a6
) # is UNFL enabled?
13513 bne.
b fabs_ext_unfl_ena
13517 # the input is an extended DENORM and underflow is enabled in the FPCR.
13518 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
13519 # exponent and insert back into the operand.
13522 lea FP_SCR0
(%a6
),%a0
# pass: ptr to operand
13523 bsr.
l norm
# normalize result
13524 neg.w
%d0
# new exponent = -(shft val)
13525 addi.w
&0x6000,%d0
# add new bias to exponent
13526 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch old sign,exp
13527 andi.w
&0x8000,%d1
# keep old sign
13528 andi.w
&0x7fff,%d0
# clear sign position
13529 or.w
%d1
,%d0
# concat old sign, new exponent
13530 mov.w
%d0
,FP_SCR0_EX
(%a6
) # insert new exponent
13531 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
13535 # operand is either single or double
13538 cmpi.
b %d0
,&s_mode
*0x10 # separate sgl/dbl prec
13542 # operand is to be rounded to single precision
13545 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
13546 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
13547 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
13548 bsr.
l scale_to_zero_src
# calculate scale factor
13550 cmpi.
l %d0
,&0x3fff-0x3f80 # will move in underflow?
13551 bge.w fabs_sd_unfl
# yes; go handle underflow
13552 cmpi.
l %d0
,&0x3fff-0x407e # will move in overflow?
13553 beq.w fabs_sd_may_ovfl
# maybe; go check
13554 blt.w fabs_sd_ovfl
# yes; go handle overflow
13557 # operand will NOT overflow or underflow when moved in to the fp reg file
13560 fmov.
l &0x0,%fpsr
# clear FPSR
13561 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
13563 fabs.x FP_SCR0
(%a6
),%fp0
# perform absolute
13565 fmov.
l %fpsr
,%d1
# save FPSR
13566 fmov.
l &0x0,%fpcr
# clear FPCR
13568 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
13570 fabs_sd_normal_exit
:
13571 mov.
l %d2
,-(%sp
) # save d2
13572 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
13573 mov.w FP_SCR0_EX
(%a6
),%d1
# load sgn,exp
13574 mov.
l %d1
,%d2
# make a copy
13575 andi.l &0x7fff,%d1
# strip sign
13576 sub.l %d0
,%d1
# add scale factor
13577 andi.w
&0x8000,%d2
# keep old sign
13578 or.w
%d1
,%d2
# concat old sign,new exp
13579 mov.w
%d2
,FP_SCR0_EX
(%a6
) # insert new exponent
13580 mov.
l (%sp
)+,%d2
# restore d2
13581 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
13585 # operand is to be rounded to double precision
13588 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
13589 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
13590 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
13591 bsr.
l scale_to_zero_src
# calculate scale factor
13593 cmpi.
l %d0
,&0x3fff-0x3c00 # will move in underflow?
13594 bge.
b fabs_sd_unfl
# yes; go handle underflow
13595 cmpi.
l %d0
,&0x3fff-0x43fe # will move in overflow?
13596 beq.w fabs_sd_may_ovfl
# maybe; go check
13597 blt.w fabs_sd_ovfl
# yes; go handle overflow
13598 bra.w fabs_sd_normal
# no; ho handle normalized op
13601 # operand WILL underflow when moved in to the fp register file
13604 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
13606 bclr &0x7,FP_SCR0_EX
(%a6
) # force absolute value
13608 # if underflow or inexact is enabled, go calculate EXOP first.
13609 mov.
b FPCR_ENABLE
(%a6
),%d1
13610 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
13611 bne.
b fabs_sd_unfl_ena
# yes
13614 lea FP_SCR0
(%a6
),%a0
# pass: result addr
13615 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
13616 bsr.
l unf_res
# calculate default result
13617 or.b %d0
,FPSR_CC
(%a6
) # set possible 'Z' ccode
13618 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
13622 # operand will underflow AND underflow is enabled.
13623 # therefore, we must return the result rounded to extended precision.
13626 mov.
l FP_SCR0_HI
(%a6
),FP_SCR1_HI
(%a6
)
13627 mov.
l FP_SCR0_LO
(%a6
),FP_SCR1_LO
(%a6
)
13628 mov.w FP_SCR0_EX
(%a6
),%d1
# load current exponent
13630 mov.
l %d2
,-(%sp
) # save d2
13631 mov.
l %d1
,%d2
# make a copy
13632 andi.l &0x7fff,%d1
# strip sign
13633 andi.w
&0x8000,%d2
# keep old sign
13634 sub.l %d0
,%d1
# subtract scale factor
13635 addi.
l &0x6000,%d1
# add new bias
13637 or.w
%d2
,%d1
# concat new sign,new exp
13638 mov.w
%d1
,FP_SCR1_EX
(%a6
) # insert new exp
13639 fmovm.x FP_SCR1
(%a6
),&0x40 # return EXOP in fp1
13640 mov.
l (%sp
)+,%d2
# restore d2
13641 bra.
b fabs_sd_unfl_dis
13644 # operand WILL overflow.
13647 fmov.
l &0x0,%fpsr
# clear FPSR
13648 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
13650 fabs.x FP_SCR0
(%a6
),%fp0
# perform absolute
13652 fmov.
l &0x0,%fpcr
# clear FPCR
13653 fmov.
l %fpsr
,%d1
# save FPSR
13655 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
13658 or.l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
13660 mov.
b FPCR_ENABLE
(%a6
),%d1
13661 andi.b &0x13,%d1
# is OVFL or INEX enabled?
13662 bne.
b fabs_sd_ovfl_ena
# yes
13665 # OVFL is not enabled; therefore, we must create the default result by
13666 # calling ovf_res().
13669 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
13670 sne
%d1
# set sign param accordingly
13671 mov.
l L_SCR3
(%a6
),%d0
# pass: prec,mode
13672 bsr.
l ovf_res
# calculate default result
13673 or.b %d0
,FPSR_CC
(%a6
) # set INF,N if applicable
13674 fmovm.x
(%a0
),&0x80 # return default result in fp0
13679 # the INEX2 bit has already been updated by the round to the correct precision.
13680 # now, round to extended(and don't alter the FPSR).
13683 mov.
l %d2
,-(%sp
) # save d2
13684 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
13685 mov.
l %d1
,%d2
# make a copy
13686 andi.l &0x7fff,%d1
# strip sign
13687 andi.w
&0x8000,%d2
# keep old sign
13688 sub.l %d0
,%d1
# add scale factor
13689 subi.
l &0x6000,%d1
# subtract bias
13691 or.w
%d2
,%d1
# concat sign,exp
13692 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
13693 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
13694 mov.
l (%sp
)+,%d2
# restore d2
13695 bra.
b fabs_sd_ovfl_dis
13698 # the move in MAY underflow. so...
13701 fmov.
l &0x0,%fpsr
# clear FPSR
13702 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
13704 fabs.x FP_SCR0
(%a6
),%fp0
# perform absolute
13706 fmov.
l %fpsr
,%d1
# save status
13707 fmov.
l &0x0,%fpcr
# clear FPCR
13709 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
13711 fabs.x
%fp0
,%fp1
# make a copy of result
13712 fcmp.
b %fp1
,&0x2 # is |result| >= 2.b?
13713 fbge.w fabs_sd_ovfl_tst
# yes; overflow has occurred
13715 # no, it didn't overflow; we have correct result
13716 bra.w fabs_sd_normal_exit
13718 ##########################################################################
13721 # input is not normalized; what is it?
13724 cmpi.
b %d1
,&DENORM
# weed out DENORM
13726 cmpi.
b %d1
,&SNAN
# weed out SNAN
13728 cmpi.
b %d1
,&QNAN
# weed out QNAN
13731 fabs.x SRC
(%a0
),%fp0
# force absolute value
13733 cmpi.
b %d1
,&INF
# weed out INF
13736 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set 'Z' ccode bit
13739 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set 'I' ccode bit
13742 #########################################################################
13743 # XDEF **************************************************************** #
13744 # fcmp(): fp compare op routine #
13746 # XREF **************************************************************** #
13747 # res_qnan() - return QNAN result #
13748 # res_snan() - return SNAN result #
13750 # INPUT *************************************************************** #
13751 # a0 = pointer to extended precision source operand #
13752 # a1 = pointer to extended precision destination operand #
13753 # d0 = round prec/mode #
13755 # OUTPUT ************************************************************** #
13758 # ALGORITHM *********************************************************** #
13759 # Handle NANs and denorms as special cases. For everything else, #
13760 # just use the actual fcmp instruction to produce the correct condition #
13763 #########################################################################
13768 mov.
b DTAG
(%a6
),%d1
13771 bne.
b fcmp_not_norm
# optimize on non-norm input
13774 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13777 fmovm.x
DST(%a1
),&0x80 # load dst op
13779 fcmp.x
%fp0
,SRC
(%a0
) # do compare
13781 fmov.
l %fpsr
,%d0
# save FPSR
13782 rol.
l &0x8,%d0
# extract ccode bits
13783 mov.
b %d0
,FPSR_CC
(%a6
) # set ccode bits(no exc bits are set)
13788 # fcmp: inputs are not both normalized; what are they?
13791 mov.w
(tbl_fcmp_op.
b,%pc
,%d1.w
*2),%d1
13792 jmp
(tbl_fcmp_op.
b,%pc
,%d1.w
*1)
13796 short fcmp_norm
- tbl_fcmp_op
# NORM - NORM
13797 short fcmp_norm
- tbl_fcmp_op
# NORM - ZERO
13798 short fcmp_norm
- tbl_fcmp_op
# NORM - INF
13799 short fcmp_res_qnan
- tbl_fcmp_op
# NORM - QNAN
13800 short fcmp_nrm_dnrm
- tbl_fcmp_op
# NORM - DENORM
13801 short fcmp_res_snan
- tbl_fcmp_op
# NORM - SNAN
13802 short tbl_fcmp_op
- tbl_fcmp_op
#
13803 short tbl_fcmp_op
- tbl_fcmp_op
#
13805 short fcmp_norm
- tbl_fcmp_op
# ZERO - NORM
13806 short fcmp_norm
- tbl_fcmp_op
# ZERO - ZERO
13807 short fcmp_norm
- tbl_fcmp_op
# ZERO - INF
13808 short fcmp_res_qnan
- tbl_fcmp_op
# ZERO - QNAN
13809 short fcmp_dnrm_s
- tbl_fcmp_op
# ZERO - DENORM
13810 short fcmp_res_snan
- tbl_fcmp_op
# ZERO - SNAN
13811 short tbl_fcmp_op
- tbl_fcmp_op
#
13812 short tbl_fcmp_op
- tbl_fcmp_op
#
13814 short fcmp_norm
- tbl_fcmp_op
# INF - NORM
13815 short fcmp_norm
- tbl_fcmp_op
# INF - ZERO
13816 short fcmp_norm
- tbl_fcmp_op
# INF - INF
13817 short fcmp_res_qnan
- tbl_fcmp_op
# INF - QNAN
13818 short fcmp_dnrm_s
- tbl_fcmp_op
# INF - DENORM
13819 short fcmp_res_snan
- tbl_fcmp_op
# INF - SNAN
13820 short tbl_fcmp_op
- tbl_fcmp_op
#
13821 short tbl_fcmp_op
- tbl_fcmp_op
#
13823 short fcmp_res_qnan
- tbl_fcmp_op
# QNAN - NORM
13824 short fcmp_res_qnan
- tbl_fcmp_op
# QNAN - ZERO
13825 short fcmp_res_qnan
- tbl_fcmp_op
# QNAN - INF
13826 short fcmp_res_qnan
- tbl_fcmp_op
# QNAN - QNAN
13827 short fcmp_res_qnan
- tbl_fcmp_op
# QNAN - DENORM
13828 short fcmp_res_snan
- tbl_fcmp_op
# QNAN - SNAN
13829 short tbl_fcmp_op
- tbl_fcmp_op
#
13830 short tbl_fcmp_op
- tbl_fcmp_op
#
13832 short fcmp_dnrm_nrm
- tbl_fcmp_op
# DENORM - NORM
13833 short fcmp_dnrm_d
- tbl_fcmp_op
# DENORM - ZERO
13834 short fcmp_dnrm_d
- tbl_fcmp_op
# DENORM - INF
13835 short fcmp_res_qnan
- tbl_fcmp_op
# DENORM - QNAN
13836 short fcmp_dnrm_sd
- tbl_fcmp_op
# DENORM - DENORM
13837 short fcmp_res_snan
- tbl_fcmp_op
# DENORM - SNAN
13838 short tbl_fcmp_op
- tbl_fcmp_op
#
13839 short tbl_fcmp_op
- tbl_fcmp_op
#
13841 short fcmp_res_snan
- tbl_fcmp_op
# SNAN - NORM
13842 short fcmp_res_snan
- tbl_fcmp_op
# SNAN - ZERO
13843 short fcmp_res_snan
- tbl_fcmp_op
# SNAN - INF
13844 short fcmp_res_snan
- tbl_fcmp_op
# SNAN - QNAN
13845 short fcmp_res_snan
- tbl_fcmp_op
# SNAN - DENORM
13846 short fcmp_res_snan
- tbl_fcmp_op
# SNAN - SNAN
13847 short tbl_fcmp_op
- tbl_fcmp_op
#
13848 short tbl_fcmp_op
- tbl_fcmp_op
#
13850 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13851 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13854 andi.b &0xf7,FPSR_CC
(%a6
)
13858 andi.b &0xf7,FPSR_CC
(%a6
)
13862 # DENORMs are a little more difficult.
13863 # If you have a 2 DENORMs, then you can just force the j-bit to a one
13864 # and use the fcmp_norm routine.
13865 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13866 # and use the fcmp_norm routine.
13867 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13868 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
13869 # (1) signs are (+) and the DENORM is the dst or
13870 # (2) signs are (-) and the DENORM is the src
13874 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
13875 mov.
l SRC_HI
(%a0
),%d0
13876 bset
&31,%d0
# DENORM src; make into small norm
13877 mov.
l %d0
,FP_SCR0_HI
(%a6
)
13878 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
13879 lea FP_SCR0
(%a6
),%a0
13883 mov.
l DST_EX
(%a1
),FP_SCR0_EX
(%a6
)
13884 mov.
l DST_HI
(%a1
),%d0
13885 bset
&31,%d0
# DENORM src; make into small norm
13886 mov.
l %d0
,FP_SCR0_HI
(%a6
)
13887 mov.
l DST_LO
(%a1
),FP_SCR0_LO
(%a6
)
13888 lea FP_SCR0
(%a6
),%a1
13892 mov.w DST_EX
(%a1
),FP_SCR1_EX
(%a6
)
13893 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
13894 mov.
l DST_HI
(%a1
),%d0
13895 bset
&31,%d0
# DENORM dst; make into small norm
13896 mov.
l %d0
,FP_SCR1_HI
(%a6
)
13897 mov.
l SRC_HI
(%a0
),%d0
13898 bset
&31,%d0
# DENORM dst; make into small norm
13899 mov.
l %d0
,FP_SCR0_HI
(%a6
)
13900 mov.
l DST_LO
(%a1
),FP_SCR1_LO
(%a6
)
13901 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
13902 lea FP_SCR1
(%a6
),%a1
13903 lea FP_SCR0
(%a6
),%a0
13907 mov.
b SRC_EX
(%a0
),%d0
# determine if like signs
13908 mov.
b DST_EX
(%a1
),%d1
13912 # signs are the same, so must determine the answer ourselves.
13913 tst.
b %d0
# is src op negative?
13914 bmi.
b fcmp_nrm_dnrm_m
# yes
13917 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'Z' ccode bit
13921 mov.
b SRC_EX
(%a0
),%d0
# determine if like signs
13922 mov.
b DST_EX
(%a1
),%d1
13926 # signs are the same, so must determine the answer ourselves.
13927 tst.
b %d0
# is src op negative?
13928 bpl.
b fcmp_dnrm_nrm_m
# no
13931 mov.
b &neg_bmask
,FPSR_CC
(%a6
) # set 'Z' ccode bit
13934 #########################################################################
13935 # XDEF **************************************************************** #
13936 # fsglmul(): emulates the fsglmul instruction #
13938 # XREF **************************************************************** #
13939 # scale_to_zero_src() - scale src exponent to zero #
13940 # scale_to_zero_dst() - scale dst exponent to zero #
13941 # unf_res4() - return default underflow result for sglop #
13942 # ovf_res() - return default overflow result #
13943 # res_qnan() - return QNAN result #
13944 # res_snan() - return SNAN result #
13946 # INPUT *************************************************************** #
13947 # a0 = pointer to extended precision source operand #
13948 # a1 = pointer to extended precision destination operand #
13949 # d0 rnd prec,mode #
13951 # OUTPUT ************************************************************** #
13953 # fp1 = EXOP (if exception occurred) #
13955 # ALGORITHM *********************************************************** #
13956 # Handle NANs, infinities, and zeroes as special cases. Divide #
13957 # norms/denorms into ext/sgl/dbl precision. #
13958 # For norms/denorms, scale the exponents such that a multiply #
13959 # instruction won't cause an exception. Use the regular fsglmul to #
13960 # compute a result. Check if the regular operands would have taken #
13961 # an exception. If so, return the default overflow/underflow result #
13962 # and return the EXOP if exceptions are enabled. Else, scale the #
13963 # result operand to the proper exponent. #
13965 #########################################################################
13969 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
13972 mov.
b DTAG
(%a6
),%d1
13976 bne.w fsglmul_not_norm
# optimize on non-norm input
13979 mov.w DST_EX
(%a1
),FP_SCR1_EX
(%a6
)
13980 mov.
l DST_HI
(%a1
),FP_SCR1_HI
(%a6
)
13981 mov.
l DST_LO
(%a1
),FP_SCR1_LO
(%a6
)
13983 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
13984 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
13985 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
13987 bsr.
l scale_to_zero_src
# scale exponent
13988 mov.
l %d0
,-(%sp
) # save scale factor 1
13990 bsr.
l scale_to_zero_dst
# scale dst exponent
13992 add.l (%sp
)+,%d0
# SCALE_FACTOR = scale1 + scale2
13994 cmpi.
l %d0
,&0x3fff-0x7ffe # would result ovfl?
13995 beq.w fsglmul_may_ovfl
# result may rnd to overflow
13996 blt.w fsglmul_ovfl
# result will overflow
13998 cmpi.
l %d0
,&0x3fff+0x0001 # would result unfl?
13999 beq.w fsglmul_may_unfl
# result may rnd to no unfl
14000 bgt.w fsglmul_unfl
# result will underflow
14003 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14005 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14006 fmov.
l &0x0,%fpsr
# clear FPSR
14008 fsglmul.x FP_SCR0
(%a6
),%fp0
# execute sgl multiply
14010 fmov.
l %fpsr
,%d1
# save status
14011 fmov.
l &0x0,%fpcr
# clear FPCR
14013 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
14015 fsglmul_normal_exit
:
14016 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
14017 mov.
l %d2
,-(%sp
) # save d2
14018 mov.w FP_SCR0_EX
(%a6
),%d1
# load {sgn,exp}
14019 mov.
l %d1
,%d2
# make a copy
14020 andi.l &0x7fff,%d1
# strip sign
14021 andi.w
&0x8000,%d2
# keep old sign
14022 sub.l %d0
,%d1
# add scale factor
14023 or.w
%d2
,%d1
# concat old sign,new exp
14024 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
14025 mov.
l (%sp
)+,%d2
# restore d2
14026 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
14030 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14032 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14033 fmov.
l &0x0,%fpsr
# clear FPSR
14035 fsglmul.x FP_SCR0
(%a6
),%fp0
# execute sgl multiply
14037 fmov.
l %fpsr
,%d1
# save status
14038 fmov.
l &0x0,%fpcr
# clear FPCR
14040 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
14044 # save setting this until now because this is where fsglmul_may_ovfl may jump in
14045 or.l &ovfl_inx_mask
, USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
14047 mov.
b FPCR_ENABLE
(%a6
),%d1
14048 andi.b &0x13,%d1
# is OVFL or INEX enabled?
14049 bne.
b fsglmul_ovfl_ena
# yes
14052 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
14053 sne
%d1
# set sign param accordingly
14054 mov.
l L_SCR3
(%a6
),%d0
# pass prec:rnd
14055 andi.b &0x30,%d0
# force prec = ext
14056 bsr.
l ovf_res
# calculate default result
14057 or.b %d0
,FPSR_CC
(%a6
) # set INF,N if applicable
14058 fmovm.x
(%a0
),&0x80 # return default result in fp0
14062 fmovm.x
&0x80,FP_SCR0
(%a6
) # move result to stack
14064 mov.
l %d2
,-(%sp
) # save d2
14065 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
14066 mov.
l %d1
,%d2
# make a copy
14067 andi.l &0x7fff,%d1
# strip sign
14068 sub.l %d0
,%d1
# add scale factor
14069 subi.
l &0x6000,%d1
# subtract bias
14071 andi.w
&0x8000,%d2
# keep old sign
14072 or.w
%d2
,%d1
# concat old sign,new exp
14073 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
14074 mov.
l (%sp
)+,%d2
# restore d2
14075 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
14076 bra.
b fsglmul_ovfl_dis
14079 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14081 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14082 fmov.
l &0x0,%fpsr
# clear FPSR
14084 fsglmul.x FP_SCR0
(%a6
),%fp0
# execute sgl multiply
14086 fmov.
l %fpsr
,%d1
# save status
14087 fmov.
l &0x0,%fpcr
# clear FPCR
14089 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
14091 fabs.x
%fp0
,%fp1
# make a copy of result
14092 fcmp.
b %fp1
,&0x2 # is |result| >= 2.b?
14093 fbge.w fsglmul_ovfl_tst
# yes; overflow has occurred
14095 # no, it didn't overflow; we have correct result
14096 bra.w fsglmul_normal_exit
14099 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
14101 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14103 fmov.
l &rz_mode
*0x10,%fpcr
# set FPCR
14104 fmov.
l &0x0,%fpsr
# clear FPSR
14106 fsglmul.x FP_SCR0
(%a6
),%fp0
# execute sgl multiply
14108 fmov.
l %fpsr
,%d1
# save status
14109 fmov.
l &0x0,%fpcr
# clear FPCR
14111 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
14113 mov.
b FPCR_ENABLE
(%a6
),%d1
14114 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
14115 bne.
b fsglmul_unfl_ena
# yes
14118 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
14120 lea FP_SCR0
(%a6
),%a0
# pass: result addr
14121 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
14122 bsr.
l unf_res4
# calculate default result
14123 or.b %d0
,FPSR_CC
(%a6
) # 'Z' bit may have been set
14124 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
14131 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op
14133 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14134 fmov.
l &0x0,%fpsr
# clear FPSR
14136 fsglmul.x FP_SCR0
(%a6
),%fp1
# execute sgl multiply
14138 fmov.
l &0x0,%fpcr
# clear FPCR
14140 fmovm.x
&0x40,FP_SCR0
(%a6
) # save result to stack
14141 mov.
l %d2
,-(%sp
) # save d2
14142 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
14143 mov.
l %d1
,%d2
# make a copy
14144 andi.l &0x7fff,%d1
# strip sign
14145 andi.w
&0x8000,%d2
# keep old sign
14146 sub.l %d0
,%d1
# add scale factor
14147 addi.
l &0x6000,%d1
# add bias
14149 or.w
%d2
,%d1
# concat old sign,new exp
14150 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
14151 mov.
l (%sp
)+,%d2
# restore d2
14152 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
14153 bra.w fsglmul_unfl_dis
14156 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14158 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14159 fmov.
l &0x0,%fpsr
# clear FPSR
14161 fsglmul.x FP_SCR0
(%a6
),%fp0
# execute sgl multiply
14163 fmov.
l %fpsr
,%d1
# save status
14164 fmov.
l &0x0,%fpcr
# clear FPCR
14166 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
14168 fabs.x
%fp0
,%fp1
# make a copy of result
14169 fcmp.
b %fp1
,&0x2 # is |result| > 2.b?
14170 fbgt.w fsglmul_normal_exit
# no; no underflow occurred
14171 fblt.w fsglmul_unfl
# yes; underflow occurred
14174 # we still don't know if underflow occurred. result is ~ equal to 2. but,
14175 # we don't know if the result was an underflow that rounded up to a 2 or
14176 # a normalized number that rounded down to a 2. so, redo the entire operation
14177 # using RZ as the rounding mode to see what the pre-rounded result is.
14178 # this case should be relatively rare.
14180 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op into fp1
14182 mov.
l L_SCR3
(%a6
),%d1
14183 andi.b &0xc0,%d1
# keep rnd prec
14184 ori.
b &rz_mode
*0x10,%d1
# insert RZ
14186 fmov.
l %d1
,%fpcr
# set FPCR
14187 fmov.
l &0x0,%fpsr
# clear FPSR
14189 fsglmul.x FP_SCR0
(%a6
),%fp1
# execute sgl multiply
14191 fmov.
l &0x0,%fpcr
# clear FPCR
14192 fabs.x
%fp1
# make absolute value
14193 fcmp.
b %fp1
,&0x2 # is |result| < 2.b?
14194 fbge.w fsglmul_normal_exit
# no; no underflow occurred
14195 bra.w fsglmul_unfl
# yes, underflow occurred
14197 ##############################################################################
14200 # Single Precision Multiply: inputs are not both normalized; what are they?
14203 mov.w
(tbl_fsglmul_op.
b,%pc
,%d1.w
*2),%d1
14204 jmp
(tbl_fsglmul_op.
b,%pc
,%d1.w
*1)
14208 short fsglmul_norm
- tbl_fsglmul_op
# NORM x NORM
14209 short fsglmul_zero
- tbl_fsglmul_op
# NORM x ZERO
14210 short fsglmul_inf_src
- tbl_fsglmul_op
# NORM x INF
14211 short fsglmul_res_qnan
- tbl_fsglmul_op
# NORM x QNAN
14212 short fsglmul_norm
- tbl_fsglmul_op
# NORM x DENORM
14213 short fsglmul_res_snan
- tbl_fsglmul_op
# NORM x SNAN
14214 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14215 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14217 short fsglmul_zero
- tbl_fsglmul_op
# ZERO x NORM
14218 short fsglmul_zero
- tbl_fsglmul_op
# ZERO x ZERO
14219 short fsglmul_res_operr
- tbl_fsglmul_op
# ZERO x INF
14220 short fsglmul_res_qnan
- tbl_fsglmul_op
# ZERO x QNAN
14221 short fsglmul_zero
- tbl_fsglmul_op
# ZERO x DENORM
14222 short fsglmul_res_snan
- tbl_fsglmul_op
# ZERO x SNAN
14223 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14224 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14226 short fsglmul_inf_dst
- tbl_fsglmul_op
# INF x NORM
14227 short fsglmul_res_operr
- tbl_fsglmul_op
# INF x ZERO
14228 short fsglmul_inf_dst
- tbl_fsglmul_op
# INF x INF
14229 short fsglmul_res_qnan
- tbl_fsglmul_op
# INF x QNAN
14230 short fsglmul_inf_dst
- tbl_fsglmul_op
# INF x DENORM
14231 short fsglmul_res_snan
- tbl_fsglmul_op
# INF x SNAN
14232 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14233 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14235 short fsglmul_res_qnan
- tbl_fsglmul_op
# QNAN x NORM
14236 short fsglmul_res_qnan
- tbl_fsglmul_op
# QNAN x ZERO
14237 short fsglmul_res_qnan
- tbl_fsglmul_op
# QNAN x INF
14238 short fsglmul_res_qnan
- tbl_fsglmul_op
# QNAN x QNAN
14239 short fsglmul_res_qnan
- tbl_fsglmul_op
# QNAN x DENORM
14240 short fsglmul_res_snan
- tbl_fsglmul_op
# QNAN x SNAN
14241 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14242 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14244 short fsglmul_norm
- tbl_fsglmul_op
# NORM x NORM
14245 short fsglmul_zero
- tbl_fsglmul_op
# NORM x ZERO
14246 short fsglmul_inf_src
- tbl_fsglmul_op
# NORM x INF
14247 short fsglmul_res_qnan
- tbl_fsglmul_op
# NORM x QNAN
14248 short fsglmul_norm
- tbl_fsglmul_op
# NORM x DENORM
14249 short fsglmul_res_snan
- tbl_fsglmul_op
# NORM x SNAN
14250 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14251 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14253 short fsglmul_res_snan
- tbl_fsglmul_op
# SNAN x NORM
14254 short fsglmul_res_snan
- tbl_fsglmul_op
# SNAN x ZERO
14255 short fsglmul_res_snan
- tbl_fsglmul_op
# SNAN x INF
14256 short fsglmul_res_snan
- tbl_fsglmul_op
# SNAN x QNAN
14257 short fsglmul_res_snan
- tbl_fsglmul_op
# SNAN x DENORM
14258 short fsglmul_res_snan
- tbl_fsglmul_op
# SNAN x SNAN
14259 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14260 short tbl_fsglmul_op
- tbl_fsglmul_op
#
14275 #########################################################################
14276 # XDEF **************************************************************** #
14277 # fsgldiv(): emulates the fsgldiv instruction #
14279 # XREF **************************************************************** #
14280 # scale_to_zero_src() - scale src exponent to zero #
14281 # scale_to_zero_dst() - scale dst exponent to zero #
14282 # unf_res4() - return default underflow result for sglop #
14283 # ovf_res() - return default overflow result #
14284 # res_qnan() - return QNAN result #
14285 # res_snan() - return SNAN result #
14287 # INPUT *************************************************************** #
14288 # a0 = pointer to extended precision source operand #
14289 # a1 = pointer to extended precision destination operand #
14290 # d0 rnd prec,mode #
14292 # OUTPUT ************************************************************** #
14294 # fp1 = EXOP (if exception occurred) #
14296 # ALGORITHM *********************************************************** #
14297 # Handle NANs, infinities, and zeroes as special cases. Divide #
14298 # norms/denorms into ext/sgl/dbl precision. #
14299 # For norms/denorms, scale the exponents such that a divide #
14300 # instruction won't cause an exception. Use the regular fsgldiv to #
14301 # compute a result. Check if the regular operands would have taken #
14302 # an exception. If so, return the default overflow/underflow result #
14303 # and return the EXOP if exceptions are enabled. Else, scale the #
14304 # result operand to the proper exponent. #
14306 #########################################################################
14310 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
14313 mov.
b DTAG
(%a6
),%d1
14315 or.b STAG
(%a6
),%d1
# combine src tags
14317 bne.w fsgldiv_not_norm
# optimize on non-norm input
14320 # DIVIDE: NORMs and DENORMs ONLY!
14323 mov.w DST_EX
(%a1
),FP_SCR1_EX
(%a6
)
14324 mov.
l DST_HI
(%a1
),FP_SCR1_HI
(%a6
)
14325 mov.
l DST_LO
(%a1
),FP_SCR1_LO
(%a6
)
14327 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
14328 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
14329 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
14331 bsr.
l scale_to_zero_src
# calculate scale factor 1
14332 mov.
l %d0
,-(%sp
) # save scale factor 1
14334 bsr.
l scale_to_zero_dst
# calculate scale factor 2
14336 neg.l (%sp
) # S.F. = scale1 - scale2
14339 mov.w
2+L_SCR3
(%a6
),%d1
# fetch precision,mode
14342 cmpi.
l %d0
,&0x3fff-0x7ffe
14343 ble.w fsgldiv_may_ovfl
14345 cmpi.
l %d0
,&0x3fff-0x0000 # will result underflow?
14346 beq.w fsgldiv_may_unfl
# maybe
14347 bgt.w fsgldiv_unfl
# yes; go handle underflow
14350 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14352 fmov.
l L_SCR3
(%a6
),%fpcr
# save FPCR
14353 fmov.
l &0x0,%fpsr
# clear FPSR
14355 fsgldiv.x FP_SCR0
(%a6
),%fp0
# perform sgl divide
14357 fmov.
l %fpsr
,%d1
# save FPSR
14358 fmov.
l &0x0,%fpcr
# clear FPCR
14360 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
14362 fsgldiv_normal_exit
:
14363 fmovm.x
&0x80,FP_SCR0
(%a6
) # store result on stack
14364 mov.
l %d2
,-(%sp
) # save d2
14365 mov.w FP_SCR0_EX
(%a6
),%d1
# load {sgn,exp}
14366 mov.
l %d1
,%d2
# make a copy
14367 andi.l &0x7fff,%d1
# strip sign
14368 andi.w
&0x8000,%d2
# keep old sign
14369 sub.l %d0
,%d1
# add scale factor
14370 or.w
%d2
,%d1
# concat old sign,new exp
14371 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
14372 mov.
l (%sp
)+,%d2
# restore d2
14373 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
14377 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14379 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14380 fmov.
l &0x0,%fpsr
# set FPSR
14382 fsgldiv.x FP_SCR0
(%a6
),%fp0
# execute divide
14387 or.l %d1
,USER_FPSR
(%a6
) # save INEX,N
14389 fmovm.x
&0x01,-(%sp
) # save result to stack
14390 mov.w
(%sp
),%d1
# fetch new exponent
14391 add.l &0xc,%sp
# clear result
14392 andi.l &0x7fff,%d1
# strip sign
14393 sub.l %d0
,%d1
# add scale factor
14394 cmp.
l %d1
,&0x7fff # did divide overflow?
14395 blt.
b fsgldiv_normal_exit
14398 or.w
&ovfl_inx_mask
,2+USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
14400 mov.
b FPCR_ENABLE
(%a6
),%d1
14401 andi.b &0x13,%d1
# is OVFL or INEX enabled?
14402 bne.
b fsgldiv_ovfl_ena
# yes
14405 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative
14406 sne
%d1
# set sign param accordingly
14407 mov.
l L_SCR3
(%a6
),%d0
# pass prec:rnd
14408 andi.b &0x30,%d0
# kill precision
14409 bsr.
l ovf_res
# calculate default result
14410 or.b %d0
,FPSR_CC
(%a6
) # set INF if applicable
14411 fmovm.x
(%a0
),&0x80 # return default result in fp0
14415 fmovm.x
&0x80,FP_SCR0
(%a6
) # move result to stack
14417 mov.
l %d2
,-(%sp
) # save d2
14418 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
14419 mov.
l %d1
,%d2
# make a copy
14420 andi.l &0x7fff,%d1
# strip sign
14421 andi.w
&0x8000,%d2
# keep old sign
14422 sub.l %d0
,%d1
# add scale factor
14423 subi.
l &0x6000,%d1
# subtract new bias
14424 andi.w
&0x7fff,%d1
# clear ms bit
14425 or.w
%d2
,%d1
# concat old sign,new exp
14426 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
14427 mov.
l (%sp
)+,%d2
# restore d2
14428 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
14429 bra.
b fsgldiv_ovfl_dis
14432 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
14434 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14436 fmov.
l &rz_mode
*0x10,%fpcr
# set FPCR
14437 fmov.
l &0x0,%fpsr
# clear FPSR
14439 fsgldiv.x FP_SCR0
(%a6
),%fp0
# execute sgl divide
14441 fmov.
l %fpsr
,%d1
# save status
14442 fmov.
l &0x0,%fpcr
# clear FPCR
14444 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
14446 mov.
b FPCR_ENABLE
(%a6
),%d1
14447 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
14448 bne.
b fsgldiv_unfl_ena
# yes
14451 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
14453 lea FP_SCR0
(%a6
),%a0
# pass: result addr
14454 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
14455 bsr.
l unf_res4
# calculate default result
14456 or.b %d0
,FPSR_CC
(%a6
) # 'Z' bit may have been set
14457 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
14464 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op
14466 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14467 fmov.
l &0x0,%fpsr
# clear FPSR
14469 fsgldiv.x FP_SCR0
(%a6
),%fp1
# execute sgl divide
14471 fmov.
l &0x0,%fpcr
# clear FPCR
14473 fmovm.x
&0x40,FP_SCR0
(%a6
) # save result to stack
14474 mov.
l %d2
,-(%sp
) # save d2
14475 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
14476 mov.
l %d1
,%d2
# make a copy
14477 andi.l &0x7fff,%d1
# strip sign
14478 andi.w
&0x8000,%d2
# keep old sign
14479 sub.l %d0
,%d1
# add scale factor
14480 addi.
l &0x6000,%d1
# add bias
14481 andi.w
&0x7fff,%d1
# clear top bit
14482 or.w
%d2
,%d1
# concat old sign, new exp
14483 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
14484 mov.
l (%sp
)+,%d2
# restore d2
14485 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
14486 bra.
b fsgldiv_unfl_dis
14489 # the divide operation MAY underflow:
14492 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14494 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14495 fmov.
l &0x0,%fpsr
# clear FPSR
14497 fsgldiv.x FP_SCR0
(%a6
),%fp0
# execute sgl divide
14499 fmov.
l %fpsr
,%d1
# save status
14500 fmov.
l &0x0,%fpcr
# clear FPCR
14502 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
14504 fabs.x
%fp0
,%fp1
# make a copy of result
14505 fcmp.
b %fp1
,&0x1 # is |result| > 1.b?
14506 fbgt.w fsgldiv_normal_exit
# no; no underflow occurred
14507 fblt.w fsgldiv_unfl
# yes; underflow occurred
14510 # we still don't know if underflow occurred. result is ~ equal to 1. but,
14511 # we don't know if the result was an underflow that rounded up to a 1
14512 # or a normalized number that rounded down to a 1. so, redo the entire
14513 # operation using RZ as the rounding mode to see what the pre-rounded
14514 # result is. this case should be relatively rare.
14516 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op into %fp1
14518 clr.
l %d1
# clear scratch register
14519 ori.
b &rz_mode
*0x10,%d1
# force RZ rnd mode
14521 fmov.
l %d1
,%fpcr
# set FPCR
14522 fmov.
l &0x0,%fpsr
# clear FPSR
14524 fsgldiv.x FP_SCR0
(%a6
),%fp1
# execute sgl divide
14526 fmov.
l &0x0,%fpcr
# clear FPCR
14527 fabs.x
%fp1
# make absolute value
14528 fcmp.
b %fp1
,&0x1 # is |result| < 1.b?
14529 fbge.w fsgldiv_normal_exit
# no; no underflow occurred
14530 bra.w fsgldiv_unfl
# yes; underflow occurred
14532 ############################################################################
14535 # Divide: inputs are not both normalized; what are they?
14538 mov.w
(tbl_fsgldiv_op.
b,%pc
,%d1.w
*2),%d1
14539 jmp
(tbl_fsgldiv_op.
b,%pc
,%d1.w
*1)
14543 short fsgldiv_norm
- tbl_fsgldiv_op
# NORM / NORM
14544 short fsgldiv_inf_load
- tbl_fsgldiv_op
# NORM / ZERO
14545 short fsgldiv_zero_load
- tbl_fsgldiv_op
# NORM / INF
14546 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# NORM / QNAN
14547 short fsgldiv_norm
- tbl_fsgldiv_op
# NORM / DENORM
14548 short fsgldiv_res_snan
- tbl_fsgldiv_op
# NORM / SNAN
14549 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14550 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14552 short fsgldiv_zero_load
- tbl_fsgldiv_op
# ZERO / NORM
14553 short fsgldiv_res_operr
- tbl_fsgldiv_op
# ZERO / ZERO
14554 short fsgldiv_zero_load
- tbl_fsgldiv_op
# ZERO / INF
14555 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# ZERO / QNAN
14556 short fsgldiv_zero_load
- tbl_fsgldiv_op
# ZERO / DENORM
14557 short fsgldiv_res_snan
- tbl_fsgldiv_op
# ZERO / SNAN
14558 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14559 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14561 short fsgldiv_inf_dst
- tbl_fsgldiv_op
# INF / NORM
14562 short fsgldiv_inf_dst
- tbl_fsgldiv_op
# INF / ZERO
14563 short fsgldiv_res_operr
- tbl_fsgldiv_op
# INF / INF
14564 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# INF / QNAN
14565 short fsgldiv_inf_dst
- tbl_fsgldiv_op
# INF / DENORM
14566 short fsgldiv_res_snan
- tbl_fsgldiv_op
# INF / SNAN
14567 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14568 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14570 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# QNAN / NORM
14571 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# QNAN / ZERO
14572 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# QNAN / INF
14573 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# QNAN / QNAN
14574 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# QNAN / DENORM
14575 short fsgldiv_res_snan
- tbl_fsgldiv_op
# QNAN / SNAN
14576 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14577 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14579 short fsgldiv_norm
- tbl_fsgldiv_op
# DENORM / NORM
14580 short fsgldiv_inf_load
- tbl_fsgldiv_op
# DENORM / ZERO
14581 short fsgldiv_zero_load
- tbl_fsgldiv_op
# DENORM / INF
14582 short fsgldiv_res_qnan
- tbl_fsgldiv_op
# DENORM / QNAN
14583 short fsgldiv_norm
- tbl_fsgldiv_op
# DENORM / DENORM
14584 short fsgldiv_res_snan
- tbl_fsgldiv_op
# DENORM / SNAN
14585 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14586 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14588 short fsgldiv_res_snan
- tbl_fsgldiv_op
# SNAN / NORM
14589 short fsgldiv_res_snan
- tbl_fsgldiv_op
# SNAN / ZERO
14590 short fsgldiv_res_snan
- tbl_fsgldiv_op
# SNAN / INF
14591 short fsgldiv_res_snan
- tbl_fsgldiv_op
# SNAN / QNAN
14592 short fsgldiv_res_snan
- tbl_fsgldiv_op
# SNAN / DENORM
14593 short fsgldiv_res_snan
- tbl_fsgldiv_op
# SNAN / SNAN
14594 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14595 short tbl_fsgldiv_op
- tbl_fsgldiv_op
#
14604 bra.
l fdiv_inf_load
14606 bra.
l fdiv_zero_load
14610 #########################################################################
14611 # XDEF **************************************************************** #
14612 # fadd(): emulates the fadd instruction #
14613 # fsadd(): emulates the fadd instruction #
14614 # fdadd(): emulates the fdadd instruction #
14616 # XREF **************************************************************** #
14617 # addsub_scaler2() - scale the operands so they won't take exc #
14618 # ovf_res() - return default overflow result #
14619 # unf_res() - return default underflow result #
14620 # res_qnan() - set QNAN result #
14621 # res_snan() - set SNAN result #
14622 # res_operr() - set OPERR result #
14623 # scale_to_zero_src() - set src operand exponent equal to zero #
14624 # scale_to_zero_dst() - set dst operand exponent equal to zero #
14626 # INPUT *************************************************************** #
14627 # a0 = pointer to extended precision source operand #
14628 # a1 = pointer to extended precision destination operand #
14630 # OUTPUT ************************************************************** #
14632 # fp1 = EXOP (if exception occurred) #
14634 # ALGORITHM *********************************************************** #
14635 # Handle NANs, infinities, and zeroes as special cases. Divide #
14636 # norms into extended, single, and double precision. #
14637 # Do addition after scaling exponents such that exception won't #
14638 # occur. Then, check result exponent to see if exception would have #
14639 # occurred. If so, return default result and maybe EXOP. Else, insert #
14640 # the correct result exponent and return. Set FPSR bits as appropriate. #
14642 #########################################################################
14646 andi.b &0x30,%d0
# clear rnd prec
14647 ori.
b &s_mode
*0x10,%d0
# insert sgl prec
14652 andi.b &0x30,%d0
# clear rnd prec
14653 ori.
b &d_mode
*0x10,%d0
# insert dbl prec
14657 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
14660 mov.
b DTAG
(%a6
),%d1
14662 or.b STAG
(%a6
),%d1
# combine src tags
14664 bne.w fadd_not_norm
# optimize on non-norm input
14667 # ADD: norms and denorms
14670 bsr.
l addsub_scaler2
# scale exponents
14673 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14675 fmov.
l &0x0,%fpsr
# clear FPSR
14676 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14678 fadd.x FP_SCR0
(%a6
),%fp0
# execute add
14680 fmov.
l &0x0,%fpcr
# clear FPCR
14681 fmov.
l %fpsr
,%d1
# fetch INEX2,N,Z
14683 or.l %d1
,USER_FPSR
(%a6
) # save exc and ccode bits
14685 fbeq.w fadd_zero_exit
# if result is zero, end now
14687 mov.
l %d2
,-(%sp
) # save d2
14689 fmovm.x
&0x01,-(%sp
) # save result to stack
14691 mov.w
2+L_SCR3
(%a6
),%d1
14694 mov.w
(%sp
),%d2
# fetch new sign, exp
14695 andi.l &0x7fff,%d2
# strip sign
14696 sub.l %d0
,%d2
# add scale factor
14698 cmp.
l %d2
,(tbl_fadd_ovfl.
b,%pc
,%d1.w
*4) # is it an overflow?
14699 bge.
b fadd_ovfl
# yes
14701 cmp.
l %d2
,(tbl_fadd_unfl.
b,%pc
,%d1.w
*4) # is it an underflow?
14702 blt.w fadd_unfl
# yes
14703 beq.w fadd_may_unfl
# maybe; go find out
14707 andi.w
&0x8000,%d1
# keep sign
14708 or.w
%d2
,%d1
# concat sign,new exp
14709 mov.w
%d1
,(%sp
) # insert new exponent
14711 fmovm.x
(%sp
)+,&0x80 # return result in fp0
14713 mov.
l (%sp
)+,%d2
# restore d2
14717 # fmov.s &0x00000000,%fp0 # return zero in fp0
14721 long
0x7fff # ext ovfl
14722 long
0x407f # sgl ovfl
14723 long
0x43ff # dbl ovfl
14726 long
0x0000 # ext unfl
14727 long
0x3f81 # sgl unfl
14728 long
0x3c01 # dbl unfl
14731 or.l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
14733 mov.
b FPCR_ENABLE
(%a6
),%d1
14734 andi.b &0x13,%d1
# is OVFL or INEX enabled?
14735 bne.
b fadd_ovfl_ena
# yes
14739 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
14740 sne
%d1
# set sign param accordingly
14741 mov.
l L_SCR3
(%a6
),%d0
# pass prec:rnd
14742 bsr.
l ovf_res
# calculate default result
14743 or.b %d0
,FPSR_CC
(%a6
) # set INF,N if applicable
14744 fmovm.x
(%a0
),&0x80 # return default result in fp0
14745 mov.
l (%sp
)+,%d2
# restore d2
14749 mov.
b L_SCR3
(%a6
),%d1
14750 andi.b &0xc0,%d1
# is precision extended?
14751 bne.
b fadd_ovfl_ena_sd
# no; prec = sgl or dbl
14753 fadd_ovfl_ena_cont
:
14755 andi.w
&0x8000,%d1
# keep sign
14756 subi.
l &0x6000,%d2
# add extra bias
14758 or.w
%d2
,%d1
# concat sign,new exp
14759 mov.w
%d1
,(%sp
) # insert new exponent
14761 fmovm.x
(%sp
)+,&0x40 # return EXOP in fp1
14762 bra.
b fadd_ovfl_dis
14765 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14767 mov.
l L_SCR3
(%a6
),%d1
14768 andi.b &0x30,%d1
# keep rnd mode
14769 fmov.
l %d1
,%fpcr
# set FPCR
14771 fadd.x FP_SCR0
(%a6
),%fp0
# execute add
14773 fmov.
l &0x0,%fpcr
# clear FPCR
14776 fmovm.x
&0x01,-(%sp
)
14777 bra.
b fadd_ovfl_ena_cont
14780 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
14784 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
14786 fmov.
l &rz_mode
*0x10,%fpcr
# set FPCR
14787 fmov.
l &0x0,%fpsr
# clear FPSR
14789 fadd.x FP_SCR0
(%a6
),%fp0
# execute add
14791 fmov.
l &0x0,%fpcr
# clear FPCR
14792 fmov.
l %fpsr
,%d1
# save status
14794 or.l %d1
,USER_FPSR
(%a6
) # save INEX,N
14796 mov.
b FPCR_ENABLE
(%a6
),%d1
14797 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
14798 bne.
b fadd_unfl_ena
# yes
14801 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
14803 lea FP_SCR0
(%a6
),%a0
# pass: result addr
14804 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
14805 bsr.
l unf_res
# calculate default result
14806 or.b %d0
,FPSR_CC
(%a6
) # 'Z' bit may have been set
14807 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
14808 mov.
l (%sp
)+,%d2
# restore d2
14812 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op
14814 mov.
l L_SCR3
(%a6
),%d1
14815 andi.b &0xc0,%d1
# is precision extended?
14816 bne.
b fadd_unfl_ena_sd
# no; sgl or dbl
14818 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
14820 fadd_unfl_ena_cont
:
14821 fmov.
l &0x0,%fpsr
# clear FPSR
14823 fadd.x FP_SCR0
(%a6
),%fp1
# execute multiply
14825 fmov.
l &0x0,%fpcr
# clear FPCR
14827 fmovm.x
&0x40,FP_SCR0
(%a6
) # save result to stack
14828 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
14829 mov.
l %d1
,%d2
# make a copy
14830 andi.l &0x7fff,%d1
# strip sign
14831 andi.w
&0x8000,%d2
# keep old sign
14832 sub.l %d0
,%d1
# add scale factor
14833 addi.
l &0x6000,%d1
# add new bias
14834 andi.w
&0x7fff,%d1
# clear top bit
14835 or.w
%d2
,%d1
# concat sign,new exp
14836 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
14837 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
14838 bra.w fadd_unfl_dis
14841 mov.
l L_SCR3
(%a6
),%d1
14842 andi.b &0x30,%d1
# use only rnd mode
14843 fmov.
l %d1
,%fpcr
# set FPCR
14845 bra.
b fadd_unfl_ena_cont
14848 # result is equal to the smallest normalized number in the selected precision
14849 # if the precision is extended, this result could not have come from an
14850 # underflow that rounded up.
14853 mov.
l L_SCR3
(%a6
),%d1
14855 beq.w fadd_normal
# yes; no underflow occurred
14857 mov.
l 0x4(%sp
),%d1
# extract hi(man)
14858 cmpi.
l %d1
,&0x80000000 # is hi(man) = 0x80000000?
14859 bne.w fadd_normal
# no; no underflow occurred
14861 tst.
l 0x8(%sp
) # is lo(man) = 0x0?
14862 bne.w fadd_normal
# no; no underflow occurred
14864 btst
&inex2_bit
,FPSR_EXCEPT
(%a6
) # is INEX2 set?
14865 beq.w fadd_normal
# no; no underflow occurred
14868 # ok, so now the result has a exponent equal to the smallest normalized
14869 # exponent for the selected precision. also, the mantissa is equal to
14870 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
14872 # now, we must determine whether the pre-rounded result was an underflow
14873 # rounded "up" or a normalized number rounded "down".
14874 # so, we do this be re-executing the add using RZ as the rounding mode and
14875 # seeing if the new result is smaller or equal to the current result.
14877 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op into fp1
14879 mov.
l L_SCR3
(%a6
),%d1
14880 andi.b &0xc0,%d1
# keep rnd prec
14881 ori.
b &rz_mode
*0x10,%d1
# insert rnd mode
14882 fmov.
l %d1
,%fpcr
# set FPCR
14883 fmov.
l &0x0,%fpsr
# clear FPSR
14885 fadd.x FP_SCR0
(%a6
),%fp1
# execute add
14887 fmov.
l &0x0,%fpcr
# clear FPCR
14889 fabs.x
%fp0
# compare absolute values
14891 fcmp.x
%fp0
,%fp1
# is first result > second?
14893 fbgt.w fadd_unfl
# yes; it's an underflow
14894 bra.w fadd_normal
# no; it's not an underflow
14896 ##########################################################################
14899 # Add: inputs are not both normalized; what are they?
14902 mov.w
(tbl_fadd_op.
b,%pc
,%d1.w
*2),%d1
14903 jmp
(tbl_fadd_op.
b,%pc
,%d1.w
*1)
14907 short fadd_norm
- tbl_fadd_op
# NORM + NORM
14908 short fadd_zero_src
- tbl_fadd_op
# NORM + ZERO
14909 short fadd_inf_src
- tbl_fadd_op
# NORM + INF
14910 short fadd_res_qnan
- tbl_fadd_op
# NORM + QNAN
14911 short fadd_norm
- tbl_fadd_op
# NORM + DENORM
14912 short fadd_res_snan
- tbl_fadd_op
# NORM + SNAN
14913 short tbl_fadd_op
- tbl_fadd_op
#
14914 short tbl_fadd_op
- tbl_fadd_op
#
14916 short fadd_zero_dst
- tbl_fadd_op
# ZERO + NORM
14917 short fadd_zero_2
- tbl_fadd_op
# ZERO + ZERO
14918 short fadd_inf_src
- tbl_fadd_op
# ZERO + INF
14919 short fadd_res_qnan
- tbl_fadd_op
# NORM + QNAN
14920 short fadd_zero_dst
- tbl_fadd_op
# ZERO + DENORM
14921 short fadd_res_snan
- tbl_fadd_op
# NORM + SNAN
14922 short tbl_fadd_op
- tbl_fadd_op
#
14923 short tbl_fadd_op
- tbl_fadd_op
#
14925 short fadd_inf_dst
- tbl_fadd_op
# INF + NORM
14926 short fadd_inf_dst
- tbl_fadd_op
# INF + ZERO
14927 short fadd_inf_2
- tbl_fadd_op
# INF + INF
14928 short fadd_res_qnan
- tbl_fadd_op
# NORM + QNAN
14929 short fadd_inf_dst
- tbl_fadd_op
# INF + DENORM
14930 short fadd_res_snan
- tbl_fadd_op
# NORM + SNAN
14931 short tbl_fadd_op
- tbl_fadd_op
#
14932 short tbl_fadd_op
- tbl_fadd_op
#
14934 short fadd_res_qnan
- tbl_fadd_op
# QNAN + NORM
14935 short fadd_res_qnan
- tbl_fadd_op
# QNAN + ZERO
14936 short fadd_res_qnan
- tbl_fadd_op
# QNAN + INF
14937 short fadd_res_qnan
- tbl_fadd_op
# QNAN + QNAN
14938 short fadd_res_qnan
- tbl_fadd_op
# QNAN + DENORM
14939 short fadd_res_snan
- tbl_fadd_op
# QNAN + SNAN
14940 short tbl_fadd_op
- tbl_fadd_op
#
14941 short tbl_fadd_op
- tbl_fadd_op
#
14943 short fadd_norm
- tbl_fadd_op
# DENORM + NORM
14944 short fadd_zero_src
- tbl_fadd_op
# DENORM + ZERO
14945 short fadd_inf_src
- tbl_fadd_op
# DENORM + INF
14946 short fadd_res_qnan
- tbl_fadd_op
# NORM + QNAN
14947 short fadd_norm
- tbl_fadd_op
# DENORM + DENORM
14948 short fadd_res_snan
- tbl_fadd_op
# NORM + SNAN
14949 short tbl_fadd_op
- tbl_fadd_op
#
14950 short tbl_fadd_op
- tbl_fadd_op
#
14952 short fadd_res_snan
- tbl_fadd_op
# SNAN + NORM
14953 short fadd_res_snan
- tbl_fadd_op
# SNAN + ZERO
14954 short fadd_res_snan
- tbl_fadd_op
# SNAN + INF
14955 short fadd_res_snan
- tbl_fadd_op
# SNAN + QNAN
14956 short fadd_res_snan
- tbl_fadd_op
# SNAN + DENORM
14957 short fadd_res_snan
- tbl_fadd_op
# SNAN + SNAN
14958 short tbl_fadd_op
- tbl_fadd_op
#
14959 short tbl_fadd_op
- tbl_fadd_op
#
14967 # both operands are ZEROes
14970 mov.
b SRC_EX
(%a0
),%d0
# are the signs opposite
14971 mov.
b DST_EX
(%a1
),%d1
14973 bmi.w fadd_zero_2_chk_rm
# weed out (-ZERO)+(+ZERO)
14975 # the signs are the same. so determine whether they are positive or negative
14976 # and return the appropriately signed zero.
14977 tst.
b %d0
# are ZEROes positive or negative?
14978 bmi.
b fadd_zero_rm
# negative
14979 fmov.s
&0x00000000,%fp0
# return +ZERO
14980 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set Z
14984 # the ZEROes have opposite signs:
14985 # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14986 # - -ZERO is returned in the case of RM.
14988 fadd_zero_2_chk_rm
:
14989 mov.
b 3+L_SCR3
(%a6
),%d1
14990 andi.b &0x30,%d1
# extract rnd mode
14991 cmpi.
b %d1
,&rm_mode
*0x10 # is rnd mode == RM?
14992 beq.
b fadd_zero_rm
# yes
14993 fmov.s
&0x00000000,%fp0
# return +ZERO
14994 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set Z
14998 fmov.s
&0x80000000,%fp0
# return -ZERO
14999 mov.
b &neg_bmask+z_bmask
,FPSR_CC
(%a6
) # set NEG/Z
15003 # one operand is a ZERO and the other is a DENORM or NORM. scale
15004 # the DENORM or NORM and jump to the regular fadd routine.
15007 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
15008 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
15009 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
15010 bsr.
l scale_to_zero_src
# scale the operand
15011 clr.w FP_SCR1_EX
(%a6
)
15012 clr.
l FP_SCR1_HI
(%a6
)
15013 clr.
l FP_SCR1_LO
(%a6
)
15014 bra.w fadd_zero_entry
# go execute fadd
15017 mov.w DST_EX
(%a1
),FP_SCR1_EX
(%a6
)
15018 mov.
l DST_HI
(%a1
),FP_SCR1_HI
(%a6
)
15019 mov.
l DST_LO
(%a1
),FP_SCR1_LO
(%a6
)
15020 bsr.
l scale_to_zero_dst
# scale the operand
15021 clr.w FP_SCR0_EX
(%a6
)
15022 clr.
l FP_SCR0_HI
(%a6
)
15023 clr.
l FP_SCR0_LO
(%a6
)
15024 bra.w fadd_zero_entry
# go execute fadd
15027 # both operands are INFs. an OPERR will result if the INFs have
15028 # different signs. else, an INF of the same sign is returned
15031 mov.
b SRC_EX
(%a0
),%d0
# exclusive or the signs
15032 mov.
b DST_EX
(%a1
),%d1
15034 bmi.
l res_operr
# weed out (-INF)+(+INF)
15036 # ok, so it's not an OPERR. but, we do have to remember to return the
15037 # src INF since that's where the 881/882 gets the j-bit from...
15040 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15043 fmovm.x SRC
(%a0
),&0x80 # return src INF
15044 tst.
b SRC_EX
(%a0
) # is INF positive?
15045 bpl.
b fadd_inf_done
# yes; we're done
15046 mov.
b &neg_bmask+inf_bmask
,FPSR_CC
(%a6
) # set INF/NEG
15050 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15053 fmovm.x
DST(%a1
),&0x80 # return dst INF
15054 tst.
b DST_EX
(%a1
) # is INF positive?
15055 bpl.
b fadd_inf_done
# yes; we're done
15056 mov.
b &neg_bmask+inf_bmask
,FPSR_CC
(%a6
) # set INF/NEG
15060 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set INF
15063 #########################################################################
15064 # XDEF **************************************************************** #
15065 # fsub(): emulates the fsub instruction #
15066 # fssub(): emulates the fssub instruction #
15067 # fdsub(): emulates the fdsub instruction #
15069 # XREF **************************************************************** #
15070 # addsub_scaler2() - scale the operands so they won't take exc #
15071 # ovf_res() - return default overflow result #
15072 # unf_res() - return default underflow result #
15073 # res_qnan() - set QNAN result #
15074 # res_snan() - set SNAN result #
15075 # res_operr() - set OPERR result #
15076 # scale_to_zero_src() - set src operand exponent equal to zero #
15077 # scale_to_zero_dst() - set dst operand exponent equal to zero #
15079 # INPUT *************************************************************** #
15080 # a0 = pointer to extended precision source operand #
15081 # a1 = pointer to extended precision destination operand #
15083 # OUTPUT ************************************************************** #
15085 # fp1 = EXOP (if exception occurred) #
15087 # ALGORITHM *********************************************************** #
15088 # Handle NANs, infinities, and zeroes as special cases. Divide #
15089 # norms into extended, single, and double precision. #
15090 # Do subtraction after scaling exponents such that exception won't#
15091 # occur. Then, check result exponent to see if exception would have #
15092 # occurred. If so, return default result and maybe EXOP. Else, insert #
15093 # the correct result exponent and return. Set FPSR bits as appropriate. #
15095 #########################################################################
15099 andi.b &0x30,%d0
# clear rnd prec
15100 ori.
b &s_mode
*0x10,%d0
# insert sgl prec
15105 andi.b &0x30,%d0
# clear rnd prec
15106 ori.
b &d_mode
*0x10,%d0
# insert dbl prec
15110 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
15113 mov.
b DTAG
(%a6
),%d1
15115 or.b STAG
(%a6
),%d1
# combine src tags
15117 bne.w fsub_not_norm
# optimize on non-norm input
15120 # SUB: norms and denorms
15123 bsr.
l addsub_scaler2
# scale exponents
15126 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
15128 fmov.
l &0x0,%fpsr
# clear FPSR
15129 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
15131 fsub.x FP_SCR0
(%a6
),%fp0
# execute subtract
15133 fmov.
l &0x0,%fpcr
# clear FPCR
15134 fmov.
l %fpsr
,%d1
# fetch INEX2, N, Z
15136 or.l %d1
,USER_FPSR
(%a6
) # save exc and ccode bits
15138 fbeq.w fsub_zero_exit
# if result zero, end now
15140 mov.
l %d2
,-(%sp
) # save d2
15142 fmovm.x
&0x01,-(%sp
) # save result to stack
15144 mov.w
2+L_SCR3
(%a6
),%d1
15147 mov.w
(%sp
),%d2
# fetch new exponent
15148 andi.l &0x7fff,%d2
# strip sign
15149 sub.l %d0
,%d2
# add scale factor
15151 cmp.
l %d2
,(tbl_fsub_ovfl.
b,%pc
,%d1.w
*4) # is it an overflow?
15152 bge.
b fsub_ovfl
# yes
15154 cmp.
l %d2
,(tbl_fsub_unfl.
b,%pc
,%d1.w
*4) # is it an underflow?
15155 blt.w fsub_unfl
# yes
15156 beq.w fsub_may_unfl
# maybe; go find out
15160 andi.w
&0x8000,%d1
# keep sign
15161 or.w
%d2
,%d1
# insert new exponent
15162 mov.w
%d1
,(%sp
) # insert new exponent
15164 fmovm.x
(%sp
)+,&0x80 # return result in fp0
15166 mov.
l (%sp
)+,%d2
# restore d2
15170 # fmov.s &0x00000000,%fp0 # return zero in fp0
15174 long
0x7fff # ext ovfl
15175 long
0x407f # sgl ovfl
15176 long
0x43ff # dbl ovfl
15179 long
0x0000 # ext unfl
15180 long
0x3f81 # sgl unfl
15181 long
0x3c01 # dbl unfl
15184 or.l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
15186 mov.
b FPCR_ENABLE
(%a6
),%d1
15187 andi.b &0x13,%d1
# is OVFL or INEX enabled?
15188 bne.
b fsub_ovfl_ena
# yes
15192 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
15193 sne
%d1
# set sign param accordingly
15194 mov.
l L_SCR3
(%a6
),%d0
# pass prec:rnd
15195 bsr.
l ovf_res
# calculate default result
15196 or.b %d0
,FPSR_CC
(%a6
) # set INF,N if applicable
15197 fmovm.x
(%a0
),&0x80 # return default result in fp0
15198 mov.
l (%sp
)+,%d2
# restore d2
15202 mov.
b L_SCR3
(%a6
),%d1
15203 andi.b &0xc0,%d1
# is precision extended?
15204 bne.
b fsub_ovfl_ena_sd
# no
15206 fsub_ovfl_ena_cont
:
15207 mov.w
(%sp
),%d1
# fetch {sgn,exp}
15208 andi.w
&0x8000,%d1
# keep sign
15209 subi.
l &0x6000,%d2
# subtract new bias
15210 andi.w
&0x7fff,%d2
# clear top bit
15211 or.w
%d2
,%d1
# concat sign,exp
15212 mov.w
%d1
,(%sp
) # insert new exponent
15214 fmovm.x
(%sp
)+,&0x40 # return EXOP in fp1
15215 bra.
b fsub_ovfl_dis
15218 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
15220 mov.
l L_SCR3
(%a6
),%d1
15221 andi.b &0x30,%d1
# clear rnd prec
15222 fmov.
l %d1
,%fpcr
# set FPCR
15224 fsub.x FP_SCR0
(%a6
),%fp0
# execute subtract
15226 fmov.
l &0x0,%fpcr
# clear FPCR
15229 fmovm.x
&0x01,-(%sp
)
15230 bra.
b fsub_ovfl_ena_cont
15233 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
15237 fmovm.x FP_SCR1
(%a6
),&0x80 # load dst op
15239 fmov.
l &rz_mode
*0x10,%fpcr
# set FPCR
15240 fmov.
l &0x0,%fpsr
# clear FPSR
15242 fsub.x FP_SCR0
(%a6
),%fp0
# execute subtract
15244 fmov.
l &0x0,%fpcr
# clear FPCR
15245 fmov.
l %fpsr
,%d1
# save status
15247 or.l %d1
,USER_FPSR
(%a6
)
15249 mov.
b FPCR_ENABLE
(%a6
),%d1
15250 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
15251 bne.
b fsub_unfl_ena
# yes
15254 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
15256 lea FP_SCR0
(%a6
),%a0
# pass: result addr
15257 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
15258 bsr.
l unf_res
# calculate default result
15259 or.b %d0
,FPSR_CC
(%a6
) # 'Z' may have been set
15260 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
15261 mov.
l (%sp
)+,%d2
# restore d2
15265 fmovm.x FP_SCR1
(%a6
),&0x40
15267 mov.
l L_SCR3
(%a6
),%d1
15268 andi.b &0xc0,%d1
# is precision extended?
15269 bne.
b fsub_unfl_ena_sd
# no
15271 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
15273 fsub_unfl_ena_cont
:
15274 fmov.
l &0x0,%fpsr
# clear FPSR
15276 fsub.x FP_SCR0
(%a6
),%fp1
# execute subtract
15278 fmov.
l &0x0,%fpcr
# clear FPCR
15280 fmovm.x
&0x40,FP_SCR0
(%a6
) # store result to stack
15281 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
15282 mov.
l %d1
,%d2
# make a copy
15283 andi.l &0x7fff,%d1
# strip sign
15284 andi.w
&0x8000,%d2
# keep old sign
15285 sub.l %d0
,%d1
# add scale factor
15286 addi.
l &0x6000,%d1
# subtract new bias
15287 andi.w
&0x7fff,%d1
# clear top bit
15288 or.w
%d2
,%d1
# concat sgn,exp
15289 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
15290 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
15291 bra.w fsub_unfl_dis
15294 mov.
l L_SCR3
(%a6
),%d1
15295 andi.b &0x30,%d1
# clear rnd prec
15296 fmov.
l %d1
,%fpcr
# set FPCR
15298 bra.
b fsub_unfl_ena_cont
15301 # result is equal to the smallest normalized number in the selected precision
15302 # if the precision is extended, this result could not have come from an
15303 # underflow that rounded up.
15306 mov.
l L_SCR3
(%a6
),%d1
15307 andi.b &0xc0,%d1
# fetch rnd prec
15308 beq.w fsub_normal
# yes; no underflow occurred
15311 cmpi.
l %d1
,&0x80000000 # is hi(man) = 0x80000000?
15312 bne.w fsub_normal
# no; no underflow occurred
15314 tst.
l 0x8(%sp
) # is lo(man) = 0x0?
15315 bne.w fsub_normal
# no; no underflow occurred
15317 btst
&inex2_bit
,FPSR_EXCEPT
(%a6
) # is INEX2 set?
15318 beq.w fsub_normal
# no; no underflow occurred
15321 # ok, so now the result has a exponent equal to the smallest normalized
15322 # exponent for the selected precision. also, the mantissa is equal to
15323 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
15325 # now, we must determine whether the pre-rounded result was an underflow
15326 # rounded "up" or a normalized number rounded "down".
15327 # so, we do this be re-executing the add using RZ as the rounding mode and
15328 # seeing if the new result is smaller or equal to the current result.
15330 fmovm.x FP_SCR1
(%a6
),&0x40 # load dst op into fp1
15332 mov.
l L_SCR3
(%a6
),%d1
15333 andi.b &0xc0,%d1
# keep rnd prec
15334 ori.
b &rz_mode
*0x10,%d1
# insert rnd mode
15335 fmov.
l %d1
,%fpcr
# set FPCR
15336 fmov.
l &0x0,%fpsr
# clear FPSR
15338 fsub.x FP_SCR0
(%a6
),%fp1
# execute subtract
15340 fmov.
l &0x0,%fpcr
# clear FPCR
15342 fabs.x
%fp0
# compare absolute values
15344 fcmp.x
%fp0
,%fp1
# is first result > second?
15346 fbgt.w fsub_unfl
# yes; it's an underflow
15347 bra.w fsub_normal
# no; it's not an underflow
15349 ##########################################################################
15352 # Sub: inputs are not both normalized; what are they?
15355 mov.w
(tbl_fsub_op.
b,%pc
,%d1.w
*2),%d1
15356 jmp
(tbl_fsub_op.
b,%pc
,%d1.w
*1)
15360 short fsub_norm
- tbl_fsub_op
# NORM - NORM
15361 short fsub_zero_src
- tbl_fsub_op
# NORM - ZERO
15362 short fsub_inf_src
- tbl_fsub_op
# NORM - INF
15363 short fsub_res_qnan
- tbl_fsub_op
# NORM - QNAN
15364 short fsub_norm
- tbl_fsub_op
# NORM - DENORM
15365 short fsub_res_snan
- tbl_fsub_op
# NORM - SNAN
15366 short tbl_fsub_op
- tbl_fsub_op
#
15367 short tbl_fsub_op
- tbl_fsub_op
#
15369 short fsub_zero_dst
- tbl_fsub_op
# ZERO - NORM
15370 short fsub_zero_2
- tbl_fsub_op
# ZERO - ZERO
15371 short fsub_inf_src
- tbl_fsub_op
# ZERO - INF
15372 short fsub_res_qnan
- tbl_fsub_op
# NORM - QNAN
15373 short fsub_zero_dst
- tbl_fsub_op
# ZERO - DENORM
15374 short fsub_res_snan
- tbl_fsub_op
# NORM - SNAN
15375 short tbl_fsub_op
- tbl_fsub_op
#
15376 short tbl_fsub_op
- tbl_fsub_op
#
15378 short fsub_inf_dst
- tbl_fsub_op
# INF - NORM
15379 short fsub_inf_dst
- tbl_fsub_op
# INF - ZERO
15380 short fsub_inf_2
- tbl_fsub_op
# INF - INF
15381 short fsub_res_qnan
- tbl_fsub_op
# NORM - QNAN
15382 short fsub_inf_dst
- tbl_fsub_op
# INF - DENORM
15383 short fsub_res_snan
- tbl_fsub_op
# NORM - SNAN
15384 short tbl_fsub_op
- tbl_fsub_op
#
15385 short tbl_fsub_op
- tbl_fsub_op
#
15387 short fsub_res_qnan
- tbl_fsub_op
# QNAN - NORM
15388 short fsub_res_qnan
- tbl_fsub_op
# QNAN - ZERO
15389 short fsub_res_qnan
- tbl_fsub_op
# QNAN - INF
15390 short fsub_res_qnan
- tbl_fsub_op
# QNAN - QNAN
15391 short fsub_res_qnan
- tbl_fsub_op
# QNAN - DENORM
15392 short fsub_res_snan
- tbl_fsub_op
# QNAN - SNAN
15393 short tbl_fsub_op
- tbl_fsub_op
#
15394 short tbl_fsub_op
- tbl_fsub_op
#
15396 short fsub_norm
- tbl_fsub_op
# DENORM - NORM
15397 short fsub_zero_src
- tbl_fsub_op
# DENORM - ZERO
15398 short fsub_inf_src
- tbl_fsub_op
# DENORM - INF
15399 short fsub_res_qnan
- tbl_fsub_op
# NORM - QNAN
15400 short fsub_norm
- tbl_fsub_op
# DENORM - DENORM
15401 short fsub_res_snan
- tbl_fsub_op
# NORM - SNAN
15402 short tbl_fsub_op
- tbl_fsub_op
#
15403 short tbl_fsub_op
- tbl_fsub_op
#
15405 short fsub_res_snan
- tbl_fsub_op
# SNAN - NORM
15406 short fsub_res_snan
- tbl_fsub_op
# SNAN - ZERO
15407 short fsub_res_snan
- tbl_fsub_op
# SNAN - INF
15408 short fsub_res_snan
- tbl_fsub_op
# SNAN - QNAN
15409 short fsub_res_snan
- tbl_fsub_op
# SNAN - DENORM
15410 short fsub_res_snan
- tbl_fsub_op
# SNAN - SNAN
15411 short tbl_fsub_op
- tbl_fsub_op
#
15412 short tbl_fsub_op
- tbl_fsub_op
#
15420 # both operands are ZEROes
15423 mov.
b SRC_EX
(%a0
),%d0
15424 mov.
b DST_EX
(%a1
),%d1
15426 bpl.
b fsub_zero_2_chk_rm
15428 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15429 tst.
b %d0
# is dst negative?
15430 bmi.
b fsub_zero_2_rm
# yes
15431 fmov.s
&0x00000000,%fp0
# no; return +ZERO
15432 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set Z
15436 # the ZEROes have the same signs:
15437 # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15438 # - -ZERO is returned in the case of RM.
15440 fsub_zero_2_chk_rm
:
15441 mov.
b 3+L_SCR3
(%a6
),%d1
15442 andi.b &0x30,%d1
# extract rnd mode
15443 cmpi.
b %d1
,&rm_mode
*0x10 # is rnd mode = RM?
15444 beq.
b fsub_zero_2_rm
# yes
15445 fmov.s
&0x00000000,%fp0
# no; return +ZERO
15446 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set Z
15450 fmov.s
&0x80000000,%fp0
# return -ZERO
15451 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
) # set Z/NEG
15455 # one operand is a ZERO and the other is a DENORM or a NORM.
15456 # scale the DENORM or NORM and jump to the regular fsub routine.
15459 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
15460 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
15461 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
15462 bsr.
l scale_to_zero_src
# scale the operand
15463 clr.w FP_SCR1_EX
(%a6
)
15464 clr.
l FP_SCR1_HI
(%a6
)
15465 clr.
l FP_SCR1_LO
(%a6
)
15466 bra.w fsub_zero_entry
# go execute fsub
15469 mov.w DST_EX
(%a1
),FP_SCR1_EX
(%a6
)
15470 mov.
l DST_HI
(%a1
),FP_SCR1_HI
(%a6
)
15471 mov.
l DST_LO
(%a1
),FP_SCR1_LO
(%a6
)
15472 bsr.
l scale_to_zero_dst
# scale the operand
15473 clr.w FP_SCR0_EX
(%a6
)
15474 clr.
l FP_SCR0_HI
(%a6
)
15475 clr.
l FP_SCR0_LO
(%a6
)
15476 bra.w fsub_zero_entry
# go execute fsub
15479 # both operands are INFs. an OPERR will result if the INFs have the
15480 # same signs. else,
15483 mov.
b SRC_EX
(%a0
),%d0
# exclusive or the signs
15484 mov.
b DST_EX
(%a1
),%d1
15486 bpl.
l res_operr
# weed out (-INF)+(+INF)
15488 # ok, so it's not an OPERR. but we do have to remember to return
15489 # the src INF since that's where the 881/882 gets the j-bit.
15492 fmovm.x SRC
(%a0
),&0x80 # return src INF
15493 fneg.x
%fp0
# invert sign
15494 fbge.w fsub_inf_done
# sign is now positive
15495 mov.
b &neg_bmask+inf_bmask
,FPSR_CC
(%a6
) # set INF/NEG
15499 fmovm.x
DST(%a1
),&0x80 # return dst INF
15500 tst.
b DST_EX
(%a1
) # is INF negative?
15501 bpl.
b fsub_inf_done
# no
15502 mov.
b &neg_bmask+inf_bmask
,FPSR_CC
(%a6
) # set INF/NEG
15506 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set INF
15509 #########################################################################
15510 # XDEF **************************************************************** #
15511 # fsqrt(): emulates the fsqrt instruction #
15512 # fssqrt(): emulates the fssqrt instruction #
15513 # fdsqrt(): emulates the fdsqrt instruction #
15515 # XREF **************************************************************** #
15516 # scale_sqrt() - scale the source operand #
15517 # unf_res() - return default underflow result #
15518 # ovf_res() - return default overflow result #
15519 # res_qnan_1op() - return QNAN result #
15520 # res_snan_1op() - return SNAN result #
15522 # INPUT *************************************************************** #
15523 # a0 = pointer to extended precision source operand #
15524 # d0 rnd prec,mode #
15526 # OUTPUT ************************************************************** #
15528 # fp1 = EXOP (if exception occurred) #
15530 # ALGORITHM *********************************************************** #
15531 # Handle NANs, infinities, and zeroes as special cases. Divide #
15532 # norms/denorms into ext/sgl/dbl precision. #
15533 # For norms/denorms, scale the exponents such that a sqrt #
15534 # instruction won't cause an exception. Use the regular fsqrt to #
15535 # compute a result. Check if the regular operands would have taken #
15536 # an exception. If so, return the default overflow/underflow result #
15537 # and return the EXOP if exceptions are enabled. Else, scale the #
15538 # result operand to the proper exponent. #
15540 #########################################################################
15544 andi.b &0x30,%d0
# clear rnd prec
15545 ori.
b &s_mode
*0x10,%d0
# insert sgl precision
15550 andi.b &0x30,%d0
# clear rnd prec
15551 ori.
b &d_mode
*0x10,%d0
# insert dbl precision
15555 mov.
l %d0
,L_SCR3
(%a6
) # store rnd info
15557 mov.
b STAG
(%a6
),%d1
15558 bne.w fsqrt_not_norm
# optimize on non-norm input
15561 # SQUARE ROOT: norms and denorms ONLY!
15564 tst.
b SRC_EX
(%a0
) # is operand negative?
15565 bmi.
l res_operr
# yes
15567 andi.b &0xc0,%d0
# is precision extended?
15568 bne.
b fsqrt_not_ext
# no; go handle sgl or dbl
15570 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
15571 fmov.
l &0x0,%fpsr
# clear FPSR
15573 fsqrt.x
(%a0
),%fp0
# execute square root
15576 or.l %d1
,USER_FPSR
(%a6
) # set N,INEX
15581 tst.
b SRC_EX
(%a0
) # is operand negative?
15582 bmi.
l res_operr
# yes
15584 andi.b &0xc0,%d0
# is precision extended?
15585 bne.
b fsqrt_not_ext
# no; go handle sgl or dbl
15587 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
15588 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
15589 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
15591 bsr.
l scale_sqrt
# calculate scale factor
15593 bra.w fsqrt_sd_normal
15596 # operand is either single or double
15599 cmpi.
b %d0
,&s_mode
*0x10 # separate sgl/dbl prec
15603 # operand is to be rounded to single precision
15606 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
15607 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
15608 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
15610 bsr.
l scale_sqrt
# calculate scale factor
15612 cmpi.
l %d0
,&0x3fff-0x3f81 # will move in underflow?
15613 beq.w fsqrt_sd_may_unfl
15614 bgt.w fsqrt_sd_unfl
# yes; go handle underflow
15615 cmpi.
l %d0
,&0x3fff-0x407f # will move in overflow?
15616 beq.w fsqrt_sd_may_ovfl
# maybe; go check
15617 blt.w fsqrt_sd_ovfl
# yes; go handle overflow
15620 # operand will NOT overflow or underflow when moved in to the fp reg file
15623 fmov.
l &0x0,%fpsr
# clear FPSR
15624 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
15626 fsqrt.x FP_SCR0
(%a6
),%fp0
# perform absolute
15628 fmov.
l %fpsr
,%d1
# save FPSR
15629 fmov.
l &0x0,%fpcr
# clear FPCR
15631 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
15633 fsqrt_sd_normal_exit
:
15634 mov.
l %d2
,-(%sp
) # save d2
15635 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
15636 mov.w FP_SCR0_EX
(%a6
),%d1
# load sgn,exp
15637 mov.
l %d1
,%d2
# make a copy
15638 andi.l &0x7fff,%d1
# strip sign
15639 sub.l %d0
,%d1
# add scale factor
15640 andi.w
&0x8000,%d2
# keep old sign
15641 or.w
%d1
,%d2
# concat old sign,new exp
15642 mov.w
%d2
,FP_SCR0_EX
(%a6
) # insert new exponent
15643 mov.
l (%sp
)+,%d2
# restore d2
15644 fmovm.x FP_SCR0
(%a6
),&0x80 # return result in fp0
15648 # operand is to be rounded to double precision
15651 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
15652 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
15653 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
15655 bsr.
l scale_sqrt
# calculate scale factor
15657 cmpi.
l %d0
,&0x3fff-0x3c01 # will move in underflow?
15658 beq.w fsqrt_sd_may_unfl
15659 bgt.
b fsqrt_sd_unfl
# yes; go handle underflow
15660 cmpi.
l %d0
,&0x3fff-0x43ff # will move in overflow?
15661 beq.w fsqrt_sd_may_ovfl
# maybe; go check
15662 blt.w fsqrt_sd_ovfl
# yes; go handle overflow
15663 bra.w fsqrt_sd_normal
# no; ho handle normalized op
15665 # we're on the line here and the distinguising characteristic is whether
15666 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15667 # elsewise fall through to underflow.
15669 btst
&0x0,1+FP_SCR0_EX
(%a6
) # is exponent 0x3fff?
15670 bne.w fsqrt_sd_normal
# yes, so no underflow
15673 # operand WILL underflow when moved in to the fp register file
15676 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set unfl exc bit
15678 fmov.
l &rz_mode
*0x10,%fpcr
# set FPCR
15679 fmov.
l &0x0,%fpsr
# clear FPSR
15681 fsqrt.x FP_SCR0
(%a6
),%fp0
# execute square root
15683 fmov.
l %fpsr
,%d1
# save status
15684 fmov.
l &0x0,%fpcr
# clear FPCR
15686 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
15688 # if underflow or inexact is enabled, go calculate EXOP first.
15689 mov.
b FPCR_ENABLE
(%a6
),%d1
15690 andi.b &0x0b,%d1
# is UNFL or INEX enabled?
15691 bne.
b fsqrt_sd_unfl_ena
# yes
15694 fmovm.x
&0x80,FP_SCR0
(%a6
) # store out result
15696 lea FP_SCR0
(%a6
),%a0
# pass: result addr
15697 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
15698 bsr.
l unf_res
# calculate default result
15699 or.b %d0
,FPSR_CC
(%a6
) # set possible 'Z' ccode
15700 fmovm.x FP_SCR0
(%a6
),&0x80 # return default result in fp0
15704 # operand will underflow AND underflow is enabled.
15705 # therefore, we must return the result rounded to extended precision.
15708 mov.
l FP_SCR0_HI
(%a6
),FP_SCR1_HI
(%a6
)
15709 mov.
l FP_SCR0_LO
(%a6
),FP_SCR1_LO
(%a6
)
15710 mov.w FP_SCR0_EX
(%a6
),%d1
# load current exponent
15712 mov.
l %d2
,-(%sp
) # save d2
15713 mov.
l %d1
,%d2
# make a copy
15714 andi.l &0x7fff,%d1
# strip sign
15715 andi.w
&0x8000,%d2
# keep old sign
15716 sub.l %d0
,%d1
# subtract scale factor
15717 addi.
l &0x6000,%d1
# add new bias
15719 or.w
%d2
,%d1
# concat new sign,new exp
15720 mov.w
%d1
,FP_SCR1_EX
(%a6
) # insert new exp
15721 fmovm.x FP_SCR1
(%a6
),&0x40 # return EXOP in fp1
15722 mov.
l (%sp
)+,%d2
# restore d2
15723 bra.
b fsqrt_sd_unfl_dis
15726 # operand WILL overflow.
15729 fmov.
l &0x0,%fpsr
# clear FPSR
15730 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
15732 fsqrt.x FP_SCR0
(%a6
),%fp0
# perform square root
15734 fmov.
l &0x0,%fpcr
# clear FPCR
15735 fmov.
l %fpsr
,%d1
# save FPSR
15737 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
15740 or.l &ovfl_inx_mask
,USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
15742 mov.
b FPCR_ENABLE
(%a6
),%d1
15743 andi.b &0x13,%d1
# is OVFL or INEX enabled?
15744 bne.
b fsqrt_sd_ovfl_ena
# yes
15747 # OVFL is not enabled; therefore, we must create the default result by
15748 # calling ovf_res().
15751 btst
&neg_bit
,FPSR_CC
(%a6
) # is result negative?
15752 sne
%d1
# set sign param accordingly
15753 mov.
l L_SCR3
(%a6
),%d0
# pass: prec,mode
15754 bsr.
l ovf_res
# calculate default result
15755 or.b %d0
,FPSR_CC
(%a6
) # set INF,N if applicable
15756 fmovm.x
(%a0
),&0x80 # return default result in fp0
15761 # the INEX2 bit has already been updated by the round to the correct precision.
15762 # now, round to extended(and don't alter the FPSR).
15765 mov.
l %d2
,-(%sp
) # save d2
15766 mov.w FP_SCR0_EX
(%a6
),%d1
# fetch {sgn,exp}
15767 mov.
l %d1
,%d2
# make a copy
15768 andi.l &0x7fff,%d1
# strip sign
15769 andi.w
&0x8000,%d2
# keep old sign
15770 sub.l %d0
,%d1
# add scale factor
15771 subi.
l &0x6000,%d1
# subtract bias
15773 or.w
%d2
,%d1
# concat sign,exp
15774 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert new exponent
15775 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
15776 mov.
l (%sp
)+,%d2
# restore d2
15777 bra.
b fsqrt_sd_ovfl_dis
15780 # the move in MAY underflow. so...
15783 btst
&0x0,1+FP_SCR0_EX
(%a6
) # is exponent 0x3fff?
15784 bne.w fsqrt_sd_ovfl
# yes, so overflow
15786 fmov.
l &0x0,%fpsr
# clear FPSR
15787 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
15789 fsqrt.x FP_SCR0
(%a6
),%fp0
# perform absolute
15791 fmov.
l %fpsr
,%d1
# save status
15792 fmov.
l &0x0,%fpcr
# clear FPCR
15794 or.l %d1
,USER_FPSR
(%a6
) # save INEX2,N
15796 fmov.x
%fp0
,%fp1
# make a copy of result
15797 fcmp.
b %fp1
,&0x1 # is |result| >= 1.b?
15798 fbge.w fsqrt_sd_ovfl_tst
# yes; overflow has occurred
15800 # no, it didn't overflow; we have correct result
15801 bra.w fsqrt_sd_normal_exit
15803 ##########################################################################
15806 # input is not normalized; what is it?
15809 cmpi.
b %d1
,&DENORM
# weed out DENORM
15811 cmpi.
b %d1
,&ZERO
# weed out ZERO
15813 cmpi.
b %d1
,&INF
# weed out INF
15815 cmpi.
b %d1
,&SNAN
# weed out SNAN
15822 # fsqrt(+INF) = +INF
15823 # fsqrt(-INF) = OPERR
15826 tst.
b SRC_EX
(%a0
) # is ZERO positive or negative?
15827 bmi.
b fsqrt_zero_m
# negative
15829 fmov.s
&0x00000000,%fp0
# return +ZERO
15830 mov.
b &z_bmask
,FPSR_CC
(%a6
) # set 'Z' ccode bit
15833 fmov.s
&0x80000000,%fp0
# return -ZERO
15834 mov.
b &z_bmask+neg_bmask
,FPSR_CC
(%a6
) # set 'Z','N' ccode bits
15838 tst.
b SRC_EX
(%a0
) # is INF positive or negative?
15839 bmi.
l res_operr
# negative
15841 fmovm.x SRC
(%a0
),&0x80 # return +INF in fp0
15842 mov.
b &inf_bmask
,FPSR_CC
(%a6
) # set 'I' ccode bit
15845 ##########################################################################
15847 #########################################################################
15848 # XDEF **************************************************************** #
15849 # addsub_scaler2(): scale inputs to fadd/fsub such that no #
15850 # OVFL/UNFL exceptions will result #
15852 # XREF **************************************************************** #
15853 # norm() - normalize mantissa after adjusting exponent #
15855 # INPUT *************************************************************** #
15856 # FP_SRC(a6) = fp op1(src) #
15857 # FP_DST(a6) = fp op2(dst) #
15859 # OUTPUT ************************************************************** #
15860 # FP_SRC(a6) = fp op1 scaled(src) #
15861 # FP_DST(a6) = fp op2 scaled(dst) #
15862 # d0 = scale amount #
15864 # ALGORITHM *********************************************************** #
15865 # If the DST exponent is > the SRC exponent, set the DST exponent #
15866 # equal to 0x3fff and scale the SRC exponent by the value that the #
15867 # DST exponent was scaled by. If the SRC exponent is greater or equal, #
15868 # do the opposite. Return this scale factor in d0. #
15869 # If the two exponents differ by > the number of mantissa bits #
15870 # plus two, then set the smallest exponent to a very small value as a #
15871 # quick shortcut. #
15873 #########################################################################
15875 global addsub_scaler2
15877 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
15878 mov.
l DST_HI
(%a1
),FP_SCR1_HI
(%a6
)
15879 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
15880 mov.
l DST_LO
(%a1
),FP_SCR1_LO
(%a6
)
15881 mov.w SRC_EX
(%a0
),%d0
15882 mov.w DST_EX
(%a1
),%d1
15883 mov.w
%d0
,FP_SCR0_EX
(%a6
)
15884 mov.w
%d1
,FP_SCR1_EX
(%a6
)
15888 mov.w
%d0
,L_SCR1
(%a6
) # store src exponent
15889 mov.w
%d1
,2+L_SCR1
(%a6
) # store dst exponent
15891 cmp.w
%d0
, %d1
# is src exp >= dst exp?
15894 # dst exp is > src exp; scale dst to exp = 0x3fff
15896 bsr.
l scale_to_zero_dst
15897 mov.
l %d0
,-(%sp
) # save scale factor
15899 cmpi.
b STAG
(%a6
),&DENORM
# is dst denormalized?
15902 lea FP_SCR0
(%a6
),%a0
15903 bsr.
l norm
# normalize the denorm; result is new exp
15904 neg.w
%d0
# new exp = -(shft val)
15905 mov.w
%d0
,L_SCR1
(%a6
) # inset new exp
15908 mov.w
2+L_SCR1
(%a6
),%d0
15909 subi.w
&mantissalen+
2,%d0
# subtract mantissalen+2 from larger exp
15911 cmp.w
%d0
,L_SCR1
(%a6
) # is difference >= len(mantissa)+2?
15912 bge.
b quick_scale12
15914 mov.w L_SCR1
(%a6
),%d0
15915 add.w
0x2(%sp
),%d0
# scale src exponent by scale factor
15916 mov.w FP_SCR0_EX
(%a6
),%d1
15918 or.w
%d1
,%d0
# concat {sgn,new exp}
15919 mov.w
%d0
,FP_SCR0_EX
(%a6
) # insert new dst exponent
15921 mov.
l (%sp
)+,%d0
# return SCALE factor
15925 andi.w
&0x8000,FP_SCR0_EX
(%a6
) # zero src exponent
15926 bset
&0x0,1+FP_SCR0_EX
(%a6
) # set exp = 1
15928 mov.
l (%sp
)+,%d0
# return SCALE factor
15931 # src exp is >= dst exp; scale src to exp = 0x3fff
15933 bsr.
l scale_to_zero_src
15934 mov.
l %d0
,-(%sp
) # save scale factor
15936 cmpi.
b DTAG
(%a6
),&DENORM
# is dst denormalized?
15938 lea FP_SCR1
(%a6
),%a0
15939 bsr.
l norm
# normalize the denorm; result is new exp
15940 neg.w
%d0
# new exp = -(shft val)
15941 mov.w
%d0
,2+L_SCR1
(%a6
) # inset new exp
15944 mov.w L_SCR1
(%a6
),%d0
15945 subi.w
&mantissalen+
2,%d0
# subtract mantissalen+2 from larger exp
15947 cmp.w
%d0
,2+L_SCR1
(%a6
) # is difference >= len(mantissa)+2?
15948 bge.
b quick_scale22
15950 mov.w
2+L_SCR1
(%a6
),%d0
15951 add.w
0x2(%sp
),%d0
# scale dst exponent by scale factor
15952 mov.w FP_SCR1_EX
(%a6
),%d1
15954 or.w
%d1
,%d0
# concat {sgn,new exp}
15955 mov.w
%d0
,FP_SCR1_EX
(%a6
) # insert new dst exponent
15957 mov.
l (%sp
)+,%d0
# return SCALE factor
15961 andi.w
&0x8000,FP_SCR1_EX
(%a6
) # zero dst exponent
15962 bset
&0x0,1+FP_SCR1_EX
(%a6
) # set exp = 1
15964 mov.
l (%sp
)+,%d0
# return SCALE factor
15967 ##########################################################################
15969 #########################################################################
15970 # XDEF **************************************************************** #
15971 # scale_to_zero_src(): scale the exponent of extended precision #
15972 # value at FP_SCR0(a6). #
15974 # XREF **************************************************************** #
15975 # norm() - normalize the mantissa if the operand was a DENORM #
15977 # INPUT *************************************************************** #
15978 # FP_SCR0(a6) = extended precision operand to be scaled #
15980 # OUTPUT ************************************************************** #
15981 # FP_SCR0(a6) = scaled extended precision operand #
15982 # d0 = scale value #
15984 # ALGORITHM *********************************************************** #
15985 # Set the exponent of the input operand to 0x3fff. Save the value #
15986 # of the difference between the original and new exponent. Then, #
15987 # normalize the operand if it was a DENORM. Add this normalization #
15988 # value to the previous value. Return the result. #
15990 #########################################################################
15992 global scale_to_zero_src
15994 mov.w FP_SCR0_EX
(%a6
),%d1
# extract operand's {sgn,exp}
15995 mov.w
%d1
,%d0
# make a copy
15997 andi.l &0x7fff,%d1
# extract operand's exponent
15999 andi.w
&0x8000,%d0
# extract operand's sgn
16000 or.w
&0x3fff,%d0
# insert new operand's exponent(=0)
16002 mov.w
%d0
,FP_SCR0_EX
(%a6
) # insert biased exponent
16004 cmpi.
b STAG
(%a6
),&DENORM
# is operand normalized?
16005 beq.
b stzs_denorm
# normalize the DENORM
16009 sub.l %d1
,%d0
# scale = BIAS + (-exp)
16014 lea FP_SCR0
(%a6
),%a0
# pass ptr to src op
16015 bsr.
l norm
# normalize denorm
16016 neg.l %d0
# new exponent = -(shft val)
16017 mov.
l %d0
,%d1
# prepare for op_norm call
16018 bra.
b stzs_norm
# finish scaling
16022 #########################################################################
16023 # XDEF **************************************************************** #
16024 # scale_sqrt(): scale the input operand exponent so a subsequent #
16025 # fsqrt operation won't take an exception. #
16027 # XREF **************************************************************** #
16028 # norm() - normalize the mantissa if the operand was a DENORM #
16030 # INPUT *************************************************************** #
16031 # FP_SCR0(a6) = extended precision operand to be scaled #
16033 # OUTPUT ************************************************************** #
16034 # FP_SCR0(a6) = scaled extended precision operand #
16035 # d0 = scale value #
16037 # ALGORITHM *********************************************************** #
16038 # If the input operand is a DENORM, normalize it. #
16039 # If the exponent of the input operand is even, set the exponent #
16040 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
16041 # exponent of the input operand is off, set the exponent to ox3fff and #
16042 # return a scale factor of "(exp-0x3fff)/2". #
16044 #########################################################################
16048 cmpi.
b STAG
(%a6
),&DENORM
# is operand normalized?
16049 beq.
b ss_denorm
# normalize the DENORM
16051 mov.w FP_SCR0_EX
(%a6
),%d1
# extract operand's {sgn,exp}
16052 andi.l &0x7fff,%d1
# extract operand's exponent
16054 andi.w
&0x8000,FP_SCR0_EX
(%a6
) # extract operand's sgn
16056 btst
&0x0,%d1
# is exp even or odd?
16059 ori.w
&0x3fff,FP_SCR0_EX
(%a6
) # insert new operand's exponent(=0)
16062 sub.l %d1
,%d0
# scale = BIAS + (-exp)
16063 asr.
l &0x1,%d0
# divide scale factor by 2
16067 ori.w
&0x3ffe,FP_SCR0_EX
(%a6
) # insert new operand's exponent(=0)
16070 sub.l %d1
,%d0
# scale = BIAS + (-exp)
16071 asr.
l &0x1,%d0
# divide scale factor by 2
16075 lea FP_SCR0
(%a6
),%a0
# pass ptr to src op
16076 bsr.
l norm
# normalize denorm
16078 btst
&0x0,%d0
# is exp even or odd?
16079 beq.
b ss_denorm_even
16081 ori.w
&0x3fff,FP_SCR0_EX
(%a6
) # insert new operand's exponent(=0)
16084 asr.
l &0x1,%d0
# divide scale factor by 2
16088 ori.w
&0x3ffe,FP_SCR0_EX
(%a6
) # insert new operand's exponent(=0)
16091 asr.
l &0x1,%d0
# divide scale factor by 2
16096 #########################################################################
16097 # XDEF **************************************************************** #
16098 # scale_to_zero_dst(): scale the exponent of extended precision #
16099 # value at FP_SCR1(a6). #
16101 # XREF **************************************************************** #
16102 # norm() - normalize the mantissa if the operand was a DENORM #
16104 # INPUT *************************************************************** #
16105 # FP_SCR1(a6) = extended precision operand to be scaled #
16107 # OUTPUT ************************************************************** #
16108 # FP_SCR1(a6) = scaled extended precision operand #
16109 # d0 = scale value #
16111 # ALGORITHM *********************************************************** #
16112 # Set the exponent of the input operand to 0x3fff. Save the value #
16113 # of the difference between the original and new exponent. Then, #
16114 # normalize the operand if it was a DENORM. Add this normalization #
16115 # value to the previous value. Return the result. #
16117 #########################################################################
16119 global scale_to_zero_dst
16121 mov.w FP_SCR1_EX
(%a6
),%d1
# extract operand's {sgn,exp}
16122 mov.w
%d1
,%d0
# make a copy
16124 andi.l &0x7fff,%d1
# extract operand's exponent
16126 andi.w
&0x8000,%d0
# extract operand's sgn
16127 or.w
&0x3fff,%d0
# insert new operand's exponent(=0)
16129 mov.w
%d0
,FP_SCR1_EX
(%a6
) # insert biased exponent
16131 cmpi.
b DTAG
(%a6
),&DENORM
# is operand normalized?
16132 beq.
b stzd_denorm
# normalize the DENORM
16136 sub.l %d1
,%d0
# scale = BIAS + (-exp)
16140 lea FP_SCR1
(%a6
),%a0
# pass ptr to dst op
16141 bsr.
l norm
# normalize denorm
16142 neg.l %d0
# new exponent = -(shft val)
16143 mov.
l %d0
,%d1
# prepare for op_norm call
16144 bra.
b stzd_norm
# finish scaling
16146 ##########################################################################
16148 #########################################################################
16149 # XDEF **************************************************************** #
16150 # res_qnan(): return default result w/ QNAN operand for dyadic #
16151 # res_snan(): return default result w/ SNAN operand for dyadic #
16152 # res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
16153 # res_snan_1op(): return dflt result w/ SNAN operand for monadic #
16155 # XREF **************************************************************** #
16158 # INPUT *************************************************************** #
16159 # FP_SRC(a6) = pointer to extended precision src operand #
16160 # FP_DST(a6) = pointer to extended precision dst operand #
16162 # OUTPUT ************************************************************** #
16163 # fp0 = default result #
16165 # ALGORITHM *********************************************************** #
16166 # If either operand (but not both operands) of an operation is a #
16167 # nonsignalling NAN, then that NAN is returned as the result. If both #
16168 # operands are nonsignalling NANs, then the destination operand #
16169 # nonsignalling NAN is returned as the result. #
16170 # If either operand to an operation is a signalling NAN (SNAN), #
16171 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
16172 # enable bit is set in the FPCR, then the trap is taken and the #
16173 # destination is not modified. If the SNAN trap enable bit is not set, #
16174 # then the SNAN is converted to a nonsignalling NAN (by setting the #
16175 # SNAN bit in the operand to one), and the operation continues as #
16176 # described in the preceding paragraph, for nonsignalling NANs. #
16177 # Make sure the appropriate FPSR bits are set before exiting. #
16179 #########################################################################
16185 cmp.
b DTAG
(%a6
), &SNAN
# is the dst an SNAN?
16187 cmp.
b DTAG
(%a6
), &QNAN
# is the dst a QNAN?
16190 cmp.
b STAG
(%a6
), &QNAN
16192 global res_snan_1op
16195 bset
&0x6, FP_SRC_HI
(%a6
) # set SNAN bit
16196 or.l &nan_mask+aiop_mask+snan_mask
, USER_FPSR
(%a6
)
16197 lea FP_SRC
(%a6
), %a0
16199 global res_qnan_1op
16202 or.l &nan_mask
, USER_FPSR
(%a6
)
16203 lea FP_SRC
(%a6
), %a0
16206 or.l &nan_mask+aiop_mask+snan_mask
, USER_FPSR
(%a6
)
16207 bset
&0x6, FP_DST_HI
(%a6
) # set SNAN bit
16208 lea FP_DST
(%a6
), %a0
16211 lea FP_DST
(%a6
), %a0
16212 cmp.
b STAG
(%a6
), &SNAN
16214 or.l &aiop_mask+snan_mask
, USER_FPSR
(%a6
)
16216 or.l &nan_mask
, USER_FPSR
(%a6
)
16218 btst
&0x7, FTEMP_EX
(%a0
) # is NAN neg?
16220 or.l &neg_mask
, USER_FPSR
(%a6
)
16222 fmovm.x
(%a0
), &0x80
16225 #########################################################################
16226 # XDEF **************************************************************** #
16227 # res_operr(): return default result during operand error #
16229 # XREF **************************************************************** #
16232 # INPUT *************************************************************** #
16235 # OUTPUT ************************************************************** #
16236 # fp0 = default operand error result #
16238 # ALGORITHM *********************************************************** #
16239 # An nonsignalling NAN is returned as the default result when #
16240 # an operand error occurs for the following cases: #
16242 # Multiply: (Infinity x Zero) #
16243 # Divide : (Zero / Zero) || (Infinity / Infinity) #
16245 #########################################################################
16249 or.l &nan_mask+operr_mask+aiop_mask
, USER_FPSR
(%a6
)
16250 fmovm.x nan_return
(%pc
), &0x80
16254 long
0x7fff0000, 0xffffffff, 0xffffffff
16256 #########################################################################
16257 # fdbcc(): routine to emulate the fdbcc instruction #
16259 # XDEF **************************************************************** #
16262 # XREF **************************************************************** #
16263 # fetch_dreg() - fetch Dn value #
16264 # store_dreg_l() - store updated Dn value #
16266 # INPUT *************************************************************** #
16267 # d0 = displacement #
16269 # OUTPUT ************************************************************** #
16272 # ALGORITHM *********************************************************** #
16273 # This routine checks which conditional predicate is specified by #
16274 # the stacked fdbcc instruction opcode and then branches to a routine #
16275 # for that predicate. The corresponding fbcc instruction is then used #
16276 # to see whether the condition (specified by the stacked FPSR) is true #
16278 # If a BSUN exception should be indicated, the BSUN and ABSUN #
16279 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
16280 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
16281 # enabled BSUN should not be flagged and the predicate is true, then #
16282 # Dn is fetched and decremented by one. If Dn is not equal to -1, add #
16283 # the displacement value to the stacked PC so that when an "rte" is #
16284 # finally executed, the branch occurs. #
16286 #########################################################################
16289 mov.
l %d0
,L_SCR1
(%a6
) # save displacement
16291 mov.w EXC_CMDREG
(%a6
),%d0
# fetch predicate
16293 clr.
l %d1
# clear scratch reg
16294 mov.
b FPSR_CC
(%a6
),%d1
# fetch fp ccodes
16295 ror.
l &0x8,%d1
# rotate to top byte
16296 fmov.
l %d1
,%fpsr
# insert into FPSR
16298 mov.w
(tbl_fdbcc.
b,%pc
,%d0.w
*2),%d1
# load table
16299 jmp
(tbl_fdbcc.
b,%pc
,%d1.w
) # jump to fdbcc routine
16302 short fdbcc_f
- tbl_fdbcc
# 00
16303 short fdbcc_eq
- tbl_fdbcc
# 01
16304 short fdbcc_ogt
- tbl_fdbcc
# 02
16305 short fdbcc_oge
- tbl_fdbcc
# 03
16306 short fdbcc_olt
- tbl_fdbcc
# 04
16307 short fdbcc_ole
- tbl_fdbcc
# 05
16308 short fdbcc_ogl
- tbl_fdbcc
# 06
16309 short fdbcc_or
- tbl_fdbcc
# 07
16310 short fdbcc_un
- tbl_fdbcc
# 08
16311 short fdbcc_ueq
- tbl_fdbcc
# 09
16312 short fdbcc_ugt
- tbl_fdbcc
# 10
16313 short fdbcc_uge
- tbl_fdbcc
# 11
16314 short fdbcc_ult
- tbl_fdbcc
# 12
16315 short fdbcc_ule
- tbl_fdbcc
# 13
16316 short fdbcc_neq
- tbl_fdbcc
# 14
16317 short fdbcc_t
- tbl_fdbcc
# 15
16318 short fdbcc_sf
- tbl_fdbcc
# 16
16319 short fdbcc_seq
- tbl_fdbcc
# 17
16320 short fdbcc_gt
- tbl_fdbcc
# 18
16321 short fdbcc_ge
- tbl_fdbcc
# 19
16322 short fdbcc_lt
- tbl_fdbcc
# 20
16323 short fdbcc_le
- tbl_fdbcc
# 21
16324 short fdbcc_gl
- tbl_fdbcc
# 22
16325 short fdbcc_gle
- tbl_fdbcc
# 23
16326 short fdbcc_ngle
- tbl_fdbcc
# 24
16327 short fdbcc_ngl
- tbl_fdbcc
# 25
16328 short fdbcc_nle
- tbl_fdbcc
# 26
16329 short fdbcc_nlt
- tbl_fdbcc
# 27
16330 short fdbcc_nge
- tbl_fdbcc
# 28
16331 short fdbcc_ngt
- tbl_fdbcc
# 29
16332 short fdbcc_sneq
- tbl_fdbcc
# 30
16333 short fdbcc_st
- tbl_fdbcc
# 31
16335 #########################################################################
16337 # IEEE Nonaware tests #
16339 # For the IEEE nonaware tests, only the false branch changes the #
16340 # counter. However, the true branch may set bsun so we check to see #
16341 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
16343 # The cases EQ and NE are shared by the Aware and Nonaware groups #
16344 # and are incapable of setting the BSUN exception bit. #
16346 # Typically, only one of the two possible branch directions could #
16347 # have the NAN bit set. #
16348 # (This is assuming the mutual exclusiveness of FPSR cc bit groupings #
16351 #########################################################################
16359 fbeq.w fdbcc_eq_yes
# equal?
16361 bra.w fdbcc_false
# no; go handle counter
16371 fbneq.w fdbcc_neq_yes
# not equal?
16373 bra.w fdbcc_false
# no; go handle counter
16383 fbgt.w fdbcc_gt_yes
# greater than?
16384 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16385 beq.w fdbcc_false
# no;go handle counter
16386 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16387 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16388 bne.w fdbcc_bsun
# yes; we have an exception
16389 bra.w fdbcc_false
# no; go handle counter
16394 # not greater than:
16399 fbngt.w fdbcc_ngt_yes
# not greater than?
16401 bra.w fdbcc_false
# no; go handle counter
16403 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16404 beq.
b fdbcc_ngt_done
# no;go finish
16405 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16406 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16407 bne.w fdbcc_bsun
# yes; we have an exception
16409 rts
# no; do nothing
16412 # greater than or equal:
16417 fbge.w fdbcc_ge_yes
# greater than or equal?
16419 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16420 beq.w fdbcc_false
# no;go handle counter
16421 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16422 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16423 bne.w fdbcc_bsun
# yes; we have an exception
16424 bra.w fdbcc_false
# no; go handle counter
16426 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16427 beq.
b fdbcc_ge_yes_done
# no;go do nothing
16428 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16429 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16430 bne.w fdbcc_bsun
# yes; we have an exception
16435 # not (greater than or equal):
16440 fbnge.w fdbcc_nge_yes
# not (greater than or equal)?
16442 bra.w fdbcc_false
# no; go handle counter
16444 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16445 beq.
b fdbcc_nge_done
# no;go finish
16446 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16447 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16448 bne.w fdbcc_bsun
# yes; we have an exception
16450 rts
# no; do nothing
16458 fblt.w fdbcc_lt_yes
# less than?
16460 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16461 beq.w fdbcc_false
# no; go handle counter
16462 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16463 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16464 bne.w fdbcc_bsun
# yes; we have an exception
16465 bra.w fdbcc_false
# no; go handle counter
16475 fbnlt.w fdbcc_nlt_yes
# not less than?
16477 bra.w fdbcc_false
# no; go handle counter
16479 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16480 beq.
b fdbcc_nlt_done
# no;go finish
16481 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16482 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16483 bne.w fdbcc_bsun
# yes; we have an exception
16485 rts
# no; do nothing
16488 # less than or equal:
16493 fble.w fdbcc_le_yes
# less than or equal?
16495 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16496 beq.w fdbcc_false
# no; go handle counter
16497 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16498 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16499 bne.w fdbcc_bsun
# yes; we have an exception
16500 bra.w fdbcc_false
# no; go handle counter
16502 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16503 beq.
b fdbcc_le_yes_done
# no; go do nothing
16504 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16505 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16506 bne.w fdbcc_bsun
# yes; we have an exception
16511 # not (less than or equal):
16516 fbnle.w fdbcc_nle_yes
# not (less than or equal)?
16518 bra.w fdbcc_false
# no; go handle counter
16520 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16521 beq.w fdbcc_nle_done
# no; go finish
16522 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16523 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16524 bne.w fdbcc_bsun
# yes; we have an exception
16526 rts
# no; do nothing
16529 # greater or less than:
16534 fbgl.w fdbcc_gl_yes
# greater or less than?
16536 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16537 beq.w fdbcc_false
# no; handle counter
16538 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16539 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16540 bne.w fdbcc_bsun
# yes; we have an exception
16541 bra.w fdbcc_false
# no; go handle counter
16546 # not (greater or less than):
16551 fbngl.w fdbcc_ngl_yes
# not (greater or less than)?
16553 bra.w fdbcc_false
# no; go handle counter
16555 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
16556 beq.
b fdbcc_ngl_done
# no; go finish
16557 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16558 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16559 bne.w fdbcc_bsun
# yes; we have an exception
16561 rts
# no; do nothing
16564 # greater, less, or equal:
16569 fbgle.w fdbcc_gle_yes
# greater, less, or equal?
16571 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16572 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16573 bne.w fdbcc_bsun
# yes; we have an exception
16574 bra.w fdbcc_false
# no; go handle counter
16579 # not (greater, less, or equal):
16584 fbngle.w fdbcc_ngle_yes
# not (greater, less, or equal)?
16586 bra.w fdbcc_false
# no; go handle counter
16588 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16589 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16590 bne.w fdbcc_bsun
# yes; we have an exception
16591 rts
# no; do nothing
16593 #########################################################################
16595 # Miscellaneous tests #
16597 # For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16599 #########################################################################
16606 fdbcc_f
: # no bsun possible
16607 bra.w fdbcc_false
# go handle counter
16614 fdbcc_t
: # no bsun possible
16618 # signalling false:
16623 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set?
16624 beq.w fdbcc_false
# no;go handle counter
16625 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16626 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16627 bne.w fdbcc_bsun
# yes; we have an exception
16628 bra.w fdbcc_false
# go handle counter
16636 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set?
16637 beq.
b fdbcc_st_done
# no;go finish
16638 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16639 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16640 bne.w fdbcc_bsun
# yes; we have an exception
16645 # signalling equal:
16650 fbseq.w fdbcc_seq_yes
# signalling equal?
16652 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set?
16653 beq.w fdbcc_false
# no;go handle counter
16654 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16655 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16656 bne.w fdbcc_bsun
# yes; we have an exception
16657 bra.w fdbcc_false
# go handle counter
16659 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set?
16660 beq.
b fdbcc_seq_yes_done
# no;go do nothing
16661 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16662 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16663 bne.w fdbcc_bsun
# yes; we have an exception
16664 fdbcc_seq_yes_done
:
16665 rts
# yes; do nothing
16668 # signalling not equal:
16673 fbsneq.w fdbcc_sneq_yes
# signalling not equal?
16675 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set?
16676 beq.w fdbcc_false
# no;go handle counter
16677 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16678 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16679 bne.w fdbcc_bsun
# yes; we have an exception
16680 bra.w fdbcc_false
# go handle counter
16682 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
16683 beq.w fdbcc_sneq_done
# no;go finish
16684 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
16685 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # is BSUN enabled?
16686 bne.w fdbcc_bsun
# yes; we have an exception
16690 #########################################################################
16692 # IEEE Aware tests #
16694 # For the IEEE aware tests, action is only taken if the result is false.#
16695 # Therefore, the opposite branch type is used to jump to the decrement #
16697 # The BSUN exception will not be set for any of these tests. #
16699 #########################################################################
16702 # ordered greater than:
16707 fbogt.w fdbcc_ogt_yes
# ordered greater than?
16709 bra.w fdbcc_false
# no; go handle counter
16711 rts
# yes; do nothing
16714 # unordered or less or equal:
16719 fbule.w fdbcc_ule_yes
# unordered or less or equal?
16721 bra.w fdbcc_false
# no; go handle counter
16723 rts
# yes; do nothing
16726 # ordered greater than or equal:
16731 fboge.w fdbcc_oge_yes
# ordered greater than or equal?
16733 bra.w fdbcc_false
# no; go handle counter
16735 rts
# yes; do nothing
16738 # unordered or less than:
16743 fbult.w fdbcc_ult_yes
# unordered or less than?
16745 bra.w fdbcc_false
# no; go handle counter
16747 rts
# yes; do nothing
16750 # ordered less than:
16755 fbolt.w fdbcc_olt_yes
# ordered less than?
16757 bra.w fdbcc_false
# no; go handle counter
16759 rts
# yes; do nothing
16762 # unordered or greater or equal:
16767 fbuge.w fdbcc_uge_yes
# unordered or greater than?
16769 bra.w fdbcc_false
# no; go handle counter
16771 rts
# yes; do nothing
16774 # ordered less than or equal:
16779 fbole.w fdbcc_ole_yes
# ordered greater or less than?
16781 bra.w fdbcc_false
# no; go handle counter
16783 rts
# yes; do nothing
16786 # unordered or greater than:
16791 fbugt.w fdbcc_ugt_yes
# unordered or greater than?
16793 bra.w fdbcc_false
# no; go handle counter
16795 rts
# yes; do nothing
16798 # ordered greater or less than:
16803 fbogl.w fdbcc_ogl_yes
# ordered greater or less than?
16805 bra.w fdbcc_false
# no; go handle counter
16807 rts
# yes; do nothing
16810 # unordered or equal:
16815 fbueq.w fdbcc_ueq_yes
# unordered or equal?
16817 bra.w fdbcc_false
# no; go handle counter
16819 rts
# yes; do nothing
16827 fbor.w fdbcc_or_yes
# ordered?
16829 bra.w fdbcc_false
# no; go handle counter
16831 rts
# yes; do nothing
16839 fbun.w fdbcc_un_yes
# unordered?
16841 bra.w fdbcc_false
# no; go handle counter
16843 rts
# yes; do nothing
16845 #######################################################################
16848 # the bsun exception bit was not set.
16850 # (1) subtract 1 from the count register
16851 # (2) if (cr == -1) then
16852 # pc = pc of next instruction
16854 # pc += sign_ext(16-bit displacement)
16857 mov.
b 1+EXC_OPWORD
(%a6
), %d1
# fetch lo opword
16858 andi.w
&0x7, %d1
# extract count register
16860 bsr.
l fetch_dreg
# fetch count value
16861 # make sure that d0 isn't corrupted between calls...
16863 subq.w
&0x1, %d0
# Dn - 1 -> Dn
16865 bsr.
l store_dreg_l
# store new count value
16867 cmpi.w
%d0
, &-0x1 # is (Dn == -1)?
16868 bne.
b fdbcc_false_cont
# no;
16872 mov.
l L_SCR1
(%a6
),%d0
# fetch displacement
16873 add.l USER_FPIAR
(%a6
),%d0
# add instruction PC
16874 addq.
l &0x4,%d0
# add instruction length
16875 mov.
l %d0
,EXC_PC
(%a6
) # set new PC
16878 # the emulation routine set bsun and BSUN was enabled. have to
16879 # fix stack and jump to the bsun handler.
16880 # let the caller of this routine shift the stack frame up to
16881 # eliminate the effective address field.
16883 mov.
b &fbsun_flg
,SPCOND_FLG
(%a6
)
16886 #########################################################################
16887 # ftrapcc(): routine to emulate the ftrapcc instruction #
16889 # XDEF **************************************************************** #
16892 # XREF **************************************************************** #
16895 # INPUT *************************************************************** #
16898 # OUTPUT ************************************************************** #
16901 # ALGORITHM *********************************************************** #
16902 # This routine checks which conditional predicate is specified by #
16903 # the stacked ftrapcc instruction opcode and then branches to a routine #
16904 # for that predicate. The corresponding fbcc instruction is then used #
16905 # to see whether the condition (specified by the stacked FPSR) is true #
16907 # If a BSUN exception should be indicated, the BSUN and ABSUN #
16908 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
16909 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
16910 # enabled BSUN should not be flagged and the predicate is true, then #
16911 # the ftrapcc_flg is set in the SPCOND_FLG location. These special #
16912 # flags indicate to the calling routine to emulate the exceptional #
16915 #########################################################################
16919 mov.w EXC_CMDREG
(%a6
),%d0
# fetch predicate
16921 clr.
l %d1
# clear scratch reg
16922 mov.
b FPSR_CC
(%a6
),%d1
# fetch fp ccodes
16923 ror.
l &0x8,%d1
# rotate to top byte
16924 fmov.
l %d1
,%fpsr
# insert into FPSR
16926 mov.w
(tbl_ftrapcc.
b,%pc
,%d0.w
*2), %d1
# load table
16927 jmp
(tbl_ftrapcc.
b,%pc
,%d1.w
) # jump to ftrapcc routine
16930 short ftrapcc_f
- tbl_ftrapcc
# 00
16931 short ftrapcc_eq
- tbl_ftrapcc
# 01
16932 short ftrapcc_ogt
- tbl_ftrapcc
# 02
16933 short ftrapcc_oge
- tbl_ftrapcc
# 03
16934 short ftrapcc_olt
- tbl_ftrapcc
# 04
16935 short ftrapcc_ole
- tbl_ftrapcc
# 05
16936 short ftrapcc_ogl
- tbl_ftrapcc
# 06
16937 short ftrapcc_or
- tbl_ftrapcc
# 07
16938 short ftrapcc_un
- tbl_ftrapcc
# 08
16939 short ftrapcc_ueq
- tbl_ftrapcc
# 09
16940 short ftrapcc_ugt
- tbl_ftrapcc
# 10
16941 short ftrapcc_uge
- tbl_ftrapcc
# 11
16942 short ftrapcc_ult
- tbl_ftrapcc
# 12
16943 short ftrapcc_ule
- tbl_ftrapcc
# 13
16944 short ftrapcc_neq
- tbl_ftrapcc
# 14
16945 short ftrapcc_t
- tbl_ftrapcc
# 15
16946 short ftrapcc_sf
- tbl_ftrapcc
# 16
16947 short ftrapcc_seq
- tbl_ftrapcc
# 17
16948 short ftrapcc_gt
- tbl_ftrapcc
# 18
16949 short ftrapcc_ge
- tbl_ftrapcc
# 19
16950 short ftrapcc_lt
- tbl_ftrapcc
# 20
16951 short ftrapcc_le
- tbl_ftrapcc
# 21
16952 short ftrapcc_gl
- tbl_ftrapcc
# 22
16953 short ftrapcc_gle
- tbl_ftrapcc
# 23
16954 short ftrapcc_ngle
- tbl_ftrapcc
# 24
16955 short ftrapcc_ngl
- tbl_ftrapcc
# 25
16956 short ftrapcc_nle
- tbl_ftrapcc
# 26
16957 short ftrapcc_nlt
- tbl_ftrapcc
# 27
16958 short ftrapcc_nge
- tbl_ftrapcc
# 28
16959 short ftrapcc_ngt
- tbl_ftrapcc
# 29
16960 short ftrapcc_sneq
- tbl_ftrapcc
# 30
16961 short ftrapcc_st
- tbl_ftrapcc
# 31
16963 #########################################################################
16965 # IEEE Nonaware tests #
16967 # For the IEEE nonaware tests, we set the result based on the #
16968 # floating point condition codes. In addition, we check to see #
16969 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
16971 # The cases EQ and NE are shared by the Aware and Nonaware groups #
16972 # and are incapable of setting the BSUN exception bit. #
16974 # Typically, only one of the two possible branch directions could #
16975 # have the NAN bit set. #
16977 #########################################################################
16985 fbeq.w ftrapcc_trap
# equal?
16995 fbneq.w ftrapcc_trap
# not equal?
17005 fbgt.w ftrapcc_trap
# greater than?
17007 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17008 beq.
b ftrapcc_gt_done
# no
17009 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17010 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17011 bne.w ftrapcc_bsun
# yes
17013 rts
# no; do nothing
17016 # not greater than:
17021 fbngt.w ftrapcc_ngt_yes
# not greater than?
17025 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17026 beq.w ftrapcc_trap
# no; go take trap
17027 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17028 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17029 bne.w ftrapcc_bsun
# yes
17030 bra.w ftrapcc_trap
# no; go take trap
17033 # greater than or equal:
17038 fbge.w ftrapcc_ge_yes
# greater than or equal?
17040 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17041 beq.
b ftrapcc_ge_done
# no; go finish
17042 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17043 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17044 bne.w ftrapcc_bsun
# yes
17046 rts
# no; do nothing
17048 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17049 beq.w ftrapcc_trap
# no; go take trap
17050 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17051 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17052 bne.w ftrapcc_bsun
# yes
17053 bra.w ftrapcc_trap
# no; go take trap
17056 # not (greater than or equal):
17061 fbnge.w ftrapcc_nge_yes
# not (greater than or equal)?
17065 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17066 beq.w ftrapcc_trap
# no; go take trap
17067 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17068 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17069 bne.w ftrapcc_bsun
# yes
17070 bra.w ftrapcc_trap
# no; go take trap
17078 fblt.w ftrapcc_trap
# less than?
17080 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17081 beq.
b ftrapcc_lt_done
# no; go finish
17082 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17083 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17084 bne.w ftrapcc_bsun
# yes
17086 rts
# no; do nothing
17094 fbnlt.w ftrapcc_nlt_yes
# not less than?
17098 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17099 beq.w ftrapcc_trap
# no; go take trap
17100 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17101 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17102 bne.w ftrapcc_bsun
# yes
17103 bra.w ftrapcc_trap
# no; go take trap
17106 # less than or equal:
17111 fble.w ftrapcc_le_yes
# less than or equal?
17113 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17114 beq.
b ftrapcc_le_done
# no; go finish
17115 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17116 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17117 bne.w ftrapcc_bsun
# yes
17119 rts
# no; do nothing
17121 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17122 beq.w ftrapcc_trap
# no; go take trap
17123 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17124 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17125 bne.w ftrapcc_bsun
# yes
17126 bra.w ftrapcc_trap
# no; go take trap
17129 # not (less than or equal):
17134 fbnle.w ftrapcc_nle_yes
# not (less than or equal)?
17138 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17139 beq.w ftrapcc_trap
# no; go take trap
17140 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17141 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17142 bne.w ftrapcc_bsun
# yes
17143 bra.w ftrapcc_trap
# no; go take trap
17146 # greater or less than:
17151 fbgl.w ftrapcc_trap
# greater or less than?
17153 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17154 beq.
b ftrapcc_gl_done
# no; go finish
17155 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17156 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17157 bne.w ftrapcc_bsun
# yes
17159 rts
# no; do nothing
17162 # not (greater or less than):
17167 fbngl.w ftrapcc_ngl_yes
# not (greater or less than)?
17171 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17172 beq.w ftrapcc_trap
# no; go take trap
17173 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17174 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17175 bne.w ftrapcc_bsun
# yes
17176 bra.w ftrapcc_trap
# no; go take trap
17179 # greater, less, or equal:
17184 fbgle.w ftrapcc_trap
# greater, less, or equal?
17186 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17187 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17188 bne.w ftrapcc_bsun
# yes
17189 rts
# no; do nothing
17192 # not (greater, less, or equal):
17197 fbngle.w ftrapcc_ngle_yes
# not (greater, less, or equal)?
17201 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17202 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17203 bne.w ftrapcc_bsun
# yes
17204 bra.w ftrapcc_trap
# no; go take trap
17206 #########################################################################
17208 # Miscellaneous tests #
17210 # For the IEEE aware tests, we only have to set the result based on the #
17211 # floating point condition codes. The BSUN exception will not be #
17212 # set for any of these tests. #
17214 #########################################################################
17230 bra.w ftrapcc_trap
# go take trap
17233 # signalling false:
17238 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17239 beq.
b ftrapcc_sf_done
# no; go finish
17240 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17241 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17242 bne.w ftrapcc_bsun
# yes
17244 rts
# no; do nothing
17252 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17253 beq.w ftrapcc_trap
# no; go take trap
17254 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17255 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17256 bne.w ftrapcc_bsun
# yes
17257 bra.w ftrapcc_trap
# no; go take trap
17260 # signalling equal:
17265 fbseq.w ftrapcc_seq_yes
# signalling equal?
17267 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17268 beq.w ftrapcc_seq_done
# no; go finish
17269 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17270 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17271 bne.w ftrapcc_bsun
# yes
17273 rts
# no; do nothing
17275 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17276 beq.w ftrapcc_trap
# no; go take trap
17277 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17278 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17279 bne.w ftrapcc_bsun
# yes
17280 bra.w ftrapcc_trap
# no; go take trap
17283 # signalling not equal:
17288 fbsneq.w ftrapcc_sneq_yes
# signalling equal?
17290 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17291 beq.w ftrapcc_sneq_no_done
# no; go finish
17292 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17293 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17294 bne.w ftrapcc_bsun
# yes
17295 ftrapcc_sneq_no_done
:
17298 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17299 beq.w ftrapcc_trap
# no; go take trap
17300 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17301 btst
&bsun_bit
, FPCR_ENABLE
(%a6
) # was BSUN set?
17302 bne.w ftrapcc_bsun
# yes
17303 bra.w ftrapcc_trap
# no; go take trap
17305 #########################################################################
17307 # IEEE Aware tests #
17309 # For the IEEE aware tests, we only have to set the result based on the #
17310 # floating point condition codes. The BSUN exception will not be #
17311 # set for any of these tests. #
17313 #########################################################################
17316 # ordered greater than:
17321 fbogt.w ftrapcc_trap
# ordered greater than?
17326 # unordered or less or equal:
17331 fbule.w ftrapcc_trap
# unordered or less or equal?
17336 # ordered greater than or equal:
17341 fboge.w ftrapcc_trap
# ordered greater than or equal?
17346 # unordered or less than:
17351 fbult.w ftrapcc_trap
# unordered or less than?
17356 # ordered less than:
17361 fbolt.w ftrapcc_trap
# ordered less than?
17366 # unordered or greater or equal:
17371 fbuge.w ftrapcc_trap
# unordered or greater than?
17376 # ordered less than or equal:
17381 fbole.w ftrapcc_trap
# ordered greater or less than?
17386 # unordered or greater than:
17391 fbugt.w ftrapcc_trap
# unordered or greater than?
17396 # ordered greater or less than:
17401 fbogl.w ftrapcc_trap
# ordered greater or less than?
17406 # unordered or equal:
17411 fbueq.w ftrapcc_trap
# unordered or equal?
17421 fbor.w ftrapcc_trap
# ordered?
17431 fbun.w ftrapcc_trap
# unordered?
17435 #######################################################################
17437 # the bsun exception bit was not set.
17438 # we will need to jump to the ftrapcc vector. the stack frame
17439 # is the same size as that of the fp unimp instruction. the
17440 # only difference is that the <ea> field should hold the PC
17441 # of the ftrapcc instruction and the vector offset field
17442 # should denote the ftrapcc trap.
17444 mov.
b &ftrapcc_flg
,SPCOND_FLG
(%a6
)
17447 # the emulation routine set bsun and BSUN was enabled. have to
17448 # fix stack and jump to the bsun handler.
17449 # let the caller of this routine shift the stack frame up to
17450 # eliminate the effective address field.
17452 mov.
b &fbsun_flg
,SPCOND_FLG
(%a6
)
17455 #########################################################################
17456 # fscc(): routine to emulate the fscc instruction #
17458 # XDEF **************************************************************** #
17461 # XREF **************************************************************** #
17462 # store_dreg_b() - store result to data register file #
17463 # dec_areg() - decrement an areg for -(an) mode #
17464 # inc_areg() - increment an areg for (an)+ mode #
17465 # _dmem_write_byte() - store result to memory #
17467 # INPUT *************************************************************** #
17470 # OUTPUT ************************************************************** #
17473 # ALGORITHM *********************************************************** #
17474 # This routine checks which conditional predicate is specified by #
17475 # the stacked fscc instruction opcode and then branches to a routine #
17476 # for that predicate. The corresponding fbcc instruction is then used #
17477 # to see whether the condition (specified by the stacked FPSR) is true #
17479 # If a BSUN exception should be indicated, the BSUN and ABSUN #
17480 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
17481 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
17482 # enabled BSUN should not be flagged and the predicate is true, then #
17483 # the result is stored to the data register file or memory #
17485 #########################################################################
17489 mov.w EXC_CMDREG
(%a6
),%d0
# fetch predicate
17491 clr.
l %d1
# clear scratch reg
17492 mov.
b FPSR_CC
(%a6
),%d1
# fetch fp ccodes
17493 ror.
l &0x8,%d1
# rotate to top byte
17494 fmov.
l %d1
,%fpsr
# insert into FPSR
17496 mov.w
(tbl_fscc.
b,%pc
,%d0.w
*2),%d1
# load table
17497 jmp
(tbl_fscc.
b,%pc
,%d1.w
) # jump to fscc routine
17500 short fscc_f
- tbl_fscc
# 00
17501 short fscc_eq
- tbl_fscc
# 01
17502 short fscc_ogt
- tbl_fscc
# 02
17503 short fscc_oge
- tbl_fscc
# 03
17504 short fscc_olt
- tbl_fscc
# 04
17505 short fscc_ole
- tbl_fscc
# 05
17506 short fscc_ogl
- tbl_fscc
# 06
17507 short fscc_or
- tbl_fscc
# 07
17508 short fscc_un
- tbl_fscc
# 08
17509 short fscc_ueq
- tbl_fscc
# 09
17510 short fscc_ugt
- tbl_fscc
# 10
17511 short fscc_uge
- tbl_fscc
# 11
17512 short fscc_ult
- tbl_fscc
# 12
17513 short fscc_ule
- tbl_fscc
# 13
17514 short fscc_neq
- tbl_fscc
# 14
17515 short fscc_t
- tbl_fscc
# 15
17516 short fscc_sf
- tbl_fscc
# 16
17517 short fscc_seq
- tbl_fscc
# 17
17518 short fscc_gt
- tbl_fscc
# 18
17519 short fscc_ge
- tbl_fscc
# 19
17520 short fscc_lt
- tbl_fscc
# 20
17521 short fscc_le
- tbl_fscc
# 21
17522 short fscc_gl
- tbl_fscc
# 22
17523 short fscc_gle
- tbl_fscc
# 23
17524 short fscc_ngle
- tbl_fscc
# 24
17525 short fscc_ngl
- tbl_fscc
# 25
17526 short fscc_nle
- tbl_fscc
# 26
17527 short fscc_nlt
- tbl_fscc
# 27
17528 short fscc_nge
- tbl_fscc
# 28
17529 short fscc_ngt
- tbl_fscc
# 29
17530 short fscc_sneq
- tbl_fscc
# 30
17531 short fscc_st
- tbl_fscc
# 31
17533 #########################################################################
17535 # IEEE Nonaware tests #
17537 # For the IEEE nonaware tests, we set the result based on the #
17538 # floating point condition codes. In addition, we check to see #
17539 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
17541 # The cases EQ and NE are shared by the Aware and Nonaware groups #
17542 # and are incapable of setting the BSUN exception bit. #
17544 # Typically, only one of the two possible branch directions could #
17545 # have the NAN bit set. #
17547 #########################################################################
17555 fbeq.w fscc_eq_yes
# equal?
17557 clr.
b %d0
# set false
17558 bra.w fscc_done
# go finish
17561 bra.w fscc_done
# go finish
17569 fbneq.w fscc_neq_yes
# not equal?
17571 clr.
b %d0
# set false
17572 bra.w fscc_done
# go finish
17575 bra.w fscc_done
# go finish
17583 fbgt.w fscc_gt_yes
# greater than?
17585 clr.
b %d0
# set false
17586 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17587 beq.w fscc_done
# no;go finish
17588 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17589 bra.w fscc_chk_bsun
# go finish
17592 bra.w fscc_done
# go finish
17595 # not greater than:
17600 fbngt.w fscc_ngt_yes
# not greater than?
17602 clr.
b %d0
# set false
17603 bra.w fscc_done
# go finish
17606 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17607 beq.w fscc_done
# no;go finish
17608 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17609 bra.w fscc_chk_bsun
# go finish
17612 # greater than or equal:
17617 fbge.w fscc_ge_yes
# greater than or equal?
17619 clr.
b %d0
# set false
17620 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17621 beq.w fscc_done
# no;go finish
17622 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17623 bra.w fscc_chk_bsun
# go finish
17626 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17627 beq.w fscc_done
# no;go finish
17628 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17629 bra.w fscc_chk_bsun
# go finish
17632 # not (greater than or equal):
17637 fbnge.w fscc_nge_yes
# not (greater than or equal)?
17639 clr.
b %d0
# set false
17640 bra.w fscc_done
# go finish
17643 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17644 beq.w fscc_done
# no;go finish
17645 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17646 bra.w fscc_chk_bsun
# go finish
17654 fblt.w fscc_lt_yes
# less than?
17656 clr.
b %d0
# set false
17657 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17658 beq.w fscc_done
# no;go finish
17659 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17660 bra.w fscc_chk_bsun
# go finish
17663 bra.w fscc_done
# go finish
17671 fbnlt.w fscc_nlt_yes
# not less than?
17673 clr.
b %d0
# set false
17674 bra.w fscc_done
# go finish
17677 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17678 beq.w fscc_done
# no;go finish
17679 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17680 bra.w fscc_chk_bsun
# go finish
17683 # less than or equal:
17688 fble.w fscc_le_yes
# less than or equal?
17690 clr.
b %d0
# set false
17691 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17692 beq.w fscc_done
# no;go finish
17693 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17694 bra.w fscc_chk_bsun
# go finish
17697 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17698 beq.w fscc_done
# no;go finish
17699 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17700 bra.w fscc_chk_bsun
# go finish
17703 # not (less than or equal):
17708 fbnle.w fscc_nle_yes
# not (less than or equal)?
17710 clr.
b %d0
# set false
17711 bra.w fscc_done
# go finish
17714 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17715 beq.w fscc_done
# no;go finish
17716 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17717 bra.w fscc_chk_bsun
# go finish
17720 # greater or less than:
17725 fbgl.w fscc_gl_yes
# greater or less than?
17727 clr.
b %d0
# set false
17728 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17729 beq.w fscc_done
# no;go finish
17730 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17731 bra.w fscc_chk_bsun
# go finish
17734 bra.w fscc_done
# go finish
17737 # not (greater or less than):
17742 fbngl.w fscc_ngl_yes
# not (greater or less than)?
17744 clr.
b %d0
# set false
17745 bra.w fscc_done
# go finish
17748 btst
&nan_bit
, FPSR_CC
(%a6
) # is NAN set in cc?
17749 beq.w fscc_done
# no;go finish
17750 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17751 bra.w fscc_chk_bsun
# go finish
17754 # greater, less, or equal:
17759 fbgle.w fscc_gle_yes
# greater, less, or equal?
17761 clr.
b %d0
# set false
17762 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17763 bra.w fscc_chk_bsun
# go finish
17766 bra.w fscc_done
# go finish
17769 # not (greater, less, or equal):
17774 fbngle.w fscc_ngle_yes
# not (greater, less, or equal)?
17776 clr.
b %d0
# set false
17777 bra.w fscc_done
# go finish
17780 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17781 bra.w fscc_chk_bsun
# go finish
17783 #########################################################################
17785 # Miscellaneous tests #
17787 # For the IEEE aware tests, we only have to set the result based on the #
17788 # floating point condition codes. The BSUN exception will not be #
17789 # set for any of these tests. #
17791 #########################################################################
17799 clr.
b %d0
# set false
17800 bra.w fscc_done
# go finish
17809 bra.w fscc_done
# go finish
17812 # signalling false:
17817 clr.
b %d0
# set false
17818 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17819 beq.w fscc_done
# no;go finish
17820 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17821 bra.w fscc_chk_bsun
# go finish
17830 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17831 beq.w fscc_done
# no;go finish
17832 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17833 bra.w fscc_chk_bsun
# go finish
17836 # signalling equal:
17841 fbseq.w fscc_seq_yes
# signalling equal?
17843 clr.
b %d0
# set false
17844 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17845 beq.w fscc_done
# no;go finish
17846 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17847 bra.w fscc_chk_bsun
# go finish
17850 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17851 beq.w fscc_done
# no;go finish
17852 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17853 bra.w fscc_chk_bsun
# go finish
17856 # signalling not equal:
17861 fbsneq.w fscc_sneq_yes
# signalling equal?
17863 clr.
b %d0
# set false
17864 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17865 beq.w fscc_done
# no;go finish
17866 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17867 bra.w fscc_chk_bsun
# go finish
17870 btst
&nan_bit
, FPSR_CC
(%a6
) # set BSUN exc bit
17871 beq.w fscc_done
# no;go finish
17872 ori.
l &bsun_mask+aiop_mask
, USER_FPSR
(%a6
) # set BSUN exc bit
17873 bra.w fscc_chk_bsun
# go finish
17875 #########################################################################
17877 # IEEE Aware tests #
17879 # For the IEEE aware tests, we only have to set the result based on the #
17880 # floating point condition codes. The BSUN exception will not be #
17881 # set for any of these tests. #
17883 #########################################################################
17886 # ordered greater than:
17891 fbogt.w fscc_ogt_yes
# ordered greater than?
17893 clr.
b %d0
# set false
17894 bra.w fscc_done
# go finish
17897 bra.w fscc_done
# go finish
17900 # unordered or less or equal:
17905 fbule.w fscc_ule_yes
# unordered or less or equal?
17907 clr.
b %d0
# set false
17908 bra.w fscc_done
# go finish
17911 bra.w fscc_done
# go finish
17914 # ordered greater than or equal:
17919 fboge.w fscc_oge_yes
# ordered greater than or equal?
17921 clr.
b %d0
# set false
17922 bra.w fscc_done
# go finish
17925 bra.w fscc_done
# go finish
17928 # unordered or less than:
17933 fbult.w fscc_ult_yes
# unordered or less than?
17935 clr.
b %d0
# set false
17936 bra.w fscc_done
# go finish
17939 bra.w fscc_done
# go finish
17942 # ordered less than:
17947 fbolt.w fscc_olt_yes
# ordered less than?
17949 clr.
b %d0
# set false
17950 bra.w fscc_done
# go finish
17953 bra.w fscc_done
# go finish
17956 # unordered or greater or equal:
17961 fbuge.w fscc_uge_yes
# unordered or greater than?
17963 clr.
b %d0
# set false
17964 bra.w fscc_done
# go finish
17967 bra.w fscc_done
# go finish
17970 # ordered less than or equal:
17975 fbole.w fscc_ole_yes
# ordered greater or less than?
17977 clr.
b %d0
# set false
17978 bra.w fscc_done
# go finish
17981 bra.w fscc_done
# go finish
17984 # unordered or greater than:
17989 fbugt.w fscc_ugt_yes
# unordered or greater than?
17991 clr.
b %d0
# set false
17992 bra.w fscc_done
# go finish
17995 bra.w fscc_done
# go finish
17998 # ordered greater or less than:
18003 fbogl.w fscc_ogl_yes
# ordered greater or less than?
18005 clr.
b %d0
# set false
18006 bra.w fscc_done
# go finish
18009 bra.w fscc_done
# go finish
18012 # unordered or equal:
18017 fbueq.w fscc_ueq_yes
# unordered or equal?
18019 clr.
b %d0
# set false
18020 bra.w fscc_done
# go finish
18023 bra.w fscc_done
# go finish
18031 fbor.w fscc_or_yes
# ordered?
18033 clr.
b %d0
# set false
18034 bra.w fscc_done
# go finish
18037 bra.w fscc_done
# go finish
18045 fbun.w fscc_un_yes
# unordered?
18047 clr.
b %d0
# set false
18048 bra.w fscc_done
# go finish
18051 bra.w fscc_done
# go finish
18053 #######################################################################
18056 # the bsun exception bit was set. now, check to see is BSUN
18057 # is enabled. if so, don't store result and correct stack frame
18058 # for a bsun exception.
18061 btst
&bsun_bit
,FPCR_ENABLE
(%a6
) # was BSUN set?
18065 # the bsun exception bit was not set.
18066 # the result has been selected.
18067 # now, check to see if the result is to be stored in the data register
18068 # file or in memory.
18071 mov.
l %d0
,%a0
# save result for a moment
18073 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# fetch lo opword
18074 mov.
l %d1
,%d0
# make a copy
18075 andi.b &0x38,%d1
# extract src mode
18077 bne.
b fscc_mem_op
# it's a memory operation
18080 andi.w
&0x7,%d1
# pass index in d1
18081 mov.
l %a0
,%d0
# pass result in d0
18082 bsr.
l store_dreg_b
# save result in regfile
18086 # the stacked <ea> is correct with the exception of:
18087 # -> Dn : <ea> is garbage
18089 # if the addressing mode is post-increment or pre-decrement,
18090 # then the address registers have not been updated.
18093 cmpi.
b %d1
,&0x18 # is <ea> (An)+ ?
18094 beq.
b fscc_mem_inc
# yes
18095 cmpi.
b %d1
,&0x20 # is <ea> -(An) ?
18096 beq.
b fscc_mem_dec
# yes
18098 mov.
l %a0
,%d0
# pass result in d0
18099 mov.
l EXC_EA
(%a6
),%a0
# fetch <ea>
18100 bsr.
l _dmem_write_byte
# write result byte
18102 tst.
l %d1
# did dstore fail?
18103 bne.w fscc_err
# yes
18107 # addresing mode is post-increment. write the result byte. if the write
18108 # fails then don't update the address register. if write passes then
18109 # call inc_areg() to update the address register.
18111 mov.
l %a0
,%d0
# pass result in d0
18112 mov.
l EXC_EA
(%a6
),%a0
# fetch <ea>
18113 bsr.
l _dmem_write_byte
# write result byte
18115 tst.
l %d1
# did dstore fail?
18116 bne.w fscc_err
# yes
18118 mov.
b 0x1+EXC_OPWORD
(%a6
),%d1
# fetch opword
18119 andi.w
&0x7,%d1
# pass index in d1
18120 movq.
l &0x1,%d0
# pass amt to inc by
18121 bsr.
l inc_areg
# increment address register
18125 # addressing mode is pre-decrement. write the result byte. if the write
18126 # fails then don't update the address register. if the write passes then
18127 # call dec_areg() to update the address register.
18129 mov.
l %a0
,%d0
# pass result in d0
18130 mov.
l EXC_EA
(%a6
),%a0
# fetch <ea>
18131 bsr.
l _dmem_write_byte
# write result byte
18133 tst.
l %d1
# did dstore fail?
18134 bne.w fscc_err
# yes
18136 mov.
b 0x1+EXC_OPWORD
(%a6
),%d1
# fetch opword
18137 andi.w
&0x7,%d1
# pass index in d1
18138 movq.
l &0x1,%d0
# pass amt to dec by
18139 bsr.
l dec_areg
# decrement address register
18143 # the emulation routine set bsun and BSUN was enabled. have to
18144 # fix stack and jump to the bsun handler.
18145 # let the caller of this routine shift the stack frame up to
18146 # eliminate the effective address field.
18148 mov.
b &fbsun_flg
,SPCOND_FLG
(%a6
)
18151 # the byte write to memory has failed. pass the failing effective address
18152 # and a FSLW to funimp_dacc().
18154 mov.w
&0x00a1,EXC_VOFF
(%a6
)
18157 #########################################################################
18158 # XDEF **************************************************************** #
18159 # fmovm_dynamic(): emulate "fmovm" dynamic instruction #
18161 # XREF **************************************************************** #
18162 # fetch_dreg() - fetch data register #
18163 # {i,d,}mem_read() - fetch data from memory #
18164 # _mem_write() - write data to memory #
18165 # iea_iacc() - instruction memory access error occurred #
18166 # iea_dacc() - data memory access error occurred #
18167 # restore() - restore An index regs if access error occurred #
18169 # INPUT *************************************************************** #
18172 # OUTPUT ************************************************************** #
18173 # If instr is "fmovm Dn,-(A7)" from supervisor mode, #
18174 # d0 = size of dump #
18176 # Else if instruction access error, #
18178 # Else if data access error, #
18180 # a0 = address of fault #
18184 # ALGORITHM *********************************************************** #
18185 # The effective address must be calculated since this is entered #
18186 # from an "Unimplemented Effective Address" exception handler. So, we #
18187 # have our own fcalc_ea() routine here. If an access error is flagged #
18188 # by a _{i,d,}mem_read() call, we must exit through the special #
18190 # The data register is determined and its value loaded to get the #
18191 # string of FP registers affected. This value is used as an index into #
18192 # a lookup table such that we can determine the number of bytes #
18194 # If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
18195 # to read in all FP values. Again, _mem_read() may fail and require a #
18197 # If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
18198 # to write all FP values. _mem_write() may also fail. #
18199 # If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
18200 # then we return the size of the dump and the string to the caller #
18201 # so that the move can occur outside of this routine. This special #
18202 # case is required so that moves to the system stack are handled #
18206 # fmovm.x dn, <ea> #
18207 # fmovm.x <ea>, dn #
18209 # <WORD 1> <WORD2> #
18210 # 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
18212 # & = (0): predecrement addressing mode #
18213 # (1): postincrement or control addressing mode #
18214 # @ = (0): move listed regs from memory to the FPU #
18215 # (1): move listed regs from the FPU to memory #
18216 # $$$ : index of data register holding reg select mask #
18219 # If the data register holds a zero, then the #
18220 # instruction is a nop. #
18222 #########################################################################
18224 global fmovm_dynamic
18227 # extract the data register in which the bit string resides...
18228 mov.
b 1+EXC_EXTWORD
(%a6
),%d1
# fetch extword
18229 andi.w
&0x70,%d1
# extract reg bits
18230 lsr.
b &0x4,%d1
# shift into lo bits
18232 # fetch the bit string into d0...
18233 bsr.
l fetch_dreg
# fetch reg string
18235 andi.l &0x000000ff,%d0
# keep only lo byte
18237 mov.
l %d0
,-(%sp
) # save strg
18238 mov.
b (tbl_fmovm_size.w
,%pc
,%d0
),%d0
18239 mov.
l %d0
,-(%sp
) # save size
18240 bsr.
l fmovm_calc_ea
# calculate <ea>
18241 mov.
l (%sp
)+,%d0
# restore size
18242 mov.
l (%sp
)+,%d1
# restore strg
18244 # if the bit string is a zero, then the operation is a no-op
18245 # but, make sure that we've calculated ea and advanced the opword pointer
18246 beq.w fmovm_data_done
18248 # separate move ins from move outs...
18249 btst
&0x5,EXC_EXTWORD
(%a6
) # is it a move in or out?
18250 beq.w fmovm_data_in
# it's a move out
18256 btst
&0x4,EXC_EXTWORD
(%a6
) # control or predecrement?
18257 bne.w fmovm_out_ctrl
# control
18259 ############################
18261 # for predecrement mode, the bit string is the opposite of both control
18262 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18263 # here, we convert it to be just like the others...
18264 mov.
b (tbl_fmovm_convert.w
,%pc
,%d1.w
*1),%d1
18266 btst
&0x5,EXC_SR
(%a6
) # user or supervisor mode?
18267 beq.
b fmovm_out_ctrl
# user
18269 fmovm_out_predec_s
:
18270 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
# is <ea> mode -(a7)?
18271 bne.
b fmovm_out_ctrl
18273 # the operation was unfortunately an: fmovm.x dn,-(sp)
18274 # called from supervisor mode.
18275 # we're also passing "size" and "strg" back to the calling routine
18278 ############################
18280 mov.
l %a0
,%a1
# move <ea> to a1
18282 sub.l %d0
,%sp
# subtract size of dump
18285 tst.
b %d1
# should FP0 be moved?
18286 bpl.
b fmovm_out_ctrl_fp1
# no
18288 mov.
l 0x0+EXC_FP0
(%a6
),(%a0
)+ # yes
18289 mov.
l 0x4+EXC_FP0
(%a6
),(%a0
)+
18290 mov.
l 0x8+EXC_FP0
(%a6
),(%a0
)+
18292 fmovm_out_ctrl_fp1
:
18293 lsl.
b &0x1,%d1
# should FP1 be moved?
18294 bpl.
b fmovm_out_ctrl_fp2
# no
18296 mov.
l 0x0+EXC_FP1
(%a6
),(%a0
)+ # yes
18297 mov.
l 0x4+EXC_FP1
(%a6
),(%a0
)+
18298 mov.
l 0x8+EXC_FP1
(%a6
),(%a0
)+
18300 fmovm_out_ctrl_fp2
:
18301 lsl.
b &0x1,%d1
# should FP2 be moved?
18302 bpl.
b fmovm_out_ctrl_fp3
# no
18304 fmovm.x
&0x20,(%a0
) # yes
18307 fmovm_out_ctrl_fp3
:
18308 lsl.
b &0x1,%d1
# should FP3 be moved?
18309 bpl.
b fmovm_out_ctrl_fp4
# no
18311 fmovm.x
&0x10,(%a0
) # yes
18314 fmovm_out_ctrl_fp4
:
18315 lsl.
b &0x1,%d1
# should FP4 be moved?
18316 bpl.
b fmovm_out_ctrl_fp5
# no
18318 fmovm.x
&0x08,(%a0
) # yes
18321 fmovm_out_ctrl_fp5
:
18322 lsl.
b &0x1,%d1
# should FP5 be moved?
18323 bpl.
b fmovm_out_ctrl_fp6
# no
18325 fmovm.x
&0x04,(%a0
) # yes
18328 fmovm_out_ctrl_fp6
:
18329 lsl.
b &0x1,%d1
# should FP6 be moved?
18330 bpl.
b fmovm_out_ctrl_fp7
# no
18332 fmovm.x
&0x02,(%a0
) # yes
18335 fmovm_out_ctrl_fp7
:
18336 lsl.
b &0x1,%d1
# should FP7 be moved?
18337 bpl.
b fmovm_out_ctrl_done
# no
18339 fmovm.x
&0x01,(%a0
) # yes
18342 fmovm_out_ctrl_done
:
18343 mov.
l %a1
,L_SCR1
(%a6
)
18345 lea
(%sp
),%a0
# pass: supervisor src
18346 mov.
l %d0
,-(%sp
) # save size
18347 bsr.
l _dmem_write
# copy data to user mem
18350 add.l %d0
,%sp
# clear fpreg data from stack
18352 tst.
l %d1
# did dstore err?
18353 bne.w fmovm_out_err
# yes
18361 mov.
l %a0
,L_SCR1
(%a6
)
18363 sub.l %d0
,%sp
# make room for fpregs
18366 mov.
l %d1
,-(%sp
) # save bit string for later
18367 mov.
l %d0
,-(%sp
) # save # of bytes
18369 bsr.
l _dmem_read
# copy data from user mem
18371 mov.
l (%sp
)+,%d0
# retrieve # of bytes
18373 tst.
l %d1
# did dfetch fail?
18374 bne.w fmovm_in_err
# yes
18376 mov.
l (%sp
)+,%d1
# load bit string
18378 lea
(%sp
),%a0
# addr of stack
18380 tst.
b %d1
# should FP0 be moved?
18381 bpl.
b fmovm_data_in_fp1
# no
18383 mov.
l (%a0
)+,0x0+EXC_FP0
(%a6
) # yes
18384 mov.
l (%a0
)+,0x4+EXC_FP0
(%a6
)
18385 mov.
l (%a0
)+,0x8+EXC_FP0
(%a6
)
18388 lsl.
b &0x1,%d1
# should FP1 be moved?
18389 bpl.
b fmovm_data_in_fp2
# no
18391 mov.
l (%a0
)+,0x0+EXC_FP1
(%a6
) # yes
18392 mov.
l (%a0
)+,0x4+EXC_FP1
(%a6
)
18393 mov.
l (%a0
)+,0x8+EXC_FP1
(%a6
)
18396 lsl.
b &0x1,%d1
# should FP2 be moved?
18397 bpl.
b fmovm_data_in_fp3
# no
18399 fmovm.x
(%a0
)+,&0x20 # yes
18402 lsl.
b &0x1,%d1
# should FP3 be moved?
18403 bpl.
b fmovm_data_in_fp4
# no
18405 fmovm.x
(%a0
)+,&0x10 # yes
18408 lsl.
b &0x1,%d1
# should FP4 be moved?
18409 bpl.
b fmovm_data_in_fp5
# no
18411 fmovm.x
(%a0
)+,&0x08 # yes
18414 lsl.
b &0x1,%d1
# should FP5 be moved?
18415 bpl.
b fmovm_data_in_fp6
# no
18417 fmovm.x
(%a0
)+,&0x04 # yes
18420 lsl.
b &0x1,%d1
# should FP6 be moved?
18421 bpl.
b fmovm_data_in_fp7
# no
18423 fmovm.x
(%a0
)+,&0x02 # yes
18426 lsl.
b &0x1,%d1
# should FP7 be moved?
18427 bpl.
b fmovm_data_in_done
# no
18429 fmovm.x
(%a0
)+,&0x01 # yes
18431 fmovm_data_in_done
:
18432 add.l %d0
,%sp
# remove fpregs from stack
18435 #####################################
18440 ##############################################################################
18443 # table indexed by the operation's bit string that gives the number
18444 # of bytes that will be moved.
18446 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18449 byte
0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18450 byte
0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18451 byte
0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18452 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453 byte
0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18454 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18455 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18456 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18457 byte
0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18458 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18460 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18461 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18462 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18464 byte
0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18465 byte
0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18466 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18467 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18468 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18470 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18471 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18472 byte
0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18473 byte
0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18474 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18475 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18476 byte
0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18477 byte
0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18478 byte
0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18479 byte
0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18480 byte
0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18483 # table to convert a pre-decrement bit string into a post-increment
18484 # or control bit string.
18485 # ex: 0x00 ==> 0x00
18495 byte
0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18496 byte
0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18497 byte
0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18498 byte
0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18499 byte
0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18500 byte
0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18501 byte
0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18502 byte
0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18503 byte
0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18504 byte
0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18505 byte
0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18506 byte
0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18507 byte
0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18508 byte
0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18509 byte
0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18510 byte
0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18511 byte
0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18512 byte
0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18513 byte
0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18514 byte
0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18515 byte
0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18516 byte
0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18517 byte
0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18518 byte
0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18519 byte
0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18520 byte
0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18521 byte
0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18522 byte
0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18523 byte
0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18524 byte
0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18525 byte
0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18526 byte
0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18528 global fmovm_calc_ea
18529 ###############################################
18530 # _fmovm_calc_ea: calculate effective address #
18531 ###############################################
18533 mov.
l %d0
,%a0
# move # bytes to a0
18535 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
18536 # easily changed if they were inputs passed in registers.
18537 mov.w EXC_OPWORD
(%a6
),%d0
# fetch opcode word
18538 mov.w
%d0
,%d1
# make a copy
18540 andi.w
&0x3f,%d0
# extract mode field
18541 andi.l &0x7,%d1
# extract reg field
18543 # jump to the corresponding function for each {MODE,REG} pair.
18544 mov.w
(tbl_fea_mode.
b,%pc
,%d0.w
*2),%d0
# fetch jmp distance
18545 jmp
(tbl_fea_mode.
b,%pc
,%d0.w
*1) # jmp to correct ea mode
18549 short tbl_fea_mode
- tbl_fea_mode
18550 short tbl_fea_mode
- tbl_fea_mode
18551 short tbl_fea_mode
- tbl_fea_mode
18552 short tbl_fea_mode
- tbl_fea_mode
18553 short tbl_fea_mode
- tbl_fea_mode
18554 short tbl_fea_mode
- tbl_fea_mode
18555 short tbl_fea_mode
- tbl_fea_mode
18556 short tbl_fea_mode
- tbl_fea_mode
18558 short tbl_fea_mode
- tbl_fea_mode
18559 short tbl_fea_mode
- tbl_fea_mode
18560 short tbl_fea_mode
- tbl_fea_mode
18561 short tbl_fea_mode
- tbl_fea_mode
18562 short tbl_fea_mode
- tbl_fea_mode
18563 short tbl_fea_mode
- tbl_fea_mode
18564 short tbl_fea_mode
- tbl_fea_mode
18565 short tbl_fea_mode
- tbl_fea_mode
18567 short faddr_ind_a0
- tbl_fea_mode
18568 short faddr_ind_a1
- tbl_fea_mode
18569 short faddr_ind_a2
- tbl_fea_mode
18570 short faddr_ind_a3
- tbl_fea_mode
18571 short faddr_ind_a4
- tbl_fea_mode
18572 short faddr_ind_a5
- tbl_fea_mode
18573 short faddr_ind_a6
- tbl_fea_mode
18574 short faddr_ind_a7
- tbl_fea_mode
18576 short faddr_ind_p_a0
- tbl_fea_mode
18577 short faddr_ind_p_a1
- tbl_fea_mode
18578 short faddr_ind_p_a2
- tbl_fea_mode
18579 short faddr_ind_p_a3
- tbl_fea_mode
18580 short faddr_ind_p_a4
- tbl_fea_mode
18581 short faddr_ind_p_a5
- tbl_fea_mode
18582 short faddr_ind_p_a6
- tbl_fea_mode
18583 short faddr_ind_p_a7
- tbl_fea_mode
18585 short faddr_ind_m_a0
- tbl_fea_mode
18586 short faddr_ind_m_a1
- tbl_fea_mode
18587 short faddr_ind_m_a2
- tbl_fea_mode
18588 short faddr_ind_m_a3
- tbl_fea_mode
18589 short faddr_ind_m_a4
- tbl_fea_mode
18590 short faddr_ind_m_a5
- tbl_fea_mode
18591 short faddr_ind_m_a6
- tbl_fea_mode
18592 short faddr_ind_m_a7
- tbl_fea_mode
18594 short faddr_ind_disp_a0
- tbl_fea_mode
18595 short faddr_ind_disp_a1
- tbl_fea_mode
18596 short faddr_ind_disp_a2
- tbl_fea_mode
18597 short faddr_ind_disp_a3
- tbl_fea_mode
18598 short faddr_ind_disp_a4
- tbl_fea_mode
18599 short faddr_ind_disp_a5
- tbl_fea_mode
18600 short faddr_ind_disp_a6
- tbl_fea_mode
18601 short faddr_ind_disp_a7
- tbl_fea_mode
18603 short faddr_ind_ext
- tbl_fea_mode
18604 short faddr_ind_ext
- tbl_fea_mode
18605 short faddr_ind_ext
- tbl_fea_mode
18606 short faddr_ind_ext
- tbl_fea_mode
18607 short faddr_ind_ext
- tbl_fea_mode
18608 short faddr_ind_ext
- tbl_fea_mode
18609 short faddr_ind_ext
- tbl_fea_mode
18610 short faddr_ind_ext
- tbl_fea_mode
18612 short fabs_short
- tbl_fea_mode
18613 short fabs_long
- tbl_fea_mode
18614 short fpc_ind
- tbl_fea_mode
18615 short fpc_ind_ext
- tbl_fea_mode
18616 short tbl_fea_mode
- tbl_fea_mode
18617 short tbl_fea_mode
- tbl_fea_mode
18618 short tbl_fea_mode
- tbl_fea_mode
18619 short tbl_fea_mode
- tbl_fea_mode
18621 ###################################
18622 # Address register indirect: (An) #
18623 ###################################
18625 mov.
l EXC_DREGS+
0x8(%a6
),%a0
# Get current a0
18629 mov.
l EXC_DREGS+
0xc(%a6
),%a0
# Get current a1
18633 mov.
l %a2
,%a0
# Get current a2
18637 mov.
l %a3
,%a0
# Get current a3
18641 mov.
l %a4
,%a0
# Get current a4
18645 mov.
l %a5
,%a0
# Get current a5
18649 mov.
l (%a6
),%a0
# Get current a6
18653 mov.
l EXC_A7
(%a6
),%a0
# Get current a7
18656 #####################################################
18657 # Address register indirect w/ postincrement: (An)+ #
18658 #####################################################
18660 mov.
l EXC_DREGS+
0x8(%a6
),%d0
# Get current a0
18662 add.l %a0
,%d1
# Increment
18663 mov.
l %d1
,EXC_DREGS+
0x8(%a6
) # Save incr value
18668 mov.
l EXC_DREGS+
0xc(%a6
),%d0
# Get current a1
18670 add.l %a0
,%d1
# Increment
18671 mov.
l %d1
,EXC_DREGS+
0xc(%a6
) # Save incr value
18676 mov.
l %a2
,%d0
# Get current a2
18678 add.l %a0
,%d1
# Increment
18679 mov.
l %d1
,%a2
# Save incr value
18684 mov.
l %a3
,%d0
# Get current a3
18686 add.l %a0
,%d1
# Increment
18687 mov.
l %d1
,%a3
# Save incr value
18692 mov.
l %a4
,%d0
# Get current a4
18694 add.l %a0
,%d1
# Increment
18695 mov.
l %d1
,%a4
# Save incr value
18700 mov.
l %a5
,%d0
# Get current a5
18702 add.l %a0
,%d1
# Increment
18703 mov.
l %d1
,%a5
# Save incr value
18708 mov.
l (%a6
),%d0
# Get current a6
18710 add.l %a0
,%d1
# Increment
18711 mov.
l %d1
,(%a6
) # Save incr value
18716 mov.
b &mia7_flg
,SPCOND_FLG
(%a6
) # set "special case" flag
18718 mov.
l EXC_A7
(%a6
),%d0
# Get current a7
18720 add.l %a0
,%d1
# Increment
18721 mov.
l %d1
,EXC_A7
(%a6
) # Save incr value
18725 ####################################################
18726 # Address register indirect w/ predecrement: -(An) #
18727 ####################################################
18729 mov.
l EXC_DREGS+
0x8(%a6
),%d0
# Get current a0
18730 sub.l %a0
,%d0
# Decrement
18731 mov.
l %d0
,EXC_DREGS+
0x8(%a6
) # Save decr value
18736 mov.
l EXC_DREGS+
0xc(%a6
),%d0
# Get current a1
18737 sub.l %a0
,%d0
# Decrement
18738 mov.
l %d0
,EXC_DREGS+
0xc(%a6
) # Save decr value
18743 mov.
l %a2
,%d0
# Get current a2
18744 sub.l %a0
,%d0
# Decrement
18745 mov.
l %d0
,%a2
# Save decr value
18750 mov.
l %a3
,%d0
# Get current a3
18751 sub.l %a0
,%d0
# Decrement
18752 mov.
l %d0
,%a3
# Save decr value
18757 mov.
l %a4
,%d0
# Get current a4
18758 sub.l %a0
,%d0
# Decrement
18759 mov.
l %d0
,%a4
# Save decr value
18764 mov.
l %a5
,%d0
# Get current a5
18765 sub.l %a0
,%d0
# Decrement
18766 mov.
l %d0
,%a5
# Save decr value
18771 mov.
l (%a6
),%d0
# Get current a6
18772 sub.l %a0
,%d0
# Decrement
18773 mov.
l %d0
,(%a6
) # Save decr value
18778 mov.
b &mda7_flg
,SPCOND_FLG
(%a6
) # set "special case" flag
18780 mov.
l EXC_A7
(%a6
),%d0
# Get current a7
18781 sub.l %a0
,%d0
# Decrement
18782 mov.
l %d0
,EXC_A7
(%a6
) # Save decr value
18786 ########################################################
18787 # Address register indirect w/ displacement: (d16, An) #
18788 ########################################################
18790 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18791 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18792 bsr.
l _imem_read_word
18794 tst.
l %d1
# did ifetch fail?
18795 bne.
l iea_iacc
# yes
18797 mov.w
%d0
,%a0
# sign extend displacement
18799 add.l EXC_DREGS+
0x8(%a6
),%a0
# a0 + d16
18803 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18804 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18805 bsr.
l _imem_read_word
18807 tst.
l %d1
# did ifetch fail?
18808 bne.
l iea_iacc
# yes
18810 mov.w
%d0
,%a0
# sign extend displacement
18812 add.l EXC_DREGS+
0xc(%a6
),%a0
# a1 + d16
18816 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18817 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18818 bsr.
l _imem_read_word
18820 tst.
l %d1
# did ifetch fail?
18821 bne.
l iea_iacc
# yes
18823 mov.w
%d0
,%a0
# sign extend displacement
18825 add.l %a2
,%a0
# a2 + d16
18829 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18830 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18831 bsr.
l _imem_read_word
18833 tst.
l %d1
# did ifetch fail?
18834 bne.
l iea_iacc
# yes
18836 mov.w
%d0
,%a0
# sign extend displacement
18838 add.l %a3
,%a0
# a3 + d16
18842 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18843 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18844 bsr.
l _imem_read_word
18846 tst.
l %d1
# did ifetch fail?
18847 bne.
l iea_iacc
# yes
18849 mov.w
%d0
,%a0
# sign extend displacement
18851 add.l %a4
,%a0
# a4 + d16
18855 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18856 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18857 bsr.
l _imem_read_word
18859 tst.
l %d1
# did ifetch fail?
18860 bne.
l iea_iacc
# yes
18862 mov.w
%d0
,%a0
# sign extend displacement
18864 add.l %a5
,%a0
# a5 + d16
18868 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18869 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18870 bsr.
l _imem_read_word
18872 tst.
l %d1
# did ifetch fail?
18873 bne.
l iea_iacc
# yes
18875 mov.w
%d0
,%a0
# sign extend displacement
18877 add.l (%a6
),%a0
# a6 + d16
18881 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18882 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18883 bsr.
l _imem_read_word
18885 tst.
l %d1
# did ifetch fail?
18886 bne.
l iea_iacc
# yes
18888 mov.w
%d0
,%a0
# sign extend displacement
18890 add.l EXC_A7
(%a6
),%a0
# a7 + d16
18893 ########################################################################
18894 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18895 # " " " w/ " (base displacement): (bd, An, Xn) #
18896 # Memory indirect postindexed: ([bd, An], Xn, od) #
18897 # Memory indirect preindexed: ([bd, An, Xn], od) #
18898 ########################################################################
18901 bsr.
l fetch_dreg
# fetch base areg
18904 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18905 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18906 bsr.
l _imem_read_word
# fetch extword in d0
18908 tst.
l %d1
# did ifetch fail?
18909 bne.
l iea_iacc
# yes
18914 bne.w fcalc_mem_ind
18916 mov.
l %d0
,L_SCR1
(%a6
) # hold opword
18920 andi.w
&0xf,%d1
# extract index regno
18922 # count on fetch_dreg() not to alter a0...
18923 bsr.
l fetch_dreg
# fetch index
18925 mov.
l %d2
,-(%sp
) # save d2
18926 mov.
l L_SCR1
(%a6
),%d2
# fetch opword
18928 btst
&0xb,%d2
# is it word or long?
18930 ext.
l %d0
# sign extend word index
18934 andi.l &0x3,%d1
# extract scale value
18936 lsl.
l %d1
,%d0
# shift index by scale
18938 extb.
l %d2
# sign extend displacement
18939 add.l %d2
,%d0
# index + disp
18940 add.l %d0
,%a0
# An + (index + disp)
18942 mov.
l (%sp
)+,%d2
# restore old d2
18945 ###########################
18946 # Absolute short: (XXX).W #
18947 ###########################
18949 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18950 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18951 bsr.
l _imem_read_word
# fetch short address
18953 tst.
l %d1
# did ifetch fail?
18954 bne.
l iea_iacc
# yes
18956 mov.w
%d0
,%a0
# return <ea> in a0
18959 ##########################
18960 # Absolute long: (XXX).L #
18961 ##########################
18963 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18964 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18965 bsr.
l _imem_read_long
# fetch long address
18967 tst.
l %d1
# did ifetch fail?
18968 bne.
l iea_iacc
# yes
18970 mov.
l %d0
,%a0
# return <ea> in a0
18973 #######################################################
18974 # Program counter indirect w/ displacement: (d16, PC) #
18975 #######################################################
18977 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
18978 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
18979 bsr.
l _imem_read_word
# fetch word displacement
18981 tst.
l %d1
# did ifetch fail?
18982 bne.
l iea_iacc
# yes
18984 mov.w
%d0
,%a0
# sign extend displacement
18986 add.l EXC_EXTWPTR
(%a6
),%a0
# pc + d16
18988 # _imem_read_word() increased the extwptr by 2. need to adjust here.
18989 subq.
l &0x2,%a0
# adjust <ea>
18992 ##########################################################
18993 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18994 # " " w/ " (base displacement): (bd, PC, An) #
18995 # PC memory indirect postindexed: ([bd, PC], Xn, od) #
18996 # PC memory indirect preindexed: ([bd, PC, Xn], od) #
18997 ##########################################################
18999 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19000 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19001 bsr.
l _imem_read_word
# fetch ext word
19003 tst.
l %d1
# did ifetch fail?
19004 bne.
l iea_iacc
# yes
19006 mov.
l EXC_EXTWPTR
(%a6
),%a0
# put base in a0
19007 subq.
l &0x2,%a0
# adjust base
19009 btst
&0x8,%d0
# is disp only 8 bits?
19010 bne.w fcalc_mem_ind
# calc memory indirect
19012 mov.
l %d0
,L_SCR1
(%a6
) # store opword
19014 mov.
l %d0
,%d1
# make extword copy
19015 rol.w
&0x4,%d1
# rotate reg num into place
19016 andi.w
&0xf,%d1
# extract register number
19018 # count on fetch_dreg() not to alter a0...
19019 bsr.
l fetch_dreg
# fetch index
19021 mov.
l %d2
,-(%sp
) # save d2
19022 mov.
l L_SCR1
(%a6
),%d2
# fetch opword
19024 btst
&0xb,%d2
# is index word or long?
19025 bne.
b fpii8_long
# long
19026 ext.
l %d0
# sign extend word index
19029 rol.w
&0x7,%d1
# rotate scale value into place
19030 andi.l &0x3,%d1
# extract scale value
19032 lsl.
l %d1
,%d0
# shift index by scale
19034 extb.
l %d2
# sign extend displacement
19035 add.l %d2
,%d0
# disp + index
19036 add.l %d0
,%a0
# An + (index + disp)
19038 mov.
l (%sp
)+,%d2
# restore temp register
19046 btst
&0x6,%d0
# is the index suppressed?
19049 movm.
l &0x3c00,-(%sp
) # save d2-d5
19051 mov.
l %d0
,%d5
# put extword in d5
19052 mov.
l %a0
,%d3
# put base in d3
19054 clr.
l %d2
# yes, so index = 0
19055 bra.
b fbase_supp_ck
19059 mov.
l %d0
,L_SCR1
(%a6
) # save d0 (opword)
19060 bfextu
%d0
{&16:&4},%d1
# fetch dreg index
19063 movm.
l &0x3c00,-(%sp
) # save d2-d5
19064 mov.
l %d0
,%d2
# put index in d2
19065 mov.
l L_SCR1
(%a6
),%d5
19068 btst
&0xb,%d5
# is index word or long?
19073 bfextu
%d5
{&21:&2},%d0
19076 # base address (passed as parameter in d3):
19077 # we clear the value here if it should actually be suppressed.
19079 btst
&0x7,%d5
# is the bd suppressed?
19083 # base displacement:
19085 bfextu
%d5
{&26:&2},%d0
# get bd size
19086 # beq.l fmovm_error # if (size == 0) it's reserved
19092 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19093 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19094 bsr.
l _imem_read_long
19096 tst.
l %d1
# did ifetch fail?
19097 bne.
l fcea_iacc
# yes
19102 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19103 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19104 bsr.
l _imem_read_word
19106 tst.
l %d1
# did ifetch fail?
19107 bne.
l fcea_iacc
# yes
19109 ext.
l %d0
# sign extend bd
19112 add.l %d0
,%d3
# base += bd
19114 # outer displacement:
19116 bfextu
%d5
{&30:&2},%d0
# is od suppressed?
19123 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19124 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19125 bsr.
l _imem_read_long
19127 tst.
l %d1
# did ifetch fail?
19128 bne.
l fcea_iacc
# yes
19133 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19134 addq.
l &0x2,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19135 bsr.
l _imem_read_word
19137 tst.
l %d1
# did ifetch fail?
19138 bne.
l fcea_iacc
# yes
19140 ext.
l %d0
# sign extend od
19149 btst
&0x2,%d5
# pre or post indexing?
19153 bsr.
l _dmem_read_long
19155 tst.
l %d1
# did dfetch fail?
19156 bne.w fcea_err
# yes
19158 add.l %d2
,%d0
# <ea> += index
19159 add.l %d4
,%d0
# <ea> += od
19163 add.l %d2
,%d3
# preindexing
19165 bsr.
l _dmem_read_long
19167 tst.
l %d1
# did dfetch fail?
19168 bne.w fcea_err
# yes
19170 add.l %d4
,%d0
# ea += od
19174 add.l %d2
,%d3
# ea = (base + bd) + index
19179 movm.
l (%sp
)+,&0x003c # restore d2-d5
19182 #########################################################
19186 movm.
l (%sp
)+,&0x003c # restore d2-d5
19191 movm.
l (%sp
)+,&0x003c # restore d2-d5
19204 mov.
l L_SCR1
(%a6
),%a0
19207 #########################################################################
19208 # XDEF **************************************************************** #
19209 # fmovm_ctrl(): emulate fmovm.l of control registers instr #
19211 # XREF **************************************************************** #
19212 # _imem_read_long() - read longword from memory #
19213 # iea_iacc() - _imem_read_long() failed; error recovery #
19215 # INPUT *************************************************************** #
19218 # OUTPUT ************************************************************** #
19219 # If _imem_read_long() doesn't fail: #
19220 # USER_FPCR(a6) = new FPCR value #
19221 # USER_FPSR(a6) = new FPSR value #
19222 # USER_FPIAR(a6) = new FPIAR value #
19224 # ALGORITHM *********************************************************** #
19225 # Decode the instruction type by looking at the extension word #
19226 # in order to see how many control registers to fetch from memory. #
19227 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
19228 # the special access error exit handler iea_iacc(). #
19230 # Instruction word decoding: #
19232 # fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
19235 # 1111 0010 00 111100 100$ $$00 0000 0000 #
19237 # $$$ (100): FPCR #
19242 #########################################################################
19246 mov.
b EXC_EXTWORD
(%a6
),%d0
# fetch reg select bits
19247 cmpi.
b %d0
,&0x9c # fpcr & fpsr & fpiar ?
19248 beq.w fctrl_in_7
# yes
19249 cmpi.
b %d0
,&0x98 # fpcr & fpsr ?
19250 beq.w fctrl_in_6
# yes
19251 cmpi.
b %d0
,&0x94 # fpcr & fpiar ?
19252 beq.
b fctrl_in_5
# yes
19254 # fmovem.l #<data>, fpsr/fpiar
19256 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19257 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19258 bsr.
l _imem_read_long
# fetch FPSR from mem
19260 tst.
l %d1
# did ifetch fail?
19261 bne.
l iea_iacc
# yes
19263 mov.
l %d0
,USER_FPSR
(%a6
) # store new FPSR to stack
19264 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19265 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19266 bsr.
l _imem_read_long
# fetch FPIAR from mem
19268 tst.
l %d1
# did ifetch fail?
19269 bne.
l iea_iacc
# yes
19271 mov.
l %d0
,USER_FPIAR
(%a6
) # store new FPIAR to stack
19274 # fmovem.l #<data>, fpcr/fpiar
19276 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19277 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19278 bsr.
l _imem_read_long
# fetch FPCR from mem
19280 tst.
l %d1
# did ifetch fail?
19281 bne.
l iea_iacc
# yes
19283 mov.
l %d0
,USER_FPCR
(%a6
) # store new FPCR to stack
19284 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19285 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19286 bsr.
l _imem_read_long
# fetch FPIAR from mem
19288 tst.
l %d1
# did ifetch fail?
19289 bne.
l iea_iacc
# yes
19291 mov.
l %d0
,USER_FPIAR
(%a6
) # store new FPIAR to stack
19294 # fmovem.l #<data>, fpcr/fpsr
19296 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19297 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19298 bsr.
l _imem_read_long
# fetch FPCR from mem
19300 tst.
l %d1
# did ifetch fail?
19301 bne.
l iea_iacc
# yes
19303 mov.
l %d0
,USER_FPCR
(%a6
) # store new FPCR to mem
19304 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19305 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19306 bsr.
l _imem_read_long
# fetch FPSR from mem
19308 tst.
l %d1
# did ifetch fail?
19309 bne.
l iea_iacc
# yes
19311 mov.
l %d0
,USER_FPSR
(%a6
) # store new FPSR to mem
19314 # fmovem.l #<data>, fpcr/fpsr/fpiar
19316 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19317 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19318 bsr.
l _imem_read_long
# fetch FPCR from mem
19320 tst.
l %d1
# did ifetch fail?
19321 bne.
l iea_iacc
# yes
19323 mov.
l %d0
,USER_FPCR
(%a6
) # store new FPCR to mem
19324 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19325 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19326 bsr.
l _imem_read_long
# fetch FPSR from mem
19328 tst.
l %d1
# did ifetch fail?
19329 bne.
l iea_iacc
# yes
19331 mov.
l %d0
,USER_FPSR
(%a6
) # store new FPSR to mem
19332 mov.
l EXC_EXTWPTR
(%a6
),%a0
# fetch instruction addr
19333 addq.
l &0x4,EXC_EXTWPTR
(%a6
) # incr instruction ptr
19334 bsr.
l _imem_read_long
# fetch FPIAR from mem
19336 tst.
l %d1
# did ifetch fail?
19337 bne.
l iea_iacc
# yes
19339 mov.
l %d0
,USER_FPIAR
(%a6
) # store new FPIAR to mem
19342 #########################################################################
19343 # XDEF **************************************************************** #
19344 # _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
19346 # XREF **************************************************************** #
19347 # inc_areg() - increment an address register #
19348 # dec_areg() - decrement an address register #
19350 # INPUT *************************************************************** #
19351 # d0 = number of bytes to adjust <ea> by #
19353 # OUTPUT ************************************************************** #
19356 # ALGORITHM *********************************************************** #
19357 # "Dummy" CALCulate Effective Address: #
19358 # The stacked <ea> for FP unimplemented instructions and opclass #
19359 # two packed instructions is correct with the exception of... #
19361 # 1) -(An) : The register is not updated regardless of size. #
19362 # Also, for extended precision and packed, the #
19363 # stacked <ea> value is 8 bytes too big #
19364 # 2) (An)+ : The register is not updated. #
19365 # 3) #<data> : The upper longword of the immediate operand is #
19366 # stacked b,w,l and s sizes are completely stacked. #
19367 # d,x, and p are not. #
19369 #########################################################################
19373 mov.
l %d0
, %a0
# move # bytes to %a0
19375 mov.
b 1+EXC_OPWORD
(%a6
), %d0
# fetch opcode word
19376 mov.
l %d0
, %d1
# make a copy
19378 andi.w
&0x38, %d0
# extract mode field
19379 andi.l &0x7, %d1
# extract reg field
19381 cmpi.
b %d0
,&0x18 # is mode (An)+ ?
19382 beq.
b dcea_pi
# yes
19384 cmpi.
b %d0
,&0x20 # is mode -(An) ?
19385 beq.
b dcea_pd
# yes
19387 or.w
%d1
,%d0
# concat mode,reg
19388 cmpi.
b %d0
,&0x3c # is mode #<data>?
19390 beq.
b dcea_imm
# yes
19392 mov.
l EXC_EA
(%a6
),%a0
# return <ea>
19395 # need to set immediate data flag here since we'll need to do
19396 # an imem_read to fetch this later.
19398 mov.
b &immed_flg
,SPCOND_FLG
(%a6
)
19399 lea
([USER_FPIAR
,%a6
],0x4),%a0
# no; return <ea>
19402 # here, the <ea> is stacked correctly. however, we must update the
19403 # address register...
19405 mov.
l %a0
,%d0
# pass amt to inc by
19406 bsr.
l inc_areg
# inc addr register
19408 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
19411 # the <ea> is stacked correctly for all but extended and packed which
19412 # the <ea>s are 8 bytes too large.
19413 # it would make no sense to have a pre-decrement to a7 in supervisor
19414 # mode so we don't even worry about this tricky case here : )
19416 mov.
l %a0
,%d0
# pass amt to dec by
19417 bsr.
l dec_areg
# dec addr register
19419 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
19421 cmpi.
b %d0
,&0xc # is opsize ext or packed?
19422 beq.
b dcea_pd2
# yes
19425 sub.l &0x8,%a0
# correct <ea>
19426 mov.
l %a0
,EXC_EA
(%a6
) # put correct <ea> on stack
19429 #########################################################################
19430 # XDEF **************************************************************** #
19431 # _calc_ea_fout(): calculate correct stacked <ea> for extended #
19432 # and packed data opclass 3 operations. #
19434 # XREF **************************************************************** #
19437 # INPUT *************************************************************** #
19440 # OUTPUT ************************************************************** #
19441 # a0 = return correct effective address #
19443 # ALGORITHM *********************************************************** #
19444 # For opclass 3 extended and packed data operations, the <ea> #
19445 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
19446 # modes. Also, while we're at it, the index register itself must get #
19448 # So, for -(an), we must subtract 8 off of the stacked <ea> value #
19449 # and return that value as the correct <ea> and store that value in An. #
19450 # For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
19452 #########################################################################
19454 # This calc_ea is currently used to retrieve the correct <ea>
19455 # for fmove outs of type extended and packed.
19456 global _calc_ea_fout
19458 mov.
b 1+EXC_OPWORD
(%a6
),%d0
# fetch opcode word
19459 mov.
l %d0
,%d1
# make a copy
19461 andi.w
&0x38,%d0
# extract mode field
19462 andi.l &0x7,%d1
# extract reg field
19464 cmpi.
b %d0
,&0x18 # is mode (An)+ ?
19465 beq.
b ceaf_pi
# yes
19467 cmpi.
b %d0
,&0x20 # is mode -(An) ?
19468 beq.w ceaf_pd
# yes
19470 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
19473 # (An)+ : extended and packed fmove out
19474 # : stacked <ea> is correct
19475 # : "An" not updated
19477 mov.w
(tbl_ceaf_pi.
b,%pc
,%d1.w
*2),%d1
19478 mov.
l EXC_EA
(%a6
),%a0
19479 jmp
(tbl_ceaf_pi.
b,%pc
,%d1.w
*1)
19483 short ceaf_pi0
- tbl_ceaf_pi
19484 short ceaf_pi1
- tbl_ceaf_pi
19485 short ceaf_pi2
- tbl_ceaf_pi
19486 short ceaf_pi3
- tbl_ceaf_pi
19487 short ceaf_pi4
- tbl_ceaf_pi
19488 short ceaf_pi5
- tbl_ceaf_pi
19489 short ceaf_pi6
- tbl_ceaf_pi
19490 short ceaf_pi7
- tbl_ceaf_pi
19493 addi.
l &0xc,EXC_DREGS+
0x8(%a6
)
19496 addi.
l &0xc,EXC_DREGS+
0xc(%a6
)
19511 addi.
l &0xc,EXC_A6
(%a6
)
19514 mov.
b &mia7_flg
,SPCOND_FLG
(%a6
)
19515 addi.
l &0xc,EXC_A7
(%a6
)
19518 # -(An) : extended and packed fmove out
19519 # : stacked <ea> = actual <ea> + 8
19520 # : "An" not updated
19522 mov.w
(tbl_ceaf_pd.
b,%pc
,%d1.w
*2),%d1
19523 mov.
l EXC_EA
(%a6
),%a0
19525 sub.l &0x8,EXC_EA
(%a6
)
19526 jmp
(tbl_ceaf_pd.
b,%pc
,%d1.w
*1)
19530 short ceaf_pd0
- tbl_ceaf_pd
19531 short ceaf_pd1
- tbl_ceaf_pd
19532 short ceaf_pd2
- tbl_ceaf_pd
19533 short ceaf_pd3
- tbl_ceaf_pd
19534 short ceaf_pd4
- tbl_ceaf_pd
19535 short ceaf_pd5
- tbl_ceaf_pd
19536 short ceaf_pd6
- tbl_ceaf_pd
19537 short ceaf_pd7
- tbl_ceaf_pd
19540 mov.
l %a0
,EXC_DREGS+
0x8(%a6
)
19543 mov.
l %a0
,EXC_DREGS+
0xc(%a6
)
19558 mov.
l %a0
,EXC_A6
(%a6
)
19561 mov.
l %a0
,EXC_A7
(%a6
)
19562 mov.
b &mda7_flg
,SPCOND_FLG
(%a6
)
19565 #########################################################################
19566 # XDEF **************************************************************** #
19567 # _load_fop(): load operand for unimplemented FP exception #
19569 # XREF **************************************************************** #
19570 # set_tag_x() - determine ext prec optype tag #
19571 # set_tag_s() - determine sgl prec optype tag #
19572 # set_tag_d() - determine dbl prec optype tag #
19573 # unnorm_fix() - convert normalized number to denorm or zero #
19574 # norm() - normalize a denormalized number #
19575 # get_packed() - fetch a packed operand from memory #
19576 # _dcalc_ea() - calculate <ea>, fixing An in process #
19578 # _imem_read_{word,long}() - read from instruction memory #
19579 # _dmem_read() - read from data memory #
19580 # _dmem_read_{byte,word,long}() - read from data memory #
19582 # facc_in_{b,w,l,d,x}() - mem read failed; special exit point #
19584 # INPUT *************************************************************** #
19587 # OUTPUT ************************************************************** #
19588 # If memory access doesn't fail: #
19589 # FP_SRC(a6) = source operand in extended precision #
19590 # FP_DST(a6) = destination operand in extended precision #
19592 # ALGORITHM *********************************************************** #
19593 # This is called from the Unimplemented FP exception handler in #
19594 # order to load the source and maybe destination operand into #
19595 # FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load #
19596 # the source and destination from the FP register file. Set the optype #
19597 # tags for both if dyadic, one for monadic. If a number is an UNNORM, #
19598 # convert it to a DENORM or a ZERO. #
19599 # If the instruction is opclass two (memory->reg), then fetch #
19600 # the destination from the register file and the source operand from #
19601 # memory. Tag and fix both as above w/ opclass zero instructions. #
19602 # If the source operand is byte,word,long, or single, it may be #
19603 # in the data register file. If it's actually out in memory, use one of #
19604 # the mem_read() routines to fetch it. If the mem_read() access returns #
19605 # a failing value, exit through the special facc_in() routine which #
19606 # will create an acess error exception frame from the current exception #
19608 # Immediate data and regular data accesses are separated because #
19609 # if an immediate data access fails, the resulting fault status #
19610 # longword stacked for the access error exception must have the #
19611 # instruction bit set. #
19613 #########################################################################
19618 # 15 13 12 10 9 7 6 0
19620 # ---------------------------------
19621 # | opclass | RX | RY | EXTENSION | (2nd word of general FP instruction)
19622 # ---------------------------------
19625 # bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19626 # cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011)
19627 # beq.w op010 # handle <ea> -> fpn
19628 # bgt.w op011 # handle fpn -> <ea>
19630 # we're not using op011 for now...
19631 btst
&0x6,EXC_CMDREG
(%a6
)
19634 ############################
19635 # OPCLASS '000: reg -> reg #
19636 ############################
19638 mov.
b 1+EXC_CMDREG
(%a6
),%d0
# fetch extension word lo
19639 btst
&0x5,%d0
# testing extension bits
19640 beq.
b op000_src
# (bit 5 == 0) => monadic
19641 btst
&0x4,%d0
# (bit 5 == 1)
19642 beq.
b op000_dst
# (bit 4 == 0) => dyadic
19643 and.w
&0x007f,%d0
# extract extension bits {6:0}
19644 cmpi.w
%d0
,&0x0038 # is it an fcmp (dyadic) ?
19645 bne.
b op000_src
# it's an fcmp
19648 bfextu EXC_CMDREG
(%a6
){&6:&3}, %d0
# extract dst field
19649 bsr.
l load_fpn2
# fetch dst fpreg into FP_DST
19651 bsr.
l set_tag_x
# get dst optype tag
19653 cmpi.
b %d0
, &UNNORM
# is dst fpreg an UNNORM?
19654 beq.
b op000_dst_unnorm
# yes
19656 mov.
b %d0
, DTAG
(%a6
) # store the dst optype tag
19659 bfextu EXC_CMDREG
(%a6
){&3:&3}, %d0
# extract src field
19660 bsr.
l load_fpn1
# fetch src fpreg into FP_SRC
19662 bsr.
l set_tag_x
# get src optype tag
19664 cmpi.
b %d0
, &UNNORM
# is src fpreg an UNNORM?
19665 beq.
b op000_src_unnorm
# yes
19667 mov.
b %d0
, STAG
(%a6
) # store the src optype tag
19671 bsr.
l unnorm_fix
# fix the dst UNNORM
19672 bra.
b op000_dst_cont
19674 bsr.
l unnorm_fix
# fix the src UNNORM
19675 bra.
b op000_src_cont
19677 #############################
19678 # OPCLASS '010: <ea> -> reg #
19679 #############################
19681 mov.w EXC_CMDREG
(%a6
),%d0
# fetch extension word
19682 btst
&0x5,%d0
# testing extension bits
19683 beq.
b op010_src
# (bit 5 == 0) => monadic
19684 btst
&0x4,%d0
# (bit 5 == 1)
19685 beq.
b op010_dst
# (bit 4 == 0) => dyadic
19686 and.w
&0x007f,%d0
# extract extension bits {6:0}
19687 cmpi.w
%d0
,&0x0038 # is it an fcmp (dyadic) ?
19688 bne.
b op010_src
# it's an fcmp
19691 bfextu EXC_CMDREG
(%a6
){&6:&3}, %d0
# extract dst field
19692 bsr.
l load_fpn2
# fetch dst fpreg ptr
19694 bsr.
l set_tag_x
# get dst type tag
19696 cmpi.
b %d0
, &UNNORM
# is dst fpreg an UNNORM?
19697 beq.
b op010_dst_unnorm
# yes
19699 mov.
b %d0
, DTAG
(%a6
) # store the dst optype tag
19702 bfextu EXC_CMDREG
(%a6
){&3:&3}, %d0
# extract src type field
19704 bfextu EXC_OPWORD
(%a6
){&10:&3}, %d1
# extract <ea> mode field
19705 bne.w fetch_from_mem
# src op is in memory
19708 clr.
b STAG
(%a6
) # either NORM or ZERO
19709 bfextu EXC_OPWORD
(%a6
){&13:&3}, %d1
# extract src reg field
19711 mov.w
(tbl_op010_dreg.
b,%pc
,%d0.w
*2), %d0
# jmp based on optype
19712 jmp
(tbl_op010_dreg.
b,%pc
,%d0.w
*1) # fetch src from dreg
19715 bsr.
l unnorm_fix
# fix the dst UNNORM
19716 bra.
b op010_dst_cont
19720 short opd_long
- tbl_op010_dreg
19721 short opd_sgl
- tbl_op010_dreg
19722 short tbl_op010_dreg
- tbl_op010_dreg
19723 short tbl_op010_dreg
- tbl_op010_dreg
19724 short opd_word
- tbl_op010_dreg
19725 short tbl_op010_dreg
- tbl_op010_dreg
19726 short opd_byte
- tbl_op010_dreg
19727 short tbl_op010_dreg
- tbl_op010_dreg
19730 # LONG: can be either NORM or ZERO...
19733 bsr.
l fetch_dreg
# fetch long in d0
19734 fmov.
l %d0
, %fp0
# load a long
19735 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
19736 fbeq.w opd_long_zero
# long is a ZERO
19739 mov.
b &ZERO
, STAG
(%a6
) # set ZERO optype flag
19743 # WORD: can be either NORM or ZERO...
19746 bsr.
l fetch_dreg
# fetch word in d0
19747 fmov.w
%d0
, %fp0
# load a word
19748 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
19749 fbeq.w opd_word_zero
# WORD is a ZERO
19752 mov.
b &ZERO
, STAG
(%a6
) # set ZERO optype flag
19756 # BYTE: can be either NORM or ZERO...
19759 bsr.
l fetch_dreg
# fetch word in d0
19760 fmov.
b %d0
, %fp0
# load a byte
19761 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
19762 fbeq.w opd_byte_zero
# byte is a ZERO
19765 mov.
b &ZERO
, STAG
(%a6
) # set ZERO optype flag
19769 # SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19771 # separate SNANs and DENORMs so they can be loaded w/ special care.
19772 # all others can simply be moved "in" using fmove.
19775 bsr.
l fetch_dreg
# fetch sgl in d0
19776 mov.
l %d0
,L_SCR1
(%a6
)
19778 lea L_SCR1
(%a6
), %a0
# pass: ptr to the sgl
19779 bsr.
l set_tag_s
# determine sgl type
19780 mov.
b %d0
, STAG
(%a6
) # save the src tag
19782 cmpi.
b %d0
, &SNAN
# is it an SNAN?
19783 beq.w get_sgl_snan
# yes
19785 cmpi.
b %d0
, &DENORM
# is it a DENORM?
19786 beq.w get_sgl_denorm
# yes
19788 fmov.s
(%a0
), %fp0
# no, so can load it regular
19789 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
19792 ##############################################################################
19794 #########################################################################
19795 # fetch_from_mem(): #
19796 # - src is out in memory. must: #
19797 # (1) calc ea - must read AFTER you know the src type since #
19798 # if the ea is -() or ()+, need to know # of bytes. #
19799 # (2) read it in from either user or supervisor space #
19800 # (3) if (b || w || l) then simply read in #
19801 # if (s || d || x) then check for SNAN,UNNORM,DENORM #
19802 # if (packed) then punt for now #
19804 # %d0 : src type field #
19805 #########################################################################
19807 clr.
b STAG
(%a6
) # either NORM or ZERO
19809 mov.w
(tbl_fp_type.
b,%pc
,%d0.w
*2), %d0
# index by src type field
19810 jmp
(tbl_fp_type.
b,%pc
,%d0.w
*1)
19814 short load_long
- tbl_fp_type
19815 short load_sgl
- tbl_fp_type
19816 short load_ext
- tbl_fp_type
19817 short load_packed
- tbl_fp_type
19818 short load_word
- tbl_fp_type
19819 short load_dbl
- tbl_fp_type
19820 short load_byte
- tbl_fp_type
19821 short tbl_fp_type
- tbl_fp_type
19823 #########################################
19824 # load a LONG into %fp0: #
19825 # -number can't fault #
19827 # (2) read 4 bytes into L_SCR1 #
19828 # (3) fmov.l into %fp0 #
19829 #########################################
19831 movq.
l &0x4, %d0
# pass: 4 (bytes)
19832 bsr.
l _dcalc_ea
# calc <ea>; <ea> in %a0
19834 cmpi.
b SPCOND_FLG
(%a6
),&immed_flg
19835 beq.
b load_long_immed
19837 bsr.
l _dmem_read_long
# fetch src operand from memory
19839 tst.
l %d1
# did dfetch fail?
19840 bne.
l facc_in_l
# yes
19843 fmov.
l %d0
, %fp0
# read into %fp0;convert to xprec
19844 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
19846 fbeq.w load_long_zero
# src op is a ZERO
19849 mov.
b &ZERO
, STAG
(%a6
) # set optype tag to ZERO
19853 bsr.
l _imem_read_long
# fetch src operand immed data
19855 tst.
l %d1
# did ifetch fail?
19856 bne.
l funimp_iacc
# yes
19857 bra.
b load_long_cont
19859 #########################################
19860 # load a WORD into %fp0: #
19861 # -number can't fault #
19863 # (2) read 2 bytes into L_SCR1 #
19864 # (3) fmov.w into %fp0 #
19865 #########################################
19867 movq.
l &0x2, %d0
# pass: 2 (bytes)
19868 bsr.
l _dcalc_ea
# calc <ea>; <ea> in %a0
19870 cmpi.
b SPCOND_FLG
(%a6
),&immed_flg
19871 beq.
b load_word_immed
19873 bsr.
l _dmem_read_word
# fetch src operand from memory
19875 tst.
l %d1
# did dfetch fail?
19876 bne.
l facc_in_w
# yes
19879 fmov.w
%d0
, %fp0
# read into %fp0;convert to xprec
19880 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
19882 fbeq.w load_word_zero
# src op is a ZERO
19885 mov.
b &ZERO
, STAG
(%a6
) # set optype tag to ZERO
19889 bsr.
l _imem_read_word
# fetch src operand immed data
19891 tst.
l %d1
# did ifetch fail?
19892 bne.
l funimp_iacc
# yes
19893 bra.
b load_word_cont
19895 #########################################
19896 # load a BYTE into %fp0: #
19897 # -number can't fault #
19899 # (2) read 1 byte into L_SCR1 #
19900 # (3) fmov.b into %fp0 #
19901 #########################################
19903 movq.
l &0x1, %d0
# pass: 1 (byte)
19904 bsr.
l _dcalc_ea
# calc <ea>; <ea> in %a0
19906 cmpi.
b SPCOND_FLG
(%a6
),&immed_flg
19907 beq.
b load_byte_immed
19909 bsr.
l _dmem_read_byte
# fetch src operand from memory
19911 tst.
l %d1
# did dfetch fail?
19912 bne.
l facc_in_b
# yes
19915 fmov.
b %d0
, %fp0
# read into %fp0;convert to xprec
19916 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
19918 fbeq.w load_byte_zero
# src op is a ZERO
19921 mov.
b &ZERO
, STAG
(%a6
) # set optype tag to ZERO
19925 bsr.
l _imem_read_word
# fetch src operand immed data
19927 tst.
l %d1
# did ifetch fail?
19928 bne.
l funimp_iacc
# yes
19929 bra.
b load_byte_cont
19931 #########################################
19932 # load a SGL into %fp0: #
19933 # -number can't fault #
19935 # (2) read 4 bytes into L_SCR1 #
19936 # (3) fmov.s into %fp0 #
19937 #########################################
19939 movq.
l &0x4, %d0
# pass: 4 (bytes)
19940 bsr.
l _dcalc_ea
# calc <ea>; <ea> in %a0
19942 cmpi.
b SPCOND_FLG
(%a6
),&immed_flg
19943 beq.
b load_sgl_immed
19945 bsr.
l _dmem_read_long
# fetch src operand from memory
19946 mov.
l %d0
, L_SCR1
(%a6
) # store src op on stack
19948 tst.
l %d1
# did dfetch fail?
19949 bne.
l facc_in_l
# yes
19952 lea L_SCR1
(%a6
), %a0
# pass: ptr to sgl src op
19953 bsr.
l set_tag_s
# determine src type tag
19954 mov.
b %d0
, STAG
(%a6
) # save src optype tag on stack
19956 cmpi.
b %d0
, &DENORM
# is it a sgl DENORM?
19957 beq.w get_sgl_denorm
# yes
19959 cmpi.
b %d0
, &SNAN
# is it a sgl SNAN?
19960 beq.w get_sgl_snan
# yes
19962 fmov.s L_SCR1
(%a6
), %fp0
# read into %fp0;convert to xprec
19963 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
19967 bsr.
l _imem_read_long
# fetch src operand immed data
19969 tst.
l %d1
# did ifetch fail?
19970 bne.
l funimp_iacc
# yes
19971 bra.
b load_sgl_cont
19973 # must convert sgl denorm format to an Xprec denorm fmt suitable for
19975 # %a0 : points to sgl denorm
19977 clr.w FP_SRC_EX
(%a6
)
19978 bfextu
(%a0
){&9:&23}, %d0
# fetch sgl hi(_mantissa)
19980 mov.
l %d0
, FP_SRC_HI
(%a6
) # set ext hi(_mantissa)
19981 clr.
l FP_SRC_LO
(%a6
) # set ext lo(_mantissa)
19983 clr.w FP_SRC_EX
(%a6
)
19984 btst
&0x7, (%a0
) # is sgn bit set?
19985 beq.
b sgl_dnrm_norm
19986 bset
&0x7, FP_SRC_EX
(%a6
) # set sgn of xprec value
19989 lea FP_SRC
(%a6
), %a0
19990 bsr.
l norm
# normalize number
19991 mov.w
&0x3f81, %d1
# xprec exp = 0x3f81
19992 sub.w
%d0
, %d1
# exp = 0x3f81 - shft amt.
19993 or.w
%d1
, FP_SRC_EX
(%a6
) # {sgn,exp}
19995 mov.
b &NORM
, STAG
(%a6
) # fix src type tag
19998 # convert sgl to ext SNAN
19999 # %a0 : points to sgl SNAN
20001 mov.w
&0x7fff, FP_SRC_EX
(%a6
) # set exp of SNAN
20002 bfextu
(%a0
){&9:&23}, %d0
20003 lsl.
l &0x8, %d0
# extract and insert hi(man)
20004 mov.
l %d0
, FP_SRC_HI
(%a6
)
20005 clr.
l FP_SRC_LO
(%a6
)
20007 btst
&0x7, (%a0
) # see if sign of SNAN is set
20008 beq.
b no_sgl_snan_sgn
20009 bset
&0x7, FP_SRC_EX
(%a6
)
20013 #########################################
20014 # load a DBL into %fp0: #
20015 # -number can't fault #
20017 # (2) read 8 bytes into L_SCR(1,2)#
20018 # (3) fmov.d into %fp0 #
20019 #########################################
20021 movq.
l &0x8, %d0
# pass: 8 (bytes)
20022 bsr.
l _dcalc_ea
# calc <ea>; <ea> in %a0
20024 cmpi.
b SPCOND_FLG
(%a6
),&immed_flg
20025 beq.
b load_dbl_immed
20027 lea L_SCR1
(%a6
), %a1
# pass: ptr to input dbl tmp space
20028 movq.
l &0x8, %d0
# pass: # bytes to read
20029 bsr.
l _dmem_read
# fetch src operand from memory
20031 tst.
l %d1
# did dfetch fail?
20032 bne.
l facc_in_d
# yes
20035 lea L_SCR1
(%a6
), %a0
# pass: ptr to input dbl
20036 bsr.
l set_tag_d
# determine src type tag
20037 mov.
b %d0
, STAG
(%a6
) # set src optype tag
20039 cmpi.
b %d0
, &DENORM
# is it a dbl DENORM?
20040 beq.w get_dbl_denorm
# yes
20042 cmpi.
b %d0
, &SNAN
# is it a dbl SNAN?
20043 beq.w get_dbl_snan
# yes
20045 fmov.d L_SCR1
(%a6
), %fp0
# read into %fp0;convert to xprec
20046 fmovm.x
&0x80, FP_SRC
(%a6
) # return src op in FP_SRC
20050 lea L_SCR1
(%a6
), %a1
# pass: ptr to input dbl tmp space
20051 movq.
l &0x8, %d0
# pass: # bytes to read
20052 bsr.
l _imem_read
# fetch src operand from memory
20054 tst.
l %d1
# did ifetch fail?
20055 bne.
l funimp_iacc
# yes
20056 bra.
b load_dbl_cont
20058 # must convert dbl denorm format to an Xprec denorm fmt suitable for
20060 # %a0 : loc. of dbl denorm
20062 clr.w FP_SRC_EX
(%a6
)
20063 bfextu
(%a0
){&12:&31}, %d0
# fetch hi(_mantissa)
20064 mov.
l %d0
, FP_SRC_HI
(%a6
)
20065 bfextu
4(%a0
){&11:&21}, %d0
# fetch lo(_mantissa)
20068 mov.
l %d0
, FP_SRC_LO
(%a6
)
20070 btst
&0x7, (%a0
) # is sgn bit set?
20071 beq.
b dbl_dnrm_norm
20072 bset
&0x7, FP_SRC_EX
(%a6
) # set sgn of xprec value
20075 lea FP_SRC
(%a6
), %a0
20076 bsr.
l norm
# normalize number
20077 mov.w
&0x3c01, %d1
# xprec exp = 0x3c01
20078 sub.w
%d0
, %d1
# exp = 0x3c01 - shft amt.
20079 or.w
%d1
, FP_SRC_EX
(%a6
) # {sgn,exp}
20081 mov.
b &NORM
, STAG
(%a6
) # fix src type tag
20084 # convert dbl to ext SNAN
20085 # %a0 : points to dbl SNAN
20087 mov.w
&0x7fff, FP_SRC_EX
(%a6
) # set exp of SNAN
20089 bfextu
(%a0
){&12:&31}, %d0
# fetch hi(_mantissa)
20090 mov.
l %d0
, FP_SRC_HI
(%a6
)
20091 bfextu
4(%a0
){&11:&21}, %d0
# fetch lo(_mantissa)
20094 mov.
l %d0
, FP_SRC_LO
(%a6
)
20096 btst
&0x7, (%a0
) # see if sign of SNAN is set
20097 beq.
b no_dbl_snan_sgn
20098 bset
&0x7, FP_SRC_EX
(%a6
)
20102 #################################################
20103 # load a Xprec into %fp0: #
20104 # -number can't fault #
20106 # (2) read 12 bytes into L_SCR(1,2) #
20107 # (3) fmov.x into %fp0 #
20108 #################################################
20110 mov.
l &0xc, %d0
# pass: 12 (bytes)
20111 bsr.
l _dcalc_ea
# calc <ea>
20113 lea FP_SRC
(%a6
), %a1
# pass: ptr to input ext tmp space
20114 mov.
l &0xc, %d0
# pass: # of bytes to read
20115 bsr.
l _dmem_read
# fetch src operand from memory
20117 tst.
l %d1
# did dfetch fail?
20118 bne.
l facc_in_x
# yes
20120 lea FP_SRC
(%a6
), %a0
# pass: ptr to src op
20121 bsr.
l set_tag_x
# determine src type tag
20123 cmpi.
b %d0
, &UNNORM
# is the src op an UNNORM?
20124 beq.
b load_ext_unnorm
# yes
20126 mov.
b %d0
, STAG
(%a6
) # store the src optype tag
20130 bsr.
l unnorm_fix
# fix the src UNNORM
20131 mov.
b %d0
, STAG
(%a6
) # store the src optype tag
20134 #################################################
20135 # load a packed into %fp0: #
20136 # -number can't fault #
20138 # (2) read 12 bytes into L_SCR(1,2,3) #
20139 # (3) fmov.x into %fp0 #
20140 #################################################
20144 lea FP_SRC
(%a6
),%a0
# pass ptr to src op
20145 bsr.
l set_tag_x
# determine src type tag
20146 cmpi.
b %d0
,&UNNORM
# is the src op an UNNORM ZERO?
20147 beq.
b load_packed_unnorm
# yes
20149 mov.
b %d0
,STAG
(%a6
) # store the src optype tag
20152 load_packed_unnorm
:
20153 bsr.
l unnorm_fix
# fix the UNNORM ZERO
20154 mov.
b %d0
,STAG
(%a6
) # store the src optype tag
20157 #########################################################################
20158 # XDEF **************************************************************** #
20159 # fout(): move from fp register to memory or data register #
20161 # XREF **************************************************************** #
20162 # _round() - needed to create EXOP for sgl/dbl precision #
20163 # norm() - needed to create EXOP for extended precision #
20164 # ovf_res() - create default overflow result for sgl/dbl precision#
20165 # unf_res() - create default underflow result for sgl/dbl prec. #
20166 # dst_dbl() - create rounded dbl precision result. #
20167 # dst_sgl() - create rounded sgl precision result. #
20168 # fetch_dreg() - fetch dynamic k-factor reg for packed. #
20169 # bindec() - convert FP binary number to packed number. #
20170 # _mem_write() - write data to memory. #
20171 # _mem_write2() - write data to memory unless supv mode -(a7) exc.#
20172 # _dmem_write_{byte,word,long}() - write data to memory. #
20173 # store_dreg_{b,w,l}() - store data to data register file. #
20174 # facc_out_{b,w,l,d,x}() - data access error occurred. #
20176 # INPUT *************************************************************** #
20177 # a0 = pointer to extended precision source operand #
20178 # d0 = round prec,mode #
20180 # OUTPUT ************************************************************** #
20181 # fp0 : intermediate underflow or overflow result if #
20182 # OVFL/UNFL occurred for a sgl or dbl operand #
20184 # ALGORITHM *********************************************************** #
20185 # This routine is accessed by many handlers that need to do an #
20186 # opclass three move of an operand out to memory. #
20187 # Decode an fmove out (opclass 3) instruction to determine if #
20188 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
20189 # register or memory. The algorithm uses a standard "fmove" to create #
20190 # the rounded result. Also, since exceptions are disabled, this also #
20191 # create the correct OPERR default result if appropriate. #
20192 # For sgl or dbl precision, overflow or underflow can occur. If #
20193 # either occurs and is enabled, the EXOP. #
20194 # For extended precision, the stacked <ea> must be fixed along #
20195 # w/ the address index register as appropriate w/ _calc_ea_fout(). If #
20196 # the source is a denorm and if underflow is enabled, an EXOP must be #
20198 # For packed, the k-factor must be fetched from the instruction #
20199 # word or a data register. The <ea> must be fixed as w/ extended #
20200 # precision. Then, bindec() is called to create the appropriate #
20202 # If at any time an access error is flagged by one of the move- #
20203 # to-memory routines, then a special exit must be made so that the #
20204 # access error can be handled properly. #
20206 #########################################################################
20210 bfextu EXC_CMDREG
(%a6
){&3:&3},%d1
# extract dst fmt
20211 mov.w
(tbl_fout.
b,%pc
,%d1.w
*2),%a1
# use as index
20212 jmp
(tbl_fout.
b,%pc
,%a1
) # jump to routine
20216 short fout_long
- tbl_fout
20217 short fout_sgl
- tbl_fout
20218 short fout_ext
- tbl_fout
20219 short fout_pack
- tbl_fout
20220 short fout_word
- tbl_fout
20221 short fout_dbl
- tbl_fout
20222 short fout_byte
- tbl_fout
20223 short fout_pack
- tbl_fout
20225 #################################################################
20226 # fmove.b out ###################################################
20227 #################################################################
20229 # Only "Unimplemented Data Type" exceptions enter here. The operand
20230 # is either a DENORM or a NORM.
20232 tst.
b STAG
(%a6
) # is operand normalized?
20233 bne.
b fout_byte_denorm
# no
20235 fmovm.x SRC
(%a0
),&0x80 # load value
20238 fmov.
l %d0
,%fpcr
# insert rnd prec,mode
20240 fmov.
b %fp0
,%d0
# exec move out w/ correct rnd mode
20242 fmov.
l &0x0,%fpcr
# clear FPCR
20243 fmov.
l %fpsr
,%d1
# fetch FPSR
20244 or.w
%d1
,2+USER_FPSR
(%a6
) # save new exc,accrued bits
20246 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract dst mode
20247 andi.b &0x38,%d1
# is mode == 0? (Dreg dst)
20248 beq.
b fout_byte_dn
# must save to integer regfile
20250 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
20251 bsr.
l _dmem_write_byte
# write byte
20253 tst.
l %d1
# did dstore fail?
20254 bne.
l facc_out_b
# yes
20259 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract Dn
20265 mov.
l SRC_EX
(%a0
),%d1
20266 andi.l &0x80000000,%d1
# keep DENORM sign
20267 ori.
l &0x00800000,%d1
# make smallest sgl
20269 bra.
b fout_byte_norm
20271 #################################################################
20272 # fmove.w out ###################################################
20273 #################################################################
20275 # Only "Unimplemented Data Type" exceptions enter here. The operand
20276 # is either a DENORM or a NORM.
20278 tst.
b STAG
(%a6
) # is operand normalized?
20279 bne.
b fout_word_denorm
# no
20281 fmovm.x SRC
(%a0
),&0x80 # load value
20284 fmov.
l %d0
,%fpcr
# insert rnd prec:mode
20286 fmov.w
%fp0
,%d0
# exec move out w/ correct rnd mode
20288 fmov.
l &0x0,%fpcr
# clear FPCR
20289 fmov.
l %fpsr
,%d1
# fetch FPSR
20290 or.w
%d1
,2+USER_FPSR
(%a6
) # save new exc,accrued bits
20292 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract dst mode
20293 andi.b &0x38,%d1
# is mode == 0? (Dreg dst)
20294 beq.
b fout_word_dn
# must save to integer regfile
20296 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
20297 bsr.
l _dmem_write_word
# write word
20299 tst.
l %d1
# did dstore fail?
20300 bne.
l facc_out_w
# yes
20305 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract Dn
20311 mov.
l SRC_EX
(%a0
),%d1
20312 andi.l &0x80000000,%d1
# keep DENORM sign
20313 ori.
l &0x00800000,%d1
# make smallest sgl
20315 bra.
b fout_word_norm
20317 #################################################################
20318 # fmove.l out ###################################################
20319 #################################################################
20321 # Only "Unimplemented Data Type" exceptions enter here. The operand
20322 # is either a DENORM or a NORM.
20324 tst.
b STAG
(%a6
) # is operand normalized?
20325 bne.
b fout_long_denorm
# no
20327 fmovm.x SRC
(%a0
),&0x80 # load value
20330 fmov.
l %d0
,%fpcr
# insert rnd prec:mode
20332 fmov.
l %fp0
,%d0
# exec move out w/ correct rnd mode
20334 fmov.
l &0x0,%fpcr
# clear FPCR
20335 fmov.
l %fpsr
,%d1
# fetch FPSR
20336 or.w
%d1
,2+USER_FPSR
(%a6
) # save new exc,accrued bits
20339 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract dst mode
20340 andi.b &0x38,%d1
# is mode == 0? (Dreg dst)
20341 beq.
b fout_long_dn
# must save to integer regfile
20343 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
20344 bsr.
l _dmem_write_long
# write long
20346 tst.
l %d1
# did dstore fail?
20347 bne.
l facc_out_l
# yes
20352 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract Dn
20358 mov.
l SRC_EX
(%a0
),%d1
20359 andi.l &0x80000000,%d1
# keep DENORM sign
20360 ori.
l &0x00800000,%d1
# make smallest sgl
20362 bra.
b fout_long_norm
20364 #################################################################
20365 # fmove.x out ###################################################
20366 #################################################################
20368 # Only "Unimplemented Data Type" exceptions enter here. The operand
20369 # is either a DENORM or a NORM.
20370 # The DENORM causes an Underflow exception.
20373 # we copy the extended precision result to FP_SCR0 so that the reserved
20374 # 16-bit field gets zeroed. we do this since we promise not to disturb
20375 # what's at SRC(a0).
20376 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
20377 clr.w
2+FP_SCR0_EX
(%a6
) # clear reserved field
20378 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
20379 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
20381 fmovm.x SRC
(%a0
),&0x80 # return result
20383 bsr.
l _calc_ea_fout
# fix stacked <ea>
20385 mov.
l %a0
,%a1
# pass: dst addr
20386 lea FP_SCR0
(%a6
),%a0
# pass: src addr
20387 mov.
l &0xc,%d0
# pass: opsize is 12 bytes
20389 # we must not yet write the extended precision data to the stack
20390 # in the pre-decrement case from supervisor mode or else we'll corrupt
20391 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
20392 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
20395 bsr.
l _dmem_write
# write ext prec number to memory
20397 tst.
l %d1
# did dstore fail?
20398 bne.w fout_ext_err
# yes
20400 tst.
b STAG
(%a6
) # is operand normalized?
20401 bne.
b fout_ext_denorm
# no
20404 # the number is a DENORM. must set the underflow exception bit
20406 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set underflow exc bit
20408 mov.
b FPCR_ENABLE
(%a6
),%d0
20409 andi.b &0x0a,%d0
# is UNFL or INEX enabled?
20410 bne.
b fout_ext_exc
# yes
20413 # we don't want to do the write if the exception occurred in supervisor mode
20414 # so _mem_write2() handles this for us.
20416 bsr.
l _mem_write2
# write ext prec number to memory
20418 tst.
l %d1
# did dstore fail?
20419 bne.w fout_ext_err
# yes
20421 tst.
b STAG
(%a6
) # is operand normalized?
20422 bne.
b fout_ext_denorm
# no
20426 lea FP_SCR0
(%a6
),%a0
20427 bsr.
l norm
# normalize the mantissa
20428 neg.w
%d0
# new exp = -(shft amt)
20430 andi.w
&0x8000,FP_SCR0_EX
(%a6
) # keep only old sign
20431 or.w
%d0
,FP_SCR0_EX
(%a6
) # insert new exponent
20432 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
20436 mov.
l EXC_A6
(%a6
),(%a6
) # fix stacked a6
20439 #########################################################################
20440 # fmove.s out ###########################################################
20441 #########################################################################
20443 andi.b &0x30,%d0
# clear rnd prec
20444 ori.
b &s_mode
*0x10,%d0
# insert sgl prec
20445 mov.
l %d0
,L_SCR3
(%a6
) # save rnd prec,mode on stack
20448 # operand is a normalized number. first, we check to see if the move out
20449 # would cause either an underflow or overflow. these cases are handled
20450 # separately. otherwise, set the FPCR to the proper rounding mode and
20451 # execute the move.
20453 mov.w SRC_EX
(%a0
),%d0
# extract exponent
20454 andi.w
&0x7fff,%d0
# strip sign
20456 cmpi.w
%d0
,&SGL_HI
# will operand overflow?
20457 bgt.w fout_sgl_ovfl
# yes; go handle OVFL
20458 beq.w fout_sgl_may_ovfl
# maybe; go handle possible OVFL
20459 cmpi.w
%d0
,&SGL_LO
# will operand underflow?
20460 blt.w fout_sgl_unfl
# yes; go handle underflow
20463 # NORMs(in range) can be stored out by a simple "fmov.s"
20464 # Unnormalized inputs can come through this point.
20467 fmovm.x SRC
(%a0
),&0x80 # fetch fop from stack
20469 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
20470 fmov.
l &0x0,%fpsr
# clear FPSR
20472 fmov.s
%fp0
,%d0
# store does convert and round
20474 fmov.
l &0x0,%fpcr
# clear FPCR
20475 fmov.
l %fpsr
,%d1
# save FPSR
20477 or.w
%d1
,2+USER_FPSR
(%a6
) # set possible inex2/ainex
20479 fout_sgl_exg_write
:
20480 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract dst mode
20481 andi.b &0x38,%d1
# is mode == 0? (Dreg dst)
20482 beq.
b fout_sgl_exg_write_dn
# must save to integer regfile
20484 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
20485 bsr.
l _dmem_write_long
# write long
20487 tst.
l %d1
# did dstore fail?
20488 bne.
l facc_out_l
# yes
20492 fout_sgl_exg_write_dn
:
20493 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract Dn
20499 # here, we know that the operand would UNFL if moved out to single prec,
20500 # so, denorm and round and then use generic store single routine to
20501 # write the value to memory.
20504 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set UNFL
20506 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
20507 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
20508 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
20511 clr.
l %d0
# pass: S.F. = 0
20513 cmpi.
b STAG
(%a6
),&DENORM
# fetch src optype tag
20514 bne.
b fout_sgl_unfl_cont
# let DENORMs fall through
20516 lea FP_SCR0
(%a6
),%a0
20517 bsr.
l norm
# normalize the DENORM
20519 fout_sgl_unfl_cont
:
20520 lea FP_SCR0
(%a6
),%a0
# pass: ptr to operand
20521 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
20522 bsr.
l unf_res
# calc default underflow result
20524 lea FP_SCR0
(%a6
),%a0
# pass: ptr to fop
20525 bsr.
l dst_sgl
# convert to single prec
20527 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract dst mode
20528 andi.b &0x38,%d1
# is mode == 0? (Dreg dst)
20529 beq.
b fout_sgl_unfl_dn
# must save to integer regfile
20531 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
20532 bsr.
l _dmem_write_long
# write long
20534 tst.
l %d1
# did dstore fail?
20535 bne.
l facc_out_l
# yes
20537 bra.
b fout_sgl_unfl_chkexc
20540 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract Dn
20544 fout_sgl_unfl_chkexc
:
20545 mov.
b FPCR_ENABLE
(%a6
),%d1
20546 andi.b &0x0a,%d1
# is UNFL or INEX enabled?
20547 bne.w fout_sd_exc_unfl
# yes
20552 # it's definitely an overflow so call ovf_res to get the correct answer
20555 tst.
b 3+SRC_HI
(%a0
) # is result inexact?
20556 bne.
b fout_sgl_ovfl_inex2
20557 tst.
l SRC_LO
(%a0
) # is result inexact?
20558 bne.
b fout_sgl_ovfl_inex2
20559 ori.w
&ovfl_inx_mask
,2+USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
20560 bra.
b fout_sgl_ovfl_cont
20561 fout_sgl_ovfl_inex2
:
20562 ori.w
&ovfinx_mask
,2+USER_FPSR
(%a6
) # set ovfl/aovfl/ainex/inex2
20564 fout_sgl_ovfl_cont
:
20567 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20568 # overflow result. DON'T save the returned ccodes from ovf_res() since
20569 # fmove out doesn't alter them.
20570 tst.
b SRC_EX
(%a0
) # is operand negative?
20571 smi
%d1
# set if so
20572 mov.
l L_SCR3
(%a6
),%d0
# pass: sgl prec,rnd mode
20573 bsr.
l ovf_res
# calc OVFL result
20574 fmovm.x
(%a0
),&0x80 # load default overflow result
20575 fmov.s
%fp0
,%d0
# store to single
20577 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract dst mode
20578 andi.b &0x38,%d1
# is mode == 0? (Dreg dst)
20579 beq.
b fout_sgl_ovfl_dn
# must save to integer regfile
20581 mov.
l EXC_EA
(%a6
),%a0
# stacked <ea> is correct
20582 bsr.
l _dmem_write_long
# write long
20584 tst.
l %d1
# did dstore fail?
20585 bne.
l facc_out_l
# yes
20587 bra.
b fout_sgl_ovfl_chkexc
20590 mov.
b 1+EXC_OPWORD
(%a6
),%d1
# extract Dn
20594 fout_sgl_ovfl_chkexc
:
20595 mov.
b FPCR_ENABLE
(%a6
),%d1
20596 andi.b &0x0a,%d1
# is UNFL or INEX enabled?
20597 bne.w fout_sd_exc_ovfl
# yes
20602 # move out MAY overflow:
20603 # (1) force the exp to 0x3fff
20604 # (2) do a move w/ appropriate rnd mode
20605 # (3) if exp still equals zero, then insert original exponent
20606 # for the correct result.
20607 # if exp now equals one, then it overflowed so call ovf_res.
20610 mov.w SRC_EX
(%a0
),%d1
# fetch current sign
20611 andi.w
&0x8000,%d1
# keep it,clear exp
20612 ori.w
&0x3fff,%d1
# insert exp = 0
20613 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert scaled exp
20614 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
) # copy hi(man)
20615 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
) # copy lo(man)
20617 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
20619 fmov.x FP_SCR0
(%a6
),%fp0
# force fop to be rounded
20620 fmov.
l &0x0,%fpcr
# clear FPCR
20622 fabs.x
%fp0
# need absolute value
20623 fcmp.
b %fp0
,&0x2 # did exponent increase?
20624 fblt.w fout_sgl_exg
# no; go finish NORM
20625 bra.w fout_sgl_ovfl
# yes; go handle overflow
20632 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
20633 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
20634 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
20636 cmpi.
b STAG
(%a6
),&DENORM
# was src a DENORM?
20637 bne.
b fout_sd_exc_cont
# no
20639 lea FP_SCR0
(%a6
),%a0
20643 bfins
%d0
,FP_SCR0_EX
(%a6
){&1:&15}
20644 bra.
b fout_sd_exc_cont
20648 mov.
l (%sp
)+,%a0
# restore a0
20650 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
20651 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
20652 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
20655 bclr &0x7,FP_SCR0_EX
(%a6
) # clear sign bit
20656 sne.
b 2+FP_SCR0_EX
(%a6
) # set internal sign bit
20657 lea FP_SCR0
(%a6
),%a0
# pass: ptr to DENORM
20659 mov.
b 3+L_SCR3
(%a6
),%d1
20663 mov.
b 3+L_SCR3
(%a6
),%d1
20666 clr.
l %d0
# pass: zero g,r,s
20667 bsr.
l _round
# round the DENORM
20669 tst.
b 2+FP_SCR0_EX
(%a6
) # is EXOP negative?
20670 beq.
b fout_sd_exc_done
# no
20671 bset
&0x7,FP_SCR0_EX
(%a6
) # yes
20674 fmovm.x FP_SCR0
(%a6
),&0x40 # return EXOP in fp1
20677 #################################################################
20678 # fmove.d out ###################################################
20679 #################################################################
20681 andi.b &0x30,%d0
# clear rnd prec
20682 ori.
b &d_mode
*0x10,%d0
# insert dbl prec
20683 mov.
l %d0
,L_SCR3
(%a6
) # save rnd prec,mode on stack
20686 # operand is a normalized number. first, we check to see if the move out
20687 # would cause either an underflow or overflow. these cases are handled
20688 # separately. otherwise, set the FPCR to the proper rounding mode and
20689 # execute the move.
20691 mov.w SRC_EX
(%a0
),%d0
# extract exponent
20692 andi.w
&0x7fff,%d0
# strip sign
20694 cmpi.w
%d0
,&DBL_HI
# will operand overflow?
20695 bgt.w fout_dbl_ovfl
# yes; go handle OVFL
20696 beq.w fout_dbl_may_ovfl
# maybe; go handle possible OVFL
20697 cmpi.w
%d0
,&DBL_LO
# will operand underflow?
20698 blt.w fout_dbl_unfl
# yes; go handle underflow
20701 # NORMs(in range) can be stored out by a simple "fmov.d"
20702 # Unnormalized inputs can come through this point.
20705 fmovm.x SRC
(%a0
),&0x80 # fetch fop from stack
20707 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
20708 fmov.
l &0x0,%fpsr
# clear FPSR
20710 fmov.d
%fp0
,L_SCR1
(%a6
) # store does convert and round
20712 fmov.
l &0x0,%fpcr
# clear FPCR
20713 fmov.
l %fpsr
,%d0
# save FPSR
20715 or.w
%d0
,2+USER_FPSR
(%a6
) # set possible inex2/ainex
20717 mov.
l EXC_EA
(%a6
),%a1
# pass: dst addr
20718 lea L_SCR1
(%a6
),%a0
# pass: src addr
20719 movq.
l &0x8,%d0
# pass: opsize is 8 bytes
20720 bsr.
l _dmem_write
# store dbl fop to memory
20722 tst.
l %d1
# did dstore fail?
20723 bne.
l facc_out_d
# yes
20725 rts
# no; so we're finished
20728 # here, we know that the operand would UNFL if moved out to double prec,
20729 # so, denorm and round and then use generic store double routine to
20730 # write the value to memory.
20733 bset
&unfl_bit
,FPSR_EXCEPT
(%a6
) # set UNFL
20735 mov.w SRC_EX
(%a0
),FP_SCR0_EX
(%a6
)
20736 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
)
20737 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
)
20740 clr.
l %d0
# pass: S.F. = 0
20742 cmpi.
b STAG
(%a6
),&DENORM
# fetch src optype tag
20743 bne.
b fout_dbl_unfl_cont
# let DENORMs fall through
20745 lea FP_SCR0
(%a6
),%a0
20746 bsr.
l norm
# normalize the DENORM
20748 fout_dbl_unfl_cont
:
20749 lea FP_SCR0
(%a6
),%a0
# pass: ptr to operand
20750 mov.
l L_SCR3
(%a6
),%d1
# pass: rnd prec,mode
20751 bsr.
l unf_res
# calc default underflow result
20753 lea FP_SCR0
(%a6
),%a0
# pass: ptr to fop
20754 bsr.
l dst_dbl
# convert to single prec
20755 mov.
l %d0
,L_SCR1
(%a6
)
20756 mov.
l %d1
,L_SCR2
(%a6
)
20758 mov.
l EXC_EA
(%a6
),%a1
# pass: dst addr
20759 lea L_SCR1
(%a6
),%a0
# pass: src addr
20760 movq.
l &0x8,%d0
# pass: opsize is 8 bytes
20761 bsr.
l _dmem_write
# store dbl fop to memory
20763 tst.
l %d1
# did dstore fail?
20764 bne.
l facc_out_d
# yes
20766 mov.
b FPCR_ENABLE
(%a6
),%d1
20767 andi.b &0x0a,%d1
# is UNFL or INEX enabled?
20768 bne.w fout_sd_exc_unfl
# yes
20773 # it's definitely an overflow so call ovf_res to get the correct answer
20776 mov.w
2+SRC_LO
(%a0
),%d0
20778 bne.
b fout_dbl_ovfl_inex2
20780 ori.w
&ovfl_inx_mask
,2+USER_FPSR
(%a6
) # set ovfl/aovfl/ainex
20781 bra.
b fout_dbl_ovfl_cont
20782 fout_dbl_ovfl_inex2
:
20783 ori.w
&ovfinx_mask
,2+USER_FPSR
(%a6
) # set ovfl/aovfl/ainex/inex2
20785 fout_dbl_ovfl_cont
:
20788 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20789 # overflow result. DON'T save the returned ccodes from ovf_res() since
20790 # fmove out doesn't alter them.
20791 tst.
b SRC_EX
(%a0
) # is operand negative?
20792 smi
%d1
# set if so
20793 mov.
l L_SCR3
(%a6
),%d0
# pass: dbl prec,rnd mode
20794 bsr.
l ovf_res
# calc OVFL result
20795 fmovm.x
(%a0
),&0x80 # load default overflow result
20796 fmov.d
%fp0
,L_SCR1
(%a6
) # store to double
20798 mov.
l EXC_EA
(%a6
),%a1
# pass: dst addr
20799 lea L_SCR1
(%a6
),%a0
# pass: src addr
20800 movq.
l &0x8,%d0
# pass: opsize is 8 bytes
20801 bsr.
l _dmem_write
# store dbl fop to memory
20803 tst.
l %d1
# did dstore fail?
20804 bne.
l facc_out_d
# yes
20806 mov.
b FPCR_ENABLE
(%a6
),%d1
20807 andi.b &0x0a,%d1
# is UNFL or INEX enabled?
20808 bne.w fout_sd_exc_ovfl
# yes
20813 # move out MAY overflow:
20814 # (1) force the exp to 0x3fff
20815 # (2) do a move w/ appropriate rnd mode
20816 # (3) if exp still equals zero, then insert original exponent
20817 # for the correct result.
20818 # if exp now equals one, then it overflowed so call ovf_res.
20821 mov.w SRC_EX
(%a0
),%d1
# fetch current sign
20822 andi.w
&0x8000,%d1
# keep it,clear exp
20823 ori.w
&0x3fff,%d1
# insert exp = 0
20824 mov.w
%d1
,FP_SCR0_EX
(%a6
) # insert scaled exp
20825 mov.
l SRC_HI
(%a0
),FP_SCR0_HI
(%a6
) # copy hi(man)
20826 mov.
l SRC_LO
(%a0
),FP_SCR0_LO
(%a6
) # copy lo(man)
20828 fmov.
l L_SCR3
(%a6
),%fpcr
# set FPCR
20830 fmov.x FP_SCR0
(%a6
),%fp0
# force fop to be rounded
20831 fmov.
l &0x0,%fpcr
# clear FPCR
20833 fabs.x
%fp0
# need absolute value
20834 fcmp.
b %fp0
,&0x2 # did exponent increase?
20835 fblt.w fout_dbl_exg
# no; go finish NORM
20836 bra.w fout_dbl_ovfl
# yes; go handle overflow
20838 #########################################################################
20839 # XDEF **************************************************************** #
20840 # dst_dbl(): create double precision value from extended prec. #
20842 # XREF **************************************************************** #
20845 # INPUT *************************************************************** #
20846 # a0 = pointer to source operand in extended precision #
20848 # OUTPUT ************************************************************** #
20849 # d0 = hi(double precision result) #
20850 # d1 = lo(double precision result) #
20852 # ALGORITHM *********************************************************** #
20854 # Changes extended precision to double precision. #
20855 # Note: no attempt is made to round the extended value to double. #
20856 # dbl_sign = ext_sign #
20857 # dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
20858 # get rid of ext integer bit #
20859 # dbl_mant = ext_mant{62:12} #
20861 # --------------- --------------- --------------- #
20862 # extended -> |s| exp | |1| ms mant | | ls mant | #
20863 # --------------- --------------- --------------- #
20864 # 95 64 63 62 32 31 11 0 #
20869 # --------------- --------------- #
20870 # double -> |s|exp| mant | | mant | #
20871 # --------------- --------------- #
20874 #########################################################################
20877 clr.
l %d0
# clear d0
20878 mov.w FTEMP_EX
(%a0
),%d0
# get exponent
20879 subi.w
&EXT_BIAS
,%d0
# subtract extended precision bias
20880 addi.w
&DBL_BIAS
,%d0
# add double precision bias
20881 tst.
b FTEMP_HI
(%a0
) # is number a denorm?
20882 bmi.
b dst_get_dupper
# no
20883 subq.w
&0x1,%d0
# yes; denorm bias = DBL_BIAS - 1
20885 swap
%d0
# d0 now in upper word
20886 lsl.
l &0x4,%d0
# d0 in proper place for dbl prec exp
20887 tst.
b FTEMP_EX
(%a0
) # test sign
20888 bpl.
b dst_get_dman
# if postive, go process mantissa
20889 bset
&0x1f,%d0
# if negative, set sign
20891 mov.
l FTEMP_HI
(%a0
),%d1
# get ms mantissa
20892 bfextu
%d1
{&1:&20},%d1
# get upper 20 bits of ms
20893 or.l %d1
,%d0
# put these bits in ms word of double
20894 mov.
l %d0
,L_SCR1
(%a6
) # put the new exp back on the stack
20895 mov.
l FTEMP_HI
(%a0
),%d1
# get ms mantissa
20896 mov.
l &21,%d0
# load shift count
20897 lsl.
l %d0
,%d1
# put lower 11 bits in upper bits
20898 mov.
l %d1
,L_SCR2
(%a6
) # build lower lword in memory
20899 mov.
l FTEMP_LO
(%a0
),%d1
# get ls mantissa
20900 bfextu
%d1
{&0:&21},%d0
# get ls 21 bits of double
20901 mov.
l L_SCR2
(%a6
),%d1
20902 or.l %d0
,%d1
# put them in double result
20903 mov.
l L_SCR1
(%a6
),%d0
20906 #########################################################################
20907 # XDEF **************************************************************** #
20908 # dst_sgl(): create single precision value from extended prec #
20910 # XREF **************************************************************** #
20912 # INPUT *************************************************************** #
20913 # a0 = pointer to source operand in extended precision #
20915 # OUTPUT ************************************************************** #
20916 # d0 = single precision result #
20918 # ALGORITHM *********************************************************** #
20920 # Changes extended precision to single precision. #
20921 # sgl_sign = ext_sign #
20922 # sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
20923 # get rid of ext integer bit #
20924 # sgl_mant = ext_mant{62:12} #
20926 # --------------- --------------- --------------- #
20927 # extended -> |s| exp | |1| ms mant | | ls mant | #
20928 # --------------- --------------- --------------- #
20929 # 95 64 63 62 40 32 31 12 0 #
20934 # --------------- #
20935 # single -> |s|exp| mant | #
20936 # --------------- #
20939 #########################################################################
20943 mov.w FTEMP_EX
(%a0
),%d0
# get exponent
20944 subi.w
&EXT_BIAS
,%d0
# subtract extended precision bias
20945 addi.w
&SGL_BIAS
,%d0
# add single precision bias
20946 tst.
b FTEMP_HI
(%a0
) # is number a denorm?
20947 bmi.
b dst_get_supper
# no
20948 subq.w
&0x1,%d0
# yes; denorm bias = SGL_BIAS - 1
20950 swap
%d0
# put exp in upper word of d0
20951 lsl.
l &0x7,%d0
# shift it into single exp bits
20952 tst.
b FTEMP_EX
(%a0
) # test sign
20953 bpl.
b dst_get_sman
# if positive, continue
20954 bset
&0x1f,%d0
# if negative, put in sign first
20956 mov.
l FTEMP_HI
(%a0
),%d1
# get ms mantissa
20957 andi.l &0x7fffff00,%d1
# get upper 23 bits of ms
20958 lsr.
l &0x8,%d1
# and put them flush right
20959 or.l %d1
,%d0
# put these bits in ms word of single
20962 ##############################################################################
20964 bsr.
l _calc_ea_fout
# fetch the <ea>
20967 mov.
b STAG
(%a6
),%d0
# fetch input type
20968 bne.w fout_pack_not_norm
# input is not NORM
20971 btst
&0x4,EXC_CMDREG
(%a6
) # static or dynamic?
20972 beq.
b fout_pack_s
# static
20975 mov.
b 1+EXC_CMDREG
(%a6
),%d1
# fetch dynamic reg
20979 bsr.
l fetch_dreg
# fetch Dn w/ k-factor
20981 bra.
b fout_pack_type
20983 mov.
b 1+EXC_CMDREG
(%a6
),%d0
# fetch static field
20986 bfexts
%d0
{&25:&7},%d0
# extract k-factor
20989 lea FP_SRC
(%a6
),%a0
# pass: ptr to input
20991 # bindec is currently scrambling FP_SRC for denorm inputs.
20992 # we'll have to change this, but for now, tough luck!!!
20993 bsr.
l bindec
# convert xprec to packed
20995 # andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
20996 andi.l &0xcffff00f,FP_SCR0
(%a6
) # clear unused fields
21000 tst.
b 3+FP_SCR0_EX
(%a6
)
21001 bne.
b fout_pack_set
21002 tst.
l FP_SCR0_HI
(%a6
)
21003 bne.
b fout_pack_set
21004 tst.
l FP_SCR0_LO
(%a6
)
21005 bne.
b fout_pack_set
21007 # add the extra condition that only if the k-factor was zero, too, should
21008 # we zero the exponent
21010 bne.
b fout_pack_set
21011 # "mantissa" is all zero which means that the answer is zero. but, the '040
21012 # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
21013 # if the mantissa is zero, I will zero the exponent, too.
21014 # the question now is whether the exponents sign bit is allowed to be non-zero
21015 # for a zero, also...
21016 andi.w
&0xf000,FP_SCR0
(%a6
)
21020 lea FP_SCR0
(%a6
),%a0
# pass: src addr
21023 mov.
l (%sp
)+,%a1
# pass: dst addr
21024 mov.
l &0xc,%d0
# pass: opsize is 12 bytes
21026 cmpi.
b SPCOND_FLG
(%a6
),&mda7_flg
21029 bsr.
l _dmem_write
# write ext prec number to memory
21031 tst.
l %d1
# did dstore fail?
21032 bne.w fout_ext_err
# yes
21036 # we don't want to do the write if the exception occurred in supervisor mode
21037 # so _mem_write2() handles this for us.
21039 bsr.
l _mem_write2
# write ext prec number to memory
21041 tst.
l %d1
# did dstore fail?
21042 bne.w fout_ext_err
# yes
21046 fout_pack_not_norm
:
21047 cmpi.
b %d0
,&DENORM
# is it a DENORM?
21048 beq.w fout_pack_norm
# yes
21049 lea FP_SRC
(%a6
),%a0
21050 clr.w
2+FP_SRC_EX
(%a6
)
21051 cmpi.
b %d0
,&SNAN
# is it an SNAN?
21052 beq.
b fout_pack_snan
# yes
21053 bra.
b fout_pack_write
# no
21056 ori.w
&snaniop2_mask
,FPSR_EXCEPT
(%a6
) # set SNAN/AIOP
21057 bset
&0x6,FP_SRC_HI
(%a6
) # set snan bit
21058 bra.
b fout_pack_write
21060 #########################################################################
21061 # XDEF **************************************************************** #
21062 # fetch_dreg(): fetch register according to index in d1 #
21064 # XREF **************************************************************** #
21067 # INPUT *************************************************************** #
21068 # d1 = index of register to fetch from #
21070 # OUTPUT ************************************************************** #
21071 # d0 = value of register fetched #
21073 # ALGORITHM *********************************************************** #
21074 # According to the index value in d1 which can range from zero #
21075 # to fifteen, load the corresponding register file value (where #
21076 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
21077 # stack. The rest should still be in their original places. #
21079 #########################################################################
21081 # this routine leaves d1 intact for subsequent store_dreg calls.
21084 mov.w
(tbl_fdreg.
b,%pc
,%d1.w
*2),%d0
21085 jmp
(tbl_fdreg.
b,%pc
,%d0.w
*1)
21088 short fdreg0
- tbl_fdreg
21089 short fdreg1
- tbl_fdreg
21090 short fdreg2
- tbl_fdreg
21091 short fdreg3
- tbl_fdreg
21092 short fdreg4
- tbl_fdreg
21093 short fdreg5
- tbl_fdreg
21094 short fdreg6
- tbl_fdreg
21095 short fdreg7
- tbl_fdreg
21096 short fdreg8
- tbl_fdreg
21097 short fdreg9
- tbl_fdreg
21098 short fdrega
- tbl_fdreg
21099 short fdregb
- tbl_fdreg
21100 short fdregc
- tbl_fdreg
21101 short fdregd
- tbl_fdreg
21102 short fdrege
- tbl_fdreg
21103 short fdregf
- tbl_fdreg
21106 mov.
l EXC_DREGS+
0x0(%a6
),%d0
21109 mov.
l EXC_DREGS+
0x4(%a6
),%d0
21130 mov.
l EXC_DREGS+
0x8(%a6
),%d0
21133 mov.
l EXC_DREGS+
0xc(%a6
),%d0
21151 mov.
l EXC_A7
(%a6
),%d0
21154 #########################################################################
21155 # XDEF **************************************************************** #
21156 # store_dreg_l(): store longword to data register specified by d1 #
21158 # XREF **************************************************************** #
21161 # INPUT *************************************************************** #
21162 # d0 = longowrd value to store #
21163 # d1 = index of register to fetch from #
21165 # OUTPUT ************************************************************** #
21166 # (data register is updated) #
21168 # ALGORITHM *********************************************************** #
21169 # According to the index value in d1, store the longword value #
21170 # in d0 to the corresponding data register. D0/D1 are on the stack #
21171 # while the rest are in their initial places. #
21173 #########################################################################
21175 global store_dreg_l
21177 mov.w
(tbl_sdregl.
b,%pc
,%d1.w
*2),%d1
21178 jmp
(tbl_sdregl.
b,%pc
,%d1.w
*1)
21181 short sdregl0
- tbl_sdregl
21182 short sdregl1
- tbl_sdregl
21183 short sdregl2
- tbl_sdregl
21184 short sdregl3
- tbl_sdregl
21185 short sdregl4
- tbl_sdregl
21186 short sdregl5
- tbl_sdregl
21187 short sdregl6
- tbl_sdregl
21188 short sdregl7
- tbl_sdregl
21191 mov.
l %d0
,EXC_DREGS+
0x0(%a6
)
21194 mov.
l %d0
,EXC_DREGS+
0x4(%a6
)
21215 #########################################################################
21216 # XDEF **************************************************************** #
21217 # store_dreg_w(): store word to data register specified by d1 #
21219 # XREF **************************************************************** #
21222 # INPUT *************************************************************** #
21223 # d0 = word value to store #
21224 # d1 = index of register to fetch from #
21226 # OUTPUT ************************************************************** #
21227 # (data register is updated) #
21229 # ALGORITHM *********************************************************** #
21230 # According to the index value in d1, store the word value #
21231 # in d0 to the corresponding data register. D0/D1 are on the stack #
21232 # while the rest are in their initial places. #
21234 #########################################################################
21236 global store_dreg_w
21238 mov.w
(tbl_sdregw.
b,%pc
,%d1.w
*2),%d1
21239 jmp
(tbl_sdregw.
b,%pc
,%d1.w
*1)
21242 short sdregw0
- tbl_sdregw
21243 short sdregw1
- tbl_sdregw
21244 short sdregw2
- tbl_sdregw
21245 short sdregw3
- tbl_sdregw
21246 short sdregw4
- tbl_sdregw
21247 short sdregw5
- tbl_sdregw
21248 short sdregw6
- tbl_sdregw
21249 short sdregw7
- tbl_sdregw
21252 mov.w
%d0
,2+EXC_DREGS+
0x0(%a6
)
21255 mov.w
%d0
,2+EXC_DREGS+
0x4(%a6
)
21276 #########################################################################
21277 # XDEF **************************************************************** #
21278 # store_dreg_b(): store byte to data register specified by d1 #
21280 # XREF **************************************************************** #
21283 # INPUT *************************************************************** #
21284 # d0 = byte value to store #
21285 # d1 = index of register to fetch from #
21287 # OUTPUT ************************************************************** #
21288 # (data register is updated) #
21290 # ALGORITHM *********************************************************** #
21291 # According to the index value in d1, store the byte value #
21292 # in d0 to the corresponding data register. D0/D1 are on the stack #
21293 # while the rest are in their initial places. #
21295 #########################################################################
21297 global store_dreg_b
21299 mov.w
(tbl_sdregb.
b,%pc
,%d1.w
*2),%d1
21300 jmp
(tbl_sdregb.
b,%pc
,%d1.w
*1)
21303 short sdregb0
- tbl_sdregb
21304 short sdregb1
- tbl_sdregb
21305 short sdregb2
- tbl_sdregb
21306 short sdregb3
- tbl_sdregb
21307 short sdregb4
- tbl_sdregb
21308 short sdregb5
- tbl_sdregb
21309 short sdregb6
- tbl_sdregb
21310 short sdregb7
- tbl_sdregb
21313 mov.
b %d0
,3+EXC_DREGS+
0x0(%a6
)
21316 mov.
b %d0
,3+EXC_DREGS+
0x4(%a6
)
21337 #########################################################################
21338 # XDEF **************************************************************** #
21339 # inc_areg(): increment an address register by the value in d0 #
21341 # XREF **************************************************************** #
21344 # INPUT *************************************************************** #
21345 # d0 = amount to increment by #
21346 # d1 = index of address register to increment #
21348 # OUTPUT ************************************************************** #
21349 # (address register is updated) #
21351 # ALGORITHM *********************************************************** #
21352 # Typically used for an instruction w/ a post-increment <ea>, #
21353 # this routine adds the increment value in d0 to the address register #
21354 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
21355 # in their original places. #
21356 # For a7, if the increment amount is one, then we have to #
21357 # increment by two. For any a7 update, set the mia7_flag so that if #
21358 # an access error exception occurs later in emulation, this address #
21359 # register update can be undone. #
21361 #########################################################################
21365 mov.w
(tbl_iareg.
b,%pc
,%d1.w
*2),%d1
21366 jmp
(tbl_iareg.
b,%pc
,%d1.w
*1)
21369 short iareg0
- tbl_iareg
21370 short iareg1
- tbl_iareg
21371 short iareg2
- tbl_iareg
21372 short iareg3
- tbl_iareg
21373 short iareg4
- tbl_iareg
21374 short iareg5
- tbl_iareg
21375 short iareg6
- tbl_iareg
21376 short iareg7
- tbl_iareg
21378 iareg0
: add.l %d0
,EXC_DREGS+
0x8(%a6
)
21380 iareg1
: add.l %d0
,EXC_DREGS+
0xc(%a6
)
21382 iareg2
: add.l %d0
,%a2
21384 iareg3
: add.l %d0
,%a3
21386 iareg4
: add.l %d0
,%a4
21388 iareg5
: add.l %d0
,%a5
21390 iareg6
: add.l %d0
,(%a6
)
21392 iareg7
: mov.
b &mia7_flg
,SPCOND_FLG
(%a6
)
21395 add.l %d0
,EXC_A7
(%a6
)
21398 addq.
l &0x2,EXC_A7
(%a6
)
21401 #########################################################################
21402 # XDEF **************************************************************** #
21403 # dec_areg(): decrement an address register by the value in d0 #
21405 # XREF **************************************************************** #
21408 # INPUT *************************************************************** #
21409 # d0 = amount to decrement by #
21410 # d1 = index of address register to decrement #
21412 # OUTPUT ************************************************************** #
21413 # (address register is updated) #
21415 # ALGORITHM *********************************************************** #
21416 # Typically used for an instruction w/ a pre-decrement <ea>, #
21417 # this routine adds the decrement value in d0 to the address register #
21418 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
21419 # in their original places. #
21420 # For a7, if the decrement amount is one, then we have to #
21421 # decrement by two. For any a7 update, set the mda7_flag so that if #
21422 # an access error exception occurs later in emulation, this address #
21423 # register update can be undone. #
21425 #########################################################################
21429 mov.w
(tbl_dareg.
b,%pc
,%d1.w
*2),%d1
21430 jmp
(tbl_dareg.
b,%pc
,%d1.w
*1)
21433 short dareg0
- tbl_dareg
21434 short dareg1
- tbl_dareg
21435 short dareg2
- tbl_dareg
21436 short dareg3
- tbl_dareg
21437 short dareg4
- tbl_dareg
21438 short dareg5
- tbl_dareg
21439 short dareg6
- tbl_dareg
21440 short dareg7
- tbl_dareg
21442 dareg0
: sub.l %d0
,EXC_DREGS+
0x8(%a6
)
21444 dareg1
: sub.l %d0
,EXC_DREGS+
0xc(%a6
)
21446 dareg2
: sub.l %d0
,%a2
21448 dareg3
: sub.l %d0
,%a3
21450 dareg4
: sub.l %d0
,%a4
21452 dareg5
: sub.l %d0
,%a5
21454 dareg6
: sub.l %d0
,(%a6
)
21456 dareg7
: mov.
b &mda7_flg
,SPCOND_FLG
(%a6
)
21459 sub.l %d0
,EXC_A7
(%a6
)
21462 subq.
l &0x2,EXC_A7
(%a6
)
21465 ##############################################################################
21467 #########################################################################
21468 # XDEF **************************************************************** #
21469 # load_fpn1(): load FP register value into FP_SRC(a6). #
21471 # XREF **************************************************************** #
21474 # INPUT *************************************************************** #
21475 # d0 = index of FP register to load #
21477 # OUTPUT ************************************************************** #
21478 # FP_SRC(a6) = value loaded from FP register file #
21480 # ALGORITHM *********************************************************** #
21481 # Using the index in d0, load FP_SRC(a6) with a number from the #
21482 # FP register file. #
21484 #########################################################################
21488 mov.w
(tbl_load_fpn1.
b,%pc
,%d0.w
*2), %d0
21489 jmp
(tbl_load_fpn1.
b,%pc
,%d0.w
*1)
21492 short load_fpn1_0
- tbl_load_fpn1
21493 short load_fpn1_1
- tbl_load_fpn1
21494 short load_fpn1_2
- tbl_load_fpn1
21495 short load_fpn1_3
- tbl_load_fpn1
21496 short load_fpn1_4
- tbl_load_fpn1
21497 short load_fpn1_5
- tbl_load_fpn1
21498 short load_fpn1_6
- tbl_load_fpn1
21499 short load_fpn1_7
- tbl_load_fpn1
21502 mov.
l 0+EXC_FP0
(%a6
), 0+FP_SRC
(%a6
)
21503 mov.
l 4+EXC_FP0
(%a6
), 4+FP_SRC
(%a6
)
21504 mov.
l 8+EXC_FP0
(%a6
), 8+FP_SRC
(%a6
)
21505 lea FP_SRC
(%a6
), %a0
21508 mov.
l 0+EXC_FP1
(%a6
), 0+FP_SRC
(%a6
)
21509 mov.
l 4+EXC_FP1
(%a6
), 4+FP_SRC
(%a6
)
21510 mov.
l 8+EXC_FP1
(%a6
), 8+FP_SRC
(%a6
)
21511 lea FP_SRC
(%a6
), %a0
21514 fmovm.x
&0x20, FP_SRC
(%a6
)
21515 lea FP_SRC
(%a6
), %a0
21518 fmovm.x
&0x10, FP_SRC
(%a6
)
21519 lea FP_SRC
(%a6
), %a0
21522 fmovm.x
&0x08, FP_SRC
(%a6
)
21523 lea FP_SRC
(%a6
), %a0
21526 fmovm.x
&0x04, FP_SRC
(%a6
)
21527 lea FP_SRC
(%a6
), %a0
21530 fmovm.x
&0x02, FP_SRC
(%a6
)
21531 lea FP_SRC
(%a6
), %a0
21534 fmovm.x
&0x01, FP_SRC
(%a6
)
21535 lea FP_SRC
(%a6
), %a0
21538 #############################################################################
21540 #########################################################################
21541 # XDEF **************************************************************** #
21542 # load_fpn2(): load FP register value into FP_DST(a6). #
21544 # XREF **************************************************************** #
21547 # INPUT *************************************************************** #
21548 # d0 = index of FP register to load #
21550 # OUTPUT ************************************************************** #
21551 # FP_DST(a6) = value loaded from FP register file #
21553 # ALGORITHM *********************************************************** #
21554 # Using the index in d0, load FP_DST(a6) with a number from the #
21555 # FP register file. #
21557 #########################################################################
21561 mov.w
(tbl_load_fpn2.
b,%pc
,%d0.w
*2), %d0
21562 jmp
(tbl_load_fpn2.
b,%pc
,%d0.w
*1)
21565 short load_fpn2_0
- tbl_load_fpn2
21566 short load_fpn2_1
- tbl_load_fpn2
21567 short load_fpn2_2
- tbl_load_fpn2
21568 short load_fpn2_3
- tbl_load_fpn2
21569 short load_fpn2_4
- tbl_load_fpn2
21570 short load_fpn2_5
- tbl_load_fpn2
21571 short load_fpn2_6
- tbl_load_fpn2
21572 short load_fpn2_7
- tbl_load_fpn2
21575 mov.
l 0+EXC_FP0
(%a6
), 0+FP_DST
(%a6
)
21576 mov.
l 4+EXC_FP0
(%a6
), 4+FP_DST
(%a6
)
21577 mov.
l 8+EXC_FP0
(%a6
), 8+FP_DST
(%a6
)
21578 lea FP_DST
(%a6
), %a0
21581 mov.
l 0+EXC_FP1
(%a6
), 0+FP_DST
(%a6
)
21582 mov.
l 4+EXC_FP1
(%a6
), 4+FP_DST
(%a6
)
21583 mov.
l 8+EXC_FP1
(%a6
), 8+FP_DST
(%a6
)
21584 lea FP_DST
(%a6
), %a0
21587 fmovm.x
&0x20, FP_DST
(%a6
)
21588 lea FP_DST
(%a6
), %a0
21591 fmovm.x
&0x10, FP_DST
(%a6
)
21592 lea FP_DST
(%a6
), %a0
21595 fmovm.x
&0x08, FP_DST
(%a6
)
21596 lea FP_DST
(%a6
), %a0
21599 fmovm.x
&0x04, FP_DST
(%a6
)
21600 lea FP_DST
(%a6
), %a0
21603 fmovm.x
&0x02, FP_DST
(%a6
)
21604 lea FP_DST
(%a6
), %a0
21607 fmovm.x
&0x01, FP_DST
(%a6
)
21608 lea FP_DST
(%a6
), %a0
21611 #############################################################################
21613 #########################################################################
21614 # XDEF **************************************************************** #
21615 # store_fpreg(): store an fp value to the fpreg designated d0. #
21617 # XREF **************************************************************** #
21620 # INPUT *************************************************************** #
21621 # fp0 = extended precision value to store #
21622 # d0 = index of floating-point register #
21624 # OUTPUT ************************************************************** #
21627 # ALGORITHM *********************************************************** #
21628 # Store the value in fp0 to the FP register designated by the #
21629 # value in d0. The FP number can be DENORM or SNAN so we have to be #
21630 # careful that we don't take an exception here. #
21632 #########################################################################
21636 mov.w
(tbl_store_fpreg.
b,%pc
,%d0.w
*2), %d0
21637 jmp
(tbl_store_fpreg.
b,%pc
,%d0.w
*1)
21640 short store_fpreg_0
- tbl_store_fpreg
21641 short store_fpreg_1
- tbl_store_fpreg
21642 short store_fpreg_2
- tbl_store_fpreg
21643 short store_fpreg_3
- tbl_store_fpreg
21644 short store_fpreg_4
- tbl_store_fpreg
21645 short store_fpreg_5
- tbl_store_fpreg
21646 short store_fpreg_6
- tbl_store_fpreg
21647 short store_fpreg_7
- tbl_store_fpreg
21650 fmovm.x
&0x80, EXC_FP0
(%a6
)
21653 fmovm.x
&0x80, EXC_FP1
(%a6
)
21656 fmovm.x
&0x01, -(%sp
)
21657 fmovm.x
(%sp
)+, &0x20
21660 fmovm.x
&0x01, -(%sp
)
21661 fmovm.x
(%sp
)+, &0x10
21664 fmovm.x
&0x01, -(%sp
)
21665 fmovm.x
(%sp
)+, &0x08
21668 fmovm.x
&0x01, -(%sp
)
21669 fmovm.x
(%sp
)+, &0x04
21672 fmovm.x
&0x01, -(%sp
)
21673 fmovm.x
(%sp
)+, &0x02
21676 fmovm.x
&0x01, -(%sp
)
21677 fmovm.x
(%sp
)+, &0x01
21680 #########################################################################
21681 # XDEF **************************************************************** #
21682 # _denorm(): denormalize an intermediate result #
21684 # XREF **************************************************************** #
21687 # INPUT *************************************************************** #
21688 # a0 = points to the operand to be denormalized #
21689 # (in the internal extended format) #
21691 # d0 = rounding precision #
21693 # OUTPUT ************************************************************** #
21694 # a0 = pointer to the denormalized result #
21695 # (in the internal extended format) #
21697 # d0 = guard,round,sticky #
21699 # ALGORITHM *********************************************************** #
21700 # According to the exponent underflow threshold for the given #
21701 # precision, shift the mantissa bits to the right in order raise the #
21702 # exponent of the operand to the threshold value. While shifting the #
21703 # mantissa bits right, maintain the value of the guard, round, and #
21706 # (1) _denorm() is called by the underflow routines #
21707 # (2) _denorm() does NOT affect the status register #
21709 #########################################################################
21712 # table of exponent threshold values for each precision
21722 # Load the exponent threshold for the precision selected and check
21723 # to see if (threshold - exponent) is > 65 in which case we can
21724 # simply calculate the sticky bit and zero the mantissa. otherwise
21725 # we have to call the denormalization routine.
21727 lsr.
b &0x2, %d0
# shift prec to lo bits
21728 mov.w
(tbl_thresh.
b,%pc
,%d0.w
*2), %d1
# load prec threshold
21729 mov.w
%d1
, %d0
# copy d1 into d0
21730 sub.w FTEMP_EX
(%a0
), %d0
# diff = threshold - exp
21731 cmpi.w
%d0
, &66 # is diff > 65? (mant + g,r bits)
21732 bpl.
b denorm_set_stky
# yes; just calc sticky
21734 clr.
l %d0
# clear g,r,s
21735 btst
&inex2_bit
, FPSR_EXCEPT
(%a6
) # yes; was INEX2 set?
21736 beq.
b denorm_call
# no; don't change anything
21737 bset
&29, %d0
# yes; set sticky bit
21740 bsr.
l dnrm_lp
# denormalize the number
21744 # all bit would have been shifted off during the denorm so simply
21745 # calculate if the sticky should be set and clear the entire mantissa.
21748 mov.
l &0x20000000, %d0
# set sticky bit in return value
21749 mov.w
%d1
, FTEMP_EX
(%a0
) # load exp with threshold
21750 clr.
l FTEMP_HI
(%a0
) # set d1 = 0 (ms mantissa)
21751 clr.
l FTEMP_LO
(%a0
) # set d2 = 0 (ms mantissa)
21755 # dnrm_lp(): normalize exponent/mantissa to specified threshhold #
21758 # %a0 : points to the operand to be denormalized #
21759 # %d0{31:29} : initial guard,round,sticky #
21760 # %d1{15:0} : denormalization threshold #
21762 # %a0 : points to the denormalized operand #
21763 # %d0{31:29} : final guard,round,sticky #
21766 # *** Local Equates *** #
21767 set GRS
, L_SCR2
# g,r,s temp storage
21768 set FTEMP_LO2
, L_SCR1
# FTEMP_LO copy
21774 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
21775 # in memory so as to make the bitfield extraction for denormalization easier.
21777 mov.
l FTEMP_LO
(%a0
), FTEMP_LO2
(%a6
) # make FTEMP_LO copy
21778 mov.
l %d0
, GRS
(%a6
) # place g,r,s after it
21781 # check to see how much less than the underflow threshold the operand
21784 mov.
l %d1
, %d0
# copy the denorm threshold
21785 sub.w FTEMP_EX
(%a0
), %d1
# d1 = threshold - uns exponent
21786 ble.
b dnrm_no_lp
# d1 <= 0
21787 cmpi.w
%d1
, &0x20 # is ( 0 <= d1 < 32) ?
21789 cmpi.w
%d1
, &0x40 # is (32 <= d1 < 64) ?
21791 bra.w case_3
# (d1 >= 64)
21794 # No normalization necessary
21797 mov.
l GRS
(%a6
), %d0
# restore original g,r,s
21803 # %d0 = denorm threshold
21804 # %d1 = "n" = amt to shift
21806 # ---------------------------------------------------------
21807 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21808 # ---------------------------------------------------------
21809 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21818 # <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21819 # ---------------------------------------------------------
21820 # |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
21821 # ---------------------------------------------------------
21824 mov.
l %d2
, -(%sp
) # create temp storage
21826 mov.w
%d0
, FTEMP_EX
(%a0
) # exponent = denorm threshold
21828 sub.w
%d1
, %d0
# %d0 = 32 - %d1
21830 cmpi.w
%d1
, &29 # is shft amt >= 29
21831 blt.
b case1_extract
# no; no fix needed
21832 mov.
b GRS
(%a6
), %d2
21833 or.b %d2
, 3+FTEMP_LO2
(%a6
)
21836 bfextu FTEMP_HI
(%a0
){&0:%d0
}, %d2
# %d2 = new FTEMP_HI
21837 bfextu FTEMP_HI
(%a0
){%d0
:&32}, %d1
# %d1 = new FTEMP_LO
21838 bfextu FTEMP_LO2
(%a6
){%d0
:&32}, %d0
# %d0 = new G,R,S
21840 mov.
l %d2
, FTEMP_HI
(%a0
) # store new FTEMP_HI
21841 mov.
l %d1
, FTEMP_LO
(%a0
) # store new FTEMP_LO
21843 bftst
%d0
{&2:&30} # were bits shifted off?
21844 beq.
b case1_sticky_clear
# no; go finish
21845 bset
&rnd_stky_bit
, %d0
# yes; set sticky bit
21847 case1_sticky_clear
:
21848 and.l &0xe0000000, %d0
# clear all but G,R,S
21849 mov.
l (%sp
)+, %d2
# restore temp register
21855 # %d0 = denorm threshold
21856 # %d1 = "n" = amt to shift
21858 # ---------------------------------------------------------
21859 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21860 # ---------------------------------------------------------
21861 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21864 # \ \ -------------------
21865 # \ -------------------- \
21866 # ------------------- \ \
21870 # <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21871 # ---------------------------------------------------------
21872 # |0...............0|0....0| NEW_LO |grs |
21873 # ---------------------------------------------------------
21876 mov.
l %d2
, -(%sp
) # create temp storage
21878 mov.w
%d0
, FTEMP_EX
(%a0
) # exponent = denorm threshold
21879 subi.w
&0x20, %d1
# %d1 now between 0 and 32
21881 sub.w
%d1
, %d0
# %d0 = 32 - %d1
21883 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21884 # the number of bits to check for the sticky detect.
21885 # it only plays a role in shift amounts of 61-63.
21886 mov.
b GRS
(%a6
), %d2
21887 or.b %d2
, 3+FTEMP_LO2
(%a6
)
21889 bfextu FTEMP_HI
(%a0
){&0:%d0
}, %d2
# %d2 = new FTEMP_LO
21890 bfextu FTEMP_HI
(%a0
){%d0
:&32}, %d1
# %d1 = new G,R,S
21892 bftst
%d1
{&2:&30} # were any bits shifted off?
21893 bne.
b case2_set_sticky
# yes; set sticky bit
21894 bftst FTEMP_LO2
(%a6
){%d0
:&31} # were any bits shifted off?
21895 bne.
b case2_set_sticky
# yes; set sticky bit
21897 mov.
l %d1
, %d0
# move new G,R,S to %d0
21901 mov.
l %d1
, %d0
# move new G,R,S to %d0
21902 bset
&rnd_stky_bit
, %d0
# set sticky bit
21905 clr.
l FTEMP_HI
(%a0
) # store FTEMP_HI = 0
21906 mov.
l %d2
, FTEMP_LO
(%a0
) # store FTEMP_LO
21907 and.l &0xe0000000, %d0
# clear all but G,R,S
21909 mov.
l (%sp
)+,%d2
# restore temp register
21915 # %d0 = denorm threshold
21916 # %d1 = amt to shift
21919 mov.w
%d0
, FTEMP_EX
(%a0
) # insert denorm threshold
21921 cmpi.w
%d1
, &65 # is shift amt > 65?
21922 blt.
b case3_64
# no; it's == 64
21923 beq.
b case3_65
# no; it's == 65
21928 # Shift value is > 65 and out of range. All bits are shifted off.
21929 # Return a zero mantissa with the sticky bit set
21931 clr.
l FTEMP_HI
(%a0
) # clear hi(mantissa)
21932 clr.
l FTEMP_LO
(%a0
) # clear lo(mantissa)
21933 mov.
l &0x20000000, %d0
# set sticky bit
21939 # ---------------------------------------------------------
21940 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21941 # ---------------------------------------------------------
21942 # <-------(32)------>
21946 # \ ------------------------------
21947 # ------------------------------- \
21951 # <-------(32)------>
21952 # ---------------------------------------------------------
21953 # |0...............0|0................0|grs |
21954 # ---------------------------------------------------------
21957 mov.
l FTEMP_HI
(%a0
), %d0
# fetch hi(mantissa)
21958 mov.
l %d0
, %d1
# make a copy
21959 and.l &0xc0000000, %d0
# extract G,R
21960 and.l &0x3fffffff, %d1
# extract other bits
21962 bra.
b case3_complete
21967 # ---------------------------------------------------------
21968 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21969 # ---------------------------------------------------------
21970 # <-------(32)------>
21974 # \ ------------------------------
21975 # -------------------------------- \
21979 # <-------(31)----->
21980 # ---------------------------------------------------------
21981 # |0...............0|0................0|0rs |
21982 # ---------------------------------------------------------
21985 mov.
l FTEMP_HI
(%a0
), %d0
# fetch hi(mantissa)
21986 and.l &0x80000000, %d0
# extract R bit
21987 lsr.
l &0x1, %d0
# shift high bit into R bit
21988 and.l &0x7fffffff, %d1
# extract other bits
21991 # last operation done was an "and" of the bits shifted off so the condition
21992 # codes are already set so branch accordingly.
21993 bne.
b case3_set_sticky
# yes; go set new sticky
21994 tst.
l FTEMP_LO
(%a0
) # were any bits shifted off?
21995 bne.
b case3_set_sticky
# yes; go set new sticky
21996 tst.
b GRS
(%a6
) # were any bits shifted off?
21997 bne.
b case3_set_sticky
# yes; go set new sticky
22000 # no bits were shifted off so don't set the sticky bit.
22002 # the entire mantissa is zero.
22004 clr.
l FTEMP_HI
(%a0
) # clear hi(mantissa)
22005 clr.
l FTEMP_LO
(%a0
) # clear lo(mantissa)
22009 # some bits were shifted off so set the sticky bit.
22010 # the entire mantissa is zero.
22013 bset
&rnd_stky_bit
,%d0
# set new sticky bit
22014 clr.
l FTEMP_HI
(%a0
) # clear hi(mantissa)
22015 clr.
l FTEMP_LO
(%a0
) # clear lo(mantissa)
22018 #########################################################################
22019 # XDEF **************************************************************** #
22020 # _round(): round result according to precision/mode #
22022 # XREF **************************************************************** #
22025 # INPUT *************************************************************** #
22026 # a0 = ptr to input operand in internal extended format #
22027 # d1(hi) = contains rounding precision: #
22028 # ext = $0000xxxx #
22029 # sgl = $0004xxxx #
22030 # dbl = $0008xxxx #
22031 # d1(lo) = contains rounding mode: #
22036 # d0{31:29} = contains the g,r,s bits (extended) #
22038 # OUTPUT ************************************************************** #
22039 # a0 = pointer to rounded result #
22041 # ALGORITHM *********************************************************** #
22042 # On return the value pointed to by a0 is correctly rounded, #
22043 # a0 is preserved and the g-r-s bits in d0 are cleared. #
22044 # The result is not typed - the tag field is invalid. The #
22045 # result is still in the internal extended format. #
22047 # The INEX bit of USER_FPSR will be set if the rounded result was #
22048 # inexact (i.e. if any of the g-r-s bits were set). #
22050 #########################################################################
22055 # ext_grs() looks at the rounding precision and sets the appropriate
22057 # If (G,R,S == 0) then result is exact and round is done, else set
22058 # the inex flag in status reg and continue.
22060 bsr.
l ext_grs
# extract G,R,S
22062 tst.
l %d0
# are G,R,S zero?
22063 beq.w truncate
# yes; round is complete
22065 or.w
&inx2a_mask
, 2+USER_FPSR
(%a6
) # set inex2/ainex
22068 # Use rounding mode as an index into a jump table for these modes.
22069 # All of the following assumes grs != 0.
22071 mov.w
(tbl_mode.
b,%pc
,%d1.w
*2), %a1
# load jump offset
22072 jmp
(tbl_mode.
b,%pc
,%a1
) # jmp to rnd mode handler
22075 short rnd_near
- tbl_mode
22076 short truncate
- tbl_mode
# RZ always truncates
22077 short rnd_mnus
- tbl_mode
22078 short rnd_plus
- tbl_mode
22080 #################################################################
22081 # ROUND PLUS INFINITY #
22083 # If sign of fp number = 0 (positive), then add 1 to l. #
22084 #################################################################
22086 tst.
b FTEMP_SGN
(%a0
) # check for sign
22087 bmi.w truncate
# if positive then truncate
22089 mov.
l &0xffffffff, %d0
# force g,r,s to be all f's
22090 swap
%d1
# set up d1 for round prec.
22092 cmpi.
b %d1
, &s_mode
# is prec = sgl?
22093 beq.w add_sgl
# yes
22094 bgt.w add_dbl
# no; it's dbl
22095 bra.w add_ext
# no; it's ext
22097 #################################################################
22098 # ROUND MINUS INFINITY #
22100 # If sign of fp number = 1 (negative), then add 1 to l. #
22101 #################################################################
22103 tst.
b FTEMP_SGN
(%a0
) # check for sign
22104 bpl.w truncate
# if negative then truncate
22106 mov.
l &0xffffffff, %d0
# force g,r,s to be all f's
22107 swap
%d1
# set up d1 for round prec.
22109 cmpi.
b %d1
, &s_mode
# is prec = sgl?
22110 beq.w add_sgl
# yes
22111 bgt.w add_dbl
# no; it's dbl
22112 bra.w add_ext
# no; it's ext
22114 #################################################################
22117 # If (g=1), then add 1 to l and if (r=s=0), then clear l #
22118 # Note that this will round to even in case of a tie. #
22119 #################################################################
22121 asl.
l &0x1, %d0
# shift g-bit to c-bit
22122 bcc.w truncate
# if (g=1) then
22124 swap
%d1
# set up d1 for round prec.
22126 cmpi.
b %d1
, &s_mode
# is prec = sgl?
22127 beq.w add_sgl
# yes
22128 bgt.w add_dbl
# no; it's dbl
22129 bra.w add_ext
# no; it's ext
22131 # *** LOCAL EQUATES ***
22132 set ad_1_sgl
, 0x00000100 # constant to add 1 to l-bit in sgl prec
22133 set ad_1_dbl
, 0x00000800 # constant to add 1 to l-bit in dbl prec
22135 #########################
22137 #########################
22139 add.l &ad_1_sgl
, FTEMP_HI
(%a0
)
22140 bcc.
b scc_clr
# no mantissa overflow
22141 roxr.w FTEMP_HI
(%a0
) # shift v-bit back in
22142 roxr.w FTEMP_HI+
2(%a0
) # shift v-bit back in
22143 add.w
&0x1, FTEMP_EX
(%a0
) # and incr exponent
22145 tst.
l %d0
# test for rs = 0
22147 and.w
&0xfe00, FTEMP_HI+
2(%a0
) # clear the l-bit
22149 and.l &0xffffff00, FTEMP_HI
(%a0
) # truncate bits beyond sgl limit
22150 clr.
l FTEMP_LO
(%a0
) # clear d2
22153 #########################
22155 #########################
22157 addq.
l &1,FTEMP_LO
(%a0
) # add 1 to l-bit
22158 bcc.
b xcc_clr
# test for carry out
22159 addq.
l &1,FTEMP_HI
(%a0
) # propogate carry
22161 roxr.w FTEMP_HI
(%a0
) # mant is 0 so restore v-bit
22162 roxr.w FTEMP_HI+
2(%a0
) # mant is 0 so restore v-bit
22163 roxr.w FTEMP_LO
(%a0
)
22164 roxr.w FTEMP_LO+
2(%a0
)
22165 add.w
&0x1,FTEMP_EX
(%a0
) # and inc exp
22167 tst.
l %d0
# test rs = 0
22169 and.b &0xfe,FTEMP_LO+
3(%a0
) # clear the l bit
22173 #########################
22175 #########################
22177 add.l &ad_1_dbl
, FTEMP_LO
(%a0
) # add 1 to lsb
22178 bcc.
b dcc_clr
# no carry
22179 addq.
l &0x1, FTEMP_HI
(%a0
) # propogate carry
22180 bcc.
b dcc_clr
# no carry
22182 roxr.w FTEMP_HI
(%a0
) # mant is 0 so restore v-bit
22183 roxr.w FTEMP_HI+
2(%a0
) # mant is 0 so restore v-bit
22184 roxr.w FTEMP_LO
(%a0
)
22185 roxr.w FTEMP_LO+
2(%a0
)
22186 addq.w
&0x1, FTEMP_EX
(%a0
) # incr exponent
22188 tst.
l %d0
# test for rs = 0
22190 and.w
&0xf000, FTEMP_LO+
2(%a0
) # clear the l-bit
22193 and.l &0xfffff800,FTEMP_LO
(%a0
) # truncate bits beyond dbl limit
22196 ###########################
22197 # Truncate all other bits #
22198 ###########################
22200 swap
%d1
# select rnd prec
22202 cmpi.
b %d1
, &s_mode
# is prec sgl?
22203 beq.w sgl_done
# yes
22204 bgt.
b dbl_done
# no; it's dbl
22209 # ext_grs(): extract guard, round and sticky bits according to
22210 # rounding precision.
22213 # d0 = extended precision g,r,s (in d0{31:29})
22214 # d1 = {PREC,ROUND}
22216 # d0{31:29} = guard, round, sticky
22218 # The ext_grs extract the guard/round/sticky bits according to the
22219 # selected rounding precision. It is called by the round subroutine
22220 # only. All registers except d0 are kept intact. d0 becomes an
22221 # updated guard,round,sticky in d0{31:29}
22223 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22224 # prior to usage, and needs to restore d1 to original. this
22225 # routine is tightly tied to the round routine and not meant to
22226 # uphold standard subroutine calling practices.
22230 swap
%d1
# have d1.w point to round precision
22231 tst.
b %d1
# is rnd prec = extended?
22232 bne.
b ext_grs_not_ext
# no; go handle sgl or dbl
22235 # %d0 actually already hold g,r,s since _round() had it before calling
22236 # this function. so, as long as we don't disturb it, we are "returning" it.
22239 swap
%d1
# yes; return to correct positions
22243 movm.
l &0x3000, -(%sp
) # make some temp registers {d2/d3}
22245 cmpi.
b %d1
, &s_mode
# is rnd prec = sgl?
22246 bne.
b ext_grs_dbl
# no; go handle dbl
22251 # -----------------------------------------------------
22252 # | EXP |XXXXXXX| |xx | |grs|
22253 # -----------------------------------------------------
22255 # ee ---------------------
22261 bfextu FTEMP_HI
(%a0
){&24:&2}, %d3
# sgl prec. g-r are 2 bits right
22262 mov.
l &30, %d2
# of the sgl prec. limits
22263 lsl.
l %d2
, %d3
# shift g-r bits to MSB of d3
22264 mov.
l FTEMP_HI
(%a0
), %d2
# get word 2 for s-bit test
22265 and.l &0x0000003f, %d2
# s bit is the or of all other
22266 bne.
b ext_grs_st_stky
# bits to the right of g-r
22267 tst.
l FTEMP_LO
(%a0
) # test lower mantissa
22268 bne.
b ext_grs_st_stky
# if any are set, set sticky
22269 tst.
l %d0
# test original g,r,s
22270 bne.
b ext_grs_st_stky
# if any are set, set sticky
22271 bra.
b ext_grs_end_sd
# if words 3 and 4 are clr, exit
22276 # -----------------------------------------------------
22277 # | EXP |XXXXXXX| | |xx |grs|
22278 # -----------------------------------------------------
22286 bfextu FTEMP_LO
(%a0
){&21:&2}, %d3
# dbl-prec. g-r are 2 bits right
22287 mov.
l &30, %d2
# of the dbl prec. limits
22288 lsl.
l %d2
, %d3
# shift g-r bits to the MSB of d3
22289 mov.
l FTEMP_LO
(%a0
), %d2
# get lower mantissa for s-bit test
22290 and.l &0x000001ff, %d2
# s bit is the or-ing of all
22291 bne.
b ext_grs_st_stky
# other bits to the right of g-r
22292 tst.
l %d0
# test word original g,r,s
22293 bne.
b ext_grs_st_stky
# if any are set, set sticky
22294 bra.
b ext_grs_end_sd
# if clear, exit
22297 bset
&rnd_stky_bit
, %d3
# set sticky bit
22299 mov.
l %d3
, %d0
# return grs to d0
22301 movm.
l (%sp
)+, &0xc # restore scratch registers {d2/d3}
22303 swap
%d1
# restore d1 to original
22306 #########################################################################
22307 # norm(): normalize the mantissa of an extended precision input. the #
22308 # input operand should not be normalized already. #
22310 # XDEF **************************************************************** #
22313 # XREF **************************************************************** #
22316 # INPUT *************************************************************** #
22317 # a0 = pointer fp extended precision operand to normalize #
22319 # OUTPUT ************************************************************** #
22320 # d0 = number of bit positions the mantissa was shifted #
22321 # a0 = the input operand's mantissa is normalized; the exponent #
22324 #########################################################################
22327 mov.
l %d2
, -(%sp
) # create some temp regs
22330 mov.
l FTEMP_HI
(%a0
), %d0
# load hi(mantissa)
22331 mov.
l FTEMP_LO
(%a0
), %d1
# load lo(mantissa)
22333 bfffo
%d0
{&0:&32}, %d2
# how many places to shift?
22334 beq.
b norm_lo
# hi(man) is all zeroes!
22337 lsl.
l %d2
, %d0
# left shift hi(man)
22338 bfextu
%d1
{&0:%d2
}, %d3
# extract lo bits
22340 or.l %d3
, %d0
# create hi(man)
22341 lsl.
l %d2
, %d1
# create lo(man)
22343 mov.
l %d0
, FTEMP_HI
(%a0
) # store new hi(man)
22344 mov.
l %d1
, FTEMP_LO
(%a0
) # store new lo(man)
22346 mov.
l %d2
, %d0
# return shift amount
22348 mov.
l (%sp
)+, %d3
# restore temp regs
22354 bfffo
%d1
{&0:&32}, %d2
# how many places to shift?
22355 lsl.
l %d2
, %d1
# shift lo(man)
22356 add.l &32, %d2
# add 32 to shft amount
22358 mov.
l %d1
, FTEMP_HI
(%a0
) # store hi(man)
22359 clr.
l FTEMP_LO
(%a0
) # lo(man) is now zero
22361 mov.
l %d2
, %d0
# return shift amount
22363 mov.
l (%sp
)+, %d3
# restore temp regs
22368 #########################################################################
22369 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
22370 # - returns corresponding optype tag #
22372 # XDEF **************************************************************** #
22375 # XREF **************************************************************** #
22376 # norm() - normalize the mantissa #
22378 # INPUT *************************************************************** #
22379 # a0 = pointer to unnormalized extended precision number #
22381 # OUTPUT ************************************************************** #
22382 # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
22383 # a0 = input operand has been converted to a norm, denorm, or #
22384 # zero; both the exponent and mantissa are changed. #
22386 #########################################################################
22390 bfffo FTEMP_HI
(%a0
){&0:&32}, %d0
# how many shifts are needed?
22391 bne.
b unnorm_shift
# hi(man) is not all zeroes
22394 # hi(man) is all zeroes so see if any bits in lo(man) are set
22397 bfffo FTEMP_LO
(%a0
){&0:&32}, %d0
# is operand really a zero?
22398 beq.w unnorm_zero
# yes
22400 add.w
&32, %d0
# no; fix shift distance
22403 # d0 = # shifts needed for complete normalization
22406 clr.
l %d1
# clear top word
22407 mov.w FTEMP_EX
(%a0
), %d1
# extract exponent
22408 and.w
&0x7fff, %d1
# strip off sgn
22410 cmp.w
%d0
, %d1
# will denorm push exp < 0?
22411 bgt.
b unnorm_nrm_zero
# yes; denorm only until exp = 0
22414 # exponent would not go < 0. therefore, number stays normalized
22416 sub.w
%d0
, %d1
# shift exponent value
22417 mov.w FTEMP_EX
(%a0
), %d0
# load old exponent
22418 and.w
&0x8000, %d0
# save old sign
22419 or.w
%d0
, %d1
# {sgn,new exp}
22420 mov.w
%d1
, FTEMP_EX
(%a0
) # insert new exponent
22422 bsr.
l norm
# normalize UNNORM
22424 mov.
b &NORM
, %d0
# return new optype tag
22428 # exponent would go < 0, so only denormalize until exp = 0
22431 cmp.
b %d1
, &32 # is exp <= 32?
22432 bgt.
b unnorm_nrm_zero_lrg
# no; go handle large exponent
22434 bfextu FTEMP_HI
(%a0
){%d1
:&32}, %d0
# extract new hi(man)
22435 mov.
l %d0
, FTEMP_HI
(%a0
) # save new hi(man)
22437 mov.
l FTEMP_LO
(%a0
), %d0
# fetch old lo(man)
22438 lsl.
l %d1
, %d0
# extract new lo(man)
22439 mov.
l %d0
, FTEMP_LO
(%a0
) # save new lo(man)
22441 and.w
&0x8000, FTEMP_EX
(%a0
) # set exp = 0
22443 mov.
b &DENORM
, %d0
# return new optype tag
22447 # only mantissa bits set are in lo(man)
22449 unnorm_nrm_zero_lrg
:
22450 sub.w
&32, %d1
# adjust shft amt by 32
22452 mov.
l FTEMP_LO
(%a0
), %d0
# fetch old lo(man)
22453 lsl.
l %d1
, %d0
# left shift lo(man)
22455 mov.
l %d0
, FTEMP_HI
(%a0
) # store new hi(man)
22456 clr.
l FTEMP_LO
(%a0
) # lo(man) = 0
22458 and.w
&0x8000, FTEMP_EX
(%a0
) # set exp = 0
22460 mov.
b &DENORM
, %d0
# return new optype tag
22464 # whole mantissa is zero so this UNNORM is actually a zero
22467 and.w
&0x8000, FTEMP_EX
(%a0
) # force exponent to zero
22469 mov.
b &ZERO
, %d0
# fix optype tag
22472 #########################################################################
22473 # XDEF **************************************************************** #
22474 # set_tag_x(): return the optype of the input ext fp number #
22476 # XREF **************************************************************** #
22479 # INPUT *************************************************************** #
22480 # a0 = pointer to extended precision operand #
22482 # OUTPUT ************************************************************** #
22483 # d0 = value of type tag #
22484 # one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
22486 # ALGORITHM *********************************************************** #
22487 # Simply test the exponent, j-bit, and mantissa values to #
22488 # determine the type of operand. #
22489 # If it's an unnormalized zero, alter the operand and force it #
22490 # to be a normal zero. #
22492 #########################################################################
22496 mov.w FTEMP_EX
(%a0
), %d0
# extract exponent
22497 andi.w
&0x7fff, %d0
# strip off sign
22498 cmpi.w
%d0
, &0x7fff # is (EXP == MAX)?
22501 btst
&0x7,FTEMP_HI
(%a0
)
22507 tst.w
%d0
# is exponent = 0?
22510 tst.
l FTEMP_HI
(%a0
)
22512 tst.
l FTEMP_LO
(%a0
)
22520 # must distinguish now "Unnormalized zeroes" which we
22521 # must convert to zero.
22523 tst.
l FTEMP_HI
(%a0
)
22524 bne.
b is_unnorm_reg_x
22525 tst.
l FTEMP_LO
(%a0
)
22526 bne.
b is_unnorm_reg_x
22527 # it's an "unnormalized zero". let's convert it to an actual zero...
22528 andi.w
&0x8000,FTEMP_EX
(%a0
) # clear exponent
22535 tst.
l FTEMP_LO
(%a0
)
22537 mov.
l FTEMP_HI
(%a0
), %d0
22538 and.l &0x7fffffff, %d0
# msb is a don't care!
22544 btst
&0x6, FTEMP_HI
(%a0
)
22552 #########################################################################
22553 # XDEF **************************************************************** #
22554 # set_tag_d(): return the optype of the input dbl fp number #
22556 # XREF **************************************************************** #
22559 # INPUT *************************************************************** #
22560 # a0 = points to double precision operand #
22562 # OUTPUT ************************************************************** #
22563 # d0 = value of type tag #
22564 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
22566 # ALGORITHM *********************************************************** #
22567 # Simply test the exponent, j-bit, and mantissa values to #
22568 # determine the type of operand. #
22570 #########################################################################
22574 mov.
l FTEMP
(%a0
), %d0
22577 andi.l &0x7ff00000, %d0
22578 beq.
b zero_or_denorm_d
22580 cmpi.
l %d0
, &0x7ff00000
22587 and.l &0x000fffff, %d1
22598 and.l &0x000fffff, %d1
22615 #########################################################################
22616 # XDEF **************************************************************** #
22617 # set_tag_s(): return the optype of the input sgl fp number #
22619 # XREF **************************************************************** #
22622 # INPUT *************************************************************** #
22623 # a0 = pointer to single precision operand #
22625 # OUTPUT ************************************************************** #
22626 # d0 = value of type tag #
22627 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
22629 # ALGORITHM *********************************************************** #
22630 # Simply test the exponent, j-bit, and mantissa values to #
22631 # determine the type of operand. #
22633 #########################################################################
22637 mov.
l FTEMP
(%a0
), %d0
22640 andi.l &0x7f800000, %d0
22641 beq.
b zero_or_denorm_s
22643 cmpi.
l %d0
, &0x7f800000
22650 and.l &0x007fffff, %d1
22659 and.l &0x007fffff, %d1
22674 #########################################################################
22675 # XDEF **************************************************************** #
22676 # unf_res(): routine to produce default underflow result of a #
22677 # scaled extended precision number; this is used by #
22678 # fadd/fdiv/fmul/etc. emulation routines. #
22679 # unf_res4(): same as above but for fsglmul/fsgldiv which use #
22680 # single round prec and extended prec mode. #
22682 # XREF **************************************************************** #
22683 # _denorm() - denormalize according to scale factor #
22684 # _round() - round denormalized number according to rnd prec #
22686 # INPUT *************************************************************** #
22687 # a0 = pointer to extended precison operand #
22688 # d0 = scale factor #
22689 # d1 = rounding precision/mode #
22691 # OUTPUT ************************************************************** #
22692 # a0 = pointer to default underflow result in extended precision #
22693 # d0.b = result FPSR_cc which caller may or may not want to save #
22695 # ALGORITHM *********************************************************** #
22696 # Convert the input operand to "internal format" which means the #
22697 # exponent is extended to 16 bits and the sign is stored in the unused #
22698 # portion of the extended precison operand. Denormalize the number #
22699 # according to the scale factor passed in d0. Then, round the #
22700 # denormalized result. #
22701 # Set the FPSR_exc bits as appropriate but return the cc bits in #
22702 # d0 in case the caller doesn't want to save them (as is the case for #
22704 # unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
22705 # precision and the rounding mode to single. #
22707 #########################################################################
22710 mov.
l %d1
, -(%sp
) # save rnd prec,mode on stack
22712 btst
&0x7, FTEMP_EX
(%a0
) # make "internal" format
22715 mov.w FTEMP_EX
(%a0
), %d1
# extract exponent
22718 mov.w
%d1
, FTEMP_EX
(%a0
) # insert 16 bit exponent
22720 mov.
l %a0
, -(%sp
) # save operand ptr during calls
22722 mov.
l 0x4(%sp
),%d0
# pass rnd prec.
22725 bsr.
l _denorm
# denorm result
22728 mov.w
0x6(%sp
),%d1
# load prec:mode into %d1
22729 andi.w
&0xc0,%d1
# extract rnd prec
22735 bsr.
l _round
# round the denorm
22739 # result is now rounded properly. convert back to normal format
22740 bclr &0x7, FTEMP_EX
(%a0
) # clear sgn first; may have residue
22741 tst.
b FTEMP_SGN
(%a0
) # is "internal result" sign set?
22742 beq.
b unf_res_chkifzero
# no; result is positive
22743 bset
&0x7, FTEMP_EX
(%a0
) # set result sgn
22744 clr.
b FTEMP_SGN
(%a0
) # clear temp sign
22746 # the number may have become zero after rounding. set ccodes accordingly.
22749 tst.
l FTEMP_HI
(%a0
) # is value now a zero?
22750 bne.
b unf_res_cont
# no
22751 tst.
l FTEMP_LO
(%a0
)
22752 bne.
b unf_res_cont
# no
22753 # bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
22754 bset
&z_bit
, %d0
# yes; set zero ccode bit
22759 # can inex1 also be set along with unfl and inex2???
22761 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22763 btst
&inex2_bit
, FPSR_EXCEPT
(%a6
) # is INEX2 set?
22764 beq.
b unf_res_end
# no
22765 bset
&aunfl_bit
, FPSR_AEXCEPT
(%a6
) # yes; set aunfl
22768 add.l &0x4, %sp
# clear stack
22771 # unf_res() for fsglmul() and fsgldiv().
22774 mov.
l %d1
,-(%sp
) # save rnd prec,mode on stack
22776 btst
&0x7,FTEMP_EX
(%a0
) # make "internal" format
22779 mov.w FTEMP_EX
(%a0
),%d1
# extract exponent
22782 mov.w
%d1
,FTEMP_EX
(%a0
) # insert 16 bit exponent
22784 mov.
l %a0
,-(%sp
) # save operand ptr during calls
22786 clr.
l %d0
# force rnd prec = ext
22787 bsr.
l _denorm
# denorm result
22790 mov.w
&s_mode
,%d1
# force rnd prec = sgl
22792 mov.w
0x6(%sp
),%d1
# load rnd mode
22793 andi.w
&0x30,%d1
# extract rnd prec
22795 bsr.
l _round
# round the denorm
22799 # result is now rounded properly. convert back to normal format
22800 bclr &0x7,FTEMP_EX
(%a0
) # clear sgn first; may have residue
22801 tst.
b FTEMP_SGN
(%a0
) # is "internal result" sign set?
22802 beq.
b unf_res4_chkifzero
# no; result is positive
22803 bset
&0x7,FTEMP_EX
(%a0
) # set result sgn
22804 clr.
b FTEMP_SGN
(%a0
) # clear temp sign
22806 # the number may have become zero after rounding. set ccodes accordingly.
22807 unf_res4_chkifzero
:
22809 tst.
l FTEMP_HI
(%a0
) # is value now a zero?
22810 bne.
b unf_res4_cont
# no
22811 tst.
l FTEMP_LO
(%a0
)
22812 bne.
b unf_res4_cont
# no
22813 # bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
22814 bset
&z_bit
,%d0
# yes; set zero ccode bit
22819 # can inex1 also be set along with unfl and inex2???
22821 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22823 btst
&inex2_bit
,FPSR_EXCEPT
(%a6
) # is INEX2 set?
22824 beq.
b unf_res4_end
# no
22825 bset
&aunfl_bit
,FPSR_AEXCEPT
(%a6
) # yes; set aunfl
22828 add.l &0x4,%sp
# clear stack
22831 #########################################################################
22832 # XDEF **************************************************************** #
22833 # ovf_res(): routine to produce the default overflow result of #
22834 # an overflowing number. #
22835 # ovf_res2(): same as above but the rnd mode/prec are passed #
22838 # XREF **************************************************************** #
22841 # INPUT *************************************************************** #
22842 # d1.b = '-1' => (-); '0' => (+) #
22844 # d0 = rnd mode/prec #
22846 # hi(d0) = rnd prec #
22847 # lo(d0) = rnd mode #
22849 # OUTPUT ************************************************************** #
22850 # a0 = points to extended precision result #
22851 # d0.b = condition code bits #
22853 # ALGORITHM *********************************************************** #
22854 # The default overflow result can be determined by the sign of #
22855 # the result and the rounding mode/prec in effect. These bits are #
22856 # concatenated together to create an index into the default result #
22857 # table. A pointer to the correct result is returned in a0. The #
22858 # resulting condition codes are returned in d0 in case the caller #
22859 # doesn't want FPSR_cc altered (as is the case for fmove out). #
22861 #########################################################################
22865 andi.w
&0x10,%d1
# keep result sign
22866 lsr.
b &0x4,%d0
# shift prec/mode
22867 or.b %d0
,%d1
# concat the two
22868 mov.w
%d1
,%d0
# make a copy
22869 lsl.
b &0x1,%d1
# multiply d1 by 2
22874 and.w
&0x10, %d1
# keep result sign
22875 or.b %d0
, %d1
# insert rnd mode
22877 or.b %d0
, %d1
# insert rnd prec
22878 mov.w
%d1
, %d0
# make a copy
22879 lsl.
b &0x1, %d1
# shift left by 1
22882 # use the rounding mode, precision, and result sign as in index into the
22883 # two tables below to fetch the default result and the result ccodes.
22886 mov.
b (tbl_ovfl_cc.
b,%pc
,%d0.w
*1), %d0
# fetch result ccodes
22887 lea
(tbl_ovfl_result.
b,%pc
,%d1.w
*8), %a0
# return result ptr
22892 byte
0x2, 0x0, 0x0, 0x2
22893 byte
0x2, 0x0, 0x0, 0x2
22894 byte
0x2, 0x0, 0x0, 0x2
22895 byte
0x0, 0x0, 0x0, 0x0
22896 byte
0x2+0x8, 0x8, 0x2+0x8, 0x8
22897 byte
0x2+0x8, 0x8, 0x2+0x8, 0x8
22898 byte
0x2+0x8, 0x8, 0x2+0x8, 0x8
22901 long
0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22902 long
0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22903 long
0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22904 long
0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22906 long
0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22907 long
0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22908 long
0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22909 long
0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22911 long
0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22912 long
0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22913 long
0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22914 long
0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22916 long
0x00000000,0x00000000,0x00000000,0x00000000
22917 long
0x00000000,0x00000000,0x00000000,0x00000000
22918 long
0x00000000,0x00000000,0x00000000,0x00000000
22919 long
0x00000000,0x00000000,0x00000000,0x00000000
22921 long
0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22922 long
0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22923 long
0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22924 long
0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22926 long
0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22927 long
0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22928 long
0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22929 long
0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22931 long
0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22932 long
0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22933 long
0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22934 long
0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22936 #########################################################################
22937 # XDEF **************************************************************** #
22938 # get_packed(): fetch a packed operand from memory and then #
22939 # convert it to a floating-point binary number. #
22941 # XREF **************************************************************** #
22942 # _dcalc_ea() - calculate the correct <ea> #
22943 # _mem_read() - fetch the packed operand from memory #
22944 # facc_in_x() - the fetch failed so jump to special exit code #
22945 # decbin() - convert packed to binary extended precision #
22947 # INPUT *************************************************************** #
22950 # OUTPUT ************************************************************** #
22951 # If no failure on _mem_read(): #
22952 # FP_SRC(a6) = packed operand now as a binary FP number #
22954 # ALGORITHM *********************************************************** #
22955 # Get the correct <ea> whihc is the value on the exception stack #
22956 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
22957 # Then, fetch the operand from memory. If the fetch fails, exit #
22958 # through facc_in_x(). #
22959 # If the packed operand is a ZERO,NAN, or INF, convert it to #
22960 # its binary representation here. Else, call decbin() which will #
22961 # convert the packed value to an extended precision binary value. #
22963 #########################################################################
22965 # the stacked <ea> for packed is correct except for -(An).
22966 # the base reg must be updated for both -(An) and (An)+.
22969 mov.
l &0xc,%d0
# packed is 12 bytes
22970 bsr.
l _dcalc_ea
# fetch <ea>; correct An
22972 lea FP_SRC
(%a6
),%a1
# pass: ptr to super dst
22973 mov.
l &0xc,%d0
# pass: 12 bytes
22974 bsr.
l _dmem_read
# read packed operand
22976 tst.
l %d1
# did dfetch fail?
22977 bne.
l facc_in_x
# yes
22979 # The packed operand is an INF or a NAN if the exponent field is all ones.
22980 bfextu FP_SRC
(%a6
){&1:&15},%d0
# get exp
22981 cmpi.w
%d0
,&0x7fff # INF or NAN?
22982 bne.
b gp_try_zero
# no
22983 rts
# operand is an INF or NAN
22985 # The packed operand is a zero if the mantissa is all zero, else it's
22986 # a normal packed op.
22988 mov.
b 3+FP_SRC
(%a6
),%d0
# get byte 4
22989 andi.b &0x0f,%d0
# clear all but last nybble
22990 bne.
b gp_not_spec
# not a zero
22991 tst.
l FP_SRC_HI
(%a6
) # is lw 2 zero?
22992 bne.
b gp_not_spec
# not a zero
22993 tst.
l FP_SRC_LO
(%a6
) # is lw 3 zero?
22994 bne.
b gp_not_spec
# not a zero
22995 rts
# operand is a ZERO
22997 lea FP_SRC
(%a6
),%a0
# pass: ptr to packed op
22998 bsr.
l decbin
# convert to extended
22999 fmovm.x
&0x80,FP_SRC
(%a6
) # make this the srcop
23002 #########################################################################
23003 # decbin(): Converts normalized packed bcd value pointed to by register #
23004 # a0 to extended-precision value in fp0. #
23006 # INPUT *************************************************************** #
23007 # a0 = pointer to normalized packed bcd value #
23009 # OUTPUT ************************************************************** #
23010 # fp0 = exact fp representation of the packed bcd value. #
23012 # ALGORITHM *********************************************************** #
23013 # Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
23014 # and NaN operands are dispatched without entering this routine) #
23015 # value in 68881/882 format at location (a0). #
23017 # A1. Convert the bcd exponent to binary by successive adds and #
23018 # muls. Set the sign according to SE. Subtract 16 to compensate #
23019 # for the mantissa which is to be interpreted as 17 integer #
23020 # digits, rather than 1 integer and 16 fraction digits. #
23021 # Note: this operation can never overflow. #
23023 # A2. Convert the bcd mantissa to binary by successive #
23024 # adds and muls in FP0. Set the sign according to SM. #
23025 # The mantissa digits will be converted with the decimal point #
23026 # assumed following the least-significant digit. #
23027 # Note: this operation can never overflow. #
23029 # A3. Count the number of leading/trailing zeros in the #
23030 # bcd string. If SE is positive, count the leading zeros; #
23031 # if negative, count the trailing zeros. Set the adjusted #
23032 # exponent equal to the exponent from A1 and the zero count #
23033 # added if SM = 1 and subtracted if SM = 0. Scale the #
23034 # mantissa the equivalent of forcing in the bcd value: #
23036 # SM = 0 a non-zero digit in the integer position #
23037 # SM = 1 a non-zero digit in Mant0, lsd of the fraction #
23039 # this will insure that any value, regardless of its #
23040 # representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
23043 # A4. Calculate the factor 10^exp in FP1 using a table of #
23044 # 10^(2^n) values. To reduce the error in forming factors #
23045 # greater than 10^27, a directed rounding scheme is used with #
23046 # tables rounded to RN, RM, and RP, according to the table #
23047 # in the comments of the pwrten section. #
23049 # A5. Form the final binary number by scaling the mantissa by #
23050 # the exponent factor. This is done by multiplying the #
23051 # mantissa in FP0 by the factor in FP1 if the adjusted #
23052 # exponent sign is positive, and dividing FP0 by FP1 if #
23053 # it is negative. #
23055 # Clean up and return. Check if the final mul or div was inexact. #
23056 # If so, set INEX1 in USER_FPSR. #
23058 #########################################################################
23061 # PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23062 # to nearest, minus, and plus, respectively. The tables include
23063 # 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
23064 # is required until the power is greater than 27, however, all
23065 # tables include the first 5 for ease of indexing.
23081 mov.
l 0x0(%a0
),FP_SCR0_EX
(%a6
) # make a copy of input
23082 mov.
l 0x4(%a0
),FP_SCR0_HI
(%a6
) # so we don't alter it
23083 mov.
l 0x8(%a0
),FP_SCR0_LO
(%a6
)
23085 lea FP_SCR0
(%a6
),%a0
23087 movm.
l &0x3c00,-(%sp
) # save d2-d5
23088 fmovm.x
&0x1,-(%sp
) # save fp1
23090 # Calculate exponent:
23091 # 1. Copy bcd value in memory for use as a working copy.
23092 # 2. Calculate absolute value of exponent in d1 by mul and add.
23093 # 3. Correct for exponent sign.
23094 # 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23095 # (i.e., all digits assumed left of the decimal point.)
23100 # (*) d0: temp digit storage
23101 # (*) d1: accumulator for binary exponent
23102 # (*) d2: digit count
23103 # (*) d3: offset pointer
23104 # ( ) d4: first word of bcd
23105 # ( ) a0: pointer to working bcd value
23106 # ( ) a6: pointer to original bcd value
23107 # (*) FP_SCR1: working copy of original bcd value
23108 # (*) L_SCR1: copy of original exponent word
23111 mov.
l &EDIGITS
,%d2
# # of nibbles (digits) in fraction part
23112 mov.
l &ESTRT
,%d3
# counter to pick up digits
23113 mov.
l (%a0
),%d4
# get first word of bcd
23114 clr.
l %d1
# zero d1 for accumulator
23116 mulu.
l &0xa,%d1
# mul partial product by one digit place
23117 bfextu
%d4
{%d3
:&4},%d0
# get the digit and zero extend into d0
23118 add.l %d0
,%d1
# d1 = d1 + d0
23119 addq.
b &4,%d3
# advance d3 to the next digit
23120 dbf.w
%d2
,e_gd
# if we have used all 3 digits, exit loop
23121 btst
&30,%d4
# get SE
23122 beq.
b e_pos
# don't negate if pos
23123 neg.l %d1
# negate before subtracting
23125 sub.l &16,%d1
# sub to compensate for shift of mant
23126 bge.
b e_save
# if still pos, do not neg
23127 neg.l %d1
# now negative, make pos and set SE
23128 or.l &0x40000000,%d4
# set SE in d4,
23129 or.l &0x40000000,(%a0
) # and in working bcd
23131 mov.
l %d1
,-(%sp
) # save exp on stack
23134 # Calculate mantissa:
23135 # 1. Calculate absolute value of mantissa in fp0 by mul and add.
23136 # 2. Correct for mantissa sign.
23137 # (i.e., all digits assumed left of the decimal point.)
23142 # (*) d0: temp digit storage
23143 # (*) d1: lword counter
23144 # (*) d2: digit count
23145 # (*) d3: offset pointer
23146 # ( ) d4: words 2 and 3 of bcd
23147 # ( ) a0: pointer to working bcd value
23148 # ( ) a6: pointer to original bcd value
23149 # (*) fp0: mantissa accumulator
23150 # ( ) FP_SCR1: working copy of original bcd value
23151 # ( ) L_SCR1: copy of original exponent word
23154 mov.
l &1,%d1
# word counter, init to 1
23155 fmov.s
&0x00000000,%fp0
# accumulator
23158 # Since the packed number has a long word between the first & second parts,
23159 # get the integer digit then skip down & get the rest of the
23160 # mantissa. We will unroll the loop once.
23162 bfextu
(%a0
){&28:&4},%d0
# integer part is ls digit in long word
23163 fadd.b %d0
,%fp0
# add digit to sum in fp0
23166 # Get the rest of the mantissa.
23169 mov.
l (%a0
,%d1.
L*4),%d4
# load mantissa lonqword into d4
23170 mov.
l &FSTRT
,%d3
# counter to pick up digits
23171 mov.
l &FNIBS
,%d2
# reset number of digits per a0 ptr
23173 fmul.s
&0x41200000,%fp0
# fp0 = fp0 * 10
23174 bfextu
%d4
{%d3
:&4},%d0
# get the digit and zero extend
23175 fadd.b %d0
,%fp0
# fp0 = fp0 + digit
23178 # If all the digits (8) in that long word have been converted (d2=0),
23179 # then inc d1 (=2) to point to the next long word and reset d3 to 0
23180 # to initialize the digit offset, and set d2 to 7 for the digit count;
23181 # else continue with this long word.
23183 addq.
b &4,%d3
# advance d3 to the next digit
23184 dbf.w
%d2
,md2b
# check for last digit in this lw
23186 addq.
l &1,%d1
# inc lw pointer in mantissa
23187 cmp.
l %d1
,&2 # test for last lw
23188 ble.
b loadlw
# if not, get last one
23190 # Check the sign of the mant and make the value in fp0 the same sign.
23193 btst
&31,(%a0
) # test sign of the mantissa
23194 beq.
b ap_st_z
# if clear, go to append/strip zeros
23195 fneg.x
%fp0
# if set, negate fp0
23197 # Append/strip zeros:
23199 # For adjusted exponents which have an absolute value greater than 27*,
23200 # this routine calculates the amount needed to normalize the mantissa
23201 # for the adjusted exponent. That number is subtracted from the exp
23202 # if the exp was positive, and added if it was negative. The purpose
23203 # of this is to reduce the value of the exponent and the possibility
23204 # of error in calculation of pwrten.
23206 # 1. Branch on the sign of the adjusted exponent.
23207 # 2p.(positive exp)
23208 # 2. Check M16 and the digits in lwords 2 and 3 in decending order.
23209 # 3. Add one for each zero encountered until a non-zero digit.
23210 # 4. Subtract the count from the exp.
23211 # 5. Check if the exp has crossed zero in #3 above; make the exp abs
23213 # 6. Multiply the mantissa by 10**count.
23214 # 2n.(negative exp)
23215 # 2. Check the digits in lwords 3 and 2 in decending order.
23216 # 3. Add one for each zero encountered until a non-zero digit.
23217 # 4. Add the count to the exp.
23218 # 5. Check if the exp has crossed zero in #3 above; clear SE.
23219 # 6. Divide the mantissa by 10**count.
23221 # *Why 27? If the adjusted exponent is within -28 < expA < 28, than
23222 # any adjustment due to append/strip zeros will drive the resultane
23223 # exponent towards zero. Since all pwrten constants with a power
23224 # of 27 or less are exact, there is no need to use this routine to
23225 # attempt to lessen the resultant exponent.
23230 # (*) d0: temp digit storage
23231 # (*) d1: zero count
23232 # (*) d2: digit count
23233 # (*) d3: offset pointer
23234 # ( ) d4: first word of bcd
23235 # (*) d5: lword counter
23236 # ( ) a0: pointer to working bcd value
23237 # ( ) FP_SCR1: working copy of original bcd value
23238 # ( ) L_SCR1: copy of original exponent word
23241 # First check the absolute value of the exponent to see if this
23242 # routine is necessary. If so, then check the sign of the exponent
23243 # and do append (+) or strip (-) zeros accordingly.
23244 # This section handles a positive adjusted exponent.
23247 mov.
l (%sp
),%d1
# load expA for range test
23248 cmp.
l %d1
,&27 # test is with 27
23249 ble.w pwrten
# if abs(expA) <28, skip ap/st zeros
23250 btst
&30,(%a0
) # check sign of exp
23251 bne.
b ap_st_n
# if neg, go to neg side
23252 clr.
l %d1
# zero count reg
23253 mov.
l (%a0
),%d4
# load lword 1 to d4
23254 bfextu
%d4
{&28:&4},%d0
# get M16 in d0
23255 bne.
b ap_p_fx
# if M16 is non-zero, go fix exp
23256 addq.
l &1,%d1
# inc zero count
23257 mov.
l &1,%d5
# init lword counter
23258 mov.
l (%a0
,%d5.
L*4),%d4
# get lword 2 to d4
23259 bne.
b ap_p_cl
# if lw 2 is zero, skip it
23260 addq.
l &8,%d1
# and inc count by 8
23261 addq.
l &1,%d5
# inc lword counter
23262 mov.
l (%a0
,%d5.
L*4),%d4
# get lword 3 to d4
23264 clr.
l %d3
# init offset reg
23265 mov.
l &7,%d2
# init digit counter
23267 bfextu
%d4
{%d3
:&4},%d0
# get digit
23268 bne.
b ap_p_fx
# if non-zero, go to fix exp
23269 addq.
l &4,%d3
# point to next digit
23270 addq.
l &1,%d1
# inc digit counter
23271 dbf.w
%d2
,ap_p_gd
# get next digit
23273 mov.
l %d1
,%d0
# copy counter to d2
23274 mov.
l (%sp
),%d1
# get adjusted exp from memory
23275 sub.l %d0
,%d1
# subtract count from exp
23276 bge.
b ap_p_fm
# if still pos, go to pwrten
23277 neg.l %d1
# now its neg; get abs
23278 mov.
l (%a0
),%d4
# load lword 1 to d4
23279 or.l &0x40000000,%d4
# and set SE in d4
23280 or.l &0x40000000,(%a0
) # and in memory
23282 # Calculate the mantissa multiplier to compensate for the striping of
23283 # zeros from the mantissa.
23286 lea.
l PTENRN
(%pc
),%a1
# get address of power-of-ten table
23287 clr.
l %d3
# init table index
23288 fmov.s
&0x3f800000,%fp1
# init fp1 to 1
23289 mov.
l &3,%d2
# init d2 to count bits in counter
23291 asr.
l &1,%d0
# shift lsb into carry
23292 bcc.
b ap_p_en
# if 1, mul fp1 by pwrten factor
23293 fmul.x
(%a1
,%d3
),%fp1
# mul by 10**(d3_bit_no)
23295 add.l &12,%d3
# inc d3 to next rtable entry
23296 tst.
l %d0
# check if d0 is zero
23297 bne.
b ap_p_el
# if not, get next bit
23298 fmul.x
%fp1
,%fp0
# mul mantissa by 10**(no_bits_shifted)
23299 bra.
b pwrten
# go calc pwrten
23301 # This section handles a negative adjusted exponent.
23304 clr.
l %d1
# clr counter
23305 mov.
l &2,%d5
# set up d5 to point to lword 3
23306 mov.
l (%a0
,%d5.
L*4),%d4
# get lword 3
23307 bne.
b ap_n_cl
# if not zero, check digits
23308 sub.l &1,%d5
# dec d5 to point to lword 2
23309 addq.
l &8,%d1
# inc counter by 8
23310 mov.
l (%a0
,%d5.
L*4),%d4
# get lword 2
23312 mov.
l &28,%d3
# point to last digit
23313 mov.
l &7,%d2
# init digit counter
23315 bfextu
%d4
{%d3
:&4},%d0
# get digit
23316 bne.
b ap_n_fx
# if non-zero, go to exp fix
23317 subq.
l &4,%d3
# point to previous digit
23318 addq.
l &1,%d1
# inc digit counter
23319 dbf.w
%d2
,ap_n_gd
# get next digit
23321 mov.
l %d1
,%d0
# copy counter to d0
23322 mov.
l (%sp
),%d1
# get adjusted exp from memory
23323 sub.l %d0
,%d1
# subtract count from exp
23324 bgt.
b ap_n_fm
# if still pos, go fix mantissa
23325 neg.l %d1
# take abs of exp and clr SE
23326 mov.
l (%a0
),%d4
# load lword 1 to d4
23327 and.l &0xbfffffff,%d4
# and clr SE in d4
23328 and.l &0xbfffffff,(%a0
) # and in memory
23330 # Calculate the mantissa multiplier to compensate for the appending of
23331 # zeros to the mantissa.
23334 lea.
l PTENRN
(%pc
),%a1
# get address of power-of-ten table
23335 clr.
l %d3
# init table index
23336 fmov.s
&0x3f800000,%fp1
# init fp1 to 1
23337 mov.
l &3,%d2
# init d2 to count bits in counter
23339 asr.
l &1,%d0
# shift lsb into carry
23340 bcc.
b ap_n_en
# if 1, mul fp1 by pwrten factor
23341 fmul.x
(%a1
,%d3
),%fp1
# mul by 10**(d3_bit_no)
23343 add.l &12,%d3
# inc d3 to next rtable entry
23344 tst.
l %d0
# check if d0 is zero
23345 bne.
b ap_n_el
# if not, get next bit
23346 fdiv.x
%fp1
,%fp0
# div mantissa by 10**(no_bits_shifted)
23349 # Calculate power-of-ten factor from adjusted and shifted exponent.
23356 # (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23357 # (*) d3: FPCR work copy
23358 # ( ) d4: first word of bcd
23359 # (*) a1: RTABLE pointer
23363 # (*) d3: PWRTxx table index
23364 # ( ) a0: pointer to working copy of bcd
23365 # (*) a1: PWRTxx pointer
23366 # (*) fp1: power-of-ten accumulator
23368 # Pwrten calculates the exponent factor in the selected rounding mode
23369 # according to the following table:
23371 # Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
23392 mov.
l USER_FPCR
(%a6
),%d3
# get user's FPCR
23393 bfextu
%d3
{&26:&2},%d2
# isolate rounding mode bits
23394 mov.
l (%a0
),%d4
# reload 1st bcd word to d4
23395 asl.
l &2,%d2
# format d2 to be
23396 bfextu
%d4
{&0:&2},%d0
# {FPCR[6],FPCR[5],SM,SE}
23397 add.l %d0
,%d2
# in d2 as index into RTABLE
23398 lea.
l RTABLE
(%pc
),%a1
# load rtable base
23399 mov.
b (%a1
,%d2
),%d0
# load new rounding bits from table
23400 clr.
l %d3
# clear d3 to force no exc and extended
23401 bfins
%d0
,%d3
{&26:&2} # stuff new rounding bits in FPCR
23402 fmov.
l %d3
,%fpcr
# write new FPCR
23403 asr.
l &1,%d0
# write correct PTENxx table
23404 bcc.
b not_rp
# to a1
23405 lea.
l PTENRP
(%pc
),%a1
# it is RP
23406 bra.
b calc_p
# go to init section
23408 asr.
l &1,%d0
# keep checking
23410 lea.
l PTENRM
(%pc
),%a1
# it is RM
23411 bra.
b calc_p
# go to init section
23413 lea.
l PTENRN
(%pc
),%a1
# it is RN
23415 mov.
l %d1
,%d0
# copy exp to d0;use d0
23416 bpl.
b no_neg
# if exp is negative,
23417 neg.l %d0
# invert it
23418 or.l &0x40000000,(%a0
) # and set SE bit
23420 clr.
l %d3
# table index
23421 fmov.s
&0x3f800000,%fp1
# init fp1 to 1
23423 asr.
l &1,%d0
# shift next bit into carry
23424 bcc.
b e_next
# if zero, skip the mul
23425 fmul.x
(%a1
,%d3
),%fp1
# mul by 10**(d3_bit_no)
23427 add.l &12,%d3
# inc d3 to next rtable entry
23428 tst.
l %d0
# check if d0 is zero
23429 bne.
b e_loop
# not zero, continue shifting
23432 # Check the sign of the adjusted exp and make the value in fp0 the
23433 # same sign. If the exp was pos then multiply fp1*fp0;
23434 # else divide fp0/fp1.
23438 # ( ) a0: pointer to working bcd value
23439 # (*) fp0: mantissa accumulator
23440 # ( ) fp1: scaling factor - 10**(abs(exp))
23443 btst
&30,(%a0
) # test the sign of the exponent
23444 beq.
b mul # if clear, go to multiply
23446 fdiv.x
%fp1
,%fp0
# exp is negative, so divide mant by exp
23449 fmul.x
%fp1
,%fp0
# exp is positive, so multiply by exp
23452 # Clean up and return with result in fp0.
23454 # If the final mul/div in decbin incurred an inex exception,
23455 # it will be inex2, but will be reported as inex1 by get_op.
23458 fmov.
l %fpsr
,%d0
# get status register
23459 bclr &inex2_bit+
8,%d0
# test for inex2 and clear it
23460 beq.
b no_exc
# skip this if no exc
23461 ori.w
&inx1a_mask
,2+USER_FPSR
(%a6
) # set INEX1/AINEX
23463 add.l &0x4,%sp
# clear 1 lw param
23464 fmovm.x
(%sp
)+,&0x40 # restore fp1
23465 movm.
l (%sp
)+,&0x3c # restore d2-d5
23470 #########################################################################
23471 # bindec(): Converts an input in extended precision format to bcd format#
23473 # INPUT *************************************************************** #
23474 # a0 = pointer to the input extended precision value in memory. #
23475 # the input may be either normalized, unnormalized, or #
23477 # d0 = contains the k-factor sign-extended to 32-bits. #
23479 # OUTPUT ************************************************************** #
23480 # FP_SCR0(a6) = bcd format result on the stack. #
23482 # ALGORITHM *********************************************************** #
23484 # A1. Set RM and size ext; Set SIGMA = sign of input. #
23485 # The k-factor is saved for use in d7. Clear the #
23486 # BINDEC_FLG for separating normalized/denormalized #
23487 # input. If input is unnormalized or denormalized, #
23490 # A2. Set X = abs(input). #
23492 # A3. Compute ILOG. #
23493 # ILOG is the log base 10 of the input value. It is #
23494 # approximated by adding e + 0.f when the original #
23495 # value is viewed as 2^^e * 1.f in extended precision. #
23496 # This value is stored in d6. #
23498 # A4. Clr INEX bit. #
23499 # The operation in A3 above may have set INEX2. #
23501 # A5. Set ICTR = 0; #
23502 # ICTR is a flag used in A13. It must be set before the #
23505 # A6. Calculate LEN. #
23506 # LEN is the number of digits to be displayed. The #
23507 # k-factor can dictate either the total number of digits, #
23508 # if it is a positive number, or the number of digits #
23509 # after the decimal point which are to be included as #
23510 # significant. See the 68882 manual for examples. #
23511 # If LEN is computed to be greater than 17, set OPERR in #
23512 # USER_FPSR. LEN is stored in d4. #
23514 # A7. Calculate SCALE. #
23515 # SCALE is equal to 10^ISCALE, where ISCALE is the number #
23516 # of decimal places needed to insure LEN integer digits #
23517 # in the output before conversion to bcd. LAMBDA is the #
23518 # sign of ISCALE, used in A9. Fp1 contains #
23519 # 10^^(abs(ISCALE)) using a rounding mode which is a #
23520 # function of the original rounding mode and the signs #
23521 # of ISCALE and X. A table is given in the code. #
23523 # A8. Clr INEX; Force RZ. #
23524 # The operation in A3 above may have set INEX2. #
23525 # RZ mode is forced for the scaling operation to insure #
23526 # only one rounding error. The grs bits are collected in #
23527 # the INEX flag for use in A10. #
23529 # A9. Scale X -> Y. #
23530 # The mantissa is scaled to the desired number of #
23531 # significant digits. The excess digits are collected #
23534 # A10. Or in INEX. #
23535 # If INEX is set, round error occurred. This is #
23536 # compensated for by 'or-ing' in the INEX2 flag to #
23539 # A11. Restore original FPCR; set size ext. #
23540 # Perform FINT operation in the user's rounding mode. #
23541 # Keep the size to extended. #
23543 # A12. Calculate YINT = FINT(Y) according to user's rounding #
23544 # mode. The FPSP routine sintd0 is used. The output #
23547 # A13. Check for LEN digits. #
23548 # If the int operation results in more than LEN digits, #
23549 # or less than LEN -1 digits, adjust ILOG and repeat from #
23550 # A6. This test occurs only on the first pass. If the #
23551 # result is exactly 10^LEN, decrement ILOG and divide #
23552 # the mantissa by 10. #
23554 # A14. Convert the mantissa to bcd. #
23555 # The binstr routine is used to convert the LEN digit #
23556 # mantissa to bcd in memory. The input to binstr is #
23557 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
23558 # such that the decimal point is to the left of bit 63. #
23559 # The bcd digits are stored in the correct position in #
23560 # the final string area in memory. #
23562 # A15. Convert the exponent to bcd. #
23563 # As in A14 above, the exp is converted to bcd and the #
23564 # digits are stored in the final string. #
23565 # Test the length of the final exponent string. If the #
23566 # length is 4, set operr. #
23568 # A16. Write sign bits to final string. #
23570 #########################################################################
23572 set BINDEC_FLG
, EXC_TEMP
# DENORM flag
23574 # Constants in extended precision
23576 long
0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23578 long
0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23580 # Constants in single precision
23582 long
0x3F800000,0x00000000,0x00000000,0x00000000
23584 long
0x40000000,0x00000000,0x00000000,0x00000000
23586 long
0x41200000,0x00000000,0x00000000,0x00000000
23588 long
0x459A2800,0x00000000,0x00000000,0x00000000
23596 # Implementation Notes:
23598 # The registers are used as follows:
23600 # d0: scratch; LEN input to binstr
23602 # d2: upper 32-bits of mantissa for binstr
23603 # d3: scratch;lower 32-bits of mantissa for binstr
23608 # a0: ptr for original operand/final result
23609 # a1: scratch pointer
23610 # a2: pointer to FP_X; abs(original value) in ext
23621 movm.
l &0x3f20,-(%sp
) # {%d2-%d7/%a2}
23622 fmovm.x
&0x7,-(%sp
) # {%fp0-%fp2}
23624 # A1. Set RM and size ext. Set SIGMA = sign input;
23625 # The k-factor is saved for use in d7. Clear BINDEC_FLG for
23626 # separating normalized/denormalized input. If the input
23627 # is a denormalized number, set the BINDEC_FLG memory word
23628 # to signal denorm. If the input is unnormalized, normalize
23629 # the input and test for denormalized result.
23631 fmov.
l &rm_mode
*0x10,%fpcr
# set RM and ext
23632 mov.
l (%a0
),L_SCR2
(%a6
) # save exponent for sign check
23633 mov.
l %d0
,%d7
# move k-factor to d7
23635 clr.
b BINDEC_FLG
(%a6
) # clr norm/denorm flag
23636 cmpi.
b STAG
(%a6
),&DENORM
# is input a DENORM?
23637 bne.w A2_str
# no; input is a NORM
23640 # Normalize the denorm
23644 and.w
&0x7fff,%d0
# strip sign of normalized exp
23654 # Test if the normalized input is denormalized
23657 bgt.
b pos_exp
# if greater than zero, it is a norm
23658 st BINDEC_FLG
(%a6
) # set flag for denorm
23660 and.w
&0x7fff,%d0
# strip sign of normalized exp
23665 # A2. Set X = abs(input).
23668 mov.
l (%a0
),FP_SCR1
(%a6
) # move input to work space
23669 mov.
l 4(%a0
),FP_SCR1+
4(%a6
) # move input to work space
23670 mov.
l 8(%a0
),FP_SCR1+
8(%a6
) # move input to work space
23671 and.l &0x7fffffff,FP_SCR1
(%a6
) # create abs(X)
23673 # A3. Compute ILOG.
23674 # ILOG is the log base 10 of the input value. It is approx-
23675 # imated by adding e + 0.f when the original value is viewed
23676 # as 2^^e * 1.f in extended precision. This value is stored
23681 # d0: k-factor/exponent
23687 # d7: k-factor/Unchanged
23688 # a0: ptr for original operand/final result
23691 # fp0: x/float(ILOG)
23695 # F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23697 # L_SCR2:first word of X packed/Unchanged
23699 tst.
b BINDEC_FLG
(%a6
) # check for denorm
23700 beq.
b A3_cont
# if clr, continue with norm
23701 mov.
l &-4933,%d6
# force ILOG = -4933
23704 mov.w FP_SCR1
(%a6
),%d0
# move exp to d0
23705 mov.w
&0x3fff,FP_SCR1
(%a6
) # replace exponent with 0x3fff
23706 fmov.x FP_SCR1
(%a6
),%fp0
# now fp0 has 1.f
23707 sub.w
&0x3fff,%d0
# strip off bias
23708 fadd.w
%d0
,%fp0
# add in exp
23709 fsub.s FONE
(%pc
),%fp0
# subtract off 1.0
23710 fbge.w pos_res
# if pos, branch
23711 fmul.x PLOG2UP1
(%pc
),%fp0
# if neg, mul by LOG2UP1
23712 fmov.
l %fp0
,%d6
# put ILOG in d6 as a lword
23713 bra.
b A4_str
# go move out ILOG
23715 fmul.x PLOG2
(%pc
),%fp0
# if pos, mul by LOG2
23716 fmov.
l %fp0
,%d6
# put ILOG in d6 as a lword
23719 # A4. Clr INEX bit.
23720 # The operation in A3 above may have set INEX2.
23723 fmov.
l &0,%fpsr
# zero all of fpsr - nothing needed
23726 # A5. Set ICTR = 0;
23727 # ICTR is a flag used in A13. It must be set before the
23728 # loop entry A6. The lower word of d5 is used for ICTR.
23730 clr.w
%d5
# clear ICTR
23732 # A6. Calculate LEN.
23733 # LEN is the number of digits to be displayed. The k-factor
23734 # can dictate either the total number of digits, if it is
23735 # a positive number, or the number of digits after the
23736 # original decimal point which are to be included as
23737 # significant. See the 68882 manual for examples.
23738 # If LEN is computed to be greater than 17, set OPERR in
23739 # USER_FPSR. LEN is stored in d4.
23743 # d0: exponent/Unchanged
23746 # d4: exc picture/LEN
23747 # d5: ICTR/Unchanged
23748 # d6: ILOG/Unchanged
23749 # d7: k-factor/Unchanged
23750 # a0: ptr for original operand/final result
23753 # fp0: float(ILOG)/Unchanged
23757 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23759 # L_SCR2:first word of X packed/Unchanged
23762 tst.
l %d7
# branch on sign of k
23763 ble.
b k_neg
# if k <= 0, LEN = ILOG + 1 - k
23764 mov.
l %d7
,%d4
# if k > 0, LEN = k
23765 bra.
b len_ck
# skip to LEN check
23767 mov.
l %d6
,%d4
# first load ILOG to d4
23768 sub.l %d7
,%d4
# subtract off k
23769 addq.
l &1,%d4
# add in the 1
23771 tst.
l %d4
# LEN check: branch on sign of LEN
23772 ble.
b LEN_ng
# if neg, set LEN = 1
23773 cmp.
l %d4
,&17 # test if LEN > 17
23774 ble.
b A7_str
# if not, forget it
23775 mov.
l &17,%d4
# set max LEN = 17
23776 tst.
l %d7
# if negative, never set OPERR
23777 ble.
b A7_str
# if positive, continue
23778 or.l &opaop_mask
,USER_FPSR
(%a6
) # set OPERR & AIOP in USER_FPSR
23779 bra.
b A7_str
# finished here
23781 mov.
l &1,%d4
# min LEN is 1
23784 # A7. Calculate SCALE.
23785 # SCALE is equal to 10^ISCALE, where ISCALE is the number
23786 # of decimal places needed to insure LEN integer digits
23787 # in the output before conversion to bcd. LAMBDA is the sign
23788 # of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
23789 # the rounding mode as given in the following table (see
23790 # Coonen, p. 7.23 as ref.; however, the SCALE variable is
23791 # of opposite sign in bindec.sa from Coonen).
23794 # FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
23795 # ----------------------------------------------
23796 # RN 00 0 0 00/0 RN
23797 # RN 00 0 1 00/0 RN
23798 # RN 00 1 0 00/0 RN
23799 # RN 00 1 1 00/0 RN
23800 # RZ 01 0 0 11/3 RP
23801 # RZ 01 0 1 11/3 RP
23802 # RZ 01 1 0 10/2 RM
23803 # RZ 01 1 1 10/2 RM
23804 # RM 10 0 0 11/3 RP
23805 # RM 10 0 1 10/2 RM
23806 # RM 10 1 0 10/2 RM
23807 # RM 10 1 1 11/3 RP
23808 # RP 11 0 0 10/2 RM
23809 # RP 11 0 1 11/3 RP
23810 # RP 11 1 0 11/3 RP
23811 # RP 11 1 1 10/2 RM
23815 # d0: exponent/scratch - final is 0
23816 # d2: x/0 or 24 for A9
23817 # d3: x/scratch - offset ptr into PTENRM array
23818 # d4: LEN/Unchanged
23819 # d5: 0/ICTR:LAMBDA
23820 # d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23821 # d7: k-factor/Unchanged
23822 # a0: ptr for original operand/final result
23823 # a1: x/ptr to PTENRM array
23825 # fp0: float(ILOG)/Unchanged
23829 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23831 # L_SCR2:first word of X packed/Unchanged
23834 tst.
l %d7
# test sign of k
23835 bgt.
b k_pos
# if pos and > 0, skip this
23836 cmp.
l %d7
,%d6
# test k - ILOG
23837 blt.
b k_pos
# if ILOG >= k, skip this
23838 mov.
l %d7
,%d6
# if ((k<0) & (ILOG < k)) ILOG = k
23840 mov.
l %d6
,%d0
# calc ILOG + 1 - LEN in d0
23841 addq.
l &1,%d0
# add the 1
23842 sub.l %d4
,%d0
# sub off LEN
23843 swap
%d5
# use upper word of d5 for LAMBDA
23844 clr.w
%d5
# set it zero initially
23845 clr.w
%d2
# set up d2 for very small case
23846 tst.
l %d0
# test sign of ISCALE
23847 bge.
b iscale
# if pos, skip next inst
23848 addq.w
&1,%d5
# if neg, set LAMBDA true
23849 cmp.
l %d0
,&0xffffecd4 # test iscale <= -4908
23850 bgt.
b no_inf
# if false, skip rest
23851 add.l &24,%d0
# add in 24 to iscale
23852 mov.
l &24,%d2
# put 24 in d2 for A9
23854 neg.l %d0
# and take abs of ISCALE
23856 fmov.s FONE
(%pc
),%fp1
# init fp1 to 1
23857 bfextu USER_FPCR
(%a6
){&26:&2},%d1
# get initial rmode bits
23858 lsl.w
&1,%d1
# put them in bits 2:1
23859 add.w
%d5
,%d1
# add in LAMBDA
23860 lsl.w
&1,%d1
# put them in bits 3:1
23861 tst.
l L_SCR2
(%a6
) # test sign of original x
23862 bge.
b x_pos
# if pos, don't set bit 0
23863 addq.
l &1,%d1
# if neg, set bit 0
23865 lea.
l RBDTBL
(%pc
),%a2
# load rbdtbl base
23866 mov.
b (%a2
,%d1
),%d3
# load d3 with new rmode
23867 lsl.
l &4,%d3
# put bits in proper position
23868 fmov.
l %d3
,%fpcr
# load bits into fpu
23869 lsr.
l &4,%d3
# put bits in proper position
23870 tst.
b %d3
# decode new rmode for pten table
23871 bne.
b not_rn
# if zero, it is RN
23872 lea.
l PTENRN
(%pc
),%a1
# load a1 with RN table base
23873 bra.
b rmode
# exit decode
23875 lsr.
b &1,%d3
# get lsb in carry
23876 bcc.
b not_rp2
# if carry clear, it is RM
23877 lea.
l PTENRP
(%pc
),%a1
# load a1 with RP table base
23878 bra.
b rmode
# exit decode
23880 lea.
l PTENRM
(%pc
),%a1
# load a1 with RM table base
23882 clr.
l %d3
# clr table index
23884 lsr.
l &1,%d0
# shift next bit into carry
23885 bcc.
b e_next2
# if zero, skip the mul
23886 fmul.x
(%a1
,%d3
),%fp1
# mul by 10**(d3_bit_no)
23888 add.l &12,%d3
# inc d3 to next pwrten table entry
23889 tst.
l %d0
# test if ISCALE is zero
23890 bne.
b e_loop2
# if not, loop
23892 # A8. Clr INEX; Force RZ.
23893 # The operation in A3 above may have set INEX2.
23894 # RZ mode is forced for the scaling operation to insure
23895 # only one rounding error. The grs bits are collected in
23896 # the INEX flag for use in A10.
23901 fmov.
l &0,%fpsr
# clr INEX
23902 fmov.
l &rz_mode
*0x10,%fpcr
# set RZ rounding mode
23904 # A9. Scale X -> Y.
23905 # The mantissa is scaled to the desired number of significant
23906 # digits. The excess digits are collected in INEX2. If mul,
23907 # Check d2 for excess 10 exponential value. If not zero,
23908 # the iscale value would have caused the pwrten calculation
23909 # to overflow. Only a negative iscale can cause this, so
23910 # multiply by 10^(d2), which is now only allowed to be 24,
23911 # with a multiply by 10^8 and 10^16, which is exact since
23912 # 10^24 is exact. If the input was denormalized, we must
23913 # create a busy stack frame with the mul command and the
23914 # two operands, and allow the fpu to complete the multiply.
23918 # d0: FPCR with RZ mode/Unchanged
23919 # d2: 0 or 24/unchanged
23921 # d4: LEN/Unchanged
23923 # d6: ILOG/Unchanged
23924 # d7: k-factor/Unchanged
23925 # a0: ptr for original operand/final result
23926 # a1: ptr to PTENRM array/Unchanged
23928 # fp0: float(ILOG)/X adjusted for SCALE (Y)
23929 # fp1: 10^ISCALE/Unchanged
23932 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23934 # L_SCR2:first word of X packed/Unchanged
23937 fmov.x
(%a0
),%fp0
# load X from memory
23938 fabs.x
%fp0
# use abs(X)
23939 tst.w
%d5
# LAMBDA is in lower word of d5
23940 bne.
b sc_mul
# if neg (LAMBDA = 1), scale by mul
23941 fdiv.x
%fp1
,%fp0
# calculate X / SCALE -> Y to fp0
23942 bra.w A10_st
# branch to A10
23945 tst.
b BINDEC_FLG
(%a6
) # check for denorm
23946 beq.w A9_norm
# if norm, continue with mul
23948 # for DENORM, we must calculate:
23949 # fp0 = input_op * 10^ISCALE * 10^24
23950 # since the input operand is a DENORM, we can't multiply it directly.
23951 # so, we do the multiplication of the exponents and mantissas separately.
23952 # in this way, we avoid underflow on intermediate stages of the
23953 # multiplication and guarantee a result without exception.
23954 fmovm.x
&0x2,-(%sp
) # save 10^ISCALE to stack
23956 mov.w
(%sp
),%d3
# grab exponent
23957 andi.w
&0x7fff,%d3
# clear sign
23958 ori.w
&0x8000,(%a0
) # make DENORM exp negative
23959 add.w
(%a0
),%d3
# add DENORM exp to 10^ISCALE exp
23960 subi.w
&0x3fff,%d3
# subtract BIAS
23962 subi.w
&0x3fff,%d3
# subtract BIAS
23964 subi.w
&0x3fff,%d3
# subtract BIAS
23966 bmi.w sc_mul_err
# is result is DENORM, punt!!!
23968 andi.w
&0x8000,(%sp
) # keep sign
23969 or.w
%d3
,(%sp
) # insert new exponent
23970 andi.w
&0x7fff,(%a0
) # clear sign bit on DENORM again
23971 mov.
l 0x8(%a0
),-(%sp
) # put input op mantissa on stk
23972 mov.
l 0x4(%a0
),-(%sp
)
23973 mov.
l &0x3fff0000,-(%sp
) # force exp to zero
23974 fmovm.x
(%sp
)+,&0x80 # load normalized DENORM into fp0
23977 # fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
23978 # fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
23979 mov.
l 36+8(%a1
),-(%sp
) # get 10^8 mantissa
23980 mov.
l 36+4(%a1
),-(%sp
)
23981 mov.
l &0x3fff0000,-(%sp
) # force exp to zero
23982 mov.
l 48+8(%a1
),-(%sp
) # get 10^16 mantissa
23983 mov.
l 48+4(%a1
),-(%sp
)
23984 mov.
l &0x3fff0000,-(%sp
)# force exp to zero
23985 fmul.x
(%sp
)+,%fp0
# multiply fp0 by 10^8
23986 fmul.x
(%sp
)+,%fp0
# multiply fp0 by 10^16
23993 tst.w
%d2
# test for small exp case
23994 beq.
b A9_con
# if zero, continue as normal
23995 fmul.x
36(%a1
),%fp0
# multiply fp0 by 10^8
23996 fmul.x
48(%a1
),%fp0
# multiply fp0 by 10^16
23998 fmul.x
%fp1
,%fp0
# calculate X * SCALE -> Y to fp0
24001 # If INEX is set, round error occurred. This is compensated
24002 # for by 'or-ing' in the INEX2 flag to the lsb of Y.
24006 # d0: FPCR with RZ mode/FPSR with INEX2 isolated
24009 # d4: LEN/Unchanged
24011 # d6: ILOG/Unchanged
24012 # d7: k-factor/Unchanged
24013 # a0: ptr for original operand/final result
24014 # a1: ptr to PTENxx array/Unchanged
24015 # a2: x/ptr to FP_SCR1(a6)
24016 # fp0: Y/Y with lsb adjusted
24017 # fp1: 10^ISCALE/Unchanged
24021 fmov.
l %fpsr
,%d0
# get FPSR
24022 fmov.x
%fp0
,FP_SCR1
(%a6
) # move Y to memory
24023 lea.
l FP_SCR1
(%a6
),%a2
# load a2 with ptr to FP_SCR1
24024 btst
&9,%d0
# check if INEX2 set
24025 beq.
b A11_st
# if clear, skip rest
24026 or.l &1,8(%a2
) # or in 1 to lsb of mantissa
24027 fmov.x FP_SCR1
(%a6
),%fp0
# write adjusted Y back to fpu
24030 # A11. Restore original FPCR; set size ext.
24031 # Perform FINT operation in the user's rounding mode. Keep
24032 # the size to extended. The sintdo entry point in the sint
24033 # routine expects the FPCR value to be in USER_FPCR for
24034 # mode and precision. The original FPCR is saved in L_SCR1.
24037 mov.
l USER_FPCR
(%a6
),L_SCR1
(%a6
) # save it for later
24038 and.l &0x00000030,USER_FPCR
(%a6
) # set size to ext,
24039 # ;block exceptions
24042 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24043 # The FPSP routine sintd0 is used. The output is in fp0.
24047 # d0: FPSR with AINEX cleared/FPCR with size set to ext
24050 # d4: LEN/Unchanged
24051 # d5: ICTR:LAMBDA/Unchanged
24052 # d6: ILOG/Unchanged
24053 # d7: k-factor/Unchanged
24054 # a0: ptr for original operand/src ptr for sintdo
24055 # a1: ptr to PTENxx array/Unchanged
24056 # a2: ptr to FP_SCR1(a6)/Unchanged
24057 # a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24059 # fp1: 10^ISCALE/Unchanged
24062 # F_SCR2:Y adjusted for inex/Y with original exponent
24063 # L_SCR1:x/original USER_FPCR
24064 # L_SCR2:first word of X packed/Unchanged
24067 movm.
l &0xc0c0,-(%sp
) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
24068 mov.
l L_SCR1
(%a6
),-(%sp
)
24069 mov.
l L_SCR2
(%a6
),-(%sp
)
24071 lea.
l FP_SCR1
(%a6
),%a0
# a0 is ptr to FP_SCR1(a6)
24072 fmov.x
%fp0
,(%a0
) # move Y to memory at FP_SCR1(a6)
24073 tst.
l L_SCR2
(%a6
) # test sign of original operand
24074 bge.
b do_fint12
# if pos, use Y
24075 or.l &0x80000000,(%a0
) # if neg, use -Y
24077 mov.
l USER_FPSR
(%a6
),-(%sp
)
24078 # bsr sintdo # sint routine returns int in fp0
24080 fmov.
l USER_FPCR
(%a6
),%fpcr
24081 fmov.
l &0x0,%fpsr
# clear the AEXC bits!!!
24082 ## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
24083 ## andi.l &0x00000030,%d0
24084 ## fmov.l %d0,%fpcr
24085 fint.x FP_SCR1
(%a6
),%fp0
# do fint()
24087 or.w
%d0
,FPSR_EXCEPT
(%a6
)
24088 ## fmov.l &0x0,%fpcr
24089 ## fmov.l %fpsr,%d0 # don't keep ccodes
24090 ## or.w %d0,FPSR_EXCEPT(%a6)
24092 mov.
b (%sp
),USER_FPSR
(%a6
)
24095 mov.
l (%sp
)+,L_SCR2
(%a6
)
24096 mov.
l (%sp
)+,L_SCR1
(%a6
)
24097 movm.
l (%sp
)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
24099 mov.
l L_SCR2
(%a6
),FP_SCR1
(%a6
) # restore original exponent
24100 mov.
l L_SCR1
(%a6
),USER_FPCR
(%a6
) # restore user's FPCR
24102 # A13. Check for LEN digits.
24103 # If the int operation results in more than LEN digits,
24104 # or less than LEN -1 digits, adjust ILOG and repeat from
24105 # A6. This test occurs only on the first pass. If the
24106 # result is exactly 10^LEN, decrement ILOG and divide
24107 # the mantissa by 10. The calculation of 10^LEN cannot
24108 # be inexact, since all powers of ten upto 10^27 are exact
24109 # in extended precision, so the use of a previous power-of-ten
24110 # table will introduce no error.
24115 # d0: FPCR with size set to ext/scratch final = 0
24117 # d3: x/scratch final = x
24118 # d4: LEN/LEN adjusted
24119 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24120 # d6: ILOG/ILOG adjusted
24121 # d7: k-factor/Unchanged
24122 # a0: pointer into memory for packed bcd string formation
24123 # a1: ptr to PTENxx array/Unchanged
24124 # a2: ptr to FP_SCR1(a6)/Unchanged
24125 # fp0: int portion of Y/abs(YINT) adjusted
24126 # fp1: 10^ISCALE/Unchanged
24129 # F_SCR2:Y with original exponent/Unchanged
24130 # L_SCR1:original USER_FPCR/Unchanged
24131 # L_SCR2:first word of X packed/Unchanged
24134 swap
%d5
# put ICTR in lower word of d5
24135 tst.w
%d5
# check if ICTR = 0
24136 bne not_zr
# if non-zero, go to second test
24138 # Compute 10^(LEN-1)
24140 fmov.s FONE
(%pc
),%fp2
# init fp2 to 1.0
24141 mov.
l %d4
,%d0
# put LEN in d0
24142 subq.
l &1,%d0
# d0 = LEN -1
24143 clr.
l %d3
# clr table index
24145 lsr.
l &1,%d0
# shift next bit into carry
24146 bcc.
b l_next
# if zero, skip the mul
24147 fmul.x
(%a1
,%d3
),%fp2
# mul by 10**(d3_bit_no)
24149 add.l &12,%d3
# inc d3 to next pwrten table entry
24150 tst.
l %d0
# test if LEN is zero
24151 bne.
b l_loop
# if not, loop
24153 # 10^LEN-1 is computed for this test and A14. If the input was
24154 # denormalized, check only the case in which YINT > 10^LEN.
24156 tst.
b BINDEC_FLG
(%a6
) # check if input was norm
24157 beq.
b A13_con
# if norm, continue with checking
24158 fabs.x
%fp0
# take abs of YINT
24161 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24164 fabs.x
%fp0
# take abs of YINT
24165 fcmp.x
%fp0
,%fp2
# compare abs(YINT) with 10^(LEN-1)
24166 fbge.w test_2
# if greater, do next test
24167 subq.
l &1,%d6
# subtract 1 from ILOG
24168 mov.w
&1,%d5
# set ICTR
24169 fmov.
l &rm_mode
*0x10,%fpcr
# set rmode to RM
24170 fmul.s FTEN
(%pc
),%fp2
# compute 10^LEN
24171 bra.w A6_str
# return to A6 and recompute YINT
24173 fmul.s FTEN
(%pc
),%fp2
# compute 10^LEN
24174 fcmp.x
%fp0
,%fp2
# compare abs(YINT) with 10^LEN
24175 fblt.w A14_st
# if less, all is ok, go to A14
24176 fbgt.w fix_ex
# if greater, fix and redo
24177 fdiv.s FTEN
(%pc
),%fp0
# if equal, divide by 10
24178 addq.
l &1,%d6
# and inc ILOG
24179 bra.
b A14_st
# and continue elsewhere
24181 addq.
l &1,%d6
# increment ILOG by 1
24182 mov.w
&1,%d5
# set ICTR
24183 fmov.
l &rm_mode
*0x10,%fpcr
# set rmode to RM
24184 bra.w A6_str
# return to A6 and recompute YINT
24186 # Since ICTR <> 0, we have already been through one adjustment,
24187 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24188 # 10^LEN is again computed using whatever table is in a1 since the
24189 # value calculated cannot be inexact.
24192 fmov.s FONE
(%pc
),%fp2
# init fp2 to 1.0
24193 mov.
l %d4
,%d0
# put LEN in d0
24194 clr.
l %d3
# clr table index
24196 lsr.
l &1,%d0
# shift next bit into carry
24197 bcc.
b z_next
# if zero, skip the mul
24198 fmul.x
(%a1
,%d3
),%fp2
# mul by 10**(d3_bit_no)
24200 add.l &12,%d3
# inc d3 to next pwrten table entry
24201 tst.
l %d0
# test if LEN is zero
24202 bne.
b z_loop
# if not, loop
24203 fabs.x
%fp0
# get abs(YINT)
24204 fcmp.x
%fp0
,%fp2
# check if abs(YINT) = 10^LEN
24205 fbneq.w A14_st
# if not, skip this
24206 fdiv.s FTEN
(%pc
),%fp0
# divide abs(YINT) by 10
24207 addq.
l &1,%d6
# and inc ILOG by 1
24208 addq.
l &1,%d4
# and inc LEN
24209 fmul.s FTEN
(%pc
),%fp2
# if LEN++, the get 10^^LEN
24211 # A14. Convert the mantissa to bcd.
24212 # The binstr routine is used to convert the LEN digit
24213 # mantissa to bcd in memory. The input to binstr is
24214 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24215 # such that the decimal point is to the left of bit 63.
24216 # The bcd digits are stored in the correct position in
24217 # the final string area in memory.
24222 # d0: x/LEN call to binstr - final is 0
24224 # d2: x/ms 32-bits of mant of abs(YINT)
24225 # d3: x/ls 32-bits of mant of abs(YINT)
24226 # d4: LEN/Unchanged
24227 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24229 # d7: k-factor/Unchanged
24230 # a0: pointer into memory for packed bcd string formation
24231 # /ptr to first mantissa byte in result string
24232 # a1: ptr to PTENxx array/Unchanged
24233 # a2: ptr to FP_SCR1(a6)/Unchanged
24234 # fp0: int portion of Y/abs(YINT) adjusted
24235 # fp1: 10^ISCALE/Unchanged
24236 # fp2: 10^LEN/Unchanged
24237 # F_SCR1:x/Work area for final result
24238 # F_SCR2:Y with original exponent/Unchanged
24239 # L_SCR1:original USER_FPCR/Unchanged
24240 # L_SCR2:first word of X packed/Unchanged
24243 fmov.
l &rz_mode
*0x10,%fpcr
# force rz for conversion
24244 fdiv.x
%fp2
,%fp0
# divide abs(YINT) by 10^LEN
24245 lea.
l FP_SCR0
(%a6
),%a0
24246 fmov.x
%fp0
,(%a0
) # move abs(YINT)/10^LEN to memory
24247 mov.
l 4(%a0
),%d2
# move 2nd word of FP_RES to d2
24248 mov.
l 8(%a0
),%d3
# move 3rd word of FP_RES to d3
24249 clr.
l 4(%a0
) # zero word 2 of FP_RES
24250 clr.
l 8(%a0
) # zero word 3 of FP_RES
24251 mov.
l (%a0
),%d0
# move exponent to d0
24252 swap
%d0
# put exponent in lower word
24253 beq.
b no_sft
# if zero, don't shift
24254 sub.l &0x3ffd,%d0
# sub bias less 2 to make fract
24255 tst.
l %d0
# check if > 1
24256 bgt.
b no_sft
# if so, don't shift
24257 neg.l %d0
# make exp positive
24259 lsr.
l &1,%d2
# shift d2:d3 right, add 0s
24260 roxr.
l &1,%d3
# the number of places
24261 dbf.w
%d0
,m_loop
# given in d0
24263 tst.
l %d2
# check for mantissa of zero
24264 bne.
b no_zr
# if not, go on
24265 tst.
l %d3
# continue zero check
24266 beq.
b zer_m
# if zero, go directly to binstr
24268 clr.
l %d1
# put zero in d1 for addx
24269 add.l &0x00000080,%d3
# inc at bit 7
24270 addx.
l %d1
,%d2
# continue inc
24271 and.l &0xffffff80,%d3
# strip off lsb not used by 882
24273 mov.
l %d4
,%d0
# put LEN in d0 for binstr call
24274 addq.
l &3,%a0
# a0 points to M16 byte in result
24275 bsr binstr
# call binstr to convert mant
24278 # A15. Convert the exponent to bcd.
24279 # As in A14 above, the exp is converted to bcd and the
24280 # digits are stored in the final string.
24282 # Digits are stored in L_SCR1(a6) on return from BINDEC as:
24285 # -----------------------------------------
24286 # | 0 | e3 | e2 | e1 | e4 | X | X | X |
24287 # -----------------------------------------
24289 # And are moved into their proper places in FP_SCR0. If digit e4
24290 # is non-zero, OPERR is signaled. In all cases, all 4 digits are
24291 # written as specified in the 881/882 manual for packed decimal.
24295 # d0: x/LEN call to binstr - final is 0
24296 # d1: x/scratch (0);shift count for final exponent packing
24297 # d2: x/ms 32-bits of exp fraction/scratch
24298 # d3: x/ls 32-bits of exp fraction
24299 # d4: LEN/Unchanged
24300 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24302 # d7: k-factor/Unchanged
24303 # a0: ptr to result string/ptr to L_SCR1(a6)
24304 # a1: ptr to PTENxx array/Unchanged
24305 # a2: ptr to FP_SCR1(a6)/Unchanged
24306 # fp0: abs(YINT) adjusted/float(ILOG)
24307 # fp1: 10^ISCALE/Unchanged
24308 # fp2: 10^LEN/Unchanged
24309 # F_SCR1:Work area for final result/BCD result
24310 # F_SCR2:Y with original exponent/ILOG/10^4
24311 # L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24312 # L_SCR2:first word of X packed/Unchanged
24315 tst.
b BINDEC_FLG
(%a6
) # check for denorm
24317 ftest.x
%fp0
# test for zero
24318 fbeq.w den_zero
# if zero, use k-factor or 4933
24319 fmov.
l %d6
,%fp0
# float ILOG
24320 fabs.x
%fp0
# get abs of ILOG
24323 tst.
l %d7
# check sign of the k-factor
24324 blt.
b use_ilog
# if negative, use ILOG
24325 fmov.s F4933
(%pc
),%fp0
# force exponent to 4933
24326 bra.
b convrt
# do it
24328 fmov.
l %d6
,%fp0
# float ILOG
24329 fabs.x
%fp0
# get abs of ILOG
24332 ftest.x
%fp0
# test for zero
24333 fbneq.w not_zero
# if zero, force exponent
24334 fmov.s FONE
(%pc
),%fp0
# force exponent to 1
24335 bra.
b convrt
# do it
24337 fmov.
l %d6
,%fp0
# float ILOG
24338 fabs.x
%fp0
# get abs of ILOG
24340 fdiv.x
24(%a1
),%fp0
# compute ILOG/10^4
24341 fmov.x
%fp0
,FP_SCR1
(%a6
) # store fp0 in memory
24342 mov.
l 4(%a2
),%d2
# move word 2 to d2
24343 mov.
l 8(%a2
),%d3
# move word 3 to d3
24344 mov.w
(%a2
),%d0
# move exp to d0
24345 beq.
b x_loop_fin
# if zero, skip the shift
24346 sub.w
&0x3ffd,%d0
# subtract off bias
24347 neg.w
%d0
# make exp positive
24349 lsr.
l &1,%d2
# shift d2:d3 right
24350 roxr.
l &1,%d3
# the number of places
24351 dbf.w
%d0
,x_loop
# given in d0
24353 clr.
l %d1
# put zero in d1 for addx
24354 add.l &0x00000080,%d3
# inc at bit 6
24355 addx.
l %d1
,%d2
# continue inc
24356 and.l &0xffffff80,%d3
# strip off lsb not used by 882
24357 mov.
l &4,%d0
# put 4 in d0 for binstr call
24358 lea.
l L_SCR1
(%a6
),%a0
# a0 is ptr to L_SCR1 for exp digits
24359 bsr binstr
# call binstr to convert exp
24360 mov.
l L_SCR1
(%a6
),%d0
# load L_SCR1 lword to d0
24361 mov.
l &12,%d1
# use d1 for shift count
24362 lsr.
l %d1
,%d0
# shift d0 right by 12
24363 bfins
%d0
,FP_SCR0
(%a6
){&4:&12} # put e3:e2:e1 in FP_SCR0
24364 lsr.
l %d1
,%d0
# shift d0 right by 12
24365 bfins
%d0
,FP_SCR0
(%a6
){&16:&4} # put e4 in FP_SCR0
24366 tst.
b %d0
# check if e4 is zero
24367 beq.
b A16_st
# if zero, skip rest
24368 or.l &opaop_mask
,USER_FPSR
(%a6
) # set OPERR & AIOP in USER_FPSR
24371 # A16. Write sign bits to final string.
24372 # Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24376 # d0: x/scratch - final is x
24379 # d4: LEN/Unchanged
24380 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24381 # d6: ILOG/ILOG adjusted
24382 # d7: k-factor/Unchanged
24383 # a0: ptr to L_SCR1(a6)/Unchanged
24384 # a1: ptr to PTENxx array/Unchanged
24385 # a2: ptr to FP_SCR1(a6)/Unchanged
24386 # fp0: float(ILOG)/Unchanged
24387 # fp1: 10^ISCALE/Unchanged
24388 # fp2: 10^LEN/Unchanged
24389 # F_SCR1:BCD result with correct signs
24391 # L_SCR1:Exponent digits on return from binstr
24392 # L_SCR2:first word of X packed/Unchanged
24395 clr.
l %d0
# clr d0 for collection of signs
24396 and.b &0x0f,FP_SCR0
(%a6
) # clear first nibble of FP_SCR0
24397 tst.
l L_SCR2
(%a6
) # check sign of original mantissa
24398 bge.
b mant_p
# if pos, don't set SM
24399 mov.
l &2,%d0
# move 2 in to d0 for SM
24401 tst.
l %d6
# check sign of ILOG
24402 bge.
b wr_sgn
# if pos, don't set SE
24403 addq.
l &1,%d0
# set bit 0 in d0 for SE
24405 bfins
%d0
,FP_SCR0
(%a6
){&0:&2} # insert SM and SE into FP_SCR0
24407 # Clean up and restore all registers used.
24409 fmov.
l &0,%fpsr
# clear possible inex2/ainex bits
24410 fmovm.x
(%sp
)+,&0xe0 # {%fp0-%fp2}
24411 movm.
l (%sp
)+,&0x4fc # {%d2-%d7/%a2}
24416 long
0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24417 long
0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24418 long
0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24419 long
0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24420 long
0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24421 long
0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
24422 long
0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
24423 long
0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
24424 long
0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
24425 long
0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
24426 long
0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
24427 long
0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
24428 long
0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
24432 long
0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24433 long
0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24434 long
0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24435 long
0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24436 long
0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24437 long
0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
24438 long
0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
24439 long
0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
24440 long
0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
24441 long
0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
24442 long
0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
24443 long
0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
24444 long
0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
24448 long
0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24449 long
0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24450 long
0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24451 long
0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24452 long
0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24453 long
0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
24454 long
0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
24455 long
0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
24456 long
0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
24457 long
0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
24458 long
0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
24459 long
0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
24460 long
0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
24462 #########################################################################
24463 # binstr(): Converts a 64-bit binary integer to bcd. #
24465 # INPUT *************************************************************** #
24466 # d2:d3 = 64-bit binary integer #
24467 # d0 = desired length (LEN) #
24468 # a0 = pointer to start in memory for bcd characters #
24469 # (This pointer must point to byte 4 of the first #
24470 # lword of the packed decimal memory string.) #
24472 # OUTPUT ************************************************************** #
24473 # a0 = pointer to LEN bcd digits representing the 64-bit integer. #
24475 # ALGORITHM *********************************************************** #
24476 # The 64-bit binary is assumed to have a decimal point before #
24477 # bit 63. The fraction is multiplied by 10 using a mul by 2 #
24478 # shift and a mul by 8 shift. The bits shifted out of the #
24479 # msb form a decimal digit. This process is iterated until #
24480 # LEN digits are formed. #
24482 # A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
24483 # digit formed will be assumed the least significant. This is #
24484 # to force the first byte formed to have a 0 in the upper 4 bits. #
24486 # A2. Beginning of the loop: #
24487 # Copy the fraction in d2:d3 to d4:d5. #
24489 # A3. Multiply the fraction in d2:d3 by 8 using bit-field #
24490 # extracts and shifts. The three msbs from d2 will go into d1. #
24492 # A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
24493 # will be collected by the carry. #
24495 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
24496 # into d2:d3. D1 will contain the bcd digit formed. #
24498 # A6. Test d7. If zero, the digit formed is the ms digit. If non- #
24499 # zero, it is the ls digit. Put the digit in its place in the #
24500 # upper word of d0. If it is the ls digit, write the word #
24501 # from d0 to memory. #
24503 # A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
24505 #########################################################################
24507 # Implementation Notes:
24509 # The registers are used as follows:
24512 # d1: temp used to form the digit
24513 # d2: upper 32-bits of fraction for mul by 8
24514 # d3: lower 32-bits of fraction for mul by 8
24515 # d4: upper 32-bits of fraction for mul by 2
24516 # d5: lower 32-bits of fraction for mul by 2
24517 # d6: temp for bit-field extracts
24518 # d7: byte digit formation word;digit count {0,1}
24519 # a0: pointer into memory for packed bcd string formation
24524 movm.
l &0xff00,-(%sp
) # {%d0-%d7}
24529 mov.
l &1,%d7
# init d7 for second digit
24530 subq.
l &1,%d0
# for dbf d0 would have LEN+1 passes
24532 # A2. Copy d2:d3 to d4:d5. Start loop.
24535 mov.
l %d2
,%d4
# copy the fraction before muls
24536 mov.
l %d3
,%d5
# to d4:d5
24538 # A3. Multiply d2:d3 by 8; extract msbs into d1.
24540 bfextu
%d2
{&0:&3},%d1
# copy 3 msbs of d2 into d1
24541 asl.
l &3,%d2
# shift d2 left by 3 places
24542 bfextu
%d3
{&0:&3},%d6
# copy 3 msbs of d3 into d6
24543 asl.
l &3,%d3
# shift d3 left by 3 places
24544 or.l %d6
,%d2
# or in msbs from d3 into d2
24546 # A4. Multiply d4:d5 by 2; add carry out to d1.
24548 asl.
l &1,%d5
# mul d5 by 2
24549 roxl.
l &1,%d4
# mul d4 by 2
24550 swap
%d6
# put 0 in d6 lower word
24551 addx.w
%d6
,%d1
# add in extend from mul by 2
24553 # A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
24555 add.l %d5
,%d3
# add lower 32 bits
24556 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24557 addx.
l %d4
,%d2
# add with extend upper 32 bits
24558 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24559 addx.w
%d6
,%d1
# add in extend from add to d1
24560 swap
%d6
# with d6 = 0; put 0 in upper word
24562 # A6. Test d7 and branch.
24564 tst.w
%d7
# if zero, store digit & to loop
24565 beq.
b first_d
# if non-zero, form byte & write
24567 swap
%d7
# bring first digit to word d7b
24568 asl.w
&4,%d7
# first digit in upper 4 bits d7b
24569 add.w
%d1
,%d7
# add in ls digit to d7b
24570 mov.
b %d7
,(%a0
)+ # store d7b byte in memory
24571 swap
%d7
# put LEN counter in word d7a
24572 clr.w
%d7
# set d7a to signal no digits done
24573 dbf.w
%d0
,loop
# do loop some more!
24574 bra.
b end_bstr
# finished, so exit
24576 swap
%d7
# put digit word in d7b
24577 mov.w
%d1
,%d7
# put new digit in d7b
24578 swap
%d7
# put LEN counter in word d7a
24579 addq.w
&1,%d7
# set d7a to signal first digit done
24580 dbf.w
%d0
,loop
# do loop some more!
24581 swap
%d7
# put last digit in string
24582 lsl.w
&4,%d7
# move it to upper 4 bits
24583 mov.
b %d7
,(%a0
)+ # store it in memory string
24585 # Clean up and return with result in fp0.
24588 movm.
l (%sp
)+,&0xff # {%d0-%d7}
24591 #########################################################################
24592 # XDEF **************************************************************** #
24593 # facc_in_b(): dmem_read_byte failed #
24594 # facc_in_w(): dmem_read_word failed #
24595 # facc_in_l(): dmem_read_long failed #
24596 # facc_in_d(): dmem_read of dbl prec failed #
24597 # facc_in_x(): dmem_read of ext prec failed #
24599 # facc_out_b(): dmem_write_byte failed #
24600 # facc_out_w(): dmem_write_word failed #
24601 # facc_out_l(): dmem_write_long failed #
24602 # facc_out_d(): dmem_write of dbl prec failed #
24603 # facc_out_x(): dmem_write of ext prec failed #
24605 # XREF **************************************************************** #
24606 # _real_access() - exit through access error handler #
24608 # INPUT *************************************************************** #
24611 # OUTPUT ************************************************************** #
24614 # ALGORITHM *********************************************************** #
24615 # Flow jumps here when an FP data fetch call gets an error #
24616 # result. This means the operating system wants an access error frame #
24617 # made out of the current exception stack frame. #
24618 # So, we first call restore() which makes sure that any updated #
24619 # -(an)+ register gets returned to its pre-exception value and then #
24620 # we change the stack to an acess error stack frame. #
24622 #########################################################################
24625 movq.
l &0x1,%d0
# one byte
24626 bsr.w restore
# fix An
24628 mov.w
&0x0121,EXC_VOFF
(%a6
) # set FSLW
24632 movq.
l &0x2,%d0
# two bytes
24633 bsr.w restore
# fix An
24635 mov.w
&0x0141,EXC_VOFF
(%a6
) # set FSLW
24639 movq.
l &0x4,%d0
# four bytes
24640 bsr.w restore
# fix An
24642 mov.w
&0x0101,EXC_VOFF
(%a6
) # set FSLW
24646 movq.
l &0x8,%d0
# eight bytes
24647 bsr.w restore
# fix An
24649 mov.w
&0x0161,EXC_VOFF
(%a6
) # set FSLW
24653 movq.
l &0xc,%d0
# twelve bytes
24654 bsr.w restore
# fix An
24656 mov.w
&0x0161,EXC_VOFF
(%a6
) # set FSLW
24659 ################################################################
24662 movq.
l &0x1,%d0
# one byte
24663 bsr.w restore
# restore An
24665 mov.w
&0x00a1,EXC_VOFF
(%a6
) # set FSLW
24669 movq.
l &0x2,%d0
# two bytes
24670 bsr.w restore
# restore An
24672 mov.w
&0x00c1,EXC_VOFF
(%a6
) # set FSLW
24676 movq.
l &0x4,%d0
# four bytes
24677 bsr.w restore
# restore An
24679 mov.w
&0x0081,EXC_VOFF
(%a6
) # set FSLW
24683 movq.
l &0x8,%d0
# eight bytes
24684 bsr.w restore
# restore An
24686 mov.w
&0x00e1,EXC_VOFF
(%a6
) # set FSLW
24690 mov.
l &0xc,%d0
# twelve bytes
24691 bsr.w restore
# restore An
24693 mov.w
&0x00e1,EXC_VOFF
(%a6
) # set FSLW
24695 # here's where we actually create the access error frame from the
24696 # current exception stack frame.
24698 mov.
l USER_FPIAR
(%a6
),EXC_PC
(%a6
) # store current PC
24700 fmovm.x EXC_FPREGS
(%a6
),&0xc0 # restore fp0-fp1
24701 fmovm.
l USER_FPCR
(%a6
),%fpcr
,%fpsr
,%fpiar
# restore ctrl regs
24702 movm.
l EXC_DREGS
(%a6
),&0x0303 # restore d0-d1/a0-a1
24706 mov.
l (%sp
),-(%sp
) # store SR, hi(PC)
24707 mov.
l 0x8(%sp
),0x4(%sp
) # store lo(PC)
24708 mov.
l 0xc(%sp
),0x8(%sp
) # store EA
24709 mov.
l &0x00000001,0xc(%sp
) # store FSLW
24710 mov.w
0x6(%sp
),0xc(%sp
) # fix FSLW (size)
24711 mov.w
&0x4008,0x6(%sp
) # store voff
24713 btst
&0x5,(%sp
) # supervisor or user mode?
24714 beq.
b facc_out2
# user
24715 bset
&0x2,0xd(%sp
) # set supervisor TM bit
24720 ##################################################################
24722 # if the effective addressing mode was predecrement or postincrement,
24723 # the emulation has already changed its value to the correct post-
24724 # instruction value. but since we're exiting to the access error
24725 # handler, then AN must be returned to its pre-instruction value.
24728 mov.
b EXC_OPWORD+
0x1(%a6
),%d1
24729 andi.b &0x38,%d1
# extract opmode
24730 cmpi.
b %d1
,&0x18 # postinc?
24732 cmpi.
b %d1
,&0x20 # predec?
24737 mov.
b EXC_OPWORD+
0x1(%a6
),%d1
24738 andi.w
&0x0007,%d1
# fetch An
24740 mov.w
(tbl_rest_inc.
b,%pc
,%d1.w
*2),%d1
24741 jmp
(tbl_rest_inc.
b,%pc
,%d1.w
*1)
24744 short ri_a0
- tbl_rest_inc
24745 short ri_a1
- tbl_rest_inc
24746 short ri_a2
- tbl_rest_inc
24747 short ri_a3
- tbl_rest_inc
24748 short ri_a4
- tbl_rest_inc
24749 short ri_a5
- tbl_rest_inc
24750 short ri_a6
- tbl_rest_inc
24751 short ri_a7
- tbl_rest_inc
24754 sub.l %d0
,EXC_DREGS+
0x8(%a6
) # fix stacked a0
24757 sub.l %d0
,EXC_DREGS+
0xc(%a6
) # fix stacked a1
24760 sub.l %d0
,%a2
# fix a2
24763 sub.l %d0
,%a3
# fix a3
24766 sub.l %d0
,%a4
# fix a4
24769 sub.l %d0
,%a5
# fix a5
24772 sub.l %d0
,(%a6
) # fix stacked a6
24774 # if it's a fmove out instruction, we don't have to fix a7
24775 # because we hadn't changed it yet. if it's an opclass two
24776 # instruction (data moved in) and the exception was in supervisor
24777 # mode, then also also wasn't updated. if it was user mode, then
24778 # restore the correct a7 which is in the USP currently.
24780 cmpi.
b EXC_VOFF
(%a6
),&0x30 # move in or out?
24781 bne.
b ri_a7_done
# out
24783 btst
&0x5,EXC_SR
(%a6
) # user or supervisor?
24784 bne.
b ri_a7_done
# supervisor
24785 movc
%usp
,%a0
# restore USP
24791 # need to invert adjustment value if the <ea> was predec