arch/m68k-all/m680x0/060sp/dist/pfpsp.s

   1 #
   2 # $NetBSD: pfpsp.s,v 1.4 2005/12/11 12:17:52 christos Exp $
   3 #
   4
   5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
   7 # M68000 Hi-Performance Microprocessor Division
   8 # M68060 Software Package Production Release
   9 #
  10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
  11 # All rights reserved.
  12 #
  13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  14 # To the maximum extent permitted by applicable law,
  15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
  17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
  18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
  19 # and any accompanying written materials.
  20 #
  21 # To the maximum extent permitted by applicable law,
  22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
  24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
  25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
  26 #
  27 # Motorola assumes no responsibility for the maintenance and support
  28 # of the SOFTWARE.
  29 #
  30 # You are hereby granted a copyright license to use, modify, and distribute the
  31 # SOFTWARE so long as this entire notice is retained without alteration
  32 # in any modified and/or redistributed versions, and that such modified
  33 # versions are clearly identified as such.
  34 # No licenses are granted by implication, estoppel or otherwise under any
  35 # patents or trademarks of Motorola, Inc.
  36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  37
  38 #
  39 # freal.s:
  40 #       This file is appended to the top of the 060FPSP package
  41 # and contains the entry points into the package. The user, in
  42 # effect, branches to one of the branch table entries located
  43 # after _060FPSP_TABLE.
  44 #       Also, subroutine stubs exist in this file (_fpsp_done for
  45 # example) that are referenced by the FPSP package itself in order
  46 # to call a given routine. The stub routine actually performs the
  47 # callout. The FPSP code does a "bsr" to the stub routine. This
  48 # extra layer of hierarchy adds a slight performance penalty but
  49 # it makes the FPSP code easier to read and more mainatinable.
  50 #
  51
  52 set     _off_bsun,      0x00
  53 set     _off_snan,      0x04
  54 set     _off_operr,     0x08
  55 set     _off_ovfl,      0x0c
  56 set     _off_unfl,      0x10
  57 set     _off_dz,        0x14
  58 set     _off_inex,      0x18
  59 set     _off_fline,     0x1c
  60 set     _off_fpu_dis,   0x20
  61 set     _off_trap,      0x24
  62 set     _off_trace,     0x28
  63 set     _off_access,    0x2c
  64 set     _off_done,      0x30
  65
  66 set     _off_imr,       0x40
  67 set     _off_dmr,       0x44
  68 set     _off_dmw,       0x48
  69 set     _off_irw,       0x4c
  70 set     _off_irl,       0x50
  71 set     _off_drb,       0x54
  72 set     _off_drw,       0x58
  73 set     _off_drl,       0x5c
  74 set     _off_dwb,       0x60
  75 set     _off_dww,       0x64
  76 set     _off_dwl,       0x68
  77
  78 _060FPSP_TABLE:
  79
  80 ###############################################################
  81
  82 # Here's the table of ENTRY POINTS for those linking the package.
  83         bra.l           _fpsp_snan
  84         short           0x0000
  85         bra.l           _fpsp_operr
  86         short           0x0000
  87         bra.l           _fpsp_ovfl
  88         short           0x0000
  89         bra.l           _fpsp_unfl
  90         short           0x0000
  91         bra.l           _fpsp_dz
  92         short           0x0000
  93         bra.l           _fpsp_inex
  94         short           0x0000
  95         bra.l           _fpsp_fline
  96         short           0x0000
  97         bra.l           _fpsp_unsupp
  98         short           0x0000
  99         bra.l           _fpsp_effadd
 100         short           0x0000
 101
 102         space           56
 103
 104 ###############################################################
 105         global          _fpsp_done
 106 _fpsp_done:
 107         mov.l           %d0,-(%sp)
 108         mov.l           (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
 109         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 110         mov.l           0x4(%sp),%d0
 111         rtd             &0x4
 112
 113         global          _real_ovfl
 114 _real_ovfl:
 115         mov.l           %d0,-(%sp)
 116         mov.l           (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
 117         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 118         mov.l           0x4(%sp),%d0
 119         rtd             &0x4
 120
 121         global          _real_unfl
 122 _real_unfl:
 123         mov.l           %d0,-(%sp)
 124         mov.l           (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
 125         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 126         mov.l           0x4(%sp),%d0
 127         rtd             &0x4
 128
 129         global          _real_inex
 130 _real_inex:
 131         mov.l           %d0,-(%sp)
 132         mov.l           (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
 133         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 134         mov.l           0x4(%sp),%d0
 135         rtd             &0x4
 136
 137         global          _real_bsun
 138 _real_bsun:
 139         mov.l           %d0,-(%sp)
 140         mov.l           (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
 141         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 142         mov.l           0x4(%sp),%d0
 143         rtd             &0x4
 144
 145         global          _real_operr
 146 _real_operr:
 147         mov.l           %d0,-(%sp)
 148         mov.l           (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
 149         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 150         mov.l           0x4(%sp),%d0
 151         rtd             &0x4
 152
 153         global          _real_snan
 154 _real_snan:
 155         mov.l           %d0,-(%sp)
 156         mov.l           (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
 157         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 158         mov.l           0x4(%sp),%d0
 159         rtd             &0x4
 160
 161         global          _real_dz
 162 _real_dz:
 163         mov.l           %d0,-(%sp)
 164         mov.l           (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
 165         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 166         mov.l           0x4(%sp),%d0
 167         rtd             &0x4
 168
 169         global          _real_fline
 170 _real_fline:
 171         mov.l           %d0,-(%sp)
 172         mov.l           (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
 173         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 174         mov.l           0x4(%sp),%d0
 175         rtd             &0x4
 176
 177         global          _real_fpu_disabled
 178 _real_fpu_disabled:
 179         mov.l           %d0,-(%sp)
 180         mov.l           (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
 181         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 182         mov.l           0x4(%sp),%d0
 183         rtd             &0x4
 184
 185         global          _real_trap
 186 _real_trap:
 187         mov.l           %d0,-(%sp)
 188         mov.l           (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
 189         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 190         mov.l           0x4(%sp),%d0
 191         rtd             &0x4
 192
 193         global          _real_trace
 194 _real_trace:
 195         mov.l           %d0,-(%sp)
 196         mov.l           (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
 197         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 198         mov.l           0x4(%sp),%d0
 199         rtd             &0x4
 200
 201         global          _real_access
 202 _real_access:
 203         mov.l           %d0,-(%sp)
 204         mov.l           (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
 205         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 206         mov.l           0x4(%sp),%d0
 207         rtd             &0x4
 208
 209 #######################################
 210
 211         global          _imem_read
 212 _imem_read:
 213         mov.l           %d0,-(%sp)
 214         mov.l           (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
 215         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 216         mov.l           0x4(%sp),%d0
 217         rtd             &0x4
 218
 219         global          _dmem_read
 220 _dmem_read:
 221         mov.l           %d0,-(%sp)
 222         mov.l           (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
 223         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 224         mov.l           0x4(%sp),%d0
 225         rtd             &0x4
 226
 227         global          _dmem_write
 228 _dmem_write:
 229         mov.l           %d0,-(%sp)
 230         mov.l           (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
 231         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 232         mov.l           0x4(%sp),%d0
 233         rtd             &0x4
 234
 235         global          _imem_read_word
 236 _imem_read_word:
 237         mov.l           %d0,-(%sp)
 238         mov.l           (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
 239         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 240         mov.l           0x4(%sp),%d0
 241         rtd             &0x4
 242
 243         global          _imem_read_long
 244 _imem_read_long:
 245         mov.l           %d0,-(%sp)
 246         mov.l           (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
 247         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 248         mov.l           0x4(%sp),%d0
 249         rtd             &0x4
 250
 251         global          _dmem_read_byte
 252 _dmem_read_byte:
 253         mov.l           %d0,-(%sp)
 254         mov.l           (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
 255         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 256         mov.l           0x4(%sp),%d0
 257         rtd             &0x4
 258
 259         global          _dmem_read_word
 260 _dmem_read_word:
 261         mov.l           %d0,-(%sp)
 262         mov.l           (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
 263         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 264         mov.l           0x4(%sp),%d0
 265         rtd             &0x4
 266
 267         global          _dmem_read_long
 268 _dmem_read_long:
 269         mov.l           %d0,-(%sp)
 270         mov.l           (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
 271         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 272         mov.l           0x4(%sp),%d0
 273         rtd             &0x4
 274
 275         global          _dmem_write_byte
 276 _dmem_write_byte:
 277         mov.l           %d0,-(%sp)
 278         mov.l           (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
 279         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 280         mov.l           0x4(%sp),%d0
 281         rtd             &0x4
 282
 283         global          _dmem_write_word
 284 _dmem_write_word:
 285         mov.l           %d0,-(%sp)
 286         mov.l           (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
 287         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 288         mov.l           0x4(%sp),%d0
 289         rtd             &0x4
 290
 291         global          _dmem_write_long
 292 _dmem_write_long:
 293         mov.l           %d0,-(%sp)
 294         mov.l           (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
 295         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 296         mov.l           0x4(%sp),%d0
 297         rtd             &0x4
 298
 299 #
 300 # This file contains a set of define statements for constants
 301 # in order to promote readability within the corecode itself.
 302 #
 303
 304 set LOCAL_SIZE,         192                     # stack frame size(bytes)
 305 set LV,                 -LOCAL_SIZE             # stack offset
 306
 307 set EXC_SR,             0x4                     # stack status register
 308 set EXC_PC,             0x6                     # stack pc
 309 set EXC_VOFF,           0xa                     # stacked vector offset
 310 set EXC_EA,             0xc                     # stacked <ea>
 311
 312 set EXC_FP,             0x0                     # frame pointer
 313
 314 set EXC_AREGS,          -68                     # offset of all address regs
 315 set EXC_DREGS,          -100                    # offset of all data regs
 316 set EXC_FPREGS,         -36                     # offset of all fp regs
 317
 318 set EXC_A7,             EXC_AREGS+(7*4)         # offset of saved a7
 319 set OLD_A7,             EXC_AREGS+(6*4)         # extra copy of saved a7
 320 set EXC_A6,             EXC_AREGS+(6*4)         # offset of saved a6
 321 set EXC_A5,             EXC_AREGS+(5*4)
 322 set EXC_A4,             EXC_AREGS+(4*4)
 323 set EXC_A3,             EXC_AREGS+(3*4)
 324 set EXC_A2,             EXC_AREGS+(2*4)
 325 set EXC_A1,             EXC_AREGS+(1*4)
 326 set EXC_A0,             EXC_AREGS+(0*4)
 327 set EXC_D7,             EXC_DREGS+(7*4)
 328 set EXC_D6,             EXC_DREGS+(6*4)
 329 set EXC_D5,             EXC_DREGS+(5*4)
 330 set EXC_D4,             EXC_DREGS+(4*4)
 331 set EXC_D3,             EXC_DREGS+(3*4)
 332 set EXC_D2,             EXC_DREGS+(2*4)
 333 set EXC_D1,             EXC_DREGS+(1*4)
 334 set EXC_D0,             EXC_DREGS+(0*4)
 335
 336 set EXC_FP0,            EXC_FPREGS+(0*12)       # offset of saved fp0
 337 set EXC_FP1,            EXC_FPREGS+(1*12)       # offset of saved fp1
 338 set EXC_FP2,            EXC_FPREGS+(2*12)       # offset of saved fp2 (not used)
 339
 340 set FP_SCR1,            LV+80                   # fp scratch 1
 341 set FP_SCR1_EX,         FP_SCR1+0
 342 set FP_SCR1_SGN,        FP_SCR1+2
 343 set FP_SCR1_HI,         FP_SCR1+4
 344 set FP_SCR1_LO,         FP_SCR1+8
 345
 346 set FP_SCR0,            LV+68                   # fp scratch 0
 347 set FP_SCR0_EX,         FP_SCR0+0
 348 set FP_SCR0_SGN,        FP_SCR0+2
 349 set FP_SCR0_HI,         FP_SCR0+4
 350 set FP_SCR0_LO,         FP_SCR0+8
 351
 352 set FP_DST,             LV+56                   # fp destination operand
 353 set FP_DST_EX,          FP_DST+0
 354 set FP_DST_SGN,         FP_DST+2
 355 set FP_DST_HI,          FP_DST+4
 356 set FP_DST_LO,          FP_DST+8
 357
 358 set FP_SRC,             LV+44                   # fp source operand
 359 set FP_SRC_EX,          FP_SRC+0
 360 set FP_SRC_SGN,         FP_SRC+2
 361 set FP_SRC_HI,          FP_SRC+4
 362 set FP_SRC_LO,          FP_SRC+8
 363
 364 set USER_FPIAR,         LV+40                   # FP instr address register
 365
 366 set USER_FPSR,          LV+36                   # FP status register
 367 set FPSR_CC,            USER_FPSR+0             # FPSR condition codes
 368 set FPSR_QBYTE,         USER_FPSR+1             # FPSR qoutient byte
 369 set FPSR_EXCEPT,        USER_FPSR+2             # FPSR exception status byte
 370 set FPSR_AEXCEPT,       USER_FPSR+3             # FPSR accrued exception byte
 371
 372 set USER_FPCR,          LV+32                   # FP control register
 373 set FPCR_ENABLE,        USER_FPCR+2             # FPCR exception enable
 374 set FPCR_MODE,          USER_FPCR+3             # FPCR rounding mode control
 375
 376 set L_SCR3,             LV+28                   # integer scratch 3
 377 set L_SCR2,             LV+24                   # integer scratch 2
 378 set L_SCR1,             LV+20                   # integer scratch 1
 379
 380 set STORE_FLG,          LV+19                   # flag: operand store (ie. not fcmp/ftst)
 381
 382 set EXC_TEMP2,          LV+24                   # temporary space
 383 set EXC_TEMP,           LV+16                   # temporary space
 384
 385 set DTAG,               LV+15                   # destination operand type
 386 set STAG,               LV+14                   # source operand type
 387
 388 set SPCOND_FLG,         LV+10                   # flag: special case (see below)
 389
 390 set EXC_CC,             LV+8                    # saved condition codes
 391 set EXC_EXTWPTR,        LV+4                    # saved current PC (active)
 392 set EXC_EXTWORD,        LV+2                    # saved extension word
 393 set EXC_CMDREG,         LV+2                    # saved extension word
 394 set EXC_OPWORD,         LV+0                    # saved operation word
 395
 396 ################################
 397
 398 # Helpful macros
 399
 400 set FTEMP,              0                       # offsets within an
 401 set FTEMP_EX,           0                       # extended precision
 402 set FTEMP_SGN,          2                       # value saved in memory.
 403 set FTEMP_HI,           4
 404 set FTEMP_LO,           8
 405 set FTEMP_GRS,          12
 406
 407 set LOCAL,              0                       # offsets within an
 408 set LOCAL_EX,           0                       # extended precision
 409 set LOCAL_SGN,          2                       # value saved in memory.
 410 set LOCAL_HI,           4
 411 set LOCAL_LO,           8
 412 set LOCAL_GRS,          12
 413
 414 set DST,                0                       # offsets within an
 415 set DST_EX,             0                       # extended precision
 416 set DST_HI,             4                       # value saved in memory.
 417 set DST_LO,             8
 418
 419 set SRC,                0                       # offsets within an
 420 set SRC_EX,             0                       # extended precision
 421 set SRC_HI,             4                       # value saved in memory.
 422 set SRC_LO,             8
 423
 424 set SGL_LO,             0x3f81                  # min sgl prec exponent
 425 set SGL_HI,             0x407e                  # max sgl prec exponent
 426 set DBL_LO,             0x3c01                  # min dbl prec exponent
 427 set DBL_HI,             0x43fe                  # max dbl prec exponent
 428 set EXT_LO,             0x0                     # min ext prec exponent
 429 set EXT_HI,             0x7ffe                  # max ext prec exponent
 430
 431 set EXT_BIAS,           0x3fff                  # extended precision bias
 432 set SGL_BIAS,           0x007f                  # single precision bias
 433 set DBL_BIAS,           0x03ff                  # double precision bias
 434
 435 set NORM,               0x00                    # operand type for STAG/DTAG
 436 set ZERO,               0x01                    # operand type for STAG/DTAG
 437 set INF,                0x02                    # operand type for STAG/DTAG
 438 set QNAN,               0x03                    # operand type for STAG/DTAG
 439 set DENORM,             0x04                    # operand type for STAG/DTAG
 440 set SNAN,               0x05                    # operand type for STAG/DTAG
 441 set UNNORM,             0x06                    # operand type for STAG/DTAG
 442
 443 ##################
 444 # FPSR/FPCR bits #
 445 ##################
 446 set neg_bit,            0x3                     # negative result
 447 set z_bit,              0x2                     # zero result
 448 set inf_bit,            0x1                     # infinite result
 449 set nan_bit,            0x0                     # NAN result
 450
 451 set q_sn_bit,           0x7                     # sign bit of quotient byte
 452
 453 set bsun_bit,           7                       # branch on unordered
 454 set snan_bit,           6                       # signalling NAN
 455 set operr_bit,          5                       # operand error
 456 set ovfl_bit,           4                       # overflow
 457 set unfl_bit,           3                       # underflow
 458 set dz_bit,             2                       # divide by zero
 459 set inex2_bit,          1                       # inexact result 2
 460 set inex1_bit,          0                       # inexact result 1
 461
 462 set aiop_bit,           7                       # accrued inexact operation bit
 463 set aovfl_bit,          6                       # accrued overflow bit
 464 set aunfl_bit,          5                       # accrued underflow bit
 465 set adz_bit,            4                       # accrued dz bit
 466 set ainex_bit,          3                       # accrued inexact bit
 467
 468 #############################
 469 # FPSR individual bit masks #
 470 #############################
 471 set neg_mask,           0x08000000              # negative bit mask (lw)
 472 set inf_mask,           0x02000000              # infinity bit mask (lw)
 473 set z_mask,             0x04000000              # zero bit mask (lw)
 474 set nan_mask,           0x01000000              # nan bit mask (lw)
 475
 476 set neg_bmask,          0x08                    # negative bit mask (byte)
 477 set inf_bmask,          0x02                    # infinity bit mask (byte)
 478 set z_bmask,            0x04                    # zero bit mask (byte)
 479 set nan_bmask,          0x01                    # nan bit mask (byte)
 480
 481 set bsun_mask,          0x00008000              # bsun exception mask
 482 set snan_mask,          0x00004000              # snan exception mask
 483 set operr_mask,         0x00002000              # operr exception mask
 484 set ovfl_mask,          0x00001000              # overflow exception mask
 485 set unfl_mask,          0x00000800              # underflow exception mask
 486 set dz_mask,            0x00000400              # dz exception mask
 487 set inex2_mask,         0x00000200              # inex2 exception mask
 488 set inex1_mask,         0x00000100              # inex1 exception mask
 489
 490 set aiop_mask,          0x00000080              # accrued illegal operation
 491 set aovfl_mask,         0x00000040              # accrued overflow
 492 set aunfl_mask,         0x00000020              # accrued underflow
 493 set adz_mask,           0x00000010              # accrued divide by zero
 494 set ainex_mask,         0x00000008              # accrued inexact
 495
 496 ######################################
 497 # FPSR combinations used in the FPSP #
 498 ######################################
 499 set dzinf_mask,         inf_mask+dz_mask+adz_mask
 500 set opnan_mask,         nan_mask+operr_mask+aiop_mask
 501 set nzi_mask,           0x01ffffff              #clears N, Z, and I
 502 set unfinx_mask,        unfl_mask+inex2_mask+aunfl_mask+ainex_mask
 503 set unf2inx_mask,       unfl_mask+inex2_mask+ainex_mask
 504 set ovfinx_mask,        ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
 505 set inx1a_mask,         inex1_mask+ainex_mask
 506 set inx2a_mask,         inex2_mask+ainex_mask
 507 set snaniop_mask,       nan_mask+snan_mask+aiop_mask
 508 set snaniop2_mask,      snan_mask+aiop_mask
 509 set naniop_mask,        nan_mask+aiop_mask
 510 set neginf_mask,        neg_mask+inf_mask
 511 set infaiop_mask,       inf_mask+aiop_mask
 512 set negz_mask,          neg_mask+z_mask
 513 set opaop_mask,         operr_mask+aiop_mask
 514 set unfl_inx_mask,      unfl_mask+aunfl_mask+ainex_mask
 515 set ovfl_inx_mask,      ovfl_mask+aovfl_mask+ainex_mask
 516
 517 #########
 518 # misc. #
 519 #########
 520 set rnd_stky_bit,       29                      # stky bit pos in longword
 521
 522 set sign_bit,           0x7                     # sign bit
 523 set signan_bit,         0x6                     # signalling nan bit
 524
 525 set sgl_thresh,         0x3f81                  # minimum sgl exponent
 526 set dbl_thresh,         0x3c01                  # minimum dbl exponent
 527
 528 set x_mode,             0x0                     # extended precision
 529 set s_mode,             0x4                     # single precision
 530 set d_mode,             0x8                     # double precision
 531
 532 set rn_mode,            0x0                     # round-to-nearest
 533 set rz_mode,            0x1                     # round-to-zero
 534 set rm_mode,            0x2                     # round-tp-minus-infinity
 535 set rp_mode,            0x3                     # round-to-plus-infinity
 536
 537 set mantissalen,        64                      # length of mantissa in bits
 538
 539 set BYTE,               1                       # len(byte) == 1 byte
 540 set WORD,               2                       # len(word) == 2 bytes
 541 set LONG,               4                       # len(longword) == 2 bytes
 542
 543 set BSUN_VEC,           0xc0                    # bsun    vector offset
 544 set INEX_VEC,           0xc4                    # inexact vector offset
 545 set DZ_VEC,             0xc8                    # dz      vector offset
 546 set UNFL_VEC,           0xcc                    # unfl    vector offset
 547 set OPERR_VEC,          0xd0                    # operr   vector offset
 548 set OVFL_VEC,           0xd4                    # ovfl    vector offset
 549 set SNAN_VEC,           0xd8                    # snan    vector offset
 550
 551 ###########################
 552 # SPecial CONDition FLaGs #
 553 ###########################
 554 set ftrapcc_flg,        0x01                    # flag bit: ftrapcc exception
 555 set fbsun_flg,          0x02                    # flag bit: bsun exception
 556 set mia7_flg,           0x04                    # flag bit: (a7)+ <ea>
 557 set mda7_flg,           0x08                    # flag bit: -(a7) <ea>
 558 set fmovm_flg,          0x40                    # flag bit: fmovm instruction
 559 set immed_flg,          0x80                    # flag bit: &<data> <ea>
 560
 561 set ftrapcc_bit,        0x0
 562 set fbsun_bit,          0x1
 563 set mia7_bit,           0x2
 564 set mda7_bit,           0x3
 565 set immed_bit,          0x7
 566
 567 ##################################
 568 # TRANSCENDENTAL "LAST-OP" FLAGS #
 569 ##################################
 570 set FMUL_OP,            0x0                     # fmul instr performed last
 571 set FDIV_OP,            0x1                     # fdiv performed last
 572 set FADD_OP,            0x2                     # fadd performed last
 573 set FMOV_OP,            0x3                     # fmov performed last
 574
 575 #############
 576 # CONSTANTS #
 577 #############
 578 T1:     long            0x40C62D38,0xD3D64634   # 16381 LOG2 LEAD
 579 T2:     long            0x3D6F90AE,0xB1E75CC7   # 16381 LOG2 TRAIL
 580
 581 PI:     long            0x40000000,0xC90FDAA2,0x2168C235,0x00000000
 582 PIBY2:  long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 583
 584 TWOBYPI:
 585         long            0x3FE45F30,0x6DC9C883
 586
 587 #########################################################################
 588 # XDEF **************************************************************** #
 589 #       _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.    #
 590 #                                                                       #
 591 #       This handler should be the first code executed upon taking the  #
 592 #       FP Overflow exception in an operating system.                   #
 593 #                                                                       #
 594 # XREF **************************************************************** #
 595 #       _imem_read_long() - read instruction longword                   #
 596 #       fix_skewed_ops() - adjust src operand in fsave frame            #
 597 #       set_tag_x() - determine optype of src/dst operands              #
 598 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 599 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 600 #       load_fpn2() - load dst operand from FP regfile                  #
 601 #       fout() - emulate an opclass 3 instruction                       #
 602 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 603 #       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 604 #       _real_ovfl() - "callout" for Overflow exception enabled code    #
 605 #       _real_inex() - "callout" for Inexact exception enabled code     #
 606 #       _real_trace() - "callout" for Trace exception code              #
 607 #                                                                       #
 608 # INPUT *************************************************************** #
 609 #       - The system stack contains the FP Ovfl exception stack frame   #
 610 #       - The fsave frame contains the source operand                   #
 611 #                                                                       #
 612 # OUTPUT ************************************************************** #
 613 #       Overflow Exception enabled:                                     #
 614 #       - The system stack is unchanged                                 #
 615 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
 616 #       Overflow Exception disabled:                                    #
 617 #       - The system stack is unchanged                                 #
 618 #       - The "exception present" flag in the fsave frame is cleared    #
 619 #                                                                       #
 620 # ALGORITHM *********************************************************** #
 621 #       On the 060, if an FP overflow is present as the result of any   #
 622 # instruction, the 060 will take an overflow exception whether the      #
 623 # exception is enabled or disabled in the FPCR. For the disabled case,  #
 624 # This handler emulates the instruction to determine what the correct   #
 625 # default result should be for the operation. This default result is    #
 626 # then stored in either the FP regfile, data regfile, or memory.        #
 627 # Finally, the handler exits through the "callout" _fpsp_done()         #
 628 # denoting that no exceptional conditions exist within the machine.     #
 629 #       If the exception is enabled, then this handler must create the  #
 630 # exceptional operand and plave it in the fsave state frame, and store  #
 631 # the default result (only if the instruction is opclass 3). For        #
 632 # exceptions enabled, this handler must exit through the "callout"      #
 633 # _real_ovfl() so that the operating system enabled overflow handler    #
 634 # can handle this case.                                                 #
 635 #       Two other conditions exist. First, if overflow was disabled     #
 636 # but the inexact exception was enabled, this handler must exit         #
 637 # through the "callout" _real_inex() regardless of whether the result   #
 638 # was inexact.                                                          #
 639 #       Also, in the case of an opclass three instruction where         #
 640 # overflow was disabled and the trace exception was enabled, this       #
 641 # handler must exit through the "callout" _real_trace().                #
 642 #                                                                       #
 643 #########################################################################
 644
 645         global          _fpsp_ovfl
 646 _fpsp_ovfl:
 647
 648 #$#     sub.l           &24,%sp                 # make room for src/dst
 649
 650         link.w          %a6,&-LOCAL_SIZE        # init stack frame
 651
 652         fsave           FP_SRC(%a6)             # grab the "busy" frame
 653
 654         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 655         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 656         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 657
 658 # the FPIAR holds the "current PC" of the faulting instruction
 659         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 660         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 661         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 662         bsr.l           _imem_read_long         # fetch the instruction words
 663         mov.l           %d0,EXC_OPWORD(%a6)
 664
 665 ##############################################################################
 666
 667         btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 668         bne.w           fovfl_out
 669
 670
 671         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 672         bsr.l           fix_skewed_ops          # fix src op
 673
 674 # since, I believe, only NORMs and DENORMs can come through here,
 675 # maybe we can avoid the subroutine call.
 676         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 677         bsr.l           set_tag_x               # tag the operand type
 678         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 679
 680 # bit five of the fp extension word separates the monadic and dyadic operations
 681 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
 682 # will never take this exception.
 683         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
 684         beq.b           fovfl_extract           # monadic
 685
 686         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 687         bsr.l           load_fpn2               # load dst into FP_DST
 688
 689         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 690         bsr.l           set_tag_x               # tag the operand type
 691         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 692         bne.b           fovfl_op2_done          # no
 693         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 694 fovfl_op2_done:
 695         mov.b           %d0,DTAG(%a6)           # save dst optype tag
 696
 697 fovfl_extract:
 698
 699 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 700 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 701 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 702 #$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 703 #$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 704 #$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 705
 706         clr.l           %d0
 707         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 708
 709         mov.b           1+EXC_CMDREG(%a6),%d1
 710         andi.w          &0x007f,%d1             # extract extension
 711
 712         andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
 713
 714         fmov.l          &0x0,%fpcr              # zero current control regs
 715         fmov.l          &0x0,%fpsr
 716
 717         lea             FP_SRC(%a6),%a0
 718         lea             FP_DST(%a6),%a1
 719
 720 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
 721         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 722         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 723
 724 # the operation has been emulated. the result is in fp0.
 725 # the EXOP, if an exception occurred, is in fp1.
 726 # we must save the default result regardless of whether
 727 # traps are enabled or disabled.
 728         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 729         bsr.l           store_fpreg
 730
 731 # the exceptional possibilities we have left ourselves with are ONLY overflow
 732 # and inexact. and, the inexact is such that overflow occurred and was disabled
 733 # but inexact was enabled.
 734         btst            &ovfl_bit,FPCR_ENABLE(%a6)
 735         bne.b           fovfl_ovfl_on
 736
 737         btst            &inex2_bit,FPCR_ENABLE(%a6)
 738         bne.b           fovfl_inex_on
 739
 740         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 741         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 742         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 743
 744         unlk            %a6
 745 #$#     add.l           &24,%sp
 746         bra.l           _fpsp_done
 747
 748 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 749 # in fp1. now, simply jump to _real_ovfl()!
 750 fovfl_ovfl_on:
 751         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 752
 753         mov.w           &0xe005,2+FP_SRC(%a6)   # save exc status
 754
 755         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 756         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 757         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 758
 759         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 760
 761         unlk            %a6
 762
 763         bra.l           _real_ovfl
 764
 765 # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
 766 # we must jump to real_inex().
 767 fovfl_inex_on:
 768
 769         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 770
 771         mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
 772         mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
 773
 774         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 775         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 776         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 777
 778         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 779
 780         unlk            %a6
 781
 782         bra.l           _real_inex
 783
 784 ########################################################################
 785 fovfl_out:
 786
 787
 788 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 789 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 790 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 791
 792 # the src operand is definitely a NORM(!), so tag it as such
 793         mov.b           &NORM,STAG(%a6)         # set src optype tag
 794
 795         clr.l           %d0
 796         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 797
 798         and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
 799
 800         fmov.l          &0x0,%fpcr              # zero current control regs
 801         fmov.l          &0x0,%fpsr
 802
 803         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
 804
 805         bsr.l           fout
 806
 807         btst            &ovfl_bit,FPCR_ENABLE(%a6)
 808         bne.w           fovfl_ovfl_on
 809
 810         btst            &inex2_bit,FPCR_ENABLE(%a6)
 811         bne.w           fovfl_inex_on
 812
 813         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 814         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 815         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 816
 817         unlk            %a6
 818 #$#     add.l           &24,%sp
 819
 820         btst            &0x7,(%sp)              # is trace on?
 821         beq.l           _fpsp_done              # no
 822
 823         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
 824         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
 825         bra.l           _real_trace
 826
 827 #########################################################################
 828 # XDEF **************************************************************** #
 829 #       _fpsp_unfl(): 060FPSP entry point for FP Underflow exception.   #
 830 #                                                                       #
 831 #       This handler should be the first code executed upon taking the  #
 832 #       FP Underflow exception in an operating system.                  #
 833 #                                                                       #
 834 # XREF **************************************************************** #
 835 #       _imem_read_long() - read instruction longword                   #
 836 #       fix_skewed_ops() - adjust src operand in fsave frame            #
 837 #       set_tag_x() - determine optype of src/dst operands              #
 838 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 839 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 840 #       load_fpn2() - load dst operand from FP regfile                  #
 841 #       fout() - emulate an opclass 3 instruction                       #
 842 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 843 #       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 844 #       _real_ovfl() - "callout" for Overflow exception enabled code    #
 845 #       _real_inex() - "callout" for Inexact exception enabled code     #
 846 #       _real_trace() - "callout" for Trace exception code              #
 847 #                                                                       #
 848 # INPUT *************************************************************** #
 849 #       - The system stack contains the FP Unfl exception stack frame   #
 850 #       - The fsave frame contains the source operand                   #
 851 #                                                                       #
 852 # OUTPUT ************************************************************** #
 853 #       Underflow Exception enabled:                                    #
 854 #       - The system stack is unchanged                                 #
 855 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
 856 #       Underflow Exception disabled:                                   #
 857 #       - The system stack is unchanged                                 #
 858 #       - The "exception present" flag in the fsave frame is cleared    #
 859 #                                                                       #
 860 # ALGORITHM *********************************************************** #
 861 #       On the 060, if an FP underflow is present as the result of any  #
 862 # instruction, the 060 will take an underflow exception whether the     #
 863 # exception is enabled or disabled in the FPCR. For the disabled case,  #
 864 # This handler emulates the instruction to determine what the correct   #
 865 # default result should be for the operation. This default result is    #
 866 # then stored in either the FP regfile, data regfile, or memory.        #
 867 # Finally, the handler exits through the "callout" _fpsp_done()         #
 868 # denoting that no exceptional conditions exist within the machine.     #
 869 #       If the exception is enabled, then this handler must create the  #
 870 # exceptional operand and plave it in the fsave state frame, and store  #
 871 # the default result (only if the instruction is opclass 3). For        #
 872 # exceptions enabled, this handler must exit through the "callout"      #
 873 # _real_unfl() so that the operating system enabled overflow handler    #
 874 # can handle this case.                                                 #
 875 #       Two other conditions exist. First, if underflow was disabled    #
 876 # but the inexact exception was enabled and the result was inexact,     #
 877 # this handler must exit through the "callout" _real_inex().            #
 878 # was inexact.                                                          #
 879 #       Also, in the case of an opclass three instruction where         #
 880 # underflow was disabled and the trace exception was enabled, this      #
 881 # handler must exit through the "callout" _real_trace().                #
 882 #                                                                       #
 883 #########################################################################
 884
 885         global          _fpsp_unfl
 886 _fpsp_unfl:
 887
 888 #$#     sub.l           &24,%sp                 # make room for src/dst
 889
 890         link.w          %a6,&-LOCAL_SIZE        # init stack frame
 891
 892         fsave           FP_SRC(%a6)             # grab the "busy" frame
 893
 894         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 895         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 896         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 897
 898 # the FPIAR holds the "current PC" of the faulting instruction
 899         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 900         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 901         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 902         bsr.l           _imem_read_long         # fetch the instruction words
 903         mov.l           %d0,EXC_OPWORD(%a6)
 904
 905 ##############################################################################
 906
 907         btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 908         bne.w           funfl_out
 909
 910
 911         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 912         bsr.l           fix_skewed_ops          # fix src op
 913
 914         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 915         bsr.l           set_tag_x               # tag the operand type
 916         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 917
 918 # bit five of the fp ext word separates the monadic and dyadic operations
 919 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
 920 # will never take this exception.
 921         btst            &0x5,1+EXC_CMDREG(%a6)  # is op monadic or dyadic?
 922         beq.b           funfl_extract           # monadic
 923
 924 # now, what's left that's not dyadic is fsincos. we can distinguish it
 925 # from all dyadics by the '0110xxx pattern
 926         btst            &0x4,1+EXC_CMDREG(%a6)  # is op an fsincos?
 927         bne.b           funfl_extract           # yes
 928
 929         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 930         bsr.l           load_fpn2               # load dst into FP_DST
 931
 932         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 933         bsr.l           set_tag_x               # tag the operand type
 934         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 935         bne.b           funfl_op2_done          # no
 936         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 937 funfl_op2_done:
 938         mov.b           %d0,DTAG(%a6)           # save dst optype tag
 939
 940 funfl_extract:
 941
 942 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 943 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 944 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 945 #$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 946 #$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 947 #$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 948
 949         clr.l           %d0
 950         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 951
 952         mov.b           1+EXC_CMDREG(%a6),%d1
 953         andi.w          &0x007f,%d1             # extract extension
 954
 955         andi.l          &0x00ff01ff,USER_FPSR(%a6)
 956
 957         fmov.l          &0x0,%fpcr              # zero current control regs
 958         fmov.l          &0x0,%fpsr
 959
 960         lea             FP_SRC(%a6),%a0
 961         lea             FP_DST(%a6),%a1
 962
 963 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
 964         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 965         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 966
 967         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 968         bsr.l           store_fpreg
 969
 970 # The `060 FPU multiplier hardware is such that if the result of a
 971 # multiply operation is the smallest possible normalized number
 972 # (0x00000000_80000000_00000000), then the machine will take an
 973 # underflow exception. Since this is incorrect, we need to check
 974 # if our emulation, after re-doing the operation, decided that
 975 # no underflow was called for. We do these checks only in
 976 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
 977 # special case will simply exit gracefully with the correct result.
 978
 979 # the exceptional possibilities we have left ourselves with are ONLY overflow
 980 # and inexact. and, the inexact is such that overflow occurred and was disabled
 981 # but inexact was enabled.
 982         btst            &unfl_bit,FPCR_ENABLE(%a6)
 983         bne.b           funfl_unfl_on
 984
 985 funfl_chkinex:
 986         btst            &inex2_bit,FPCR_ENABLE(%a6)
 987         bne.b           funfl_inex_on
 988
 989 funfl_exit:
 990         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 991         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 992         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 993
 994         unlk            %a6
 995 #$#     add.l           &24,%sp
 996         bra.l           _fpsp_done
 997
 998 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 999 # in fp1 (don't forget to save fp0). what to do now?
1000 # well, we simply have to get to go to _real_unfl()!
1001 funfl_unfl_on:
1002
1003 # The `060 FPU multiplier hardware is such that if the result of a
1004 # multiply operation is the smallest possible normalized number
1005 # (0x00000000_80000000_00000000), then the machine will take an
1006 # underflow exception. Since this is incorrect, we check here to see
1007 # if our emulation, after re-doing the operation, decided that
1008 # no underflow was called for.
1009         btst            &unfl_bit,FPSR_EXCEPT(%a6)
1010         beq.w           funfl_chkinex
1011
1012 funfl_unfl_on2:
1013         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
1014
1015         mov.w           &0xe003,2+FP_SRC(%a6)   # save exc status
1016
1017         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1018         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1019         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1020
1021         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1022
1023         unlk            %a6
1024
1025         bra.l           _real_unfl
1026
1027 # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1028 # we must jump to real_inex().
1029 funfl_inex_on:
1030
1031 # The `060 FPU multiplier hardware is such that if the result of a
1032 # multiply operation is the smallest possible normalized number
1033 # (0x00000000_80000000_00000000), then the machine will take an
1034 # underflow exception.
1035 # But, whether bogus or not, if inexact is enabled AND it occurred,
1036 # then we have to branch to real_inex.
1037
1038         btst            &inex2_bit,FPSR_EXCEPT(%a6)
1039         beq.w           funfl_exit
1040
1041 funfl_inex_on2:
1042
1043         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to stack
1044
1045         mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
1046         mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
1047
1048         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1049         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1050         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1051
1052         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1053
1054         unlk            %a6
1055
1056         bra.l           _real_inex
1057
1058 #######################################################################
1059 funfl_out:
1060
1061
1062 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1063 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1064 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1065
1066 # the src operand is definitely a NORM(!), so tag it as such
1067         mov.b           &NORM,STAG(%a6)         # set src optype tag
1068
1069         clr.l           %d0
1070         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
1071
1072         and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1073
1074         fmov.l          &0x0,%fpcr              # zero current control regs
1075         fmov.l          &0x0,%fpsr
1076
1077         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1078
1079         bsr.l           fout
1080
1081         btst            &unfl_bit,FPCR_ENABLE(%a6)
1082         bne.w           funfl_unfl_on2
1083
1084         btst            &inex2_bit,FPCR_ENABLE(%a6)
1085         bne.w           funfl_inex_on2
1086
1087         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1088         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1089         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1090
1091         unlk            %a6
1092 #$#     add.l           &24,%sp
1093
1094         btst            &0x7,(%sp)              # is trace on?
1095         beq.l           _fpsp_done              # no
1096
1097         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
1098         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
1099         bra.l           _real_trace
1100
1101 #########################################################################
1102 # XDEF **************************************************************** #
1103 #       _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented       #
1104 #                       Data Type" exception.                           #
1105 #                                                                       #
1106 #       This handler should be the first code executed upon taking the  #
1107 #       FP Unimplemented Data Type exception in an operating system.    #
1108 #                                                                       #
1109 # XREF **************************************************************** #
1110 #       _imem_read_{word,long}() - read instruction word/longword       #
1111 #       fix_skewed_ops() - adjust src operand in fsave frame            #
1112 #       set_tag_x() - determine optype of src/dst operands              #
1113 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
1114 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
1115 #       load_fpn2() - load dst operand from FP regfile                  #
1116 #       load_fpn1() - load src operand from FP regfile                  #
1117 #       fout() - emulate an opclass 3 instruction                       #
1118 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1119 #       _real_inex() - "callout" to operating system inexact handler    #
1120 #       _fpsp_done() - "callout" for exit; work all done                #
1121 #       _real_trace() - "callout" for Trace enabled exception           #
1122 #       funimp_skew() - adjust fsave src ops to "incorrect" value       #
1123 #       _real_snan() - "callout" for SNAN exception                     #
1124 #       _real_operr() - "callout" for OPERR exception                   #
1125 #       _real_ovfl() - "callout" for OVFL exception                     #
1126 #       _real_unfl() - "callout" for UNFL exception                     #
1127 #       get_packed() - fetch packed operand from memory                 #
1128 #                                                                       #
1129 # INPUT *************************************************************** #
1130 #       - The system stack contains the "Unimp Data Type" stk frame     #
1131 #       - The fsave frame contains the ssrc op (for UNNORM/DENORM)      #
1132 #                                                                       #
1133 # OUTPUT ************************************************************** #
1134 #       If Inexact exception (opclass 3):                               #
1135 #       - The system stack is changed to an Inexact exception stk frame #
1136 #       If SNAN exception (opclass 3):                                  #
1137 #       - The system stack is changed to an SNAN exception stk frame    #
1138 #       If OPERR exception (opclass 3):                                 #
1139 #       - The system stack is changed to an OPERR exception stk frame   #
1140 #       If OVFL exception (opclass 3):                                  #
1141 #       - The system stack is changed to an OVFL exception stk frame    #
1142 #       If UNFL exception (opclass 3):                                  #
1143 #       - The system stack is changed to an UNFL exception stack frame  #
1144 #       If Trace exception enabled:                                     #
1145 #       - The system stack is changed to a Trace exception stack frame  #
1146 #       Else: (normal case)                                             #
1147 #       - Correct result has been stored as appropriate                 #
1148 #                                                                       #
1149 # ALGORITHM *********************************************************** #
1150 #       Two main instruction types can enter here: (1) DENORM or UNNORM #
1151 # unimplemented data types. These can be either opclass 0,2 or 3        #
1152 # instructions, and (2) PACKED unimplemented data format instructions   #
1153 # also of opclasses 0,2, or 3.                                          #
1154 #       For UNNORM/DENORM opclass 0 and 2, the handler fetches the src  #
1155 # operand from the fsave state frame and the dst operand (if dyadic)    #
1156 # from the FP register file. The instruction is then emulated by        #
1157 # choosing an emulation routine from a table of routines indexed by     #
1158 # instruction type. Once the instruction has been emulated and result   #
1159 # saved, then we check to see if any enabled exceptions resulted from   #
1160 # instruction emulation. If none, then we exit through the "callout"    #
1161 # _fpsp_done(). If there is an enabled FP exception, then we insert     #
1162 # this exception into the FPU in the fsave state frame and then exit    #
1163 # through _fpsp_done().                                                 #
1164 #       PACKED opclass 0 and 2 is similar in how the instruction is     #
1165 # emulated and exceptions handled. The differences occur in how the     #
1166 # handler loads the packed op (by calling get_packed() routine) and     #
1167 # by the fact that a Trace exception could be pending for PACKED ops.   #
1168 # If a Trace exception is pending, then the current exception stack     #
1169 # frame is changed to a Trace exception stack frame and an exit is      #
1170 # made through _real_trace().                                           #
1171 #       For UNNORM/DENORM opclass 3, the actual move out to memory is   #
1172 # performed by calling the routine fout(). If no exception should occur #
1173 # as the result of emulation, then an exit either occurs through        #
1174 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1175 # (a Trace stack frame must be created here, too). If an FP exception   #
1176 # should occur, then we must create an exception stack frame of that    #
1177 # type and jump to either _real_snan(), _real_operr(), _real_inex(),    #
1178 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3        #
1179 # emulation is performed in a similar manner.                           #
1180 #                                                                       #
1181 #########################################################################
1182
1183 #
1184 # (1) DENORM and UNNORM (unimplemented) data types:
1185 #
1186 #                               post-instruction
1187 #                               *****************
1188 #                               *      EA       *
1189 #        pre-instruction        *               *
1190 #       *****************       *****************
1191 #       * 0x0 *  0x0dc  *       * 0x3 *  0x0dc  *
1192 #       *****************       *****************
1193 #       *     Next      *       *     Next      *
1194 #       *      PC       *       *      PC       *
1195 #       *****************       *****************
1196 #       *      SR       *       *      SR       *
1197 #       *****************       *****************
1198 #
1199 # (2) PACKED format (unsupported) opclasses two and three:
1200 #       *****************
1201 #       *      EA       *
1202 #       *               *
1203 #       *****************
1204 #       * 0x2 *  0x0dc  *
1205 #       *****************
1206 #       *     Next      *
1207 #       *      PC       *
1208 #       *****************
1209 #       *      SR       *
1210 #       *****************
1211 #
1212         global          _fpsp_unsupp
1213 _fpsp_unsupp:
1214
1215         link.w          %a6,&-LOCAL_SIZE        # init stack frame
1216
1217         fsave           FP_SRC(%a6)             # save fp state
1218
1219         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
1220         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1221         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
1222
1223         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
1224         bne.b           fu_s
1225 fu_u:
1226         mov.l           %usp,%a0                # fetch user stack pointer
1227         mov.l           %a0,EXC_A7(%a6)         # save on stack
1228         bra.b           fu_cont
1229 # if the exception is an opclass zero or two unimplemented data type
1230 # exception, then the a7' calculated here is wrong since it doesn't
1231 # stack an ea. however, we don't need an a7' for this case anyways.
1232 fu_s:
1233         lea             0x4+EXC_EA(%a6),%a0     # load old a7'
1234         mov.l           %a0,EXC_A7(%a6)         # save on stack
1235
1236 fu_cont:
1237
1238 # the FPIAR holds the "current PC" of the faulting instruction
1239 # the FPIAR should be set correctly for ALL exceptions passing through
1240 # this point.
1241         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1242         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
1243         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
1244         bsr.l           _imem_read_long         # fetch the instruction words
1245         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
1246
1247 ############################
1248
1249         clr.b           SPCOND_FLG(%a6)         # clear special condition flag
1250
1251 # Separate opclass three (fpn-to-mem) ops since they have a different
1252 # stack frame and protocol.
1253         btst            &0x5,EXC_CMDREG(%a6)    # is it an fmove out?
1254         bne.w           fu_out                  # yes
1255
1256 # Separate packed opclass two instructions.
1257         bfextu          EXC_CMDREG(%a6){&0:&6},%d0
1258         cmpi.b          %d0,&0x13
1259         beq.w           fu_in_pack
1260
1261
1262 # I'm not sure at this point what FPSR bits are valid for this instruction.
1263 # so, since the emulation routines re-create them anyways, zero exception field
1264         andi.l          &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1265
1266         fmov.l          &0x0,%fpcr              # zero current control regs
1267         fmov.l          &0x0,%fpsr
1268
1269 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1270 # precision format if the src format was single or double and the
1271 # source data type was an INF, NAN, DENORM, or UNNORM
1272         lea             FP_SRC(%a6),%a0         # pass ptr to input
1273         bsr.l           fix_skewed_ops
1274
1275 # we don't know whether the src operand or the dst operand (or both) is the
1276 # UNNORM or DENORM. call the function that tags the operand type. if the
1277 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1278         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
1279         bsr.l           set_tag_x               # tag the operand type
1280         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1281         bne.b           fu_op2                  # no
1282         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1283
1284 fu_op2:
1285         mov.b           %d0,STAG(%a6)           # save src optype tag
1286
1287         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1288
1289 # bit five of the fp extension word separates the monadic and dyadic operations
1290 # at this point
1291         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1292         beq.b           fu_extract              # monadic
1293         cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1294         beq.b           fu_extract              # yes, so it's monadic, too
1295
1296         bsr.l           load_fpn2               # load dst into FP_DST
1297
1298         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1299         bsr.l           set_tag_x               # tag the operand type
1300         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1301         bne.b           fu_op2_done             # no
1302         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1303 fu_op2_done:
1304         mov.b           %d0,DTAG(%a6)           # save dst optype tag
1305
1306 fu_extract:
1307         clr.l           %d0
1308         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1309
1310         bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1311
1312         lea             FP_SRC(%a6),%a0
1313         lea             FP_DST(%a6),%a1
1314
1315         mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1316         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1317
1318 #
1319 # Exceptions in order of precedence:
1320 #       BSUN    : none
1321 #       SNAN    : all dyadic ops
1322 #       OPERR   : fsqrt(-NORM)
1323 #       OVFL    : all except ftst,fcmp
1324 #       UNFL    : all except ftst,fcmp
1325 #       DZ      : fdiv
1326 #       INEX2   : all except ftst,fcmp
1327 #       INEX1   : none (packed doesn't go through here)
1328 #
1329
1330 # we determine the highest priority exception(if any) set by the
1331 # emulation routine that has also been enabled by the user.
1332         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions set
1333         bne.b           fu_in_ena               # some are enabled
1334
1335 fu_in_cont:
1336 # fcmp and ftst do not store any result.
1337         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1338         andi.b          &0x38,%d0               # extract bits 3-5
1339         cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1340         beq.b           fu_in_exit              # yes
1341
1342         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1343         bsr.l           store_fpreg             # store the result
1344
1345 fu_in_exit:
1346
1347         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1348         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1349         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1350
1351         unlk            %a6
1352
1353         bra.l           _fpsp_done
1354
1355 fu_in_ena:
1356         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1357         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1358         bne.b           fu_in_exc               # there is at least one set
1359
1360 #
1361 # No exceptions occurred that were also enabled. Now:
1362 #
1363 #       if (OVFL && ovfl_disabled && inexact_enabled) {
1364 #           branch to _real_inex() (even if the result was exact!);
1365 #       } else {
1366 #           save the result in the proper fp reg (unless the op is fcmp or ftst);
1367 #           return;
1368 #       }
1369 #
1370         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1371         beq.b           fu_in_cont              # no
1372
1373 fu_in_ovflchk:
1374         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1375         beq.b           fu_in_cont              # no
1376         bra.w           fu_in_exc_ovfl          # go insert overflow frame
1377
1378 #
1379 # An exception occurred and that exception was enabled:
1380 #
1381 #       shift enabled exception field into lo byte of d0;
1382 #       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1383 #           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1384 #               /*
1385 #                * this is the case where we must call _real_inex() now or else
1386 #                * there will be no other way to pass it the exceptional operand
1387 #                */
1388 #               call _real_inex();
1389 #       } else {
1390 #               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1391 #       }
1392 #
1393 fu_in_exc:
1394         subi.l          &24,%d0                 # fix offset to be 0-8
1395         cmpi.b          %d0,&0x6                # is exception INEX? (6)
1396         bne.b           fu_in_exc_exit          # no
1397
1398 # the enabled exception was inexact
1399         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1400         bne.w           fu_in_exc_unfl          # yes
1401         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1402         bne.w           fu_in_exc_ovfl          # yes
1403
1404 # here, we insert the correct fsave status value into the fsave frame for the
1405 # corresponding exception. the operand in the fsave frame should be the original
1406 # src operand.
1407 fu_in_exc_exit:
1408         mov.l           %d0,-(%sp)              # save d0
1409         bsr.l           funimp_skew             # skew sgl or dbl inputs
1410         mov.l           (%sp)+,%d0              # restore d0
1411
1412         mov.w           (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1413
1414         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1415         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1416         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1417
1418         frestore        FP_SRC(%a6)             # restore src op
1419
1420         unlk            %a6
1421
1422         bra.l           _fpsp_done
1423
1424 tbl_except:
1425         short           0xe000,0xe006,0xe004,0xe005
1426         short           0xe003,0xe002,0xe001,0xe001
1427
1428 fu_in_exc_unfl:
1429         mov.w           &0x4,%d0
1430         bra.b           fu_in_exc_exit
1431 fu_in_exc_ovfl:
1432         mov.w           &0x03,%d0
1433         bra.b           fu_in_exc_exit
1434
1435 # If the input operand to this operation was opclass two and a single
1436 # or double precision denorm, inf, or nan, the operand needs to be
1437 # "corrected" in order to have the proper equivalent extended precision
1438 # number.
1439         global          fix_skewed_ops
1440 fix_skewed_ops:
1441         bfextu          EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1442         cmpi.b          %d0,&0x11               # is class = 2 & fmt = sgl?
1443         beq.b           fso_sgl                 # yes
1444         cmpi.b          %d0,&0x15               # is class = 2 & fmt = dbl?
1445         beq.b           fso_dbl                 # yes
1446         rts                                     # no
1447
1448 fso_sgl:
1449         mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1450         andi.w          &0x7fff,%d0             # strip sign
1451         cmpi.w          %d0,&0x3f80             # is |exp| == $3f80?
1452         beq.b           fso_sgl_dnrm_zero       # yes
1453         cmpi.w          %d0,&0x407f             # no; is |exp| == $407f?
1454         beq.b           fso_infnan              # yes
1455         rts                                     # no
1456
1457 fso_sgl_dnrm_zero:
1458         andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1459         beq.b           fso_zero                # it's a skewed zero
1460 fso_sgl_dnrm:
1461 # here, we count on norm not to alter a0...
1462         bsr.l           norm                    # normalize mantissa
1463         neg.w           %d0                     # -shft amt
1464         addi.w          &0x3f81,%d0             # adjust new exponent
1465         andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1466         or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1467         rts
1468
1469 fso_zero:
1470         andi.w          &0x8000,LOCAL_EX(%a0)   # clear bogus exponent
1471         rts
1472
1473 fso_infnan:
1474         andi.b          &0x7f,LOCAL_HI(%a0)     # clear j-bit
1475         ori.w           &0x7fff,LOCAL_EX(%a0)   # make exponent = $7fff
1476         rts
1477
1478 fso_dbl:
1479         mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1480         andi.w          &0x7fff,%d0             # strip sign
1481         cmpi.w          %d0,&0x3c00             # is |exp| == $3c00?
1482         beq.b           fso_dbl_dnrm_zero       # yes
1483         cmpi.w          %d0,&0x43ff             # no; is |exp| == $43ff?
1484         beq.b           fso_infnan              # yes
1485         rts                                     # no
1486
1487 fso_dbl_dnrm_zero:
1488         andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1489         bne.b           fso_dbl_dnrm            # it's a skewed denorm
1490         tst.l           LOCAL_LO(%a0)           # is it a zero?
1491         beq.b           fso_zero                # yes
1492 fso_dbl_dnrm:
1493 # here, we count on norm not to alter a0...
1494         bsr.l           norm                    # normalize mantissa
1495         neg.w           %d0                     # -shft amt
1496         addi.w          &0x3c01,%d0             # adjust new exponent
1497         andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1498         or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1499         rts
1500
1501 #################################################################
1502
1503 # fmove out took an unimplemented data type exception.
1504 # the src operand is in FP_SRC. Call _fout() to write out the result and
1505 # to determine which exceptions, if any, to take.
1506 fu_out:
1507
1508 # Separate packed move outs from the UNNORM and DENORM move outs.
1509         bfextu          EXC_CMDREG(%a6){&3:&3},%d0
1510         cmpi.b          %d0,&0x3
1511         beq.w           fu_out_pack
1512         cmpi.b          %d0,&0x7
1513         beq.w           fu_out_pack
1514
1515
1516 # I'm not sure at this point what FPSR bits are valid for this instruction.
1517 # so, since the emulation routines re-create them anyways, zero exception field.
1518 # fmove out doesn't affect ccodes.
1519         and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
1520
1521         fmov.l          &0x0,%fpcr              # zero current control regs
1522         fmov.l          &0x0,%fpsr
1523
1524 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1525 # call here. just figure out what it is...
1526         mov.w           FP_SRC_EX(%a6),%d0      # get exponent
1527         andi.w          &0x7fff,%d0             # strip sign
1528         beq.b           fu_out_denorm           # it's a DENORM
1529
1530         lea             FP_SRC(%a6),%a0
1531         bsr.l           unnorm_fix              # yes; fix it
1532
1533         mov.b           %d0,STAG(%a6)
1534
1535         bra.b           fu_out_cont
1536 fu_out_denorm:
1537         mov.b           &DENORM,STAG(%a6)
1538 fu_out_cont:
1539
1540         clr.l           %d0
1541         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1542
1543         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1544
1545         mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
1546         bsr.l           fout                    # call fmove out routine
1547
1548 # Exceptions in order of precedence:
1549 #       BSUN    : none
1550 #       SNAN    : none
1551 #       OPERR   : fmove.{b,w,l} out of large UNNORM
1552 #       OVFL    : fmove.{s,d}
1553 #       UNFL    : fmove.{s,d,x}
1554 #       DZ      : none
1555 #       INEX2   : all
1556 #       INEX1   : none (packed doesn't travel through here)
1557
1558 # determine the highest priority exception(if any) set by the
1559 # emulation routine that has also been enabled by the user.
1560         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1561         bne.w           fu_out_ena              # some are enabled
1562
1563 fu_out_done:
1564
1565         mov.l           EXC_A6(%a6),(%a6)       # in case a6 changed
1566
1567 # on extended precision opclass three instructions using pre-decrement or
1568 # post-increment addressing mode, the address register is not updated. is the
1569 # address register was the stack pointer used from user mode, then let's update
1570 # it here. if it was used from supervisor mode, then we have to handle this
1571 # as a special case.
1572         btst            &0x5,EXC_SR(%a6)
1573         bne.b           fu_out_done_s
1574
1575         mov.l           EXC_A7(%a6),%a0         # restore a7
1576         mov.l           %a0,%usp
1577
1578 fu_out_done_cont:
1579         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1580         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1581         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1582
1583         unlk            %a6
1584
1585         btst            &0x7,(%sp)              # is trace on?
1586         bne.b           fu_out_trace            # yes
1587
1588         bra.l           _fpsp_done
1589
1590 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1591 # ("fmov.x fpm,-(a7)") if so,
1592 fu_out_done_s:
1593         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
1594         bne.b           fu_out_done_cont
1595
1596 # the extended precision result is still in fp0. but, we need to save it
1597 # somewhere on the stack until we can copy it to its final resting place.
1598 # here, we're counting on the top of the stack to be the old place-holders
1599 # for fp0/fp1 which have already been restored. that way, we can write
1600 # over those destinations with the shifted stack frame.
1601         fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1602
1603         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1604         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1605         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1606
1607         mov.l           (%a6),%a6               # restore frame pointer
1608
1609         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1610         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1611
1612 # now, copy the result to the proper place on the stack
1613         mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1614         mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1615         mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1616
1617         add.l           &LOCAL_SIZE-0x8,%sp
1618
1619         btst            &0x7,(%sp)
1620         bne.b           fu_out_trace
1621
1622         bra.l           _fpsp_done
1623
1624 fu_out_ena:
1625         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1626         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1627         bne.b           fu_out_exc              # there is at least one set
1628
1629 # no exceptions were set.
1630 # if a disabled overflow occurred and inexact was enabled but the result
1631 # was exact, then a branch to _real_inex() is made.
1632         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1633         beq.w           fu_out_done             # no
1634
1635 fu_out_ovflchk:
1636         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1637         beq.w           fu_out_done             # no
1638         bra.w           fu_inex                 # yes
1639
1640 #
1641 # The fp move out that took the "Unimplemented Data Type" exception was
1642 # being traced. Since the stack frames are similar, get the "current" PC
1643 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1644 #
1645 #                 UNSUPP FRAME             TRACE FRAME
1646 #               *****************       *****************
1647 #               *      EA       *       *    Current    *
1648 #               *               *       *      PC       *
1649 #               *****************       *****************
1650 #               * 0x3 *  0x0dc  *       * 0x2 *  0x024  *
1651 #               *****************       *****************
1652 #               *     Next      *       *     Next      *
1653 #               *      PC       *       *      PC       *
1654 #               *****************       *****************
1655 #               *      SR       *       *      SR       *
1656 #               *****************       *****************
1657 #
1658 fu_out_trace:
1659         mov.w           &0x2024,0x6(%sp)
1660         fmov.l          %fpiar,0x8(%sp)
1661         bra.l           _real_trace
1662
1663 # an exception occurred and that exception was enabled.
1664 fu_out_exc:
1665         subi.l          &24,%d0                 # fix offset to be 0-8
1666
1667 # we don't mess with the existing fsave frame. just re-insert it and
1668 # jump to the "_real_{}()" handler...
1669         mov.w           (tbl_fu_out.b,%pc,%d0.w*2),%d0
1670         jmp             (tbl_fu_out.b,%pc,%d0.w*1)
1671
1672         swbeg           &0x8
1673 tbl_fu_out:
1674         short           tbl_fu_out      - tbl_fu_out    # BSUN can't happen
1675         short           tbl_fu_out      - tbl_fu_out    # SNAN can't happen
1676         short           fu_operr        - tbl_fu_out    # OPERR
1677         short           fu_ovfl         - tbl_fu_out    # OVFL
1678         short           fu_unfl         - tbl_fu_out    # UNFL
1679         short           tbl_fu_out      - tbl_fu_out    # DZ can't happen
1680         short           fu_inex         - tbl_fu_out    # INEX2
1681         short           tbl_fu_out      - tbl_fu_out    # INEX1 won't make it here
1682
1683 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1684 # frestore it.
1685 fu_snan:
1686         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1687         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1688         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1689
1690         mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd8
1691         mov.w           &0xe006,2+FP_SRC(%a6)
1692
1693         frestore        FP_SRC(%a6)
1694
1695         unlk            %a6
1696
1697
1698         bra.l           _real_snan
1699
1700 fu_operr:
1701         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1702         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1703         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1704
1705         mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
1706         mov.w           &0xe004,2+FP_SRC(%a6)
1707
1708         frestore        FP_SRC(%a6)
1709
1710         unlk            %a6
1711
1712
1713         bra.l           _real_operr
1714
1715 fu_ovfl:
1716         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1717
1718         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1719         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1720         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1721
1722         mov.w           &0x30d4,EXC_VOFF(%a6)   # vector offset = 0xd4
1723         mov.w           &0xe005,2+FP_SRC(%a6)
1724
1725         frestore        FP_SRC(%a6)             # restore EXOP
1726
1727         unlk            %a6
1728
1729         bra.l           _real_ovfl
1730
1731 # underflow can happen for extended precision. extended precision opclass
1732 # three instruction exceptions don't update the stack pointer. so, if the
1733 # exception occurred from user mode, then simply update a7 and exit normally.
1734 # if the exception occurred from supervisor mode, check if
1735 fu_unfl:
1736         mov.l           EXC_A6(%a6),(%a6)       # restore a6
1737
1738         btst            &0x5,EXC_SR(%a6)
1739         bne.w           fu_unfl_s
1740
1741         mov.l           EXC_A7(%a6),%a0         # restore a7 whether we need
1742         mov.l           %a0,%usp                # to or not...
1743
1744 fu_unfl_cont:
1745         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1746
1747         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1748         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1749         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1750
1751         mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1752         mov.w           &0xe003,2+FP_SRC(%a6)
1753
1754         frestore        FP_SRC(%a6)             # restore EXOP
1755
1756         unlk            %a6
1757
1758         bra.l           _real_unfl
1759
1760 fu_unfl_s:
1761         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1762         bne.b           fu_unfl_cont
1763
1764 # the extended precision result is still in fp0. but, we need to save it
1765 # somewhere on the stack until we can copy it to its final resting place
1766 # (where the exc frame is currently). make sure it's not at the top of the
1767 # frame or it will get overwritten when the exc stack frame is shifted "down".
1768         fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1769         fmovm.x         &0x40,FP_DST(%a6)       # put EXOP on stack
1770
1771         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1772         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1773         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1774
1775         mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1776         mov.w           &0xe003,2+FP_DST(%a6)
1777
1778         frestore        FP_DST(%a6)             # restore EXOP
1779
1780         mov.l           (%a6),%a6               # restore frame pointer
1781
1782         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1783         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1784         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1785
1786 # now, copy the result to the proper place on the stack
1787         mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1788         mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1789         mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1790
1791         add.l           &LOCAL_SIZE-0x8,%sp
1792
1793         bra.l           _real_unfl
1794
1795 # fmove in and out enter here.
1796 fu_inex:
1797         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1798
1799         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1800         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1801         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1802
1803         mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
1804         mov.w           &0xe001,2+FP_SRC(%a6)
1805
1806         frestore        FP_SRC(%a6)             # restore EXOP
1807
1808         unlk            %a6
1809
1810
1811         bra.l           _real_inex
1812
1813 #########################################################################
1814 #########################################################################
1815 fu_in_pack:
1816
1817
1818 # I'm not sure at this point what FPSR bits are valid for this instruction.
1819 # so, since the emulation routines re-create them anyways, zero exception field
1820         andi.l          &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1821
1822         fmov.l          &0x0,%fpcr              # zero current control regs
1823         fmov.l          &0x0,%fpsr
1824
1825         bsr.l           get_packed              # fetch packed src operand
1826
1827         lea             FP_SRC(%a6),%a0         # pass ptr to src
1828         bsr.l           set_tag_x               # set src optype tag
1829
1830         mov.b           %d0,STAG(%a6)           # save src optype tag
1831
1832         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1833
1834 # bit five of the fp extension word separates the monadic and dyadic operations
1835 # at this point
1836         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1837         beq.b           fu_extract_p            # monadic
1838         cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1839         beq.b           fu_extract_p            # yes, so it's monadic, too
1840
1841         bsr.l           load_fpn2               # load dst into FP_DST
1842
1843         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1844         bsr.l           set_tag_x               # tag the operand type
1845         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1846         bne.b           fu_op2_done_p           # no
1847         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1848 fu_op2_done_p:
1849         mov.b           %d0,DTAG(%a6)           # save dst optype tag
1850
1851 fu_extract_p:
1852         clr.l           %d0
1853         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1854
1855         bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1856
1857         lea             FP_SRC(%a6),%a0
1858         lea             FP_DST(%a6),%a1
1859
1860         mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1861         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1862
1863 #
1864 # Exceptions in order of precedence:
1865 #       BSUN    : none
1866 #       SNAN    : all dyadic ops
1867 #       OPERR   : fsqrt(-NORM)
1868 #       OVFL    : all except ftst,fcmp
1869 #       UNFL    : all except ftst,fcmp
1870 #       DZ      : fdiv
1871 #       INEX2   : all except ftst,fcmp
1872 #       INEX1   : all
1873 #
1874
1875 # we determine the highest priority exception(if any) set by the
1876 # emulation routine that has also been enabled by the user.
1877         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1878         bne.w           fu_in_ena_p             # some are enabled
1879
1880 fu_in_cont_p:
1881 # fcmp and ftst do not store any result.
1882         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1883         andi.b          &0x38,%d0               # extract bits 3-5
1884         cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1885         beq.b           fu_in_exit_p            # yes
1886
1887         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1888         bsr.l           store_fpreg             # store the result
1889
1890 fu_in_exit_p:
1891
1892         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1893         bne.w           fu_in_exit_s_p          # supervisor
1894
1895         mov.l           EXC_A7(%a6),%a0         # update user a7
1896         mov.l           %a0,%usp
1897
1898 fu_in_exit_cont_p:
1899         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1900         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1901         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1902
1903         unlk            %a6                     # unravel stack frame
1904
1905         btst            &0x7,(%sp)              # is trace on?
1906         bne.w           fu_trace_p              # yes
1907
1908         bra.l           _fpsp_done              # exit to os
1909
1910 # the exception occurred in supervisor mode. check to see if the
1911 # addressing mode was (a7)+. if so, we'll need to shift the
1912 # stack frame "up".
1913 fu_in_exit_s_p:
1914         btst            &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1915         beq.b           fu_in_exit_cont_p       # no
1916
1917         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1918         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1919         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1920
1921         unlk            %a6                     # unravel stack frame
1922
1923 # shift the stack frame "up". we don't really care about the <ea> field.
1924         mov.l           0x4(%sp),0x10(%sp)
1925         mov.l           0x0(%sp),0xc(%sp)
1926         add.l           &0xc,%sp
1927
1928         btst            &0x7,(%sp)              # is trace on?
1929         bne.w           fu_trace_p              # yes
1930
1931         bra.l           _fpsp_done              # exit to os
1932
1933 fu_in_ena_p:
1934         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled & set
1935         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1936         bne.b           fu_in_exc_p             # at least one was set
1937
1938 #
1939 # No exceptions occurred that were also enabled. Now:
1940 #
1941 #       if (OVFL && ovfl_disabled && inexact_enabled) {
1942 #           branch to _real_inex() (even if the result was exact!);
1943 #       } else {
1944 #           save the result in the proper fp reg (unless the op is fcmp or ftst);
1945 #           return;
1946 #       }
1947 #
1948         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1949         beq.w           fu_in_cont_p            # no
1950
1951 fu_in_ovflchk_p:
1952         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1953         beq.w           fu_in_cont_p            # no
1954         bra.w           fu_in_exc_ovfl_p        # do _real_inex() now
1955
1956 #
1957 # An exception occurred and that exception was enabled:
1958 #
1959 #       shift enabled exception field into lo byte of d0;
1960 #       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1961 #           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1962 #               /*
1963 #                * this is the case where we must call _real_inex() now or else
1964 #                * there will be no other way to pass it the exceptional operand
1965 #                */
1966 #               call _real_inex();
1967 #       } else {
1968 #               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1969 #       }
1970 #
1971 fu_in_exc_p:
1972         subi.l          &24,%d0                 # fix offset to be 0-8
1973         cmpi.b          %d0,&0x6                # is exception INEX? (6 or 7)
1974         blt.b           fu_in_exc_exit_p        # no
1975
1976 # the enabled exception was inexact
1977         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1978         bne.w           fu_in_exc_unfl_p        # yes
1979         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1980         bne.w           fu_in_exc_ovfl_p        # yes
1981
1982 # here, we insert the correct fsave status value into the fsave frame for the
1983 # corresponding exception. the operand in the fsave frame should be the original
1984 # src operand.
1985 # as a reminder for future predicted pain and agony, we are passing in fsave the
1986 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1987 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1988 fu_in_exc_exit_p:
1989         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1990         bne.w           fu_in_exc_exit_s_p      # supervisor
1991
1992         mov.l           EXC_A7(%a6),%a0         # update user a7
1993         mov.l           %a0,%usp
1994
1995 fu_in_exc_exit_cont_p:
1996         mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1997
1998         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1999         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2000         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2001
2002         frestore        FP_SRC(%a6)             # restore src op
2003
2004         unlk            %a6
2005
2006         btst            &0x7,(%sp)              # is trace enabled?
2007         bne.w           fu_trace_p              # yes
2008
2009         bra.l           _fpsp_done
2010
2011 tbl_except_p:
2012         short           0xe000,0xe006,0xe004,0xe005
2013         short           0xe003,0xe002,0xe001,0xe001
2014
2015 fu_in_exc_ovfl_p:
2016         mov.w           &0x3,%d0
2017         bra.w           fu_in_exc_exit_p
2018
2019 fu_in_exc_unfl_p:
2020         mov.w           &0x4,%d0
2021         bra.w           fu_in_exc_exit_p
2022
2023 fu_in_exc_exit_s_p:
2024         btst            &mia7_bit,SPCOND_FLG(%a6)
2025         beq.b           fu_in_exc_exit_cont_p
2026
2027         mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2028
2029         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2030         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2031         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2032
2033         frestore        FP_SRC(%a6)             # restore src op
2034
2035         unlk            %a6                     # unravel stack frame
2036
2037 # shift stack frame "up". who cares about <ea> field.
2038         mov.l           0x4(%sp),0x10(%sp)
2039         mov.l           0x0(%sp),0xc(%sp)
2040         add.l           &0xc,%sp
2041
2042         btst            &0x7,(%sp)              # is trace on?
2043         bne.b           fu_trace_p              # yes
2044
2045         bra.l           _fpsp_done              # exit to os
2046
2047 #
2048 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2049 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2050 # trace stack frame then jump to _real_trace().
2051 #
2052 #                 UNSUPP FRAME             TRACE FRAME
2053 #               *****************       *****************
2054 #               *      EA       *       *    Current    *
2055 #               *               *       *      PC       *
2056 #               *****************       *****************
2057 #               * 0x2 * 0x0dc   *       * 0x2 *  0x024  *
2058 #               *****************       *****************
2059 #               *     Next      *       *     Next      *
2060 #               *      PC       *       *      PC       *
2061 #               *****************       *****************
2062 #               *      SR       *       *      SR       *
2063 #               *****************       *****************
2064 fu_trace_p:
2065         mov.w           &0x2024,0x6(%sp)
2066         fmov.l          %fpiar,0x8(%sp)
2067
2068         bra.l           _real_trace
2069
2070 #########################################################
2071 #########################################################
2072 fu_out_pack:
2073
2074
2075 # I'm not sure at this point what FPSR bits are valid for this instruction.
2076 # so, since the emulation routines re-create them anyways, zero exception field.
2077 # fmove out doesn't affect ccodes.
2078         and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
2079
2080         fmov.l          &0x0,%fpcr              # zero current control regs
2081         fmov.l          &0x0,%fpsr
2082
2083         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
2084         bsr.l           load_fpn1
2085
2086 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2087 # able to detect all operand types.
2088         lea             FP_SRC(%a6),%a0
2089         bsr.l           set_tag_x               # tag the operand type
2090         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2091         bne.b           fu_op2_p                # no
2092         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
2093
2094 fu_op2_p:
2095         mov.b           %d0,STAG(%a6)           # save src optype tag
2096
2097         clr.l           %d0
2098         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
2099
2100         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
2101
2102         mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
2103         bsr.l           fout                    # call fmove out routine
2104
2105 # Exceptions in order of precedence:
2106 #       BSUN    : no
2107 #       SNAN    : yes
2108 #       OPERR   : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2109 #       OVFL    : no
2110 #       UNFL    : no
2111 #       DZ      : no
2112 #       INEX2   : yes
2113 #       INEX1   : no
2114
2115 # determine the highest priority exception(if any) set by the
2116 # emulation routine that has also been enabled by the user.
2117         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2118         bne.w           fu_out_ena_p            # some are enabled
2119
2120 fu_out_exit_p:
2121         mov.l           EXC_A6(%a6),(%a6)       # restore a6
2122
2123         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
2124         bne.b           fu_out_exit_s_p         # supervisor
2125
2126         mov.l           EXC_A7(%a6),%a0         # update user a7
2127         mov.l           %a0,%usp
2128
2129 fu_out_exit_cont_p:
2130         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2131         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2132         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2133
2134         unlk            %a6                     # unravel stack frame
2135
2136         btst            &0x7,(%sp)              # is trace on?
2137         bne.w           fu_trace_p              # yes
2138
2139         bra.l           _fpsp_done              # exit to os
2140
2141 # the exception occurred in supervisor mode. check to see if the
2142 # addressing mode was -(a7). if so, we'll need to shift the
2143 # stack frame "down".
2144 fu_out_exit_s_p:
2145         btst            &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2146         beq.b           fu_out_exit_cont_p      # no
2147
2148         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2149         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2150         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2151
2152         mov.l           (%a6),%a6               # restore frame pointer
2153
2154         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2155         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2156
2157 # now, copy the result to the proper place on the stack
2158         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2159         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2160         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2161
2162         add.l           &LOCAL_SIZE-0x8,%sp
2163
2164         btst            &0x7,(%sp)
2165         bne.w           fu_trace_p
2166
2167         bra.l           _fpsp_done
2168
2169 fu_out_ena_p:
2170         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
2171         bfffo           %d0{&24:&8},%d0         # find highest priority exception
2172         beq.w           fu_out_exit_p
2173
2174         mov.l           EXC_A6(%a6),(%a6)       # restore a6
2175
2176 # an exception occurred and that exception was enabled.
2177 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2178 fu_out_exc_p:
2179         cmpi.b          %d0,&0x1a
2180         bgt.w           fu_inex_p2
2181         beq.w           fu_operr_p
2182
2183 fu_snan_p:
2184         btst            &0x5,EXC_SR(%a6)
2185         bne.b           fu_snan_s_p
2186
2187         mov.l           EXC_A7(%a6),%a0
2188         mov.l           %a0,%usp
2189         bra.w           fu_snan
2190
2191 fu_snan_s_p:
2192         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2193         bne.w           fu_snan
2194
2195 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2196 # the strategy is to move the exception frame "down" 12 bytes. then, we
2197 # can store the default result where the exception frame was.
2198         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2199         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2200         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2201
2202         mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd0
2203         mov.w           &0xe006,2+FP_SRC(%a6)   # set fsave status
2204
2205         frestore        FP_SRC(%a6)             # restore src operand
2206
2207         mov.l           (%a6),%a6               # restore frame pointer
2208
2209         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2210         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2211         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2212
2213 # now, we copy the default result to it's proper location
2214         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2215         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2216         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2217
2218         add.l           &LOCAL_SIZE-0x8,%sp
2219
2220
2221         bra.l           _real_snan
2222
2223 fu_operr_p:
2224         btst            &0x5,EXC_SR(%a6)
2225         bne.w           fu_operr_p_s
2226
2227         mov.l           EXC_A7(%a6),%a0
2228         mov.l           %a0,%usp
2229         bra.w           fu_operr
2230
2231 fu_operr_p_s:
2232         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2233         bne.w           fu_operr
2234
2235 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2236 # the strategy is to move the exception frame "down" 12 bytes. then, we
2237 # can store the default result where the exception frame was.
2238         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2239         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2240         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2241
2242         mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
2243         mov.w           &0xe004,2+FP_SRC(%a6)   # set fsave status
2244
2245         frestore        FP_SRC(%a6)             # restore src operand
2246
2247         mov.l           (%a6),%a6               # restore frame pointer
2248
2249         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2250         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2251         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2252
2253 # now, we copy the default result to it's proper location
2254         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2255         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2256         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2257
2258         add.l           &LOCAL_SIZE-0x8,%sp
2259
2260
2261         bra.l           _real_operr
2262
2263 fu_inex_p2:
2264         btst            &0x5,EXC_SR(%a6)
2265         bne.w           fu_inex_s_p2
2266
2267         mov.l           EXC_A7(%a6),%a0
2268         mov.l           %a0,%usp
2269         bra.w           fu_inex
2270
2271 fu_inex_s_p2:
2272         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2273         bne.w           fu_inex
2274
2275 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2276 # the strategy is to move the exception frame "down" 12 bytes. then, we
2277 # can store the default result where the exception frame was.
2278         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2279         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2280         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2281
2282         mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
2283         mov.w           &0xe001,2+FP_SRC(%a6)   # set fsave status
2284
2285         frestore        FP_SRC(%a6)             # restore src operand
2286
2287         mov.l           (%a6),%a6               # restore frame pointer
2288
2289         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2290         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2291         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2292
2293 # now, we copy the default result to it's proper location
2294         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2295         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2296         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2297
2298         add.l           &LOCAL_SIZE-0x8,%sp
2299
2300
2301         bra.l           _real_inex
2302
2303 #########################################################################
2304
2305 #
2306 # if we're stuffing a source operand back into an fsave frame then we
2307 # have to make sure that for single or double source operands that the
2308 # format stuffed is as weird as the hardware usually makes it.
2309 #
2310         global          funimp_skew
2311 funimp_skew:
2312         bfextu          EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2313         cmpi.b          %d0,&0x1                # was src sgl?
2314         beq.b           funimp_skew_sgl         # yes
2315         cmpi.b          %d0,&0x5                # was src dbl?
2316         beq.b           funimp_skew_dbl         # yes
2317         rts
2318
2319 funimp_skew_sgl:
2320         mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2321         andi.w          &0x7fff,%d0             # strip sign
2322         beq.b           funimp_skew_sgl_not
2323         cmpi.w          %d0,&0x3f80
2324         bgt.b           funimp_skew_sgl_not
2325         neg.w           %d0                     # make exponent negative
2326         addi.w          &0x3f81,%d0             # find amt to shift
2327         mov.l           FP_SRC_HI(%a6),%d1      # fetch DENORM hi(man)
2328         lsr.l           %d0,%d1                 # shift it
2329         bset            &31,%d1                 # set j-bit
2330         mov.l           %d1,FP_SRC_HI(%a6)      # insert new hi(man)
2331         andi.w          &0x8000,FP_SRC_EX(%a6)  # clear old exponent
2332         ori.w           &0x3f80,FP_SRC_EX(%a6)  # insert new "skewed" exponent
2333 funimp_skew_sgl_not:
2334         rts
2335
2336 funimp_skew_dbl:
2337         mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2338         andi.w          &0x7fff,%d0             # strip sign
2339         beq.b           funimp_skew_dbl_not
2340         cmpi.w          %d0,&0x3c00
2341         bgt.b           funimp_skew_dbl_not
2342
2343         tst.b           FP_SRC_EX(%a6)          # make "internal format"
2344         smi.b           0x2+FP_SRC(%a6)
2345         mov.w           %d0,FP_SRC_EX(%a6)      # insert exponent with cleared sign
2346         clr.l           %d0                     # clear g,r,s
2347         lea             FP_SRC(%a6),%a0         # pass ptr to src op
2348         mov.w           &0x3c01,%d1             # pass denorm threshold
2349         bsr.l           dnrm_lp                 # denorm it
2350         mov.w           &0x3c00,%d0             # new exponent
2351         tst.b           0x2+FP_SRC(%a6)         # is sign set?
2352         beq.b           fss_dbl_denorm_done     # no
2353         bset            &15,%d0                 # set sign
2354 fss_dbl_denorm_done:
2355         bset            &0x7,FP_SRC_HI(%a6)     # set j-bit
2356         mov.w           %d0,FP_SRC_EX(%a6)      # insert new exponent
2357 funimp_skew_dbl_not:
2358         rts
2359
2360 #########################################################################
2361         global          _mem_write2
2362 _mem_write2:
2363         btst            &0x5,EXC_SR(%a6)
2364         beq.l           _dmem_write
2365         mov.l           0x0(%a0),FP_DST_EX(%a6)
2366         mov.l           0x4(%a0),FP_DST_HI(%a6)
2367         mov.l           0x8(%a0),FP_DST_LO(%a6)
2368         clr.l           %d1
2369         rts
2370
2371 #########################################################################
2372 # XDEF **************************************************************** #
2373 #       _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented       #
2374 #                       effective address" exception.                   #
2375 #                                                                       #
2376 #       This handler should be the first code executed upon taking the  #
2377 #       FP Unimplemented Effective Address exception in an operating    #
2378 #       system.                                                         #
2379 #                                                                       #
2380 # XREF **************************************************************** #
2381 #       _imem_read_long() - read instruction longword                   #
2382 #       fix_skewed_ops() - adjust src operand in fsave frame            #
2383 #       set_tag_x() - determine optype of src/dst operands              #
2384 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
2385 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
2386 #       load_fpn2() - load dst operand from FP regfile                  #
2387 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2388 #       decbin() - convert packed data to FP binary data                #
2389 #       _real_fpu_disabled() - "callout" for "FPU disabled" exception   #
2390 #       _real_access() - "callout" for access error exception           #
2391 #       _mem_read() - read extended immediate operand from memory       #
2392 #       _fpsp_done() - "callout" for exit; work all done                #
2393 #       _real_trace() - "callout" for Trace enabled exception           #
2394 #       fmovm_dynamic() - emulate dynamic fmovm instruction             #
2395 #       fmovm_ctrl() - emulate fmovm control instruction                #
2396 #                                                                       #
2397 # INPUT *************************************************************** #
2398 #       - The system stack contains the "Unimplemented <ea>" stk frame  #
2399 #                                                                       #
2400 # OUTPUT ************************************************************** #
2401 #       If access error:                                                #
2402 #       - The system stack is changed to an access error stack frame    #
2403 #       If FPU disabled:                                                #
2404 #       - The system stack is changed to an FPU disabled stack frame    #
2405 #       If Trace exception enabled:                                     #
2406 #       - The system stack is changed to a Trace exception stack frame  #
2407 #       Else: (normal case)                                             #
2408 #       - None (correct result has been stored as appropriate)          #
2409 #                                                                       #
2410 # ALGORITHM *********************************************************** #
2411 #       This exception handles 3 types of operations:                   #
2412 # (1) FP Instructions using extended precision or packed immediate      #
2413 #     addressing mode.                                                  #
2414 # (2) The "fmovm.x" instruction w/ dynamic register specification.      #
2415 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers.            #
2416 #                                                                       #
2417 #       For immediate data operations, the data is read in w/ a         #
2418 # _mem_read() "callout", converted to FP binary (if packed), and used   #
2419 # as the source operand to the instruction specified by the instruction #
2420 # word. If no FP exception should be reported ads a result of the       #
2421 # emulation, then the result is stored to the destination register and  #
2422 # the handler exits through _fpsp_done(). If an enabled exc has been    #
2423 # signalled as a result of emulation, then an fsave state frame         #
2424 # corresponding to the FP exception type must be entered into the 060   #
2425 # FPU before exiting. In either the enabled or disabled cases, we       #
2426 # must also check if a Trace exception is pending, in which case, we    #
2427 # must create a Trace exception stack frame from the current exception  #
2428 # stack frame. If no Trace is pending, we simply exit through           #
2429 # _fpsp_done().                                                         #
2430 #       For "fmovm.x", call the routine fmovm_dynamic() which will      #
2431 # decode and emulate the instruction. No FP exceptions can be pending   #
2432 # as a result of this operation emulation. A Trace exception can be     #
2433 # pending, though, which means the current stack frame must be changed  #
2434 # to a Trace stack frame and an exit made through _real_trace().        #
2435 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction   #
2436 # was executed from supervisor mode, this handler must store the FP     #
2437 # register file values to the system stack by itself since              #
2438 # fmovm_dynamic() can't handle this. A normal exit is made through      #
2439 # fpsp_done().                                                          #
2440 #       For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2441 # Again, a Trace exception may be pending and an exit made through      #
2442 # _real_trace(). Else, a normal exit is made through _fpsp_done().      #
2443 #                                                                       #
2444 #       Before any of the above is attempted, it must be checked to     #
2445 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2446 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2447 # has higher priority, we check the disabled bit in the PCR. If set,    #
2448 # then we must create an 8 word "FPU disabled" exception stack frame    #
2449 # from the current 4 word exception stack frame. This includes          #
2450 # reproducing the effective address of the instruction to put on the    #
2451 # new stack frame.                                                      #
2452 #                                                                       #
2453 #       In the process of all emulation work, if a _mem_read()          #
2454 # "callout" returns a failing result indicating an access error, then   #
2455 # we must create an access error stack frame from the current stack     #
2456 # frame. This information includes a faulting address and a fault-      #
2457 # status-longword. These are created within this handler.               #
2458 #                                                                       #
2459 #########################################################################
2460
2461         global          _fpsp_effadd
2462 _fpsp_effadd:
2463
2464 # This exception type takes priority over the "Line F Emulator"
2465 # exception. Therefore, the FPU could be disabled when entering here.
2466 # So, we must check to see if it's disabled and handle that case separately.
2467         mov.l           %d0,-(%sp)              # save d0
2468         movc            %pcr,%d0                # load proc cr
2469         btst            &0x1,%d0                # is FPU disabled?
2470         bne.w           iea_disabled            # yes
2471         mov.l           (%sp)+,%d0              # restore d0
2472
2473         link            %a6,&-LOCAL_SIZE        # init stack frame
2474
2475         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2476         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2477         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
2478
2479 # PC of instruction that took the exception is the PC in the frame
2480         mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2481
2482         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2483         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2484         bsr.l           _imem_read_long         # fetch the instruction words
2485         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2486
2487 #########################################################################
2488
2489         tst.w           %d0                     # is operation fmovem?
2490         bmi.w           iea_fmovm               # yes
2491
2492 #
2493 # here, we will have:
2494 #       fabs    fdabs   fsabs           facos           fmod
2495 #       fadd    fdadd   fsadd           fasin           frem
2496 #       fcmp                            fatan           fscale
2497 #       fdiv    fddiv   fsdiv           fatanh          fsin
2498 #       fint                            fcos            fsincos
2499 #       fintrz                          fcosh           fsinh
2500 #       fmove   fdmove  fsmove          fetox           ftan
2501 #       fmul    fdmul   fsmul           fetoxm1         ftanh
2502 #       fneg    fdneg   fsneg           fgetexp         ftentox
2503 #       fsgldiv                         fgetman         ftwotox
2504 #       fsglmul                         flog10
2505 #       fsqrt                           flog2
2506 #       fsub    fdsub   fssub           flogn
2507 #       ftst                            flognp1
2508 # which can all use f<op>.{x,p}
2509 # so, now it's immediate data extended precision AND PACKED FORMAT!
2510 #
2511 iea_op:
2512         andi.l          &0x00ff00ff,USER_FPSR(%a6)
2513
2514         btst            &0xa,%d0                # is src fmt x or p?
2515         bne.b           iea_op_pack             # packed
2516
2517
2518         mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2519         lea             FP_SRC(%a6),%a1         # pass: ptr to super addr
2520         mov.l           &0xc,%d0                # pass: 12 bytes
2521         bsr.l           _imem_read              # read extended immediate
2522
2523         tst.l           %d1                     # did ifetch fail?
2524         bne.w           iea_iacc                # yes
2525
2526         bra.b           iea_op_setsrc
2527
2528 iea_op_pack:
2529
2530         mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2531         lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
2532         mov.l           &0xc,%d0                # pass: 12 bytes
2533         bsr.l           _imem_read              # read packed operand
2534
2535         tst.l           %d1                     # did ifetch fail?
2536         bne.w           iea_iacc                # yes
2537
2538 # The packed operand is an INF or a NAN if the exponent field is all ones.
2539         bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
2540         cmpi.w          %d0,&0x7fff             # INF or NAN?
2541         beq.b           iea_op_setsrc           # operand is an INF or NAN
2542
2543 # The packed operand is a zero if the mantissa is all zero, else it's
2544 # a normal packed op.
2545         mov.b           3+FP_SRC(%a6),%d0       # get byte 4
2546         andi.b          &0x0f,%d0               # clear all but last nybble
2547         bne.b           iea_op_gp_not_spec      # not a zero
2548         tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
2549         bne.b           iea_op_gp_not_spec      # not a zero
2550         tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
2551         beq.b           iea_op_setsrc           # operand is a ZERO
2552 iea_op_gp_not_spec:
2553         lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
2554         bsr.l           decbin                  # convert to extended
2555         fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
2556
2557 iea_op_setsrc:
2558         addi.l          &0xc,EXC_EXTWPTR(%a6)   # update extension word pointer
2559
2560 # FP_SRC now holds the src operand.
2561         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
2562         bsr.l           set_tag_x               # tag the operand type
2563         mov.b           %d0,STAG(%a6)           # could be ANYTHING!!!
2564         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2565         bne.b           iea_op_getdst           # no
2566         bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2567         mov.b           %d0,STAG(%a6)           # set new optype tag
2568 iea_op_getdst:
2569         clr.b           STORE_FLG(%a6)          # clear "store result" boolean
2570
2571         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
2572         beq.b           iea_op_extract          # monadic
2573         btst            &0x4,1+EXC_CMDREG(%a6)  # is operation fsincos,ftst,fcmp?
2574         bne.b           iea_op_spec             # yes
2575
2576 iea_op_loaddst:
2577         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2578         bsr.l           load_fpn2               # load dst operand
2579
2580         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
2581         bsr.l           set_tag_x               # tag the operand type
2582         mov.b           %d0,DTAG(%a6)           # could be ANYTHING!!!
2583         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2584         bne.b           iea_op_extract          # no
2585         bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2586         mov.b           %d0,DTAG(%a6)           # set new optype tag
2587         bra.b           iea_op_extract
2588
2589 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2590 iea_op_spec:
2591         btst            &0x3,1+EXC_CMDREG(%a6)  # is operation fsincos?
2592         beq.b           iea_op_extract          # yes
2593 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2594 # store a result. then, only fcmp will branch back and pick up a dst operand.
2595         st              STORE_FLG(%a6)          # don't store a final result
2596         btst            &0x1,1+EXC_CMDREG(%a6)  # is operation fcmp?
2597         beq.b           iea_op_loaddst          # yes
2598
2599 iea_op_extract:
2600         clr.l           %d0
2601         mov.b           FPCR_MODE(%a6),%d0      # pass: rnd mode,prec
2602
2603         mov.b           1+EXC_CMDREG(%a6),%d1
2604         andi.w          &0x007f,%d1             # extract extension
2605
2606         fmov.l          &0x0,%fpcr
2607         fmov.l          &0x0,%fpsr
2608
2609         lea             FP_SRC(%a6),%a0
2610         lea             FP_DST(%a6),%a1
2611
2612         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2613         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
2614
2615 #
2616 # Exceptions in order of precedence:
2617 #       BSUN    : none
2618 #       SNAN    : all operations
2619 #       OPERR   : all reg-reg or mem-reg operations that can normally operr
2620 #       OVFL    : same as OPERR
2621 #       UNFL    : same as OPERR
2622 #       DZ      : same as OPERR
2623 #       INEX2   : same as OPERR
2624 #       INEX1   : all packed immediate operations
2625 #
2626
2627 # we determine the highest priority exception(if any) set by the
2628 # emulation routine that has also been enabled by the user.
2629         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2630         bne.b           iea_op_ena              # some are enabled
2631
2632 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2633 # these don't save results.
2634 iea_op_save:
2635         tst.b           STORE_FLG(%a6)          # does this op store a result?
2636         bne.b           iea_op_exit1            # exit with no frestore
2637
2638 iea_op_store:
2639         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2640         bsr.l           store_fpreg             # store the result
2641
2642 iea_op_exit1:
2643         mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2644         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2645
2646         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2647         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2648         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2649
2650         unlk            %a6                     # unravel the frame
2651
2652         btst            &0x7,(%sp)              # is trace on?
2653         bne.w           iea_op_trace            # yes
2654
2655         bra.l           _fpsp_done              # exit to os
2656
2657 iea_op_ena:
2658         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enable and set
2659         bfffo           %d0{&24:&8},%d0         # find highest priority exception
2660         bne.b           iea_op_exc              # at least one was set
2661
2662 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2663 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2664         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2665         beq.b           iea_op_save
2666
2667 iea_op_ovfl:
2668         btst            &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2669         beq.b           iea_op_store            # no
2670         bra.b           iea_op_exc_ovfl         # yes
2671
2672 # an enabled exception occurred. we have to insert the exception type back into
2673 # the machine.
2674 iea_op_exc:
2675         subi.l          &24,%d0                 # fix offset to be 0-8
2676         cmpi.b          %d0,&0x6                # is exception INEX?
2677         bne.b           iea_op_exc_force        # no
2678
2679 # the enabled exception was inexact. so, if it occurs with an overflow
2680 # or underflow that was disabled, then we have to force an overflow or
2681 # underflow frame.
2682         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2683         bne.b           iea_op_exc_ovfl         # yes
2684         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2685         bne.b           iea_op_exc_unfl         # yes
2686
2687 iea_op_exc_force:
2688         mov.w           (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2689         bra.b           iea_op_exit2            # exit with frestore
2690
2691 tbl_iea_except:
2692         short           0xe002, 0xe006, 0xe004, 0xe005
2693         short           0xe003, 0xe002, 0xe001, 0xe001
2694
2695 iea_op_exc_ovfl:
2696         mov.w           &0xe005,2+FP_SRC(%a6)
2697         bra.b           iea_op_exit2
2698
2699 iea_op_exc_unfl:
2700         mov.w           &0xe003,2+FP_SRC(%a6)
2701
2702 iea_op_exit2:
2703         mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2704         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2705
2706         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2707         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2708         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2709
2710         frestore        FP_SRC(%a6)             # restore exceptional state
2711
2712         unlk            %a6                     # unravel the frame
2713
2714         btst            &0x7,(%sp)              # is trace on?
2715         bne.b           iea_op_trace            # yes
2716
2717         bra.l           _fpsp_done              # exit to os
2718
2719 #
2720 # The opclass two instruction that took an "Unimplemented Effective Address"
2721 # exception was being traced. Make the "current" PC the FPIAR and put it in
2722 # the trace stack frame then jump to _real_trace().
2723 #
2724 #                UNIMP EA FRAME            TRACE FRAME
2725 #               *****************       *****************
2726 #               * 0x0 *  0x0f0  *       *    Current    *
2727 #               *****************       *      PC       *
2728 #               *    Current    *       *****************
2729 #               *      PC       *       * 0x2 *  0x024  *
2730 #               *****************       *****************
2731 #               *      SR       *       *     Next      *
2732 #               *****************       *      PC       *
2733 #                                       *****************
2734 #                                       *      SR       *
2735 #                                       *****************
2736 iea_op_trace:
2737         mov.l           (%sp),-(%sp)            # shift stack frame "down"
2738         mov.w           0x8(%sp),0x4(%sp)
2739         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
2740         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
2741
2742         bra.l           _real_trace
2743
2744 #########################################################################
2745 iea_fmovm:
2746         btst            &14,%d0                 # ctrl or data reg
2747         beq.w           iea_fmovm_ctrl
2748
2749 iea_fmovm_data:
2750
2751         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode
2752         bne.b           iea_fmovm_data_s
2753
2754 iea_fmovm_data_u:
2755         mov.l           %usp,%a0
2756         mov.l           %a0,EXC_A7(%a6)         # store current a7
2757         bsr.l           fmovm_dynamic           # do dynamic fmovm
2758         mov.l           EXC_A7(%a6),%a0         # load possibly new a7
2759         mov.l           %a0,%usp                # update usp
2760         bra.w           iea_fmovm_exit
2761
2762 iea_fmovm_data_s:
2763         clr.b           SPCOND_FLG(%a6)
2764         lea             0x2+EXC_VOFF(%a6),%a0
2765         mov.l           %a0,EXC_A7(%a6)
2766         bsr.l           fmovm_dynamic           # do dynamic fmovm
2767
2768         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2769         beq.w           iea_fmovm_data_predec
2770         cmpi.b          SPCOND_FLG(%a6),&mia7_flg
2771         bne.w           iea_fmovm_exit
2772
2773 # right now, d0 = the size.
2774 # the data has been fetched from the supervisor stack, but we have not
2775 # incremented the stack pointer by the appropriate number of bytes.
2776 # do it here.
2777 iea_fmovm_data_postinc:
2778         btst            &0x7,EXC_SR(%a6)
2779         bne.b           iea_fmovm_data_pi_trace
2780
2781         mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2782         mov.l           EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2783         mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2784
2785         lea             (EXC_SR,%a6,%d0),%a0
2786         mov.l           %a0,EXC_SR(%a6)
2787
2788         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2789         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2790         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2791
2792         unlk            %a6
2793         mov.l           (%sp)+,%sp
2794         bra.l           _fpsp_done
2795
2796 iea_fmovm_data_pi_trace:
2797         mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2798         mov.l           EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2799         mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2800         mov.l           EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2801
2802         lea             (EXC_SR-0x4,%a6,%d0),%a0
2803         mov.l           %a0,EXC_SR(%a6)
2804
2805         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2806         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2807         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2808
2809         unlk            %a6
2810         mov.l           (%sp)+,%sp
2811         bra.l           _real_trace
2812
2813 # right now, d1 = size and d0 = the strg.
2814 iea_fmovm_data_predec:
2815         mov.b           %d1,EXC_VOFF(%a6)       # store strg
2816         mov.b           %d0,0x1+EXC_VOFF(%a6)   # store size
2817
2818         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2819         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2820         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2821
2822         mov.l           (%a6),-(%sp)            # make a copy of a6
2823         mov.l           %d0,-(%sp)              # save d0
2824         mov.l           %d1,-(%sp)              # save d1
2825         mov.l           EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2826
2827         clr.l           %d0
2828         mov.b           0x1+EXC_VOFF(%a6),%d0   # fetch size
2829         neg.l           %d0                     # get negative of size
2830
2831         btst            &0x7,EXC_SR(%a6)        # is trace enabled?
2832         beq.b           iea_fmovm_data_p2
2833
2834         mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2835         mov.l           EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2836         mov.l           (%sp)+,(EXC_PC-0x4,%a6,%d0)
2837         mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2838
2839         pea             (%a6,%d0)               # create final sp
2840         bra.b           iea_fmovm_data_p3
2841
2842 iea_fmovm_data_p2:
2843         mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2844         mov.l           (%sp)+,(EXC_PC,%a6,%d0)
2845         mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2846
2847         pea             (0x4,%a6,%d0)           # create final sp
2848
2849 iea_fmovm_data_p3:
2850         clr.l           %d1
2851         mov.b           EXC_VOFF(%a6),%d1       # fetch strg
2852
2853         tst.b           %d1
2854         bpl.b           fm_1
2855         fmovm.x         &0x80,(0x4+0x8,%a6,%d0)
2856         addi.l          &0xc,%d0
2857 fm_1:
2858         lsl.b           &0x1,%d1
2859         bpl.b           fm_2
2860         fmovm.x         &0x40,(0x4+0x8,%a6,%d0)
2861         addi.l          &0xc,%d0
2862 fm_2:
2863         lsl.b           &0x1,%d1
2864         bpl.b           fm_3
2865         fmovm.x         &0x20,(0x4+0x8,%a6,%d0)
2866         addi.l          &0xc,%d0
2867 fm_3:
2868         lsl.b           &0x1,%d1
2869         bpl.b           fm_4
2870         fmovm.x         &0x10,(0x4+0x8,%a6,%d0)
2871         addi.l          &0xc,%d0
2872 fm_4:
2873         lsl.b           &0x1,%d1
2874         bpl.b           fm_5
2875         fmovm.x         &0x08,(0x4+0x8,%a6,%d0)
2876         addi.l          &0xc,%d0
2877 fm_5:
2878         lsl.b           &0x1,%d1
2879         bpl.b           fm_6
2880         fmovm.x         &0x04,(0x4+0x8,%a6,%d0)
2881         addi.l          &0xc,%d0
2882 fm_6:
2883         lsl.b           &0x1,%d1
2884         bpl.b           fm_7
2885         fmovm.x         &0x02,(0x4+0x8,%a6,%d0)
2886         addi.l          &0xc,%d0
2887 fm_7:
2888         lsl.b           &0x1,%d1
2889         bpl.b           fm_end
2890         fmovm.x         &0x01,(0x4+0x8,%a6,%d0)
2891 fm_end:
2892         mov.l           0x4(%sp),%d1
2893         mov.l           0x8(%sp),%d0
2894         mov.l           0xc(%sp),%a6
2895         mov.l           (%sp)+,%sp
2896
2897         btst            &0x7,(%sp)              # is trace enabled?
2898         beq.l           _fpsp_done
2899         bra.l           _real_trace
2900
2901 #########################################################################
2902 iea_fmovm_ctrl:
2903
2904         bsr.l           fmovm_ctrl              # load ctrl regs
2905
2906 iea_fmovm_exit:
2907         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2908         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2909         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2910
2911         btst            &0x7,EXC_SR(%a6)        # is trace on?
2912         bne.b           iea_fmovm_trace         # yes
2913
2914         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2915
2916         unlk            %a6                     # unravel the frame
2917
2918         bra.l           _fpsp_done              # exit to os
2919
2920 #
2921 # The control reg instruction that took an "Unimplemented Effective Address"
2922 # exception was being traced. The "Current PC" for the trace frame is the
2923 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2924 # After fixing the stack frame, jump to _real_trace().
2925 #
2926 #                UNIMP EA FRAME            TRACE FRAME
2927 #               *****************       *****************
2928 #               * 0x0 *  0x0f0  *       *    Current    *
2929 #               *****************       *      PC       *
2930 #               *    Current    *       *****************
2931 #               *      PC       *       * 0x2 *  0x024  *
2932 #               *****************       *****************
2933 #               *      SR       *       *     Next      *
2934 #               *****************       *      PC       *
2935 #                                       *****************
2936 #                                       *      SR       *
2937 #                                       *****************
2938 # this ain't a pretty solution, but it works:
2939 # -restore a6 (not with unlk)
2940 # -shift stack frame down over where old a6 used to be
2941 # -add LOCAL_SIZE to stack pointer
2942 iea_fmovm_trace:
2943         mov.l           (%a6),%a6               # restore frame pointer
2944         mov.w           EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2945         mov.l           EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2946         mov.l           EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2947         mov.w           &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2948         add.l           &LOCAL_SIZE,%sp         # clear stack frame
2949
2950         bra.l           _real_trace
2951
2952 #########################################################################
2953 # The FPU is disabled and so we should really have taken the "Line
2954 # F Emulator" exception. So, here we create an 8-word stack frame
2955 # from our 4-word stack frame. This means we must calculate the length
2956 # of the faulting instruction to get the "next PC". This is trivial for
2957 # immediate operands but requires some extra work for fmovm dynamic
2958 # which can use most addressing modes.
2959 iea_disabled:
2960         mov.l           (%sp)+,%d0              # restore d0
2961
2962         link            %a6,&-LOCAL_SIZE        # init stack frame
2963
2964         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2965
2966 # PC of instruction that took the exception is the PC in the frame
2967         mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2968         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2969         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2970         bsr.l           _imem_read_long         # fetch the instruction words
2971         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2972
2973         tst.w           %d0                     # is instr fmovm?
2974         bmi.b           iea_dis_fmovm           # yes
2975 # instruction is using an extended precision immediate operand. therefore,
2976 # the total instruction length is 16 bytes.
2977 iea_dis_immed:
2978         mov.l           &0x10,%d0               # 16 bytes of instruction
2979         bra.b           iea_dis_cont
2980 iea_dis_fmovm:
2981         btst            &0xe,%d0                # is instr fmovm ctrl
2982         bne.b           iea_dis_fmovm_data      # no
2983 # the instruction is a fmovm.l with 2 or 3 registers.
2984         bfextu          %d0{&19:&3},%d1
2985         mov.l           &0xc,%d0
2986         cmpi.b          %d1,&0x7                # move all regs?
2987         bne.b           iea_dis_cont
2988         addq.l          &0x4,%d0
2989         bra.b           iea_dis_cont
2990 # the instruction is an fmovm.x dynamic which can use many addressing
2991 # modes and thus can have several different total instruction lengths.
2992 # call fmovm_calc_ea which will go through the ea calc process and,
2993 # as a by-product, will tell us how long the instruction is.
2994 iea_dis_fmovm_data:
2995         clr.l           %d0
2996         bsr.l           fmovm_calc_ea
2997         mov.l           EXC_EXTWPTR(%a6),%d0
2998         sub.l           EXC_PC(%a6),%d0
2999 iea_dis_cont:
3000         mov.w           %d0,EXC_VOFF(%a6)       # store stack shift value
3001
3002         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3003
3004         unlk            %a6
3005
3006 # here, we actually create the 8-word frame from the 4-word frame,
3007 # with the "next PC" as additional info.
3008 # the <ea> field is let as undefined.
3009         subq.l          &0x8,%sp                # make room for new stack
3010         mov.l           %d0,-(%sp)              # save d0
3011         mov.w           0xc(%sp),0x4(%sp)       # move SR
3012         mov.l           0xe(%sp),0x6(%sp)       # move Current PC
3013         clr.l           %d0
3014         mov.w           0x12(%sp),%d0
3015         mov.l           0x6(%sp),0x10(%sp)      # move Current PC
3016         add.l           %d0,0x6(%sp)            # make Next PC
3017         mov.w           &0x402c,0xa(%sp)        # insert offset,frame format
3018         mov.l           (%sp)+,%d0              # restore d0
3019
3020         bra.l           _real_fpu_disabled
3021
3022 ##########
3023
3024 iea_iacc:
3025         movc            %pcr,%d0
3026         btst            &0x1,%d0
3027         bne.b           iea_iacc_cont
3028         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3029         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3030 iea_iacc_cont:
3031         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3032
3033         unlk            %a6
3034
3035         subq.w          &0x8,%sp                # make stack frame bigger
3036         mov.l           0x8(%sp),(%sp)          # store SR,hi(PC)
3037         mov.w           0xc(%sp),0x4(%sp)       # store lo(PC)
3038         mov.w           &0x4008,0x6(%sp)        # store voff
3039         mov.l           0x2(%sp),0x8(%sp)       # store ea
3040         mov.l           &0x09428001,0xc(%sp)    # store fslw
3041
3042 iea_acc_done:
3043         btst            &0x5,(%sp)              # user or supervisor mode?
3044         beq.b           iea_acc_done2           # user
3045         bset            &0x2,0xd(%sp)           # set supervisor TM bit
3046
3047 iea_acc_done2:
3048         bra.l           _real_access
3049
3050 iea_dacc:
3051         lea             -LOCAL_SIZE(%a6),%sp
3052
3053         movc            %pcr,%d1
3054         btst            &0x1,%d1
3055         bne.b           iea_dacc_cont
3056         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3057         fmovm.l         LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3058 iea_dacc_cont:
3059         mov.l           (%a6),%a6
3060
3061         mov.l           0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3062         mov.w           0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3063         mov.w           &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3064         mov.l           %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3065         mov.w           %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3066         mov.w           &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3067
3068         movm.l          LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3069         add.w           &LOCAL_SIZE-0x4,%sp
3070
3071         bra.b           iea_acc_done
3072
3073 #########################################################################
3074 # XDEF **************************************************************** #
3075 #       _fpsp_operr(): 060FPSP entry point for FP Operr exception.      #
3076 #                                                                       #
3077 #       This handler should be the first code executed upon taking the  #
3078 #       FP Operand Error exception in an operating system.              #
3079 #                                                                       #
3080 # XREF **************************************************************** #
3081 #       _imem_read_long() - read instruction longword                   #
3082 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3083 #       _real_operr() - "callout" to operating system operr handler     #
3084 #       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3085 #       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3086 #       facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3087 #                                                                       #
3088 # INPUT *************************************************************** #
3089 #       - The system stack contains the FP Operr exception frame        #
3090 #       - The fsave frame contains the source operand                   #
3091 #                                                                       #
3092 # OUTPUT ************************************************************** #
3093 #       No access error:                                                #
3094 #       - The system stack is unchanged                                 #
3095 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3096 #                                                                       #
3097 # ALGORITHM *********************************************************** #
3098 #       In a system where the FP Operr exception is enabled, the goal   #
3099 # is to get to the handler specified at _real_operr(). But, on the 060, #
3100 # for opclass zero and two instruction taking this exception, the       #
3101 # input operand in the fsave frame may be incorrect for some cases      #
3102 # and needs to be corrected. This handler calls fix_skewed_ops() to     #
3103 # do just this and then exits through _real_operr().                    #
3104 #       For opclass 3 instructions, the 060 doesn't store the default   #
3105 # operr result out to memory or data register file as it should.        #
3106 # This code must emulate the move out before finally exiting through    #
3107 # _real_inex(). The move out, if to memory, is performed using          #
3108 # _mem_write() "callout" routines that may return a failing result.     #
3109 # In this special case, the handler must exit through facc_out()        #
3110 # which creates an access error stack frame from the current operr      #
3111 # stack frame.                                                          #
3112 #                                                                       #
3113 #########################################################################
3114
3115         global          _fpsp_operr
3116 _fpsp_operr:
3117
3118         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3119
3120         fsave           FP_SRC(%a6)             # grab the "busy" frame
3121
3122         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3123         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3124         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3125
3126 # the FPIAR holds the "current PC" of the faulting instruction
3127         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3128
3129         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3130         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3131         bsr.l           _imem_read_long         # fetch the instruction words
3132         mov.l           %d0,EXC_OPWORD(%a6)
3133
3134 ##############################################################################
3135
3136         btst            &13,%d0                 # is instr an fmove out?
3137         bne.b           foperr_out              # fmove out
3138
3139
3140 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3141 # this would be the case for opclass two operations with a source infinity or
3142 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3143 # cause an operr so we don't need to check for them here.
3144         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3145         bsr.l           fix_skewed_ops          # fix src op
3146
3147 foperr_exit:
3148         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3149         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3150         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3151
3152         frestore        FP_SRC(%a6)
3153
3154         unlk            %a6
3155         bra.l           _real_operr
3156
3157 ########################################################################
3158
3159 #
3160 # the hardware does not save the default result to memory on enabled
3161 # operand error exceptions. we do this here before passing control to
3162 # the user operand error handler.
3163 #
3164 # byte, word, and long destination format operations can pass
3165 # through here. we simply need to test the sign of the src
3166 # operand and save the appropriate minimum or maximum integer value
3167 # to the effective address as pointed to by the stacked effective address.
3168 #
3169 # although packed opclass three operations can take operand error
3170 # exceptions, they won't pass through here since they are caught
3171 # first by the unsupported data format exception handler. that handler
3172 # sends them directly to _real_operr() if necessary.
3173 #
3174 foperr_out:
3175
3176         mov.w           FP_SRC_EX(%a6),%d1      # fetch exponent
3177         andi.w          &0x7fff,%d1
3178         cmpi.w          %d1,&0x7fff
3179         bne.b           foperr_out_not_qnan
3180 # the operand is either an infinity or a QNAN.
3181         tst.l           FP_SRC_LO(%a6)
3182         bne.b           foperr_out_qnan
3183         mov.l           FP_SRC_HI(%a6),%d1
3184         andi.l          &0x7fffffff,%d1
3185         beq.b           foperr_out_not_qnan
3186 foperr_out_qnan:
3187         mov.l           FP_SRC_HI(%a6),L_SCR1(%a6)
3188         bra.b           foperr_out_jmp
3189
3190 foperr_out_not_qnan:
3191         mov.l           &0x7fffffff,%d1
3192         tst.b           FP_SRC_EX(%a6)
3193         bpl.b           foperr_out_not_qnan2
3194         addq.l          &0x1,%d1
3195 foperr_out_not_qnan2:
3196         mov.l           %d1,L_SCR1(%a6)
3197
3198 foperr_out_jmp:
3199         bfextu          %d0{&19:&3},%d0         # extract dst format field
3200         mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3201         mov.w           (tbl_operr.b,%pc,%d0.w*2),%a0
3202         jmp             (tbl_operr.b,%pc,%a0)
3203
3204 tbl_operr:
3205         short           foperr_out_l - tbl_operr # long word integer
3206         short           tbl_operr    - tbl_operr # sgl prec shouldn't happen
3207         short           tbl_operr    - tbl_operr # ext prec shouldn't happen
3208         short           foperr_exit  - tbl_operr # packed won't enter here
3209         short           foperr_out_w - tbl_operr # word integer
3210         short           tbl_operr    - tbl_operr # dbl prec shouldn't happen
3211         short           foperr_out_b - tbl_operr # byte integer
3212         short           tbl_operr    - tbl_operr # packed won't enter here
3213
3214 foperr_out_b:
3215         mov.b           L_SCR1(%a6),%d0         # load positive default result
3216         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3217         ble.b           foperr_out_b_save_dn    # yes
3218         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3219         bsr.l           _dmem_write_byte        # write the default result
3220
3221         tst.l           %d1                     # did dstore fail?
3222         bne.l           facc_out_b              # yes
3223
3224         bra.w           foperr_exit
3225 foperr_out_b_save_dn:
3226         andi.w          &0x0007,%d1
3227         bsr.l           store_dreg_b            # store result to regfile
3228         bra.w           foperr_exit
3229
3230 foperr_out_w:
3231         mov.w           L_SCR1(%a6),%d0         # load positive default result
3232         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3233         ble.b           foperr_out_w_save_dn    # yes
3234         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3235         bsr.l           _dmem_write_word        # write the default result
3236
3237         tst.l           %d1                     # did dstore fail?
3238         bne.l           facc_out_w              # yes
3239
3240         bra.w           foperr_exit
3241 foperr_out_w_save_dn:
3242         andi.w          &0x0007,%d1
3243         bsr.l           store_dreg_w            # store result to regfile
3244         bra.w           foperr_exit
3245
3246 foperr_out_l:
3247         mov.l           L_SCR1(%a6),%d0         # load positive default result
3248         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3249         ble.b           foperr_out_l_save_dn    # yes
3250         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3251         bsr.l           _dmem_write_long        # write the default result
3252
3253         tst.l           %d1                     # did dstore fail?
3254         bne.l           facc_out_l              # yes
3255
3256         bra.w           foperr_exit
3257 foperr_out_l_save_dn:
3258         andi.w          &0x0007,%d1
3259         bsr.l           store_dreg_l            # store result to regfile
3260         bra.w           foperr_exit
3261
3262 #########################################################################
3263 # XDEF **************************************************************** #
3264 #       _fpsp_snan(): 060FPSP entry point for FP SNAN exception.        #
3265 #                                                                       #
3266 #       This handler should be the first code executed upon taking the  #
3267 #       FP Signalling NAN exception in an operating system.             #
3268 #                                                                       #
3269 # XREF **************************************************************** #
3270 #       _imem_read_long() - read instruction longword                   #
3271 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3272 #       _real_snan() - "callout" to operating system SNAN handler       #
3273 #       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3274 #       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3275 #       facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)   #
3276 #       _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>   #
3277 #                                                                       #
3278 # INPUT *************************************************************** #
3279 #       - The system stack contains the FP SNAN exception frame         #
3280 #       - The fsave frame contains the source operand                   #
3281 #                                                                       #
3282 # OUTPUT ************************************************************** #
3283 #       No access error:                                                #
3284 #       - The system stack is unchanged                                 #
3285 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3286 #                                                                       #
3287 # ALGORITHM *********************************************************** #
3288 #       In a system where the FP SNAN exception is enabled, the goal    #
3289 # is to get to the handler specified at _real_snan(). But, on the 060,  #
3290 # for opclass zero and two instructions taking this exception, the      #
3291 # input operand in the fsave frame may be incorrect for some cases      #
3292 # and needs to be corrected. This handler calls fix_skewed_ops() to     #
3293 # do just this and then exits through _real_snan().                     #
3294 #       For opclass 3 instructions, the 060 doesn't store the default   #
3295 # SNAN result out to memory or data register file as it should.         #
3296 # This code must emulate the move out before finally exiting through    #
3297 # _real_snan(). The move out, if to memory, is performed using          #
3298 # _mem_write() "callout" routines that may return a failing result.     #
3299 # In this special case, the handler must exit through facc_out()        #
3300 # which creates an access error stack frame from the current SNAN       #
3301 # stack frame.                                                          #
3302 #       For the case of an extended precision opclass 3 instruction,    #
3303 # if the effective addressing mode was -() or ()+, then the address     #
3304 # register must get updated by calling _calc_ea_fout(). If the <ea>     #
3305 # was -(a7) from supervisor mode, then the exception frame currently    #
3306 # on the system stack must be carefully moved "down" to make room       #
3307 # for the operand being moved.                                          #
3308 #                                                                       #
3309 #########################################################################
3310
3311         global          _fpsp_snan
3312 _fpsp_snan:
3313
3314         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3315
3316         fsave           FP_SRC(%a6)             # grab the "busy" frame
3317
3318         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3319         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3320         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3321
3322 # the FPIAR holds the "current PC" of the faulting instruction
3323         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3324
3325         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3326         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3327         bsr.l           _imem_read_long         # fetch the instruction words
3328         mov.l           %d0,EXC_OPWORD(%a6)
3329
3330 ##############################################################################
3331
3332         btst            &13,%d0                 # is instr an fmove out?
3333         bne.w           fsnan_out               # fmove out
3334
3335
3336 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3337 # this would be the case for opclass two operations with a source infinity or
3338 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3339 # fixed here.
3340         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3341         bsr.l           fix_skewed_ops          # fix src op
3342
3343 fsnan_exit:
3344         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3345         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3346         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3347
3348         frestore        FP_SRC(%a6)
3349
3350         unlk            %a6
3351         bra.l           _real_snan
3352
3353 ########################################################################
3354
3355 #
3356 # the hardware does not save the default result to memory on enabled
3357 # snan exceptions. we do this here before passing control to
3358 # the user snan handler.
3359 #
3360 # byte, word, long, and packed destination format operations can pass
3361 # through here. since packed format operations already were handled by
3362 # fpsp_unsupp(), then we need to do nothing else for them here.
3363 # for byte, word, and long, we simply need to test the sign of the src
3364 # operand and save the appropriate minimum or maximum integer value
3365 # to the effective address as pointed to by the stacked effective address.
3366 #
3367 fsnan_out:
3368
3369         bfextu          %d0{&19:&3},%d0         # extract dst format field
3370         mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3371         mov.w           (tbl_snan.b,%pc,%d0.w*2),%a0
3372         jmp             (tbl_snan.b,%pc,%a0)
3373
3374 tbl_snan:
3375         short           fsnan_out_l - tbl_snan # long word integer
3376         short           fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3377         short           fsnan_out_x - tbl_snan # ext prec shouldn't happen
3378         short           tbl_snan    - tbl_snan # packed needs no help
3379         short           fsnan_out_w - tbl_snan # word integer
3380         short           fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3381         short           fsnan_out_b - tbl_snan # byte integer
3382         short           tbl_snan    - tbl_snan # packed needs no help
3383
3384 fsnan_out_b:
3385         mov.b           FP_SRC_HI(%a6),%d0      # load upper byte of SNAN
3386         bset            &6,%d0                  # set SNAN bit
3387         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3388         ble.b           fsnan_out_b_dn          # yes
3389         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3390         bsr.l           _dmem_write_byte        # write the default result
3391
3392         tst.l           %d1                     # did dstore fail?
3393         bne.l           facc_out_b              # yes
3394
3395         bra.w           fsnan_exit
3396 fsnan_out_b_dn:
3397         andi.w          &0x0007,%d1
3398         bsr.l           store_dreg_b            # store result to regfile
3399         bra.w           fsnan_exit
3400
3401 fsnan_out_w:
3402         mov.w           FP_SRC_HI(%a6),%d0      # load upper word of SNAN
3403         bset            &14,%d0                 # set SNAN bit
3404         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3405         ble.b           fsnan_out_w_dn          # yes
3406         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3407         bsr.l           _dmem_write_word        # write the default result
3408
3409         tst.l           %d1                     # did dstore fail?
3410         bne.l           facc_out_w              # yes
3411
3412         bra.w           fsnan_exit
3413 fsnan_out_w_dn:
3414         andi.w          &0x0007,%d1
3415         bsr.l           store_dreg_w            # store result to regfile
3416         bra.w           fsnan_exit
3417
3418 fsnan_out_l:
3419         mov.l           FP_SRC_HI(%a6),%d0      # load upper longword of SNAN
3420         bset            &30,%d0                 # set SNAN bit
3421         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3422         ble.b           fsnan_out_l_dn          # yes
3423         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3424         bsr.l           _dmem_write_long        # write the default result
3425
3426         tst.l           %d1                     # did dstore fail?
3427         bne.l           facc_out_l              # yes
3428
3429         bra.w           fsnan_exit
3430 fsnan_out_l_dn:
3431         andi.w          &0x0007,%d1
3432         bsr.l           store_dreg_l            # store result to regfile
3433         bra.w           fsnan_exit
3434
3435 fsnan_out_s:
3436         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3437         ble.b           fsnan_out_d_dn          # yes
3438         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3439         andi.l          &0x80000000,%d0         # keep sign
3440         ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3441         mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3442         lsr.l           &0x8,%d1                # shift mantissa for sgl
3443         or.l            %d1,%d0                 # create sgl SNAN
3444         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3445         bsr.l           _dmem_write_long        # write the default result
3446
3447         tst.l           %d1                     # did dstore fail?
3448         bne.l           facc_out_l              # yes
3449
3450         bra.w           fsnan_exit
3451 fsnan_out_d_dn:
3452         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3453         andi.l          &0x80000000,%d0         # keep sign
3454         ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3455         mov.l           %d1,-(%sp)
3456         mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3457         lsr.l           &0x8,%d1                # shift mantissa for sgl
3458         or.l            %d1,%d0                 # create sgl SNAN
3459         mov.l           (%sp)+,%d1
3460         andi.w          &0x0007,%d1
3461         bsr.l           store_dreg_l            # store result to regfile
3462         bra.w           fsnan_exit
3463
3464 fsnan_out_d:
3465         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3466         andi.l          &0x80000000,%d0         # keep sign
3467         ori.l           &0x7ff80000,%d0         # insert new exponent,SNAN bit
3468         mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3469         mov.l           %d0,FP_SCR0_EX(%a6)     # store to temp space
3470         mov.l           &11,%d0                 # load shift amt
3471         lsr.l           %d0,%d1
3472         or.l            %d1,FP_SCR0_EX(%a6)     # create dbl hi
3473         mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3474         andi.l          &0x000007ff,%d1
3475         ror.l           %d0,%d1
3476         mov.l           %d1,FP_SCR0_HI(%a6)     # store to temp space
3477         mov.l           FP_SRC_LO(%a6),%d1      # load lo mantissa
3478         lsr.l           %d0,%d1
3479         or.l            %d1,FP_SCR0_HI(%a6)     # create dbl lo
3480         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3481         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
3482         movq.l          &0x8,%d0                # pass: size of 8 bytes
3483         bsr.l           _dmem_write             # write the default result
3484
3485         tst.l           %d1                     # did dstore fail?
3486         bne.l           facc_out_d              # yes
3487
3488         bra.w           fsnan_exit
3489
3490 # for extended precision, if the addressing mode is pre-decrement or
3491 # post-increment, then the address register did not get updated.
3492 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3493 fsnan_out_x:
3494         clr.b           SPCOND_FLG(%a6)         # clear special case flag
3495
3496         mov.w           FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3497         clr.w           2+FP_SCR0(%a6)
3498         mov.l           FP_SRC_HI(%a6),%d0
3499         bset            &30,%d0
3500         mov.l           %d0,FP_SCR0_HI(%a6)
3501         mov.l           FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3502
3503         btst            &0x5,EXC_SR(%a6)        # supervisor mode exception?
3504         bne.b           fsnan_out_x_s           # yes
3505
3506         mov.l           %usp,%a0                # fetch user stack pointer
3507         mov.l           %a0,EXC_A7(%a6)         # save on stack for calc_ea()
3508         mov.l           (%a6),EXC_A6(%a6)
3509
3510         bsr.l           _calc_ea_fout           # find the correct ea,update An
3511         mov.l           %a0,%a1
3512         mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3513
3514         mov.l           EXC_A7(%a6),%a0
3515         mov.l           %a0,%usp                # restore user stack pointer
3516         mov.l           EXC_A6(%a6),(%a6)
3517
3518 fsnan_out_x_save:
3519         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3520         movq.l          &0xc,%d0                # pass: size of extended
3521         bsr.l           _dmem_write             # write the default result
3522
3523         tst.l           %d1                     # did dstore fail?
3524         bne.l           facc_out_x              # yes
3525
3526         bra.w           fsnan_exit
3527
3528 fsnan_out_x_s:
3529         mov.l           (%a6),EXC_A6(%a6)
3530
3531         bsr.l           _calc_ea_fout           # find the correct ea,update An
3532         mov.l           %a0,%a1
3533         mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3534
3535         mov.l           EXC_A6(%a6),(%a6)
3536
3537         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3538         bne.b           fsnan_out_x_save        # no
3539
3540 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3541         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3542         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3543         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3544
3545         frestore        FP_SRC(%a6)
3546
3547         mov.l           EXC_A6(%a6),%a6         # restore frame pointer
3548
3549         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3550         mov.l           LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3551         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3552
3553         mov.l           LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3554         mov.l           LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3555         mov.l           LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3556
3557         add.l           &LOCAL_SIZE-0x8,%sp
3558
3559         bra.l           _real_snan
3560
3561 #########################################################################
3562 # XDEF **************************************************************** #
3563 #       _fpsp_inex(): 060FPSP entry point for FP Inexact exception.     #
3564 #                                                                       #
3565 #       This handler should be the first code executed upon taking the  #
3566 #       FP Inexact exception in an operating system.                    #
3567 #                                                                       #
3568 # XREF **************************************************************** #
3569 #       _imem_read_long() - read instruction longword                   #
3570 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3571 #       set_tag_x() - determine optype of src/dst operands              #
3572 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
3573 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
3574 #       load_fpn2() - load dst operand from FP regfile                  #
3575 #       smovcr() - emulate an "fmovcr" instruction                      #
3576 #       fout() - emulate an opclass 3 instruction                       #
3577 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3578 #       _real_inex() - "callout" to operating system inexact handler    #
3579 #                                                                       #
3580 # INPUT *************************************************************** #
3581 #       - The system stack contains the FP Inexact exception frame      #
3582 #       - The fsave frame contains the source operand                   #
3583 #                                                                       #
3584 # OUTPUT ************************************************************** #
3585 #       - The system stack is unchanged                                 #
3586 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3587 #                                                                       #
3588 # ALGORITHM *********************************************************** #
3589 #       In a system where the FP Inexact exception is enabled, the goal #
3590 # is to get to the handler specified at _real_inex(). But, on the 060,  #
3591 # for opclass zero and two instruction taking this exception, the       #
3592 # hardware doesn't store the correct result to the destination FP       #
3593 # register as did the '040 and '881/2. This handler must emulate the    #
3594 # instruction in order to get this value and then store it to the       #
3595 # correct register before calling _real_inex().                         #
3596 #       For opclass 3 instructions, the 060 doesn't store the default   #
3597 # inexact result out to memory or data register file as it should.      #
3598 # This code must emulate the move out by calling fout() before finally  #
3599 # exiting through _real_inex().                                         #
3600 #                                                                       #
3601 #########################################################################
3602
3603         global          _fpsp_inex
3604 _fpsp_inex:
3605
3606         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3607
3608         fsave           FP_SRC(%a6)             # grab the "busy" frame
3609
3610         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3611         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3612         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3613
3614 # the FPIAR holds the "current PC" of the faulting instruction
3615         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3616
3617         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3618         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3619         bsr.l           _imem_read_long         # fetch the instruction words
3620         mov.l           %d0,EXC_OPWORD(%a6)
3621
3622 ##############################################################################
3623
3624         btst            &13,%d0                 # is instr an fmove out?
3625         bne.w           finex_out               # fmove out
3626
3627
3628 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3629 # longword integer directly into the upper longword of the mantissa along
3630 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3631         bfextu          %d0{&19:&3},%d0         # fetch instr size
3632         bne.b           finex_cont              # instr size is not long
3633         cmpi.w          FP_SRC_EX(%a6),&0x401e  # is exponent 0x401e?
3634         bne.b           finex_cont              # no
3635         fmov.l          &0x0,%fpcr
3636         fmov.l          FP_SRC_HI(%a6),%fp0     # load integer src
3637         fmov.x          %fp0,FP_SRC(%a6)        # store integer as extended precision
3638         mov.w           &0xe001,0x2+FP_SRC(%a6)
3639
3640 finex_cont:
3641         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3642         bsr.l           fix_skewed_ops          # fix src op
3643
3644 # Here, we zero the ccode and exception byte field since we're going to
3645 # emulate the whole instruction. Notice, though, that we don't kill the
3646 # INEX1 bit. This is because a packed op has long since been converted
3647 # to extended before arriving here. Therefore, we need to retain the
3648 # INEX1 bit from when the operand was first converted.
3649         andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3650
3651         fmov.l          &0x0,%fpcr              # zero current control regs
3652         fmov.l          &0x0,%fpsr
3653
3654         bfextu          EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3655         cmpi.b          %d1,&0x17               # is op an fmovecr?
3656         beq.w           finex_fmovcr            # yes
3657
3658         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3659         bsr.l           set_tag_x               # tag the operand type
3660         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
3661
3662 # bits four and five of the fp extension word separate the monadic and dyadic
3663 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3664 # will never take this exception, but fsincos will.
3665         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
3666         beq.b           finex_extract           # monadic
3667
3668         btst            &0x4,1+EXC_CMDREG(%a6)  # is operation an fsincos?
3669         bne.b           finex_extract           # yes
3670
3671         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3672         bsr.l           load_fpn2               # load dst into FP_DST
3673
3674         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
3675         bsr.l           set_tag_x               # tag the operand type
3676         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
3677         bne.b           finex_op2_done          # no
3678         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
3679 finex_op2_done:
3680         mov.b           %d0,DTAG(%a6)           # save dst optype tag
3681
3682 finex_extract:
3683         clr.l           %d0
3684         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
3685
3686         mov.b           1+EXC_CMDREG(%a6),%d1
3687         andi.w          &0x007f,%d1             # extract extension
3688
3689         lea             FP_SRC(%a6),%a0
3690         lea             FP_DST(%a6),%a1
3691
3692         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3693         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
3694
3695 # the operation has been emulated. the result is in fp0.
3696 finex_save:
3697         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
3698         bsr.l           store_fpreg
3699
3700 finex_exit:
3701         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3702         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3703         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3704
3705         frestore        FP_SRC(%a6)
3706
3707         unlk            %a6
3708         bra.l           _real_inex
3709
3710 finex_fmovcr:
3711         clr.l           %d0
3712         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3713         mov.b           1+EXC_CMDREG(%a6),%d1
3714         andi.l          &0x0000007f,%d1         # pass rom offset
3715         bsr.l           smovcr
3716         bra.b           finex_save
3717
3718 ########################################################################
3719
3720 #
3721 # the hardware does not save the default result to memory on enabled
3722 # inexact exceptions. we do this here before passing control to
3723 # the user inexact handler.
3724 #
3725 # byte, word, and long destination format operations can pass
3726 # through here. so can double and single precision.
3727 # although packed opclass three operations can take inexact
3728 # exceptions, they won't pass through here since they are caught
3729 # first by the unsupported data format exception handler. that handler
3730 # sends them directly to _real_inex() if necessary.
3731 #
3732 finex_out:
3733
3734         mov.b           &NORM,STAG(%a6)         # src is a NORM
3735
3736         clr.l           %d0
3737         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3738
3739         andi.l          &0xffff00ff,USER_FPSR(%a6) # zero exception field
3740
3741         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
3742
3743         bsr.l           fout                    # store the default result
3744
3745         bra.b           finex_exit
3746
3747 #########################################################################
3748 # XDEF **************************************************************** #
3749 #       _fpsp_dz(): 060FPSP entry point for FP DZ exception.            #
3750 #                                                                       #
3751 #       This handler should be the first code executed upon taking      #
3752 #       the FP DZ exception in an operating system.                     #
3753 #                                                                       #
3754 # XREF **************************************************************** #
3755 #       _imem_read_long() - read instruction longword from memory       #
3756 #       fix_skewed_ops() - adjust fsave operand                         #
3757 #       _real_dz() - "callout" exit point from FP DZ handler            #
3758 #                                                                       #
3759 # INPUT *************************************************************** #
3760 #       - The system stack contains the FP DZ exception stack.          #
3761 #       - The fsave frame contains the source operand.                  #
3762 #                                                                       #
3763 # OUTPUT ************************************************************** #
3764 #       - The system stack contains the FP DZ exception stack.          #
3765 #       - The fsave frame contains the adjusted source operand.         #
3766 #                                                                       #
3767 # ALGORITHM *********************************************************** #
3768 #       In a system where the DZ exception is enabled, the goal is to   #
3769 # get to the handler specified at _real_dz(). But, on the 060, when the #
3770 # exception is taken, the input operand in the fsave state frame may    #
3771 # be incorrect for some cases and need to be adjusted. So, this package #
3772 # adjusts the operand using fix_skewed_ops() and then branches to       #
3773 # _real_dz().                                                           #
3774 #                                                                       #
3775 #########################################################################
3776
3777         global          _fpsp_dz
3778 _fpsp_dz:
3779
3780         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3781
3782         fsave           FP_SRC(%a6)             # grab the "busy" frame
3783
3784         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3785         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3786         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3787
3788 # the FPIAR holds the "current PC" of the faulting instruction
3789         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3790
3791         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3792         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3793         bsr.l           _imem_read_long         # fetch the instruction words
3794         mov.l           %d0,EXC_OPWORD(%a6)
3795
3796 ##############################################################################
3797
3798
3799 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3800 # this would be the case for opclass two operations with a source zero
3801 # in the sgl or dbl format.
3802         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3803         bsr.l           fix_skewed_ops          # fix src op
3804
3805 fdz_exit:
3806         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3807         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3808         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3809
3810         frestore        FP_SRC(%a6)
3811
3812         unlk            %a6
3813         bra.l           _real_dz
3814
3815 #########################################################################
3816 # XDEF **************************************************************** #
3817 #       _fpsp_fline(): 060FPSP entry point for "Line F emulator"        #
3818 #                      exception when the "reduced" version of the      #
3819 #                      FPSP is implemented that does not emulate        #
3820 #                      FP unimplemented instructions.                   #
3821 #                                                                       #
3822 #       This handler should be the first code executed upon taking a    #
3823 #       "Line F Emulator" exception in an operating system integrating  #
3824 #       the reduced version of 060FPSP.                                 #
3825 #                                                                       #
3826 # XREF **************************************************************** #
3827 #       _real_fpu_disabled() - Handle "FPU disabled" exceptions         #
3828 #       _real_fline() - Handle all other cases (treated equally)        #
3829 #                                                                       #
3830 # INPUT *************************************************************** #
3831 #       - The system stack contains a "Line F Emulator" exception       #
3832 #         stack frame.                                                  #
3833 #                                                                       #
3834 # OUTPUT ************************************************************** #
3835 #       - The system stack is unchanged.                                #
3836 #                                                                       #
3837 # ALGORITHM *********************************************************** #
3838 #       When a "Line F Emulator" exception occurs in a system where     #
3839 # "FPU Unimplemented" instructions will not be emulated, the exception  #
3840 # can occur because then FPU is disabled or the instruction is to be    #
3841 # classifed as "Line F". This module determines which case exists and   #
3842 # calls the appropriate "callout".                                      #
3843 #                                                                       #
3844 #########################################################################
3845
3846         global          _fpsp_fline
3847 _fpsp_fline:
3848
3849 # check to see if the FPU is disabled. if so, jump to the OS entry
3850 # point for that condition.
3851         cmpi.w          0x6(%sp),&0x402c
3852         beq.l           _real_fpu_disabled
3853
3854         bra.l           _real_fline
3855
3856 #########################################################################
3857 # XDEF **************************************************************** #
3858 #       _dcalc_ea(): calc correct <ea> from <ea> stacked on exception   #
3859 #                                                                       #
3860 # XREF **************************************************************** #
3861 #       inc_areg() - increment an address register                      #
3862 #       dec_areg() - decrement an address register                      #
3863 #                                                                       #
3864 # INPUT *************************************************************** #
3865 #       d0 = number of bytes to adjust <ea> by                          #
3866 #                                                                       #
3867 # OUTPUT ************************************************************** #
3868 #       None                                                            #
3869 #                                                                       #
3870 # ALGORITHM *********************************************************** #
3871 # "Dummy" CALCulate Effective Address:                                  #
3872 #       The stacked <ea> for FP unimplemented instructions and opclass  #
3873 #       two packed instructions is correct with the exception of...     #
3874 #                                                                       #
3875 #       1) -(An)   : The register is not updated regardless of size.    #
3876 #                    Also, for extended precision and packed, the       #
3877 #                    stacked <ea> value is 8 bytes too big              #
3878 #       2) (An)+   : The register is not updated.                       #
3879 #       3) #<data> : The upper longword of the immediate operand is     #
3880 #                    stacked b,w,l and s sizes are completely stacked.  #
3881 #                    d,x, and p are not.                                #
3882 #                                                                       #
3883 #########################################################################
3884
3885         global          _dcalc_ea
3886 _dcalc_ea:
3887         mov.l           %d0, %a0                # move # bytes to %a0
3888
3889         mov.b           1+EXC_OPWORD(%a6), %d0  # fetch opcode word
3890         mov.l           %d0, %d1                # make a copy
3891
3892         andi.w          &0x38, %d0              # extract mode field
3893         andi.l          &0x7, %d1               # extract reg  field
3894
3895         cmpi.b          %d0,&0x18               # is mode (An)+ ?
3896         beq.b           dcea_pi                 # yes
3897
3898         cmpi.b          %d0,&0x20               # is mode -(An) ?
3899         beq.b           dcea_pd                 # yes
3900
3901         or.w            %d1,%d0                 # concat mode,reg
3902         cmpi.b          %d0,&0x3c               # is mode #<data>?
3903
3904         beq.b           dcea_imm                # yes
3905
3906         mov.l           EXC_EA(%a6),%a0         # return <ea>
3907         rts
3908
3909 # need to set immediate data flag here since we'll need to do
3910 # an imem_read to fetch this later.
3911 dcea_imm:
3912         mov.b           &immed_flg,SPCOND_FLG(%a6)
3913         lea             ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3914         rts
3915
3916 # here, the <ea> is stacked correctly. however, we must update the
3917 # address register...
3918 dcea_pi:
3919         mov.l           %a0,%d0                 # pass amt to inc by
3920         bsr.l           inc_areg                # inc addr register
3921
3922         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3923         rts
3924
3925 # the <ea> is stacked correctly for all but extended and packed which
3926 # the <ea>s are 8 bytes too large.
3927 # it would make no sense to have a pre-decrement to a7 in supervisor
3928 # mode so we don't even worry about this tricky case here : )
3929 dcea_pd:
3930         mov.l           %a0,%d0                 # pass amt to dec by
3931         bsr.l           dec_areg                # dec addr register
3932
3933         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3934
3935         cmpi.b          %d0,&0xc                # is opsize ext or packed?
3936         beq.b           dcea_pd2                # yes
3937         rts
3938 dcea_pd2:
3939         sub.l           &0x8,%a0                # correct <ea>
3940         mov.l           %a0,EXC_EA(%a6)         # put correct <ea> on stack
3941         rts
3942
3943 #########################################################################
3944 # XDEF **************************************************************** #
3945 #       _calc_ea_fout(): calculate correct stacked <ea> for extended    #
3946 #                        and packed data opclass 3 operations.          #
3947 #                                                                       #
3948 # XREF **************************************************************** #
3949 #       None                                                            #
3950 #                                                                       #
3951 # INPUT *************************************************************** #
3952 #       None                                                            #
3953 #                                                                       #
3954 # OUTPUT ************************************************************** #
3955 #       a0 = return correct effective address                           #
3956 #                                                                       #
3957 # ALGORITHM *********************************************************** #
3958 #       For opclass 3 extended and packed data operations, the <ea>     #
3959 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
3960 # modes. Also, while we're at it, the index register itself must get    #
3961 # updated.                                                              #
3962 #       So, for -(an), we must subtract 8 off of the stacked <ea> value #
3963 # and return that value as the correct <ea> and store that value in An. #
3964 # For (an)+, the stacked <ea> is correct but we must adjust An by +12.  #
3965 #                                                                       #
3966 #########################################################################
3967
3968 # This calc_ea is currently used to retrieve the correct <ea>
3969 # for fmove outs of type extended and packed.
3970         global          _calc_ea_fout
3971 _calc_ea_fout:
3972         mov.b           1+EXC_OPWORD(%a6),%d0   # fetch opcode word
3973         mov.l           %d0,%d1                 # make a copy
3974
3975         andi.w          &0x38,%d0               # extract mode field
3976         andi.l          &0x7,%d1                # extract reg  field
3977
3978         cmpi.b          %d0,&0x18               # is mode (An)+ ?
3979         beq.b           ceaf_pi                 # yes
3980
3981         cmpi.b          %d0,&0x20               # is mode -(An) ?
3982         beq.w           ceaf_pd                 # yes
3983
3984         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3985         rts
3986
3987 # (An)+ : extended and packed fmove out
3988 #       : stacked <ea> is correct
3989 #       : "An" not updated
3990 ceaf_pi:
3991         mov.w           (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3992         mov.l           EXC_EA(%a6),%a0
3993         jmp             (tbl_ceaf_pi.b,%pc,%d1.w*1)
3994
3995         swbeg           &0x8
3996 tbl_ceaf_pi:
3997         short           ceaf_pi0 - tbl_ceaf_pi
3998         short           ceaf_pi1 - tbl_ceaf_pi
3999         short           ceaf_pi2 - tbl_ceaf_pi
4000         short           ceaf_pi3 - tbl_ceaf_pi
4001         short           ceaf_pi4 - tbl_ceaf_pi
4002         short           ceaf_pi5 - tbl_ceaf_pi
4003         short           ceaf_pi6 - tbl_ceaf_pi
4004         short           ceaf_pi7 - tbl_ceaf_pi
4005
4006 ceaf_pi0:
4007         addi.l          &0xc,EXC_DREGS+0x8(%a6)
4008         rts
4009 ceaf_pi1:
4010         addi.l          &0xc,EXC_DREGS+0xc(%a6)
4011         rts
4012 ceaf_pi2:
4013         add.l           &0xc,%a2
4014         rts
4015 ceaf_pi3:
4016         add.l           &0xc,%a3
4017         rts
4018 ceaf_pi4:
4019         add.l           &0xc,%a4
4020         rts
4021 ceaf_pi5:
4022         add.l           &0xc,%a5
4023         rts
4024 ceaf_pi6:
4025         addi.l          &0xc,EXC_A6(%a6)
4026         rts
4027 ceaf_pi7:
4028         mov.b           &mia7_flg,SPCOND_FLG(%a6)
4029         addi.l          &0xc,EXC_A7(%a6)
4030         rts
4031
4032 # -(An) : extended and packed fmove out
4033 #       : stacked <ea> = actual <ea> + 8
4034 #       : "An" not updated
4035 ceaf_pd:
4036         mov.w           (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4037         mov.l           EXC_EA(%a6),%a0
4038         sub.l           &0x8,%a0
4039         sub.l           &0x8,EXC_EA(%a6)
4040         jmp             (tbl_ceaf_pd.b,%pc,%d1.w*1)
4041
4042         swbeg           &0x8
4043 tbl_ceaf_pd:
4044         short           ceaf_pd0 - tbl_ceaf_pd
4045         short           ceaf_pd1 - tbl_ceaf_pd
4046         short           ceaf_pd2 - tbl_ceaf_pd
4047         short           ceaf_pd3 - tbl_ceaf_pd
4048         short           ceaf_pd4 - tbl_ceaf_pd
4049         short           ceaf_pd5 - tbl_ceaf_pd
4050         short           ceaf_pd6 - tbl_ceaf_pd
4051         short           ceaf_pd7 - tbl_ceaf_pd
4052
4053 ceaf_pd0:
4054         mov.l           %a0,EXC_DREGS+0x8(%a6)
4055         rts
4056 ceaf_pd1:
4057         mov.l           %a0,EXC_DREGS+0xc(%a6)
4058         rts
4059 ceaf_pd2:
4060         mov.l           %a0,%a2
4061         rts
4062 ceaf_pd3:
4063         mov.l           %a0,%a3
4064         rts
4065 ceaf_pd4:
4066         mov.l           %a0,%a4
4067         rts
4068 ceaf_pd5:
4069         mov.l           %a0,%a5
4070         rts
4071 ceaf_pd6:
4072         mov.l           %a0,EXC_A6(%a6)
4073         rts
4074 ceaf_pd7:
4075         mov.l           %a0,EXC_A7(%a6)
4076         mov.b           &mda7_flg,SPCOND_FLG(%a6)
4077         rts
4078
4079 #
4080 # This table holds the offsets of the emulation routines for each individual
4081 # math operation relative to the address of this table. Included are
4082 # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4083 # this table is for the version if the 060FPSP without transcendentals.
4084 # The location within the table is determined by the extension bits of the
4085 # operation longword.
4086 #
4087
4088         swbeg           &109
4089 tbl_unsupp:
4090         long            fin             - tbl_unsupp    # 00: fmove
4091         long            fint            - tbl_unsupp    # 01: fint
4092         long            tbl_unsupp      - tbl_unsupp    # 02: fsinh
4093         long            fintrz          - tbl_unsupp    # 03: fintrz
4094         long            fsqrt           - tbl_unsupp    # 04: fsqrt
4095         long            tbl_unsupp      - tbl_unsupp
4096         long            tbl_unsupp      - tbl_unsupp    # 06: flognp1
4097         long            tbl_unsupp      - tbl_unsupp
4098         long            tbl_unsupp      - tbl_unsupp    # 08: fetoxm1
4099         long            tbl_unsupp      - tbl_unsupp    # 09: ftanh
4100         long            tbl_unsupp      - tbl_unsupp    # 0a: fatan
4101         long            tbl_unsupp      - tbl_unsupp
4102         long            tbl_unsupp      - tbl_unsupp    # 0c: fasin
4103         long            tbl_unsupp      - tbl_unsupp    # 0d: fatanh
4104         long            tbl_unsupp      - tbl_unsupp    # 0e: fsin
4105         long            tbl_unsupp      - tbl_unsupp    # 0f: ftan
4106         long            tbl_unsupp      - tbl_unsupp    # 10: fetox
4107         long            tbl_unsupp      - tbl_unsupp    # 11: ftwotox
4108         long            tbl_unsupp      - tbl_unsupp    # 12: ftentox
4109         long            tbl_unsupp      - tbl_unsupp
4110         long            tbl_unsupp      - tbl_unsupp    # 14: flogn
4111         long            tbl_unsupp      - tbl_unsupp    # 15: flog10
4112         long            tbl_unsupp      - tbl_unsupp    # 16: flog2
4113         long            tbl_unsupp      - tbl_unsupp
4114         long            fabs            - tbl_unsupp    # 18: fabs
4115         long            tbl_unsupp      - tbl_unsupp    # 19: fcosh
4116         long            fneg            - tbl_unsupp    # 1a: fneg
4117         long            tbl_unsupp      - tbl_unsupp
4118         long            tbl_unsupp      - tbl_unsupp    # 1c: facos
4119         long            tbl_unsupp      - tbl_unsupp    # 1d: fcos
4120         long            tbl_unsupp      - tbl_unsupp    # 1e: fgetexp
4121         long            tbl_unsupp      - tbl_unsupp    # 1f: fgetman
4122         long            fdiv            - tbl_unsupp    # 20: fdiv
4123         long            tbl_unsupp      - tbl_unsupp    # 21: fmod
4124         long            fadd            - tbl_unsupp    # 22: fadd
4125         long            fmul            - tbl_unsupp    # 23: fmul
4126         long            fsgldiv         - tbl_unsupp    # 24: fsgldiv
4127         long            tbl_unsupp      - tbl_unsupp    # 25: frem
4128         long            tbl_unsupp      - tbl_unsupp    # 26: fscale
4129         long            fsglmul         - tbl_unsupp    # 27: fsglmul
4130         long            fsub            - tbl_unsupp    # 28: fsub
4131         long            tbl_unsupp      - tbl_unsupp
4132         long            tbl_unsupp      - tbl_unsupp
4133         long            tbl_unsupp      - tbl_unsupp
4134         long            tbl_unsupp      - tbl_unsupp
4135         long            tbl_unsupp      - tbl_unsupp
4136         long            tbl_unsupp      - tbl_unsupp
4137         long            tbl_unsupp      - tbl_unsupp
4138         long            tbl_unsupp      - tbl_unsupp    # 30: fsincos
4139         long            tbl_unsupp      - tbl_unsupp    # 31: fsincos
4140         long            tbl_unsupp      - tbl_unsupp    # 32: fsincos
4141         long            tbl_unsupp      - tbl_unsupp    # 33: fsincos
4142         long            tbl_unsupp      - tbl_unsupp    # 34: fsincos
4143         long            tbl_unsupp      - tbl_unsupp    # 35: fsincos
4144         long            tbl_unsupp      - tbl_unsupp    # 36: fsincos
4145         long            tbl_unsupp      - tbl_unsupp    # 37: fsincos
4146         long            fcmp            - tbl_unsupp    # 38: fcmp
4147         long            tbl_unsupp      - tbl_unsupp
4148         long            ftst            - tbl_unsupp    # 3a: ftst
4149         long            tbl_unsupp      - tbl_unsupp
4150         long            tbl_unsupp      - tbl_unsupp
4151         long            tbl_unsupp      - tbl_unsupp
4152         long            tbl_unsupp      - tbl_unsupp
4153         long            tbl_unsupp      - tbl_unsupp
4154         long            fsin            - tbl_unsupp    # 40: fsmove
4155         long            fssqrt          - tbl_unsupp    # 41: fssqrt
4156         long            tbl_unsupp      - tbl_unsupp
4157         long            tbl_unsupp      - tbl_unsupp
4158         long            fdin            - tbl_unsupp    # 44: fdmove
4159         long            fdsqrt          - tbl_unsupp    # 45: fdsqrt
4160         long            tbl_unsupp      - tbl_unsupp
4161         long            tbl_unsupp      - tbl_unsupp
4162         long            tbl_unsupp      - tbl_unsupp
4163         long            tbl_unsupp      - tbl_unsupp
4164         long            tbl_unsupp      - tbl_unsupp
4165         long            tbl_unsupp      - tbl_unsupp
4166         long            tbl_unsupp      - tbl_unsupp
4167         long            tbl_unsupp      - tbl_unsupp
4168         long            tbl_unsupp      - tbl_unsupp
4169         long            tbl_unsupp      - tbl_unsupp
4170         long            tbl_unsupp      - tbl_unsupp
4171         long            tbl_unsupp      - tbl_unsupp
4172         long            tbl_unsupp      - tbl_unsupp
4173         long            tbl_unsupp      - tbl_unsupp
4174         long            tbl_unsupp      - tbl_unsupp
4175         long            tbl_unsupp      - tbl_unsupp
4176         long            tbl_unsupp      - tbl_unsupp
4177         long            tbl_unsupp      - tbl_unsupp
4178         long            fsabs           - tbl_unsupp    # 58: fsabs
4179         long            tbl_unsupp      - tbl_unsupp
4180         long            fsneg           - tbl_unsupp    # 5a: fsneg
4181         long            tbl_unsupp      - tbl_unsupp
4182         long            fdabs           - tbl_unsupp    # 5c: fdabs
4183         long            tbl_unsupp      - tbl_unsupp
4184         long            fdneg           - tbl_unsupp    # 5e: fdneg
4185         long            tbl_unsupp      - tbl_unsupp
4186         long            fsdiv           - tbl_unsupp    # 60: fsdiv
4187         long            tbl_unsupp      - tbl_unsupp
4188         long            fsadd           - tbl_unsupp    # 62: fsadd
4189         long            fsmul           - tbl_unsupp    # 63: fsmul
4190         long            fddiv           - tbl_unsupp    # 64: fddiv
4191         long            tbl_unsupp      - tbl_unsupp
4192         long            fdadd           - tbl_unsupp    # 66: fdadd
4193         long            fdmul           - tbl_unsupp    # 67: fdmul
4194         long            fssub           - tbl_unsupp    # 68: fssub
4195         long            tbl_unsupp      - tbl_unsupp
4196         long            tbl_unsupp      - tbl_unsupp
4197         long            tbl_unsupp      - tbl_unsupp
4198         long            fdsub           - tbl_unsupp    # 6c: fdsub
4199
4200 #################################################
4201 # Add this here so non-fp modules can compile.
4202 # (smovcr is called from fpsp_inex.)
4203         global          smovcr
4204 smovcr:
4205         bra.b           smovcr
4206
4207 #########################################################################
4208 # XDEF **************************************************************** #
4209 #       fmovm_dynamic(): emulate "fmovm" dynamic instruction            #
4210 #                                                                       #
4211 # XREF **************************************************************** #
4212 #       fetch_dreg() - fetch data register                              #
4213 #       {i,d,}mem_read() - fetch data from memory                       #
4214 #       _mem_write() - write data to memory                             #
4215 #       iea_iacc() - instruction memory access error occurred           #
4216 #       iea_dacc() - data memory access error occurred                  #
4217 #       restore() - restore An index regs if access error occurred      #
4218 #                                                                       #
4219 # INPUT *************************************************************** #
4220 #       None                                                            #
4221 #                                                                       #
4222 # OUTPUT ************************************************************** #
4223 #       If instr is "fmovm Dn,-(A7)" from supervisor mode,              #
4224 #               d0 = size of dump                                       #
4225 #               d1 = Dn                                                 #
4226 #       Else if instruction access error,                               #
4227 #               d0 = FSLW                                               #
4228 #       Else if data access error,                                      #
4229 #               d0 = FSLW                                               #
4230 #               a0 = address of fault                                   #
4231 #       Else                                                            #
4232 #               none.                                                   #
4233 #                                                                       #
4234 # ALGORITHM *********************************************************** #
4235 #       The effective address must be calculated since this is entered  #
4236 # from an "Unimplemented Effective Address" exception handler. So, we   #
4237 # have our own fcalc_ea() routine here. If an access error is flagged   #
4238 # by a _{i,d,}mem_read() call, we must exit through the special         #
4239 # handler.                                                              #
4240 #       The data register is determined and its value loaded to get the #
4241 # string of FP registers affected. This value is used as an index into  #
4242 # a lookup table such that we can determine the number of bytes         #
4243 # involved.                                                             #
4244 #       If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used  #
4245 # to read in all FP values. Again, _mem_read() may fail and require a   #
4246 # special exit.                                                         #
4247 #       If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4248 # to write all FP values. _mem_write() may also fail.                   #
4249 #       If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,  #
4250 # then we return the size of the dump and the string to the caller      #
4251 # so that the move can occur outside of this routine. This special      #
4252 # case is required so that moves to the system stack are handled        #
4253 # correctly.                                                            #
4254 #                                                                       #
4255 # DYNAMIC:                                                              #
4256 #       fmovm.x dn, <ea>                                                #
4257 #       fmovm.x <ea>, dn                                                #
4258 #                                                                       #
4259 #             <WORD 1>                <WORD2>                           #
4260 #       1111 0010 00 |<ea>|     11@& 1000 0$$$ 0000                     #
4261 #                                                                       #
4262 #       & = (0): predecrement addressing mode                           #
4263 #           (1): postincrement or control addressing mode               #
4264 #       @ = (0): move listed regs from memory to the FPU                #
4265 #           (1): move listed regs from the FPU to memory                #
4266 #       $$$    : index of data register holding reg select mask         #
4267 #                                                                       #
4268 # NOTES:                                                                #
4269 #       If the data register holds a zero, then the                     #
4270 #       instruction is a nop.                                           #
4271 #                                                                       #
4272 #########################################################################
4273
4274         global          fmovm_dynamic
4275 fmovm_dynamic:
4276
4277 # extract the data register in which the bit string resides...
4278         mov.b           1+EXC_EXTWORD(%a6),%d1  # fetch extword
4279         andi.w          &0x70,%d1               # extract reg bits
4280         lsr.b           &0x4,%d1                # shift into lo bits
4281
4282 # fetch the bit string into d0...
4283         bsr.l           fetch_dreg              # fetch reg string
4284
4285         andi.l          &0x000000ff,%d0         # keep only lo byte
4286
4287         mov.l           %d0,-(%sp)              # save strg
4288         mov.b           (tbl_fmovm_size.w,%pc,%d0),%d0
4289         mov.l           %d0,-(%sp)              # save size
4290         bsr.l           fmovm_calc_ea           # calculate <ea>
4291         mov.l           (%sp)+,%d0              # restore size
4292         mov.l           (%sp)+,%d1              # restore strg
4293
4294 # if the bit string is a zero, then the operation is a no-op
4295 # but, make sure that we've calculated ea and advanced the opword pointer
4296         beq.w           fmovm_data_done
4297
4298 # separate move ins from move outs...
4299         btst            &0x5,EXC_EXTWORD(%a6)   # is it a move in or out?
4300         beq.w           fmovm_data_in           # it's a move out
4301
4302 #############
4303 # MOVE OUT: #
4304 #############
4305 fmovm_data_out:
4306         btst            &0x4,EXC_EXTWORD(%a6)   # control or predecrement?
4307         bne.w           fmovm_out_ctrl          # control
4308
4309 ############################
4310 fmovm_out_predec:
4311 # for predecrement mode, the bit string is the opposite of both control
4312 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4313 # here, we convert it to be just like the others...
4314         mov.b           (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4315
4316         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
4317         beq.b           fmovm_out_ctrl          # user
4318
4319 fmovm_out_predec_s:
4320         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4321         bne.b           fmovm_out_ctrl
4322
4323 # the operation was unfortunately an: fmovm.x dn,-(sp)
4324 # called from supervisor mode.
4325 # we're also passing "size" and "strg" back to the calling routine
4326         rts
4327
4328 ############################
4329 fmovm_out_ctrl:
4330         mov.l           %a0,%a1                 # move <ea> to a1
4331
4332         sub.l           %d0,%sp                 # subtract size of dump
4333         lea             (%sp),%a0
4334
4335         tst.b           %d1                     # should FP0 be moved?
4336         bpl.b           fmovm_out_ctrl_fp1      # no
4337
4338         mov.l           0x0+EXC_FP0(%a6),(%a0)+ # yes
4339         mov.l           0x4+EXC_FP0(%a6),(%a0)+
4340         mov.l           0x8+EXC_FP0(%a6),(%a0)+
4341
4342 fmovm_out_ctrl_fp1:
4343         lsl.b           &0x1,%d1                # should FP1 be moved?
4344         bpl.b           fmovm_out_ctrl_fp2      # no
4345
4346         mov.l           0x0+EXC_FP1(%a6),(%a0)+ # yes
4347         mov.l           0x4+EXC_FP1(%a6),(%a0)+
4348         mov.l           0x8+EXC_FP1(%a6),(%a0)+
4349
4350 fmovm_out_ctrl_fp2:
4351         lsl.b           &0x1,%d1                # should FP2 be moved?
4352         bpl.b           fmovm_out_ctrl_fp3      # no
4353
4354         fmovm.x         &0x20,(%a0)             # yes
4355         add.l           &0xc,%a0
4356
4357 fmovm_out_ctrl_fp3:
4358         lsl.b           &0x1,%d1                # should FP3 be moved?
4359         bpl.b           fmovm_out_ctrl_fp4      # no
4360
4361         fmovm.x         &0x10,(%a0)             # yes
4362         add.l           &0xc,%a0
4363
4364 fmovm_out_ctrl_fp4:
4365         lsl.b           &0x1,%d1                # should FP4 be moved?
4366         bpl.b           fmovm_out_ctrl_fp5      # no
4367
4368         fmovm.x         &0x08,(%a0)             # yes
4369         add.l           &0xc,%a0
4370
4371 fmovm_out_ctrl_fp5:
4372         lsl.b           &0x1,%d1                # should FP5 be moved?
4373         bpl.b           fmovm_out_ctrl_fp6      # no
4374
4375         fmovm.x         &0x04,(%a0)             # yes
4376         add.l           &0xc,%a0
4377
4378 fmovm_out_ctrl_fp6:
4379         lsl.b           &0x1,%d1                # should FP6 be moved?
4380         bpl.b           fmovm_out_ctrl_fp7      # no
4381
4382         fmovm.x         &0x02,(%a0)             # yes
4383         add.l           &0xc,%a0
4384
4385 fmovm_out_ctrl_fp7:
4386         lsl.b           &0x1,%d1                # should FP7 be moved?
4387         bpl.b           fmovm_out_ctrl_done     # no
4388
4389         fmovm.x         &0x01,(%a0)             # yes
4390         add.l           &0xc,%a0
4391
4392 fmovm_out_ctrl_done:
4393         mov.l           %a1,L_SCR1(%a6)
4394
4395         lea             (%sp),%a0               # pass: supervisor src
4396         mov.l           %d0,-(%sp)              # save size
4397         bsr.l           _dmem_write             # copy data to user mem
4398
4399         mov.l           (%sp)+,%d0
4400         add.l           %d0,%sp                 # clear fpreg data from stack
4401
4402         tst.l           %d1                     # did dstore err?
4403         bne.w           fmovm_out_err           # yes
4404
4405         rts
4406
4407 ############
4408 # MOVE IN: #
4409 ############
4410 fmovm_data_in:
4411         mov.l           %a0,L_SCR1(%a6)
4412
4413         sub.l           %d0,%sp                 # make room for fpregs
4414         lea             (%sp),%a1
4415
4416         mov.l           %d1,-(%sp)              # save bit string for later
4417         mov.l           %d0,-(%sp)              # save # of bytes
4418
4419         bsr.l           _dmem_read              # copy data from user mem
4420
4421         mov.l           (%sp)+,%d0              # retrieve # of bytes
4422
4423         tst.l           %d1                     # did dfetch fail?
4424         bne.w           fmovm_in_err            # yes
4425
4426         mov.l           (%sp)+,%d1              # load bit string
4427
4428         lea             (%sp),%a0               # addr of stack
4429
4430         tst.b           %d1                     # should FP0 be moved?
4431         bpl.b           fmovm_data_in_fp1       # no
4432
4433         mov.l           (%a0)+,0x0+EXC_FP0(%a6) # yes
4434         mov.l           (%a0)+,0x4+EXC_FP0(%a6)
4435         mov.l           (%a0)+,0x8+EXC_FP0(%a6)
4436
4437 fmovm_data_in_fp1:
4438         lsl.b           &0x1,%d1                # should FP1 be moved?
4439         bpl.b           fmovm_data_in_fp2       # no
4440
4441         mov.l           (%a0)+,0x0+EXC_FP1(%a6) # yes
4442         mov.l           (%a0)+,0x4+EXC_FP1(%a6)
4443         mov.l           (%a0)+,0x8+EXC_FP1(%a6)
4444
4445 fmovm_data_in_fp2:
4446         lsl.b           &0x1,%d1                # should FP2 be moved?
4447         bpl.b           fmovm_data_in_fp3       # no
4448
4449         fmovm.x         (%a0)+,&0x20            # yes
4450
4451 fmovm_data_in_fp3:
4452         lsl.b           &0x1,%d1                # should FP3 be moved?
4453         bpl.b           fmovm_data_in_fp4       # no
4454
4455         fmovm.x         (%a0)+,&0x10            # yes
4456
4457 fmovm_data_in_fp4:
4458         lsl.b           &0x1,%d1                # should FP4 be moved?
4459         bpl.b           fmovm_data_in_fp5       # no
4460
4461         fmovm.x         (%a0)+,&0x08            # yes
4462
4463 fmovm_data_in_fp5:
4464         lsl.b           &0x1,%d1                # should FP5 be moved?
4465         bpl.b           fmovm_data_in_fp6       # no
4466
4467         fmovm.x         (%a0)+,&0x04            # yes
4468
4469 fmovm_data_in_fp6:
4470         lsl.b           &0x1,%d1                # should FP6 be moved?
4471         bpl.b           fmovm_data_in_fp7       # no
4472
4473         fmovm.x         (%a0)+,&0x02            # yes
4474
4475 fmovm_data_in_fp7:
4476         lsl.b           &0x1,%d1                # should FP7 be moved?
4477         bpl.b           fmovm_data_in_done      # no
4478
4479         fmovm.x         (%a0)+,&0x01            # yes
4480
4481 fmovm_data_in_done:
4482         add.l           %d0,%sp                 # remove fpregs from stack
4483         rts
4484
4485 #####################################
4486
4487 fmovm_data_done:
4488         rts
4489
4490 ##############################################################################
4491
4492 #
4493 # table indexed by the operation's bit string that gives the number
4494 # of bytes that will be moved.
4495 #
4496 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4497 #
4498 tbl_fmovm_size:
4499         byte    0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4500         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4501         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4502         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4503         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4504         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4505         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4506         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4507         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4508         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4509         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4512         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4513         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4514         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4515         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4516         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4517         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4518         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4519         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4520         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4521         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4522         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4523         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4524         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4525         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4526         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4527         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4528         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4529         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4530         byte    0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4531
4532 #
4533 # table to convert a pre-decrement bit string into a post-increment
4534 # or control bit string.
4535 # ex:   0x00    ==>     0x00
4536 #       0x01    ==>     0x80
4537 #       0x02    ==>     0x40
4538 #               .
4539 #               .
4540 #       0xfd    ==>     0xbf
4541 #       0xfe    ==>     0x7f
4542 #       0xff    ==>     0xff
4543 #
4544 tbl_fmovm_convert:
4545         byte    0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4546         byte    0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4547         byte    0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4548         byte    0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4549         byte    0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4550         byte    0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4551         byte    0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4552         byte    0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4553         byte    0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4554         byte    0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4555         byte    0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4556         byte    0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4557         byte    0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4558         byte    0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4559         byte    0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4560         byte    0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4561         byte    0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4562         byte    0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4563         byte    0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4564         byte    0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4565         byte    0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4566         byte    0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4567         byte    0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4568         byte    0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4569         byte    0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4570         byte    0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4571         byte    0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4572         byte    0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4573         byte    0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4574         byte    0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4575         byte    0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4576         byte    0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4577
4578         global          fmovm_calc_ea
4579 ###############################################
4580 # _fmovm_calc_ea: calculate effective address #
4581 ###############################################
4582 fmovm_calc_ea:
4583         mov.l           %d0,%a0                 # move # bytes to a0
4584
4585 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
4586 # easily changed if they were inputs passed in registers.
4587         mov.w           EXC_OPWORD(%a6),%d0     # fetch opcode word
4588         mov.w           %d0,%d1                 # make a copy
4589
4590         andi.w          &0x3f,%d0               # extract mode field
4591         andi.l          &0x7,%d1                # extract reg  field
4592
4593 # jump to the corresponding function for each {MODE,REG} pair.
4594         mov.w           (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4595         jmp             (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4596
4597         swbeg           &64
4598 tbl_fea_mode:
4599         short           tbl_fea_mode    -       tbl_fea_mode
4600         short           tbl_fea_mode    -       tbl_fea_mode
4601         short           tbl_fea_mode    -       tbl_fea_mode
4602         short           tbl_fea_mode    -       tbl_fea_mode
4603         short           tbl_fea_mode    -       tbl_fea_mode
4604         short           tbl_fea_mode    -       tbl_fea_mode
4605         short           tbl_fea_mode    -       tbl_fea_mode
4606         short           tbl_fea_mode    -       tbl_fea_mode
4607
4608         short           tbl_fea_mode    -       tbl_fea_mode
4609         short           tbl_fea_mode    -       tbl_fea_mode
4610         short           tbl_fea_mode    -       tbl_fea_mode
4611         short           tbl_fea_mode    -       tbl_fea_mode
4612         short           tbl_fea_mode    -       tbl_fea_mode
4613         short           tbl_fea_mode    -       tbl_fea_mode
4614         short           tbl_fea_mode    -       tbl_fea_mode
4615         short           tbl_fea_mode    -       tbl_fea_mode
4616
4617         short           faddr_ind_a0    -       tbl_fea_mode
4618         short           faddr_ind_a1    -       tbl_fea_mode
4619         short           faddr_ind_a2    -       tbl_fea_mode
4620         short           faddr_ind_a3    -       tbl_fea_mode
4621         short           faddr_ind_a4    -       tbl_fea_mode
4622         short           faddr_ind_a5    -       tbl_fea_mode
4623         short           faddr_ind_a6    -       tbl_fea_mode
4624         short           faddr_ind_a7    -       tbl_fea_mode
4625
4626         short           faddr_ind_p_a0  -       tbl_fea_mode
4627         short           faddr_ind_p_a1  -       tbl_fea_mode
4628         short           faddr_ind_p_a2  -       tbl_fea_mode
4629         short           faddr_ind_p_a3  -       tbl_fea_mode
4630         short           faddr_ind_p_a4  -       tbl_fea_mode
4631         short           faddr_ind_p_a5  -       tbl_fea_mode
4632         short           faddr_ind_p_a6  -       tbl_fea_mode
4633         short           faddr_ind_p_a7  -       tbl_fea_mode
4634
4635         short           faddr_ind_m_a0  -       tbl_fea_mode
4636         short           faddr_ind_m_a1  -       tbl_fea_mode
4637         short           faddr_ind_m_a2  -       tbl_fea_mode
4638         short           faddr_ind_m_a3  -       tbl_fea_mode
4639         short           faddr_ind_m_a4  -       tbl_fea_mode
4640         short           faddr_ind_m_a5  -       tbl_fea_mode
4641         short           faddr_ind_m_a6  -       tbl_fea_mode
4642         short           faddr_ind_m_a7  -       tbl_fea_mode
4643
4644         short           faddr_ind_disp_a0       -       tbl_fea_mode
4645         short           faddr_ind_disp_a1       -       tbl_fea_mode
4646         short           faddr_ind_disp_a2       -       tbl_fea_mode
4647         short           faddr_ind_disp_a3       -       tbl_fea_mode
4648         short           faddr_ind_disp_a4       -       tbl_fea_mode
4649         short           faddr_ind_disp_a5       -       tbl_fea_mode
4650         short           faddr_ind_disp_a6       -       tbl_fea_mode
4651         short           faddr_ind_disp_a7       -       tbl_fea_mode
4652
4653         short           faddr_ind_ext   -       tbl_fea_mode
4654         short           faddr_ind_ext   -       tbl_fea_mode
4655         short           faddr_ind_ext   -       tbl_fea_mode
4656         short           faddr_ind_ext   -       tbl_fea_mode
4657         short           faddr_ind_ext   -       tbl_fea_mode
4658         short           faddr_ind_ext   -       tbl_fea_mode
4659         short           faddr_ind_ext   -       tbl_fea_mode
4660         short           faddr_ind_ext   -       tbl_fea_mode
4661
4662         short           fabs_short      -       tbl_fea_mode
4663         short           fabs_long       -       tbl_fea_mode
4664         short           fpc_ind         -       tbl_fea_mode
4665         short           fpc_ind_ext     -       tbl_fea_mode
4666         short           tbl_fea_mode    -       tbl_fea_mode
4667         short           tbl_fea_mode    -       tbl_fea_mode
4668         short           tbl_fea_mode    -       tbl_fea_mode
4669         short           tbl_fea_mode    -       tbl_fea_mode
4670
4671 ###################################
4672 # Address register indirect: (An) #
4673 ###################################
4674 faddr_ind_a0:
4675         mov.l           EXC_DREGS+0x8(%a6),%a0  # Get current a0
4676         rts
4677
4678 faddr_ind_a1:
4679         mov.l           EXC_DREGS+0xc(%a6),%a0  # Get current a1
4680         rts
4681
4682 faddr_ind_a2:
4683         mov.l           %a2,%a0                 # Get current a2
4684         rts
4685
4686 faddr_ind_a3:
4687         mov.l           %a3,%a0                 # Get current a3
4688         rts
4689
4690 faddr_ind_a4:
4691         mov.l           %a4,%a0                 # Get current a4
4692         rts
4693
4694 faddr_ind_a5:
4695         mov.l           %a5,%a0                 # Get current a5
4696         rts
4697
4698 faddr_ind_a6:
4699         mov.l           (%a6),%a0               # Get current a6
4700         rts
4701
4702 faddr_ind_a7:
4703         mov.l           EXC_A7(%a6),%a0         # Get current a7
4704         rts
4705
4706 #####################################################
4707 # Address register indirect w/ postincrement: (An)+ #
4708 #####################################################
4709 faddr_ind_p_a0:
4710         mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
4711         mov.l           %d0,%d1
4712         add.l           %a0,%d1                 # Increment
4713         mov.l           %d1,EXC_DREGS+0x8(%a6)  # Save incr value
4714         mov.l           %d0,%a0
4715         rts
4716
4717 faddr_ind_p_a1:
4718         mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
4719         mov.l           %d0,%d1
4720         add.l           %a0,%d1                 # Increment
4721         mov.l           %d1,EXC_DREGS+0xc(%a6)  # Save incr value
4722         mov.l           %d0,%a0
4723         rts
4724
4725 faddr_ind_p_a2:
4726         mov.l           %a2,%d0                 # Get current a2
4727         mov.l           %d0,%d1
4728         add.l           %a0,%d1                 # Increment
4729         mov.l           %d1,%a2                 # Save incr value
4730         mov.l           %d0,%a0
4731         rts
4732
4733 faddr_ind_p_a3:
4734         mov.l           %a3,%d0                 # Get current a3
4735         mov.l           %d0,%d1
4736         add.l           %a0,%d1                 # Increment
4737         mov.l           %d1,%a3                 # Save incr value
4738         mov.l           %d0,%a0
4739         rts
4740
4741 faddr_ind_p_a4:
4742         mov.l           %a4,%d0                 # Get current a4
4743         mov.l           %d0,%d1
4744         add.l           %a0,%d1                 # Increment
4745         mov.l           %d1,%a4                 # Save incr value
4746         mov.l           %d0,%a0
4747         rts
4748
4749 faddr_ind_p_a5:
4750         mov.l           %a5,%d0                 # Get current a5
4751         mov.l           %d0,%d1
4752         add.l           %a0,%d1                 # Increment
4753         mov.l           %d1,%a5                 # Save incr value
4754         mov.l           %d0,%a0
4755         rts
4756
4757 faddr_ind_p_a6:
4758         mov.l           (%a6),%d0               # Get current a6
4759         mov.l           %d0,%d1
4760         add.l           %a0,%d1                 # Increment
4761         mov.l           %d1,(%a6)               # Save incr value
4762         mov.l           %d0,%a0
4763         rts
4764
4765 faddr_ind_p_a7:
4766         mov.b           &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4767
4768         mov.l           EXC_A7(%a6),%d0         # Get current a7
4769         mov.l           %d0,%d1
4770         add.l           %a0,%d1                 # Increment
4771         mov.l           %d1,EXC_A7(%a6)         # Save incr value
4772         mov.l           %d0,%a0
4773         rts
4774
4775 ####################################################
4776 # Address register indirect w/ predecrement: -(An) #
4777 ####################################################
4778 faddr_ind_m_a0:
4779         mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
4780         sub.l           %a0,%d0                 # Decrement
4781         mov.l           %d0,EXC_DREGS+0x8(%a6)  # Save decr value
4782         mov.l           %d0,%a0
4783         rts
4784
4785 faddr_ind_m_a1:
4786         mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
4787         sub.l           %a0,%d0                 # Decrement
4788         mov.l           %d0,EXC_DREGS+0xc(%a6)  # Save decr value
4789         mov.l           %d0,%a0
4790         rts
4791
4792 faddr_ind_m_a2:
4793         mov.l           %a2,%d0                 # Get current a2
4794         sub.l           %a0,%d0                 # Decrement
4795         mov.l           %d0,%a2                 # Save decr value
4796         mov.l           %d0,%a0
4797         rts
4798
4799 faddr_ind_m_a3:
4800         mov.l           %a3,%d0                 # Get current a3
4801         sub.l           %a0,%d0                 # Decrement
4802         mov.l           %d0,%a3                 # Save decr value
4803         mov.l           %d0,%a0
4804         rts
4805
4806 faddr_ind_m_a4:
4807         mov.l           %a4,%d0                 # Get current a4
4808         sub.l           %a0,%d0                 # Decrement
4809         mov.l           %d0,%a4                 # Save decr value
4810         mov.l           %d0,%a0
4811         rts
4812
4813 faddr_ind_m_a5:
4814         mov.l           %a5,%d0                 # Get current a5
4815         sub.l           %a0,%d0                 # Decrement
4816         mov.l           %d0,%a5                 # Save decr value
4817         mov.l           %d0,%a0
4818         rts
4819
4820 faddr_ind_m_a6:
4821         mov.l           (%a6),%d0               # Get current a6
4822         sub.l           %a0,%d0                 # Decrement
4823         mov.l           %d0,(%a6)               # Save decr value
4824         mov.l           %d0,%a0
4825         rts
4826
4827 faddr_ind_m_a7:
4828         mov.b           &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4829
4830         mov.l           EXC_A7(%a6),%d0         # Get current a7
4831         sub.l           %a0,%d0                 # Decrement
4832         mov.l           %d0,EXC_A7(%a6)         # Save decr value
4833         mov.l           %d0,%a0
4834         rts
4835
4836 ########################################################
4837 # Address register indirect w/ displacement: (d16, An) #
4838 ########################################################
4839 faddr_ind_disp_a0:
4840         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4841         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4842         bsr.l           _imem_read_word
4843
4844         tst.l           %d1                     # did ifetch fail?
4845         bne.l           iea_iacc                # yes
4846
4847         mov.w           %d0,%a0                 # sign extend displacement
4848
4849         add.l           EXC_DREGS+0x8(%a6),%a0  # a0 + d16
4850         rts
4851
4852 faddr_ind_disp_a1:
4853         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4854         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4855         bsr.l           _imem_read_word
4856
4857         tst.l           %d1                     # did ifetch fail?
4858         bne.l           iea_iacc                # yes
4859
4860         mov.w           %d0,%a0                 # sign extend displacement
4861
4862         add.l           EXC_DREGS+0xc(%a6),%a0  # a1 + d16
4863         rts
4864
4865 faddr_ind_disp_a2:
4866         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4867         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4868         bsr.l           _imem_read_word
4869
4870         tst.l           %d1                     # did ifetch fail?
4871         bne.l           iea_iacc                # yes
4872
4873         mov.w           %d0,%a0                 # sign extend displacement
4874
4875         add.l           %a2,%a0                 # a2 + d16
4876         rts
4877
4878 faddr_ind_disp_a3:
4879         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4880         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4881         bsr.l           _imem_read_word
4882
4883         tst.l           %d1                     # did ifetch fail?
4884         bne.l           iea_iacc                # yes
4885
4886         mov.w           %d0,%a0                 # sign extend displacement
4887
4888         add.l           %a3,%a0                 # a3 + d16
4889         rts
4890
4891 faddr_ind_disp_a4:
4892         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4893         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4894         bsr.l           _imem_read_word
4895
4896         tst.l           %d1                     # did ifetch fail?
4897         bne.l           iea_iacc                # yes
4898
4899         mov.w           %d0,%a0                 # sign extend displacement
4900
4901         add.l           %a4,%a0                 # a4 + d16
4902         rts
4903
4904 faddr_ind_disp_a5:
4905         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4906         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4907         bsr.l           _imem_read_word
4908
4909         tst.l           %d1                     # did ifetch fail?
4910         bne.l           iea_iacc                # yes
4911
4912         mov.w           %d0,%a0                 # sign extend displacement
4913
4914         add.l           %a5,%a0                 # a5 + d16
4915         rts
4916
4917 faddr_ind_disp_a6:
4918         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4919         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4920         bsr.l           _imem_read_word
4921
4922         tst.l           %d1                     # did ifetch fail?
4923         bne.l           iea_iacc                # yes
4924
4925         mov.w           %d0,%a0                 # sign extend displacement
4926
4927         add.l           (%a6),%a0               # a6 + d16
4928         rts
4929
4930 faddr_ind_disp_a7:
4931         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4932         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4933         bsr.l           _imem_read_word
4934
4935         tst.l           %d1                     # did ifetch fail?
4936         bne.l           iea_iacc                # yes
4937
4938         mov.w           %d0,%a0                 # sign extend displacement
4939
4940         add.l           EXC_A7(%a6),%a0         # a7 + d16
4941         rts
4942
4943 ########################################################################
4944 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4945 #    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
4946 # Memory indirect postindexed: ([bd, An], Xn, od)                      #
4947 # Memory indirect preindexed: ([bd, An, Xn], od)                       #
4948 ########################################################################
4949 faddr_ind_ext:
4950         addq.l          &0x8,%d1
4951         bsr.l           fetch_dreg              # fetch base areg
4952         mov.l           %d0,-(%sp)
4953
4954         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4955         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4956         bsr.l           _imem_read_word         # fetch extword in d0
4957
4958         tst.l           %d1                     # did ifetch fail?
4959         bne.l           iea_iacc                # yes
4960
4961         mov.l           (%sp)+,%a0
4962
4963         btst            &0x8,%d0
4964         bne.w           fcalc_mem_ind
4965
4966         mov.l           %d0,L_SCR1(%a6)         # hold opword
4967
4968         mov.l           %d0,%d1
4969         rol.w           &0x4,%d1
4970         andi.w          &0xf,%d1                # extract index regno
4971
4972 # count on fetch_dreg() not to alter a0...
4973         bsr.l           fetch_dreg              # fetch index
4974
4975         mov.l           %d2,-(%sp)              # save d2
4976         mov.l           L_SCR1(%a6),%d2         # fetch opword
4977
4978         btst            &0xb,%d2                # is it word or long?
4979         bne.b           faii8_long
4980         ext.l           %d0                     # sign extend word index
4981 faii8_long:
4982         mov.l           %d2,%d1
4983         rol.w           &0x7,%d1
4984         andi.l          &0x3,%d1                # extract scale value
4985
4986         lsl.l           %d1,%d0                 # shift index by scale
4987
4988         extb.l          %d2                     # sign extend displacement
4989         add.l           %d2,%d0                 # index + disp
4990         add.l           %d0,%a0                 # An + (index + disp)
4991
4992         mov.l           (%sp)+,%d2              # restore old d2
4993         rts
4994
4995 ###########################
4996 # Absolute short: (XXX).W #
4997 ###########################
4998 fabs_short:
4999         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5000         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5001         bsr.l           _imem_read_word         # fetch short address
5002
5003         tst.l           %d1                     # did ifetch fail?
5004         bne.l           iea_iacc                # yes
5005
5006         mov.w           %d0,%a0                 # return <ea> in a0
5007         rts
5008
5009 ##########################
5010 # Absolute long: (XXX).L #
5011 ##########################
5012 fabs_long:
5013         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5014         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5015         bsr.l           _imem_read_long         # fetch long address
5016
5017         tst.l           %d1                     # did ifetch fail?
5018         bne.l           iea_iacc                # yes
5019
5020         mov.l           %d0,%a0                 # return <ea> in a0
5021         rts
5022
5023 #######################################################
5024 # Program counter indirect w/ displacement: (d16, PC) #
5025 #######################################################
5026 fpc_ind:
5027         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5028         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5029         bsr.l           _imem_read_word         # fetch word displacement
5030
5031         tst.l           %d1                     # did ifetch fail?
5032         bne.l           iea_iacc                # yes
5033
5034         mov.w           %d0,%a0                 # sign extend displacement
5035
5036         add.l           EXC_EXTWPTR(%a6),%a0    # pc + d16
5037
5038 # _imem_read_word() increased the extwptr by 2. need to adjust here.
5039         subq.l          &0x2,%a0                # adjust <ea>
5040         rts
5041
5042 ##########################################################
5043 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5044 # "     "     w/   "  (base displacement): (bd, PC, An)  #
5045 # PC memory indirect postindexed: ([bd, PC], Xn, od)     #
5046 # PC memory indirect preindexed: ([bd, PC, Xn], od)      #
5047 ##########################################################
5048 fpc_ind_ext:
5049         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5050         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5051         bsr.l           _imem_read_word         # fetch ext word
5052
5053         tst.l           %d1                     # did ifetch fail?
5054         bne.l           iea_iacc                # yes
5055
5056         mov.l           EXC_EXTWPTR(%a6),%a0    # put base in a0
5057         subq.l          &0x2,%a0                # adjust base
5058
5059         btst            &0x8,%d0                # is disp only 8 bits?
5060         bne.w           fcalc_mem_ind           # calc memory indirect
5061
5062         mov.l           %d0,L_SCR1(%a6)         # store opword
5063
5064         mov.l           %d0,%d1                 # make extword copy
5065         rol.w           &0x4,%d1                # rotate reg num into place
5066         andi.w          &0xf,%d1                # extract register number
5067
5068 # count on fetch_dreg() not to alter a0...
5069         bsr.l           fetch_dreg              # fetch index
5070
5071         mov.l           %d2,-(%sp)              # save d2
5072         mov.l           L_SCR1(%a6),%d2         # fetch opword
5073
5074         btst            &0xb,%d2                # is index word or long?
5075         bne.b           fpii8_long              # long
5076         ext.l           %d0                     # sign extend word index
5077 fpii8_long:
5078         mov.l           %d2,%d1
5079         rol.w           &0x7,%d1                # rotate scale value into place
5080         andi.l          &0x3,%d1                # extract scale value
5081
5082         lsl.l           %d1,%d0                 # shift index by scale
5083
5084         extb.l          %d2                     # sign extend displacement
5085         add.l           %d2,%d0                 # disp + index
5086         add.l           %d0,%a0                 # An + (index + disp)
5087
5088         mov.l           (%sp)+,%d2              # restore temp register
5089         rts
5090
5091 # d2 = index
5092 # d3 = base
5093 # d4 = od
5094 # d5 = extword
5095 fcalc_mem_ind:
5096         btst            &0x6,%d0                # is the index suppressed?
5097         beq.b           fcalc_index
5098
5099         movm.l          &0x3c00,-(%sp)          # save d2-d5
5100
5101         mov.l           %d0,%d5                 # put extword in d5
5102         mov.l           %a0,%d3                 # put base in d3
5103
5104         clr.l           %d2                     # yes, so index = 0
5105         bra.b           fbase_supp_ck
5106
5107 # index:
5108 fcalc_index:
5109         mov.l           %d0,L_SCR1(%a6)         # save d0 (opword)
5110         bfextu          %d0{&16:&4},%d1         # fetch dreg index
5111         bsr.l           fetch_dreg
5112
5113         movm.l          &0x3c00,-(%sp)          # save d2-d5
5114         mov.l           %d0,%d2                 # put index in d2
5115         mov.l           L_SCR1(%a6),%d5
5116         mov.l           %a0,%d3
5117
5118         btst            &0xb,%d5                # is index word or long?
5119         bne.b           fno_ext
5120         ext.l           %d2
5121
5122 fno_ext:
5123         bfextu          %d5{&21:&2},%d0
5124         lsl.l           %d0,%d2
5125
5126 # base address (passed as parameter in d3):
5127 # we clear the value here if it should actually be suppressed.
5128 fbase_supp_ck:
5129         btst            &0x7,%d5                # is the bd suppressed?
5130         beq.b           fno_base_sup
5131         clr.l           %d3
5132
5133 # base displacement:
5134 fno_base_sup:
5135         bfextu          %d5{&26:&2},%d0         # get bd size
5136 #       beq.l           fmovm_error             # if (size == 0) it's reserved
5137
5138         cmpi.b          %d0,&0x2
5139         blt.b           fno_bd
5140         beq.b           fget_word_bd
5141
5142         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5143         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5144         bsr.l           _imem_read_long
5145
5146         tst.l           %d1                     # did ifetch fail?
5147         bne.l           fcea_iacc               # yes
5148
5149         bra.b           fchk_ind
5150
5151 fget_word_bd:
5152         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5153         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5154         bsr.l           _imem_read_word
5155
5156         tst.l           %d1                     # did ifetch fail?
5157         bne.l           fcea_iacc               # yes
5158
5159         ext.l           %d0                     # sign extend bd
5160
5161 fchk_ind:
5162         add.l           %d0,%d3                 # base += bd
5163
5164 # outer displacement:
5165 fno_bd:
5166         bfextu          %d5{&30:&2},%d0         # is od suppressed?
5167         beq.w           faii_bd
5168
5169         cmpi.b          %d0,&0x2
5170         blt.b           fnull_od
5171         beq.b           fword_od
5172
5173         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5174         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5175         bsr.l           _imem_read_long
5176
5177         tst.l           %d1                     # did ifetch fail?
5178         bne.l           fcea_iacc               # yes
5179
5180         bra.b           fadd_them
5181
5182 fword_od:
5183         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5184         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5185         bsr.l           _imem_read_word
5186
5187         tst.l           %d1                     # did ifetch fail?
5188         bne.l           fcea_iacc               # yes
5189
5190         ext.l           %d0                     # sign extend od
5191         bra.b           fadd_them
5192
5193 fnull_od:
5194         clr.l           %d0
5195
5196 fadd_them:
5197         mov.l           %d0,%d4
5198
5199         btst            &0x2,%d5                # pre or post indexing?
5200         beq.b           fpre_indexed
5201
5202         mov.l           %d3,%a0
5203         bsr.l           _dmem_read_long
5204
5205         tst.l           %d1                     # did dfetch fail?
5206         bne.w           fcea_err                # yes
5207
5208         add.l           %d2,%d0                 # <ea> += index
5209         add.l           %d4,%d0                 # <ea> += od
5210         bra.b           fdone_ea
5211
5212 fpre_indexed:
5213         add.l           %d2,%d3                 # preindexing
5214         mov.l           %d3,%a0
5215         bsr.l           _dmem_read_long
5216
5217         tst.l           %d1                     # did dfetch fail?
5218         bne.w           fcea_err                # yes
5219
5220         add.l           %d4,%d0                 # ea += od
5221         bra.b           fdone_ea
5222
5223 faii_bd:
5224         add.l           %d2,%d3                 # ea = (base + bd) + index
5225         mov.l           %d3,%d0
5226 fdone_ea:
5227         mov.l           %d0,%a0
5228
5229         movm.l          (%sp)+,&0x003c          # restore d2-d5
5230         rts
5231
5232 #########################################################
5233 fcea_err:
5234         mov.l           %d3,%a0
5235
5236         movm.l          (%sp)+,&0x003c          # restore d2-d5
5237         mov.w           &0x0101,%d0
5238         bra.l           iea_dacc
5239
5240 fcea_iacc:
5241         movm.l          (%sp)+,&0x003c          # restore d2-d5
5242         bra.l           iea_iacc
5243
5244 fmovm_out_err:
5245         bsr.l           restore
5246         mov.w           &0x00e1,%d0
5247         bra.b           fmovm_err
5248
5249 fmovm_in_err:
5250         bsr.l           restore
5251         mov.w           &0x0161,%d0
5252
5253 fmovm_err:
5254         mov.l           L_SCR1(%a6),%a0
5255         bra.l           iea_dacc
5256
5257 #########################################################################
5258 # XDEF **************************************************************** #
5259 #       fmovm_ctrl(): emulate fmovm.l of control registers instr        #
5260 #                                                                       #
5261 # XREF **************************************************************** #
5262 #       _imem_read_long() - read longword from memory                   #
5263 #       iea_iacc() - _imem_read_long() failed; error recovery           #
5264 #                                                                       #
5265 # INPUT *************************************************************** #
5266 #       None                                                            #
5267 #                                                                       #
5268 # OUTPUT ************************************************************** #
5269 #       If _imem_read_long() doesn't fail:                              #
5270 #               USER_FPCR(a6)  = new FPCR value                         #
5271 #               USER_FPSR(a6)  = new FPSR value                         #
5272 #               USER_FPIAR(a6) = new FPIAR value                        #
5273 #                                                                       #
5274 # ALGORITHM *********************************************************** #
5275 #       Decode the instruction type by looking at the extension word    #
5276 # in order to see how many control registers to fetch from memory.      #
5277 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
5278 # the special access error exit handler iea_iacc().                     #
5279 #                                                                       #
5280 # Instruction word decoding:                                            #
5281 #                                                                       #
5282 #       fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}                           #
5283 #                                                                       #
5284 #               WORD1                   WORD2                           #
5285 #       1111 0010 00 111100     100$ $$00 0000 0000                     #
5286 #                                                                       #
5287 #       $$$ (100): FPCR                                                 #
5288 #           (010): FPSR                                                 #
5289 #           (001): FPIAR                                                #
5290 #           (000): FPIAR                                                #
5291 #                                                                       #
5292 #########################################################################
5293
5294         global          fmovm_ctrl
5295 fmovm_ctrl:
5296         mov.b           EXC_EXTWORD(%a6),%d0    # fetch reg select bits
5297         cmpi.b          %d0,&0x9c               # fpcr & fpsr & fpiar ?
5298         beq.w           fctrl_in_7              # yes
5299         cmpi.b          %d0,&0x98               # fpcr & fpsr ?
5300         beq.w           fctrl_in_6              # yes
5301         cmpi.b          %d0,&0x94               # fpcr & fpiar ?
5302         beq.b           fctrl_in_5              # yes
5303
5304 # fmovem.l #<data>, fpsr/fpiar
5305 fctrl_in_3:
5306         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5307         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5308         bsr.l           _imem_read_long         # fetch FPSR from mem
5309
5310         tst.l           %d1                     # did ifetch fail?
5311         bne.l           iea_iacc                # yes
5312
5313         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to stack
5314         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5315         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5316         bsr.l           _imem_read_long         # fetch FPIAR from mem
5317
5318         tst.l           %d1                     # did ifetch fail?
5319         bne.l           iea_iacc                # yes
5320
5321         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
5322         rts
5323
5324 # fmovem.l #<data>, fpcr/fpiar
5325 fctrl_in_5:
5326         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5327         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5328         bsr.l           _imem_read_long         # fetch FPCR from mem
5329
5330         tst.l           %d1                     # did ifetch fail?
5331         bne.l           iea_iacc                # yes
5332
5333         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to stack
5334         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5335         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5336         bsr.l           _imem_read_long         # fetch FPIAR from mem
5337
5338         tst.l           %d1                     # did ifetch fail?
5339         bne.l           iea_iacc                # yes
5340
5341         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
5342         rts
5343
5344 # fmovem.l #<data>, fpcr/fpsr
5345 fctrl_in_6:
5346         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5347         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5348         bsr.l           _imem_read_long         # fetch FPCR from mem
5349
5350         tst.l           %d1                     # did ifetch fail?
5351         bne.l           iea_iacc                # yes
5352
5353         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
5354         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5355         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5356         bsr.l           _imem_read_long         # fetch FPSR from mem
5357
5358         tst.l           %d1                     # did ifetch fail?
5359         bne.l           iea_iacc                # yes
5360
5361         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
5362         rts
5363
5364 # fmovem.l #<data>, fpcr/fpsr/fpiar
5365 fctrl_in_7:
5366         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5367         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5368         bsr.l           _imem_read_long         # fetch FPCR from mem
5369
5370         tst.l           %d1                     # did ifetch fail?
5371         bne.l           iea_iacc                # yes
5372
5373         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
5374         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5375         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5376         bsr.l           _imem_read_long         # fetch FPSR from mem
5377
5378         tst.l           %d1                     # did ifetch fail?
5379         bne.l           iea_iacc                # yes
5380
5381         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
5382         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5383         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5384         bsr.l           _imem_read_long         # fetch FPIAR from mem
5385
5386         tst.l           %d1                     # did ifetch fail?
5387         bne.l           iea_iacc                # yes
5388
5389         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to mem
5390         rts
5391
5392 ##########################################################################
5393
5394 #########################################################################
5395 # XDEF **************************************************************** #
5396 #       addsub_scaler2(): scale inputs to fadd/fsub such that no        #
5397 #                         OVFL/UNFL exceptions will result              #
5398 #                                                                       #
5399 # XREF **************************************************************** #
5400 #       norm() - normalize mantissa after adjusting exponent            #
5401 #                                                                       #
5402 # INPUT *************************************************************** #
5403 #       FP_SRC(a6) = fp op1(src)                                        #
5404 #       FP_DST(a6) = fp op2(dst)                                        #
5405 #                                                                       #
5406 # OUTPUT ************************************************************** #
5407 #       FP_SRC(a6) = fp op1 scaled(src)                                 #
5408 #       FP_DST(a6) = fp op2 scaled(dst)                                 #
5409 #       d0         = scale amount                                       #
5410 #                                                                       #
5411 # ALGORITHM *********************************************************** #
5412 #       If the DST exponent is > the SRC exponent, set the DST exponent #
5413 # equal to 0x3fff and scale the SRC exponent by the value that the      #
5414 # DST exponent was scaled by. If the SRC exponent is greater or equal,  #
5415 # do the opposite. Return this scale factor in d0.                      #
5416 #       If the two exponents differ by > the number of mantissa bits    #
5417 # plus two, then set the smallest exponent to a very small value as a   #
5418 # quick shortcut.                                                       #
5419 #                                                                       #
5420 #########################################################################
5421
5422         global          addsub_scaler2
5423 addsub_scaler2:
5424         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
5425         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
5426         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
5427         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
5428         mov.w           SRC_EX(%a0),%d0
5429         mov.w           DST_EX(%a1),%d1
5430         mov.w           %d0,FP_SCR0_EX(%a6)
5431         mov.w           %d1,FP_SCR1_EX(%a6)
5432
5433         andi.w          &0x7fff,%d0
5434         andi.w          &0x7fff,%d1
5435         mov.w           %d0,L_SCR1(%a6)         # store src exponent
5436         mov.w           %d1,2+L_SCR1(%a6)       # store dst exponent
5437
5438         cmp.w           %d0, %d1                # is src exp >= dst exp?
5439         bge.l           src_exp_ge2
5440
5441 # dst exp is >  src exp; scale dst to exp = 0x3fff
5442 dst_exp_gt2:
5443         bsr.l           scale_to_zero_dst
5444         mov.l           %d0,-(%sp)              # save scale factor
5445
5446         cmpi.b          STAG(%a6),&DENORM       # is dst denormalized?
5447         bne.b           cmpexp12
5448
5449         lea             FP_SCR0(%a6),%a0
5450         bsr.l           norm                    # normalize the denorm; result is new exp
5451         neg.w           %d0                     # new exp = -(shft val)
5452         mov.w           %d0,L_SCR1(%a6)         # inset new exp
5453
5454 cmpexp12:
5455         mov.w           2+L_SCR1(%a6),%d0
5456         subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
5457
5458         cmp.w           %d0,L_SCR1(%a6)         # is difference >= len(mantissa)+2?
5459         bge.b           quick_scale12
5460
5461         mov.w           L_SCR1(%a6),%d0
5462         add.w           0x2(%sp),%d0            # scale src exponent by scale factor
5463         mov.w           FP_SCR0_EX(%a6),%d1
5464         and.w           &0x8000,%d1
5465         or.w            %d1,%d0                 # concat {sgn,new exp}
5466         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new dst exponent
5467
5468         mov.l           (%sp)+,%d0              # return SCALE factor
5469         rts
5470
5471 quick_scale12:
5472         andi.w          &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5473         bset            &0x0,1+FP_SCR0_EX(%a6)  # set exp = 1
5474
5475         mov.l           (%sp)+,%d0              # return SCALE factor
5476         rts
5477
5478 # src exp is >= dst exp; scale src to exp = 0x3fff
5479 src_exp_ge2:
5480         bsr.l           scale_to_zero_src
5481         mov.l           %d0,-(%sp)              # save scale factor
5482
5483         cmpi.b          DTAG(%a6),&DENORM       # is dst denormalized?
5484         bne.b           cmpexp22
5485         lea             FP_SCR1(%a6),%a0
5486         bsr.l           norm                    # normalize the denorm; result is new exp
5487         neg.w           %d0                     # new exp = -(shft val)
5488         mov.w           %d0,2+L_SCR1(%a6)       # inset new exp
5489
5490 cmpexp22:
5491         mov.w           L_SCR1(%a6),%d0
5492         subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
5493
5494         cmp.w           %d0,2+L_SCR1(%a6)       # is difference >= len(mantissa)+2?
5495         bge.b           quick_scale22
5496
5497         mov.w           2+L_SCR1(%a6),%d0
5498         add.w           0x2(%sp),%d0            # scale dst exponent by scale factor
5499         mov.w           FP_SCR1_EX(%a6),%d1
5500         andi.w          &0x8000,%d1
5501         or.w            %d1,%d0                 # concat {sgn,new exp}
5502         mov.w           %d0,FP_SCR1_EX(%a6)     # insert new dst exponent
5503
5504         mov.l           (%sp)+,%d0              # return SCALE factor
5505         rts
5506
5507 quick_scale22:
5508         andi.w          &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5509         bset            &0x0,1+FP_SCR1_EX(%a6)  # set exp = 1
5510
5511         mov.l           (%sp)+,%d0              # return SCALE factor
5512         rts
5513
5514 ##########################################################################
5515
5516 #########################################################################
5517 # XDEF **************************************************************** #
5518 #       scale_to_zero_src(): scale the exponent of extended precision   #
5519 #                            value at FP_SCR0(a6).                      #
5520 #                                                                       #
5521 # XREF **************************************************************** #
5522 #       norm() - normalize the mantissa if the operand was a DENORM     #
5523 #                                                                       #
5524 # INPUT *************************************************************** #
5525 #       FP_SCR0(a6) = extended precision operand to be scaled           #
5526 #                                                                       #
5527 # OUTPUT ************************************************************** #
5528 #       FP_SCR0(a6) = scaled extended precision operand                 #
5529 #       d0          = scale value                                       #
5530 #                                                                       #
5531 # ALGORITHM *********************************************************** #
5532 #       Set the exponent of the input operand to 0x3fff. Save the value #
5533 # of the difference between the original and new exponent. Then,        #
5534 # normalize the operand if it was a DENORM. Add this normalization      #
5535 # value to the previous value. Return the result.                       #
5536 #                                                                       #
5537 #########################################################################
5538
5539         global          scale_to_zero_src
5540 scale_to_zero_src:
5541         mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
5542         mov.w           %d1,%d0                 # make a copy
5543
5544         andi.l          &0x7fff,%d1             # extract operand's exponent
5545
5546         andi.w          &0x8000,%d0             # extract operand's sgn
5547         or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
5548
5549         mov.w           %d0,FP_SCR0_EX(%a6)     # insert biased exponent
5550
5551         cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
5552         beq.b           stzs_denorm             # normalize the DENORM
5553
5554 stzs_norm:
5555         mov.l           &0x3fff,%d0
5556         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5557
5558         rts
5559
5560 stzs_denorm:
5561         lea             FP_SCR0(%a6),%a0        # pass ptr to src op
5562         bsr.l           norm                    # normalize denorm
5563         neg.l           %d0                     # new exponent = -(shft val)
5564         mov.l           %d0,%d1                 # prepare for op_norm call
5565         bra.b           stzs_norm               # finish scaling
5566
5567 ###
5568
5569 #########################################################################
5570 # XDEF **************************************************************** #
5571 #       scale_sqrt(): scale the input operand exponent so a subsequent  #
5572 #                     fsqrt operation won't take an exception.          #
5573 #                                                                       #
5574 # XREF **************************************************************** #
5575 #       norm() - normalize the mantissa if the operand was a DENORM     #
5576 #                                                                       #
5577 # INPUT *************************************************************** #
5578 #       FP_SCR0(a6) = extended precision operand to be scaled           #
5579 #                                                                       #
5580 # OUTPUT ************************************************************** #
5581 #       FP_SCR0(a6) = scaled extended precision operand                 #
5582 #       d0          = scale value                                       #
5583 #                                                                       #
5584 # ALGORITHM *********************************************************** #
5585 #       If the input operand is a DENORM, normalize it.                 #
5586 #       If the exponent of the input operand is even, set the exponent  #
5587 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the       #
5588 # exponent of the input operand is off, set the exponent to ox3fff and  #
5589 # return a scale factor of "(exp-0x3fff)/2".                            #
5590 #                                                                       #
5591 #########################################################################
5592
5593         global          scale_sqrt
5594 scale_sqrt:
5595         cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
5596         beq.b           ss_denorm               # normalize the DENORM
5597
5598         mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
5599         andi.l          &0x7fff,%d1             # extract operand's exponent
5600
5601         andi.w          &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5602
5603         btst            &0x0,%d1                # is exp even or odd?
5604         beq.b           ss_norm_even
5605
5606         ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5607
5608         mov.l           &0x3fff,%d0
5609         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5610         asr.l           &0x1,%d0                # divide scale factor by 2
5611         rts
5612
5613 ss_norm_even:
5614         ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5615
5616         mov.l           &0x3ffe,%d0
5617         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5618         asr.l           &0x1,%d0                # divide scale factor by 2
5619         rts
5620
5621 ss_denorm:
5622         lea             FP_SCR0(%a6),%a0        # pass ptr to src op
5623         bsr.l           norm                    # normalize denorm
5624
5625         btst            &0x0,%d0                # is exp even or odd?
5626         beq.b           ss_denorm_even
5627
5628         ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5629
5630         add.l           &0x3fff,%d0
5631         asr.l           &0x1,%d0                # divide scale factor by 2
5632         rts
5633
5634 ss_denorm_even:
5635         ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5636
5637         add.l           &0x3ffe,%d0
5638         asr.l           &0x1,%d0                # divide scale factor by 2
5639         rts
5640
5641 ###
5642
5643 #########################################################################
5644 # XDEF **************************************************************** #
5645 #       scale_to_zero_dst(): scale the exponent of extended precision   #
5646 #                            value at FP_SCR1(a6).                      #
5647 #                                                                       #
5648 # XREF **************************************************************** #
5649 #       norm() - normalize the mantissa if the operand was a DENORM     #
5650 #                                                                       #
5651 # INPUT *************************************************************** #
5652 #       FP_SCR1(a6) = extended precision operand to be scaled           #
5653 #                                                                       #
5654 # OUTPUT ************************************************************** #
5655 #       FP_SCR1(a6) = scaled extended precision operand                 #
5656 #       d0          = scale value                                       #
5657 #                                                                       #
5658 # ALGORITHM *********************************************************** #
5659 #       Set the exponent of the input operand to 0x3fff. Save the value #
5660 # of the difference between the original and new exponent. Then,        #
5661 # normalize the operand if it was a DENORM. Add this normalization      #
5662 # value to the previous value. Return the result.                       #
5663 #                                                                       #
5664 #########################################################################
5665
5666         global          scale_to_zero_dst
5667 scale_to_zero_dst:
5668         mov.w           FP_SCR1_EX(%a6),%d1     # extract operand's {sgn,exp}
5669         mov.w           %d1,%d0                 # make a copy
5670
5671         andi.l          &0x7fff,%d1             # extract operand's exponent
5672
5673         andi.w          &0x8000,%d0             # extract operand's sgn
5674         or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
5675
5676         mov.w           %d0,FP_SCR1_EX(%a6)     # insert biased exponent
5677
5678         cmpi.b          DTAG(%a6),&DENORM       # is operand normalized?
5679         beq.b           stzd_denorm             # normalize the DENORM
5680
5681 stzd_norm:
5682         mov.l           &0x3fff,%d0
5683         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5684         rts
5685
5686 stzd_denorm:
5687         lea             FP_SCR1(%a6),%a0        # pass ptr to dst op
5688         bsr.l           norm                    # normalize denorm
5689         neg.l           %d0                     # new exponent = -(shft val)
5690         mov.l           %d0,%d1                 # prepare for op_norm call
5691         bra.b           stzd_norm               # finish scaling
5692
5693 ##########################################################################
5694
5695 #########################################################################
5696 # XDEF **************************************************************** #
5697 #       res_qnan(): return default result w/ QNAN operand for dyadic    #
5698 #       res_snan(): return default result w/ SNAN operand for dyadic    #
5699 #       res_qnan_1op(): return dflt result w/ QNAN operand for monadic  #
5700 #       res_snan_1op(): return dflt result w/ SNAN operand for monadic  #
5701 #                                                                       #
5702 # XREF **************************************************************** #
5703 #       None                                                            #
5704 #                                                                       #
5705 # INPUT *************************************************************** #
5706 #       FP_SRC(a6) = pointer to extended precision src operand          #
5707 #       FP_DST(a6) = pointer to extended precision dst operand          #
5708 #                                                                       #
5709 # OUTPUT ************************************************************** #
5710 #       fp0 = default result                                            #
5711 #                                                                       #
5712 # ALGORITHM *********************************************************** #
5713 #       If either operand (but not both operands) of an operation is a  #
5714 # nonsignalling NAN, then that NAN is returned as the result. If both   #
5715 # operands are nonsignalling NANs, then the destination operand         #
5716 # nonsignalling NAN is returned as the result.                          #
5717 #       If either operand to an operation is a signalling NAN (SNAN),   #
5718 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap      #
5719 # enable bit is set in the FPCR, then the trap is taken and the         #
5720 # destination is not modified. If the SNAN trap enable bit is not set,  #
5721 # then the SNAN is converted to a nonsignalling NAN (by setting the     #
5722 # SNAN bit in the operand to one), and the operation continues as       #
5723 # described in the preceding paragraph, for nonsignalling NANs.         #
5724 #       Make sure the appropriate FPSR bits are set before exiting.     #
5725 #                                                                       #
5726 #########################################################################
5727
5728         global          res_qnan
5729         global          res_snan
5730 res_qnan:
5731 res_snan:
5732         cmp.b           DTAG(%a6), &SNAN        # is the dst an SNAN?
5733         beq.b           dst_snan2
5734         cmp.b           DTAG(%a6), &QNAN        # is the dst a  QNAN?
5735         beq.b           dst_qnan2
5736 src_nan:
5737         cmp.b           STAG(%a6), &QNAN
5738         beq.b           src_qnan2
5739         global          res_snan_1op
5740 res_snan_1op:
5741 src_snan2:
5742         bset            &0x6, FP_SRC_HI(%a6)    # set SNAN bit
5743         or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744         lea             FP_SRC(%a6), %a0
5745         bra.b           nan_comp
5746         global          res_qnan_1op
5747 res_qnan_1op:
5748 src_qnan2:
5749         or.l            &nan_mask, USER_FPSR(%a6)
5750         lea             FP_SRC(%a6), %a0
5751         bra.b           nan_comp
5752 dst_snan2:
5753         or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5754         bset            &0x6, FP_DST_HI(%a6)    # set SNAN bit
5755         lea             FP_DST(%a6), %a0
5756         bra.b           nan_comp
5757 dst_qnan2:
5758         lea             FP_DST(%a6), %a0
5759         cmp.b           STAG(%a6), &SNAN
5760         bne             nan_done
5761         or.l            &aiop_mask+snan_mask, USER_FPSR(%a6)
5762 nan_done:
5763         or.l            &nan_mask, USER_FPSR(%a6)
5764 nan_comp:
5765         btst            &0x7, FTEMP_EX(%a0)     # is NAN neg?
5766         beq.b           nan_not_neg
5767         or.l            &neg_mask, USER_FPSR(%a6)
5768 nan_not_neg:
5769         fmovm.x         (%a0), &0x80
5770         rts
5771
5772 #########################################################################
5773 # XDEF **************************************************************** #
5774 #       res_operr(): return default result during operand error         #
5775 #                                                                       #
5776 # XREF **************************************************************** #
5777 #       None                                                            #
5778 #                                                                       #
5779 # INPUT *************************************************************** #
5780 #       None                                                            #
5781 #                                                                       #
5782 # OUTPUT ************************************************************** #
5783 #       fp0 = default operand error result                              #
5784 #                                                                       #
5785 # ALGORITHM *********************************************************** #
5786 #       An nonsignalling NAN is returned as the default result when     #
5787 # an operand error occurs for the following cases:                      #
5788 #                                                                       #
5789 #       Multiply: (Infinity x Zero)                                     #
5790 #       Divide  : (Zero / Zero) || (Infinity / Infinity)                #
5791 #                                                                       #
5792 #########################################################################
5793
5794         global          res_operr
5795 res_operr:
5796         or.l            &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5797         fmovm.x         nan_return(%pc), &0x80
5798         rts
5799
5800 nan_return:
5801         long            0x7fff0000, 0xffffffff, 0xffffffff
5802
5803 #########################################################################
5804 # XDEF **************************************************************** #
5805 #       _denorm(): denormalize an intermediate result                   #
5806 #                                                                       #
5807 # XREF **************************************************************** #
5808 #       None                                                            #
5809 #                                                                       #
5810 # INPUT *************************************************************** #
5811 #       a0 = points to the operand to be denormalized                   #
5812 #               (in the internal extended format)                       #
5813 #                                                                       #
5814 #       d0 = rounding precision                                         #
5815 #                                                                       #
5816 # OUTPUT ************************************************************** #
5817 #       a0 = pointer to the denormalized result                         #
5818 #               (in the internal extended format)                       #
5819 #                                                                       #
5820 #       d0 = guard,round,sticky                                         #
5821 #                                                                       #
5822 # ALGORITHM *********************************************************** #
5823 #       According to the exponent underflow threshold for the given     #
5824 # precision, shift the mantissa bits to the right in order raise the    #
5825 # exponent of the operand to the threshold value. While shifting the    #
5826 # mantissa bits right, maintain the value of the guard, round, and      #
5827 # sticky bits.                                                          #
5828 # other notes:                                                          #
5829 #       (1) _denorm() is called by the underflow routines               #
5830 #       (2) _denorm() does NOT affect the status register               #
5831 #                                                                       #
5832 #########################################################################
5833
5834 #
5835 # table of exponent threshold values for each precision
5836 #
5837 tbl_thresh:
5838         short           0x0
5839         short           sgl_thresh
5840         short           dbl_thresh
5841
5842         global          _denorm
5843 _denorm:
5844 #
5845 # Load the exponent threshold for the precision selected and check
5846 # to see if (threshold - exponent) is > 65 in which case we can
5847 # simply calculate the sticky bit and zero the mantissa. otherwise
5848 # we have to call the denormalization routine.
5849 #
5850         lsr.b           &0x2, %d0               # shift prec to lo bits
5851         mov.w           (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5852         mov.w           %d1, %d0                # copy d1 into d0
5853         sub.w           FTEMP_EX(%a0), %d0      # diff = threshold - exp
5854         cmpi.w          %d0, &66                # is diff > 65? (mant + g,r bits)
5855         bpl.b           denorm_set_stky         # yes; just calc sticky
5856
5857         clr.l           %d0                     # clear g,r,s
5858         btst            &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5859         beq.b           denorm_call             # no; don't change anything
5860         bset            &29, %d0                # yes; set sticky bit
5861
5862 denorm_call:
5863         bsr.l           dnrm_lp                 # denormalize the number
5864         rts
5865
5866 #
5867 # all bit would have been shifted off during the denorm so simply
5868 # calculate if the sticky should be set and clear the entire mantissa.
5869 #
5870 denorm_set_stky:
5871         mov.l           &0x20000000, %d0        # set sticky bit in return value
5872         mov.w           %d1, FTEMP_EX(%a0)      # load exp with threshold
5873         clr.l           FTEMP_HI(%a0)           # set d1 = 0 (ms mantissa)
5874         clr.l           FTEMP_LO(%a0)           # set d2 = 0 (ms mantissa)
5875         rts
5876
5877 #                                                                       #
5878 # dnrm_lp(): normalize exponent/mantissa to specified threshhold        #
5879 #                                                                       #
5880 # INPUT:                                                                #
5881 #       %a0        : points to the operand to be denormalized           #
5882 #       %d0{31:29} : initial guard,round,sticky                         #
5883 #       %d1{15:0}  : denormalization threshold                          #
5884 # OUTPUT:                                                               #
5885 #       %a0        : points to the denormalized operand                 #
5886 #       %d0{31:29} : final guard,round,sticky                           #
5887 #                                                                       #
5888
5889 # *** Local Equates *** #
5890 set     GRS,            L_SCR2                  # g,r,s temp storage
5891 set     FTEMP_LO2,      L_SCR1                  # FTEMP_LO copy
5892
5893         global          dnrm_lp
5894 dnrm_lp:
5895
5896 #
5897 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
5898 # in memory so as to make the bitfield extraction for denormalization easier.
5899 #
5900         mov.l           FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5901         mov.l           %d0, GRS(%a6)           # place g,r,s after it
5902
5903 #
5904 # check to see how much less than the underflow threshold the operand
5905 # exponent is.
5906 #
5907         mov.l           %d1, %d0                # copy the denorm threshold
5908         sub.w           FTEMP_EX(%a0), %d1      # d1 = threshold - uns exponent
5909         ble.b           dnrm_no_lp              # d1 <= 0
5910         cmpi.w          %d1, &0x20              # is ( 0 <= d1 < 32) ?
5911         blt.b           case_1                  # yes
5912         cmpi.w          %d1, &0x40              # is (32 <= d1 < 64) ?
5913         blt.b           case_2                  # yes
5914         bra.w           case_3                  # (d1 >= 64)
5915
5916 #
5917 # No normalization necessary
5918 #
5919 dnrm_no_lp:
5920         mov.l           GRS(%a6), %d0           # restore original g,r,s
5921         rts
5922
5923 #
5924 # case (0<d1<32)
5925 #
5926 # %d0 = denorm threshold
5927 # %d1 = "n" = amt to shift
5928 #
5929 #       ---------------------------------------------------------
5930 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
5931 #       ---------------------------------------------------------
5932 #       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5933 #       \          \                  \                  \
5934 #        \          \                  \                  \
5935 #         \          \                  \                  \
5936 #          \          \                  \                  \
5937 #           \          \                  \                  \
5938 #            \          \                  \                  \
5939 #             \          \                  \                  \
5940 #              \          \                  \                  \
5941 #       <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5942 #       ---------------------------------------------------------
5943 #       |0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs              |
5944 #       ---------------------------------------------------------
5945 #
5946 case_1:
5947         mov.l           %d2, -(%sp)             # create temp storage
5948
5949         mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
5950         mov.l           &32, %d0
5951         sub.w           %d1, %d0                # %d0 = 32 - %d1
5952
5953         cmpi.w          %d1, &29                # is shft amt >= 29
5954         blt.b           case1_extract           # no; no fix needed
5955         mov.b           GRS(%a6), %d2
5956         or.b            %d2, 3+FTEMP_LO2(%a6)
5957
5958 case1_extract:
5959         bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5960         bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5961         bfextu          FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5962
5963         mov.l           %d2, FTEMP_HI(%a0)      # store new FTEMP_HI
5964         mov.l           %d1, FTEMP_LO(%a0)      # store new FTEMP_LO
5965
5966         bftst           %d0{&2:&30}             # were bits shifted off?
5967         beq.b           case1_sticky_clear      # no; go finish
5968         bset            &rnd_stky_bit, %d0      # yes; set sticky bit
5969
5970 case1_sticky_clear:
5971         and.l           &0xe0000000, %d0        # clear all but G,R,S
5972         mov.l           (%sp)+, %d2             # restore temp register
5973         rts
5974
5975 #
5976 # case (32<=d1<64)
5977 #
5978 # %d0 = denorm threshold
5979 # %d1 = "n" = amt to shift
5980 #
5981 #       ---------------------------------------------------------
5982 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
5983 #       ---------------------------------------------------------
5984 #       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5985 #       \          \                  \
5986 #        \          \                  \
5987 #         \          \                  -------------------
5988 #          \          --------------------                 \
5989 #           -------------------           \                 \
5990 #                              \           \                 \
5991 #                               \           \                 \
5992 #                                \           \                 \
5993 #       <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5994 #       ---------------------------------------------------------
5995 #       |0...............0|0....0| NEW_LO     |grs              |
5996 #       ---------------------------------------------------------
5997 #
5998 case_2:
5999         mov.l           %d2, -(%sp)             # create temp storage
6000
6001         mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
6002         subi.w          &0x20, %d1              # %d1 now between 0 and 32
6003         mov.l           &0x20, %d0
6004         sub.w           %d1, %d0                # %d0 = 32 - %d1
6005
6006 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
6007 # the number of bits to check for the sticky detect.
6008 # it only plays a role in shift amounts of 61-63.
6009         mov.b           GRS(%a6), %d2
6010         or.b            %d2, 3+FTEMP_LO2(%a6)
6011
6012         bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6013         bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6014
6015         bftst           %d1{&2:&30}             # were any bits shifted off?
6016         bne.b           case2_set_sticky        # yes; set sticky bit
6017         bftst           FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6018         bne.b           case2_set_sticky        # yes; set sticky bit
6019
6020         mov.l           %d1, %d0                # move new G,R,S to %d0
6021         bra.b           case2_end
6022
6023 case2_set_sticky:
6024         mov.l           %d1, %d0                # move new G,R,S to %d0
6025         bset            &rnd_stky_bit, %d0      # set sticky bit
6026
6027 case2_end:
6028         clr.l           FTEMP_HI(%a0)           # store FTEMP_HI = 0
6029         mov.l           %d2, FTEMP_LO(%a0)      # store FTEMP_LO
6030         and.l           &0xe0000000, %d0        # clear all but G,R,S
6031
6032         mov.l           (%sp)+,%d2              # restore temp register
6033         rts
6034
6035 #
6036 # case (d1>=64)
6037 #
6038 # %d0 = denorm threshold
6039 # %d1 = amt to shift
6040 #
6041 case_3:
6042         mov.w           %d0, FTEMP_EX(%a0)      # insert denorm threshold
6043
6044         cmpi.w          %d1, &65                # is shift amt > 65?
6045         blt.b           case3_64                # no; it's == 64
6046         beq.b           case3_65                # no; it's == 65
6047
6048 #
6049 # case (d1>65)
6050 #
6051 # Shift value is > 65 and out of range. All bits are shifted off.
6052 # Return a zero mantissa with the sticky bit set
6053 #
6054         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6055         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6056         mov.l           &0x20000000, %d0        # set sticky bit
6057         rts
6058
6059 #
6060 # case (d1 == 64)
6061 #
6062 #       ---------------------------------------------------------
6063 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
6064 #       ---------------------------------------------------------
6065 #       <-------(32)------>
6066 #       \                  \
6067 #        \                  \
6068 #         \                  \
6069 #          \                  ------------------------------
6070 #           -------------------------------                 \
6071 #                                          \                 \
6072 #                                           \                 \
6073 #                                            \                 \
6074 #                                             <-------(32)------>
6075 #       ---------------------------------------------------------
6076 #       |0...............0|0................0|grs               |
6077 #       ---------------------------------------------------------
6078 #
6079 case3_64:
6080         mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
6081         mov.l           %d0, %d1                # make a copy
6082         and.l           &0xc0000000, %d0        # extract G,R
6083         and.l           &0x3fffffff, %d1        # extract other bits
6084
6085         bra.b           case3_complete
6086
6087 #
6088 # case (d1 == 65)
6089 #
6090 #       ---------------------------------------------------------
6091 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
6092 #       ---------------------------------------------------------
6093 #       <-------(32)------>
6094 #       \                  \
6095 #        \                  \
6096 #         \                  \
6097 #          \                  ------------------------------
6098 #           --------------------------------                \
6099 #                                           \                \
6100 #                                            \                \
6101 #                                             \                \
6102 #                                              <-------(31)----->
6103 #       ---------------------------------------------------------
6104 #       |0...............0|0................0|0rs               |
6105 #       ---------------------------------------------------------
6106 #
6107 case3_65:
6108         mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
6109         and.l           &0x80000000, %d0        # extract R bit
6110         lsr.l           &0x1, %d0               # shift high bit into R bit
6111         and.l           &0x7fffffff, %d1        # extract other bits
6112
6113 case3_complete:
6114 # last operation done was an "and" of the bits shifted off so the condition
6115 # codes are already set so branch accordingly.
6116         bne.b           case3_set_sticky        # yes; go set new sticky
6117         tst.l           FTEMP_LO(%a0)           # were any bits shifted off?
6118         bne.b           case3_set_sticky        # yes; go set new sticky
6119         tst.b           GRS(%a6)                # were any bits shifted off?
6120         bne.b           case3_set_sticky        # yes; go set new sticky
6121
6122 #
6123 # no bits were shifted off so don't set the sticky bit.
6124 # the guard and
6125 # the entire mantissa is zero.
6126 #
6127         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6128         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6129         rts
6130
6131 #
6132 # some bits were shifted off so set the sticky bit.
6133 # the entire mantissa is zero.
6134 #
6135 case3_set_sticky:
6136         bset            &rnd_stky_bit,%d0       # set new sticky bit
6137         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6138         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6139         rts
6140
6141 #########################################################################
6142 # XDEF **************************************************************** #
6143 #       _round(): round result according to precision/mode              #
6144 #                                                                       #
6145 # XREF **************************************************************** #
6146 #       None                                                            #
6147 #                                                                       #
6148 # INPUT *************************************************************** #
6149 #       a0        = ptr to input operand in internal extended format    #
6150 #       d1(hi)    = contains rounding precision:                        #
6151 #                       ext = $0000xxxx                                 #
6152 #                       sgl = $0004xxxx                                 #
6153 #                       dbl = $0008xxxx                                 #
6154 #       d1(lo)    = contains rounding mode:                             #
6155 #                       RN  = $xxxx0000                                 #
6156 #                       RZ  = $xxxx0001                                 #
6157 #                       RM  = $xxxx0002                                 #
6158 #                       RP  = $xxxx0003                                 #
6159 #       d0{31:29} = contains the g,r,s bits (extended)                  #
6160 #                                                                       #
6161 # OUTPUT ************************************************************** #
6162 #       a0 = pointer to rounded result                                  #
6163 #                                                                       #
6164 # ALGORITHM *********************************************************** #
6165 #       On return the value pointed to by a0 is correctly rounded,      #
6166 #       a0 is preserved and the g-r-s bits in d0 are cleared.           #
6167 #       The result is not typed - the tag field is invalid.  The        #
6168 #       result is still in the internal extended format.                #
6169 #                                                                       #
6170 #       The INEX bit of USER_FPSR will be set if the rounded result was #
6171 #       inexact (i.e. if any of the g-r-s bits were set).               #
6172 #                                                                       #
6173 #########################################################################
6174
6175         global          _round
6176 _round:
6177 #
6178 # ext_grs() looks at the rounding precision and sets the appropriate
6179 # G,R,S bits.
6180 # If (G,R,S == 0) then result is exact and round is done, else set
6181 # the inex flag in status reg and continue.
6182 #
6183         bsr.l           ext_grs                 # extract G,R,S
6184
6185         tst.l           %d0                     # are G,R,S zero?
6186         beq.w           truncate                # yes; round is complete
6187
6188         or.w            &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6189
6190 #
6191 # Use rounding mode as an index into a jump table for these modes.
6192 # All of the following assumes grs != 0.
6193 #
6194         mov.w           (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6195         jmp             (tbl_mode.b,%pc,%a1)    # jmp to rnd mode handler
6196
6197 tbl_mode:
6198         short           rnd_near - tbl_mode
6199         short           truncate - tbl_mode     # RZ always truncates
6200         short           rnd_mnus - tbl_mode
6201         short           rnd_plus - tbl_mode
6202
6203 #################################################################
6204 #       ROUND PLUS INFINITY                                     #
6205 #                                                               #
6206 #       If sign of fp number = 0 (positive), then add 1 to l.   #
6207 #################################################################
6208 rnd_plus:
6209         tst.b           FTEMP_SGN(%a0)          # check for sign
6210         bmi.w           truncate                # if positive then truncate
6211
6212         mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
6213         swap            %d1                     # set up d1 for round prec.
6214
6215         cmpi.b          %d1, &s_mode            # is prec = sgl?
6216         beq.w           add_sgl                 # yes
6217         bgt.w           add_dbl                 # no; it's dbl
6218         bra.w           add_ext                 # no; it's ext
6219
6220 #################################################################
6221 #       ROUND MINUS INFINITY                                    #
6222 #                                                               #
6223 #       If sign of fp number = 1 (negative), then add 1 to l.   #
6224 #################################################################
6225 rnd_mnus:
6226         tst.b           FTEMP_SGN(%a0)          # check for sign
6227         bpl.w           truncate                # if negative then truncate
6228
6229         mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
6230         swap            %d1                     # set up d1 for round prec.
6231
6232         cmpi.b          %d1, &s_mode            # is prec = sgl?
6233         beq.w           add_sgl                 # yes
6234         bgt.w           add_dbl                 # no; it's dbl
6235         bra.w           add_ext                 # no; it's ext
6236
6237 #################################################################
6238 #       ROUND NEAREST                                           #
6239 #                                                               #
6240 #       If (g=1), then add 1 to l and if (r=s=0), then clear l  #
6241 #       Note that this will round to even in case of a tie.     #
6242 #################################################################
6243 rnd_near:
6244         asl.l           &0x1, %d0               # shift g-bit to c-bit
6245         bcc.w           truncate                # if (g=1) then
6246
6247         swap            %d1                     # set up d1 for round prec.
6248
6249         cmpi.b          %d1, &s_mode            # is prec = sgl?
6250         beq.w           add_sgl                 # yes
6251         bgt.w           add_dbl                 # no; it's dbl
6252         bra.w           add_ext                 # no; it's ext
6253
6254 # *** LOCAL EQUATES ***
6255 set     ad_1_sgl,       0x00000100      # constant to add 1 to l-bit in sgl prec
6256 set     ad_1_dbl,       0x00000800      # constant to add 1 to l-bit in dbl prec
6257
6258 #########################
6259 #       ADD SINGLE      #
6260 #########################
6261 add_sgl:
6262         add.l           &ad_1_sgl, FTEMP_HI(%a0)
6263         bcc.b           scc_clr                 # no mantissa overflow
6264         roxr.w          FTEMP_HI(%a0)           # shift v-bit back in
6265         roxr.w          FTEMP_HI+2(%a0)         # shift v-bit back in
6266         add.w           &0x1, FTEMP_EX(%a0)     # and incr exponent
6267 scc_clr:
6268         tst.l           %d0                     # test for rs = 0
6269         bne.b           sgl_done
6270         and.w           &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6271 sgl_done:
6272         and.l           &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6273         clr.l           FTEMP_LO(%a0)           # clear d2
6274         rts
6275
6276 #########################
6277 #       ADD EXTENDED    #
6278 #########################
6279 add_ext:
6280         addq.l          &1,FTEMP_LO(%a0)        # add 1 to l-bit
6281         bcc.b           xcc_clr                 # test for carry out
6282         addq.l          &1,FTEMP_HI(%a0)        # propogate carry
6283         bcc.b           xcc_clr
6284         roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
6285         roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
6286         roxr.w          FTEMP_LO(%a0)
6287         roxr.w          FTEMP_LO+2(%a0)
6288         add.w           &0x1,FTEMP_EX(%a0)      # and inc exp
6289 xcc_clr:
6290         tst.l           %d0                     # test rs = 0
6291         bne.b           add_ext_done
6292         and.b           &0xfe,FTEMP_LO+3(%a0)   # clear the l bit
6293 add_ext_done:
6294         rts
6295
6296 #########################
6297 #       ADD DOUBLE      #
6298 #########################
6299 add_dbl:
6300         add.l           &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6301         bcc.b           dcc_clr                 # no carry
6302         addq.l          &0x1, FTEMP_HI(%a0)     # propogate carry
6303         bcc.b           dcc_clr                 # no carry
6304
6305         roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
6306         roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
6307         roxr.w          FTEMP_LO(%a0)
6308         roxr.w          FTEMP_LO+2(%a0)
6309         addq.w          &0x1, FTEMP_EX(%a0)     # incr exponent
6310 dcc_clr:
6311         tst.l           %d0                     # test for rs = 0
6312         bne.b           dbl_done
6313         and.w           &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6314
6315 dbl_done:
6316         and.l           &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6317         rts
6318
6319 ###########################
6320 # Truncate all other bits #
6321 ###########################
6322 truncate:
6323         swap            %d1                     # select rnd prec
6324
6325         cmpi.b          %d1, &s_mode            # is prec sgl?
6326         beq.w           sgl_done                # yes
6327         bgt.b           dbl_done                # no; it's dbl
6328         rts                                     # no; it's ext
6329
6330
6331 #
6332 # ext_grs(): extract guard, round and sticky bits according to
6333 #            rounding precision.
6334 #
6335 # INPUT
6336 #       d0         = extended precision g,r,s (in d0{31:29})
6337 #       d1         = {PREC,ROUND}
6338 # OUTPUT
6339 #       d0{31:29}  = guard, round, sticky
6340 #
6341 # The ext_grs extract the guard/round/sticky bits according to the
6342 # selected rounding precision. It is called by the round subroutine
6343 # only.  All registers except d0 are kept intact. d0 becomes an
6344 # updated guard,round,sticky in d0{31:29}
6345 #
6346 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6347 #        prior to usage, and needs to restore d1 to original. this
6348 #        routine is tightly tied to the round routine and not meant to
6349 #        uphold standard subroutine calling practices.
6350 #
6351
6352 ext_grs:
6353         swap            %d1                     # have d1.w point to round precision
6354         tst.b           %d1                     # is rnd prec = extended?
6355         bne.b           ext_grs_not_ext         # no; go handle sgl or dbl
6356
6357 #
6358 # %d0 actually already hold g,r,s since _round() had it before calling
6359 # this function. so, as long as we don't disturb it, we are "returning" it.
6360 #
6361 ext_grs_ext:
6362         swap            %d1                     # yes; return to correct positions
6363         rts
6364
6365 ext_grs_not_ext:
6366         movm.l          &0x3000, -(%sp)         # make some temp registers {d2/d3}
6367
6368         cmpi.b          %d1, &s_mode            # is rnd prec = sgl?
6369         bne.b           ext_grs_dbl             # no; go handle dbl
6370
6371 #
6372 # sgl:
6373 #       96              64        40    32              0
6374 #       -----------------------------------------------------
6375 #       | EXP   |XXXXXXX|         |xx   |               |grs|
6376 #       -----------------------------------------------------
6377 #                       <--(24)--->nn\                     /
6378 #                                  ee ---------------------
6379 #                                  ww           |
6380 #                                               v
6381 #                                  gr      new sticky
6382 #
6383 ext_grs_sgl:
6384         bfextu          FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6385         mov.l           &30, %d2                # of the sgl prec. limits
6386         lsl.l           %d2, %d3                # shift g-r bits to MSB of d3
6387         mov.l           FTEMP_HI(%a0), %d2      # get word 2 for s-bit test
6388         and.l           &0x0000003f, %d2        # s bit is the or of all other
6389         bne.b           ext_grs_st_stky         # bits to the right of g-r
6390         tst.l           FTEMP_LO(%a0)           # test lower mantissa
6391         bne.b           ext_grs_st_stky         # if any are set, set sticky
6392         tst.l           %d0                     # test original g,r,s
6393         bne.b           ext_grs_st_stky         # if any are set, set sticky
6394         bra.b           ext_grs_end_sd          # if words 3 and 4 are clr, exit
6395
6396 #
6397 # dbl:
6398 #       96              64              32       11     0
6399 #       -----------------------------------------------------
6400 #       | EXP   |XXXXXXX|               |        |xx    |grs|
6401 #       -----------------------------------------------------
6402 #                                                 nn\       /
6403 #                                                 ee -------
6404 #                                                 ww    |
6405 #                                                       v
6406 #                                                 gr    new sticky
6407 #
6408 ext_grs_dbl:
6409         bfextu          FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6410         mov.l           &30, %d2                # of the dbl prec. limits
6411         lsl.l           %d2, %d3                # shift g-r bits to the MSB of d3
6412         mov.l           FTEMP_LO(%a0), %d2      # get lower mantissa  for s-bit test
6413         and.l           &0x000001ff, %d2        # s bit is the or-ing of all
6414         bne.b           ext_grs_st_stky         # other bits to the right of g-r
6415         tst.l           %d0                     # test word original g,r,s
6416         bne.b           ext_grs_st_stky         # if any are set, set sticky
6417         bra.b           ext_grs_end_sd          # if clear, exit
6418
6419 ext_grs_st_stky:
6420         bset            &rnd_stky_bit, %d3      # set sticky bit
6421 ext_grs_end_sd:
6422         mov.l           %d3, %d0                # return grs to d0
6423
6424         movm.l          (%sp)+, &0xc            # restore scratch registers {d2/d3}
6425
6426         swap            %d1                     # restore d1 to original
6427         rts
6428
6429 #########################################################################
6430 # norm(): normalize the mantissa of an extended precision input. the    #
6431 #         input operand should not be normalized already.               #
6432 #                                                                       #
6433 # XDEF **************************************************************** #
6434 #       norm()                                                          #
6435 #                                                                       #
6436 # XREF **************************************************************** #
6437 #       none                                                            #
6438 #                                                                       #
6439 # INPUT *************************************************************** #
6440 #       a0 = pointer fp extended precision operand to normalize         #
6441 #                                                                       #
6442 # OUTPUT ************************************************************** #
6443 #       d0 = number of bit positions the mantissa was shifted           #
6444 #       a0 = the input operand's mantissa is normalized; the exponent   #
6445 #            is unchanged.                                              #
6446 #                                                                       #
6447 #########################################################################
6448         global          norm
6449 norm:
6450         mov.l           %d2, -(%sp)             # create some temp regs
6451         mov.l           %d3, -(%sp)
6452
6453         mov.l           FTEMP_HI(%a0), %d0      # load hi(mantissa)
6454         mov.l           FTEMP_LO(%a0), %d1      # load lo(mantissa)
6455
6456         bfffo           %d0{&0:&32}, %d2        # how many places to shift?
6457         beq.b           norm_lo                 # hi(man) is all zeroes!
6458
6459 norm_hi:
6460         lsl.l           %d2, %d0                # left shift hi(man)
6461         bfextu          %d1{&0:%d2}, %d3        # extract lo bits
6462
6463         or.l            %d3, %d0                # create hi(man)
6464         lsl.l           %d2, %d1                # create lo(man)
6465
6466         mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
6467         mov.l           %d1, FTEMP_LO(%a0)      # store new lo(man)
6468
6469         mov.l           %d2, %d0                # return shift amount
6470
6471         mov.l           (%sp)+, %d3             # restore temp regs
6472         mov.l           (%sp)+, %d2
6473
6474         rts
6475
6476 norm_lo:
6477         bfffo           %d1{&0:&32}, %d2        # how many places to shift?
6478         lsl.l           %d2, %d1                # shift lo(man)
6479         add.l           &32, %d2                # add 32 to shft amount
6480
6481         mov.l           %d1, FTEMP_HI(%a0)      # store hi(man)
6482         clr.l           FTEMP_LO(%a0)           # lo(man) is now zero
6483
6484         mov.l           %d2, %d0                # return shift amount
6485
6486         mov.l           (%sp)+, %d3             # restore temp regs
6487         mov.l           (%sp)+, %d2
6488
6489         rts
6490
6491 #########################################################################
6492 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO     #
6493 #               - returns corresponding optype tag                      #
6494 #                                                                       #
6495 # XDEF **************************************************************** #
6496 #       unnorm_fix()                                                    #
6497 #                                                                       #
6498 # XREF **************************************************************** #
6499 #       norm() - normalize the mantissa                                 #
6500 #                                                                       #
6501 # INPUT *************************************************************** #
6502 #       a0 = pointer to unnormalized extended precision number          #
6503 #                                                                       #
6504 # OUTPUT ************************************************************** #
6505 #       d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO  #
6506 #       a0 = input operand has been converted to a norm, denorm, or     #
6507 #            zero; both the exponent and mantissa are changed.          #
6508 #                                                                       #
6509 #########################################################################
6510
6511         global          unnorm_fix
6512 unnorm_fix:
6513         bfffo           FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6514         bne.b           unnorm_shift            # hi(man) is not all zeroes
6515
6516 #
6517 # hi(man) is all zeroes so see if any bits in lo(man) are set
6518 #
6519 unnorm_chk_lo:
6520         bfffo           FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6521         beq.w           unnorm_zero             # yes
6522
6523         add.w           &32, %d0                # no; fix shift distance
6524
6525 #
6526 # d0 = # shifts needed for complete normalization
6527 #
6528 unnorm_shift:
6529         clr.l           %d1                     # clear top word
6530         mov.w           FTEMP_EX(%a0), %d1      # extract exponent
6531         and.w           &0x7fff, %d1            # strip off sgn
6532
6533         cmp.w           %d0, %d1                # will denorm push exp < 0?
6534         bgt.b           unnorm_nrm_zero         # yes; denorm only until exp = 0
6535
6536 #
6537 # exponent would not go < 0. therefore, number stays normalized
6538 #
6539         sub.w           %d0, %d1                # shift exponent value
6540         mov.w           FTEMP_EX(%a0), %d0      # load old exponent
6541         and.w           &0x8000, %d0            # save old sign
6542         or.w            %d0, %d1                # {sgn,new exp}
6543         mov.w           %d1, FTEMP_EX(%a0)      # insert new exponent
6544
6545         bsr.l           norm                    # normalize UNNORM
6546
6547         mov.b           &NORM, %d0              # return new optype tag
6548         rts
6549
6550 #
6551 # exponent would go < 0, so only denormalize until exp = 0
6552 #
6553 unnorm_nrm_zero:
6554         cmp.b           %d1, &32                # is exp <= 32?
6555         bgt.b           unnorm_nrm_zero_lrg     # no; go handle large exponent
6556
6557         bfextu          FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6558         mov.l           %d0, FTEMP_HI(%a0)      # save new hi(man)
6559
6560         mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
6561         lsl.l           %d1, %d0                # extract new lo(man)
6562         mov.l           %d0, FTEMP_LO(%a0)      # save new lo(man)
6563
6564         and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
6565
6566         mov.b           &DENORM, %d0            # return new optype tag
6567         rts
6568
6569 #
6570 # only mantissa bits set are in lo(man)
6571 #
6572 unnorm_nrm_zero_lrg:
6573         sub.w           &32, %d1                # adjust shft amt by 32
6574
6575         mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
6576         lsl.l           %d1, %d0                # left shift lo(man)
6577
6578         mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
6579         clr.l           FTEMP_LO(%a0)           # lo(man) = 0
6580
6581         and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
6582
6583         mov.b           &DENORM, %d0            # return new optype tag
6584         rts
6585
6586 #
6587 # whole mantissa is zero so this UNNORM is actually a zero
6588 #
6589 unnorm_zero:
6590         and.w           &0x8000, FTEMP_EX(%a0)  # force exponent to zero
6591
6592         mov.b           &ZERO, %d0              # fix optype tag
6593         rts
6594
6595 #########################################################################
6596 # XDEF **************************************************************** #
6597 #       set_tag_x(): return the optype of the input ext fp number       #
6598 #                                                                       #
6599 # XREF **************************************************************** #
6600 #       None                                                            #
6601 #                                                                       #
6602 # INPUT *************************************************************** #
6603 #       a0 = pointer to extended precision operand                      #
6604 #                                                                       #
6605 # OUTPUT ************************************************************** #
6606 #       d0 = value of type tag                                          #
6607 #               one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO     #
6608 #                                                                       #
6609 # ALGORITHM *********************************************************** #
6610 #       Simply test the exponent, j-bit, and mantissa values to         #
6611 # determine the type of operand.                                        #
6612 #       If it's an unnormalized zero, alter the operand and force it    #
6613 # to be a normal zero.                                                  #
6614 #                                                                       #
6615 #########################################################################
6616
6617         global          set_tag_x
6618 set_tag_x:
6619         mov.w           FTEMP_EX(%a0), %d0      # extract exponent
6620         andi.w          &0x7fff, %d0            # strip off sign
6621         cmpi.w          %d0, &0x7fff            # is (EXP == MAX)?
6622         beq.b           inf_or_nan_x
6623 not_inf_or_nan_x:
6624         btst            &0x7,FTEMP_HI(%a0)
6625         beq.b           not_norm_x
6626 is_norm_x:
6627         mov.b           &NORM, %d0
6628         rts
6629 not_norm_x:
6630         tst.w           %d0                     # is exponent = 0?
6631         bne.b           is_unnorm_x
6632 not_unnorm_x:
6633         tst.l           FTEMP_HI(%a0)
6634         bne.b           is_denorm_x
6635         tst.l           FTEMP_LO(%a0)
6636         bne.b           is_denorm_x
6637 is_zero_x:
6638         mov.b           &ZERO, %d0
6639         rts
6640 is_denorm_x:
6641         mov.b           &DENORM, %d0
6642         rts
6643 # must distinguish now "Unnormalized zeroes" which we
6644 # must convert to zero.
6645 is_unnorm_x:
6646         tst.l           FTEMP_HI(%a0)
6647         bne.b           is_unnorm_reg_x
6648         tst.l           FTEMP_LO(%a0)
6649         bne.b           is_unnorm_reg_x
6650 # it's an "unnormalized zero". let's convert it to an actual zero...
6651         andi.w          &0x8000,FTEMP_EX(%a0)   # clear exponent
6652         mov.b           &ZERO, %d0
6653         rts
6654 is_unnorm_reg_x:
6655         mov.b           &UNNORM, %d0
6656         rts
6657 inf_or_nan_x:
6658         tst.l           FTEMP_LO(%a0)
6659         bne.b           is_nan_x
6660         mov.l           FTEMP_HI(%a0), %d0
6661         and.l           &0x7fffffff, %d0        # msb is a don't care!
6662         bne.b           is_nan_x
6663 is_inf_x:
6664         mov.b           &INF, %d0
6665         rts
6666 is_nan_x:
6667         btst            &0x6, FTEMP_HI(%a0)
6668         beq.b           is_snan_x
6669         mov.b           &QNAN, %d0
6670         rts
6671 is_snan_x:
6672         mov.b           &SNAN, %d0
6673         rts
6674
6675 #########################################################################
6676 # XDEF **************************************************************** #
6677 #       set_tag_d(): return the optype of the input dbl fp number       #
6678 #                                                                       #
6679 # XREF **************************************************************** #
6680 #       None                                                            #
6681 #                                                                       #
6682 # INPUT *************************************************************** #
6683 #       a0 = points to double precision operand                         #
6684 #                                                                       #
6685 # OUTPUT ************************************************************** #
6686 #       d0 = value of type tag                                          #
6687 #               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
6688 #                                                                       #
6689 # ALGORITHM *********************************************************** #
6690 #       Simply test the exponent, j-bit, and mantissa values to         #
6691 # determine the type of operand.                                        #
6692 #                                                                       #
6693 #########################################################################
6694
6695         global          set_tag_d
6696 set_tag_d:
6697         mov.l           FTEMP(%a0), %d0
6698         mov.l           %d0, %d1
6699
6700         andi.l          &0x7ff00000, %d0
6701         beq.b           zero_or_denorm_d
6702
6703         cmpi.l          %d0, &0x7ff00000
6704         beq.b           inf_or_nan_d
6705
6706 is_norm_d:
6707         mov.b           &NORM, %d0
6708         rts
6709 zero_or_denorm_d:
6710         and.l           &0x000fffff, %d1
6711         bne             is_denorm_d
6712         tst.l           4+FTEMP(%a0)
6713         bne             is_denorm_d
6714 is_zero_d:
6715         mov.b           &ZERO, %d0
6716         rts
6717 is_denorm_d:
6718         mov.b           &DENORM, %d0
6719         rts
6720 inf_or_nan_d:
6721         and.l           &0x000fffff, %d1
6722         bne             is_nan_d
6723         tst.l           4+FTEMP(%a0)
6724         bne             is_nan_d
6725 is_inf_d:
6726         mov.b           &INF, %d0
6727         rts
6728 is_nan_d:
6729         btst            &19, %d1
6730         bne             is_qnan_d
6731 is_snan_d:
6732         mov.b           &SNAN, %d0
6733         rts
6734 is_qnan_d:
6735         mov.b           &QNAN, %d0
6736         rts
6737
6738 #########################################################################
6739 # XDEF **************************************************************** #
6740 #       set_tag_s(): return the optype of the input sgl fp number       #
6741 #                                                                       #
6742 # XREF **************************************************************** #
6743 #       None                                                            #
6744 #                                                                       #
6745 # INPUT *************************************************************** #
6746 #       a0 = pointer to single precision operand                        #
6747 #                                                                       #
6748 # OUTPUT ************************************************************** #
6749 #       d0 = value of type tag                                          #
6750 #               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
6751 #                                                                       #
6752 # ALGORITHM *********************************************************** #
6753 #       Simply test the exponent, j-bit, and mantissa values to         #
6754 # determine the type of operand.                                        #
6755 #                                                                       #
6756 #########################################################################
6757
6758         global          set_tag_s
6759 set_tag_s:
6760         mov.l           FTEMP(%a0), %d0
6761         mov.l           %d0, %d1
6762
6763         andi.l          &0x7f800000, %d0
6764         beq.b           zero_or_denorm_s
6765
6766         cmpi.l          %d0, &0x7f800000
6767         beq.b           inf_or_nan_s
6768
6769 is_norm_s:
6770         mov.b           &NORM, %d0
6771         rts
6772 zero_or_denorm_s:
6773         and.l           &0x007fffff, %d1
6774         bne             is_denorm_s
6775 is_zero_s:
6776         mov.b           &ZERO, %d0
6777         rts
6778 is_denorm_s:
6779         mov.b           &DENORM, %d0
6780         rts
6781 inf_or_nan_s:
6782         and.l           &0x007fffff, %d1
6783         bne             is_nan_s
6784 is_inf_s:
6785         mov.b           &INF, %d0
6786         rts
6787 is_nan_s:
6788         btst            &22, %d1
6789         bne             is_qnan_s
6790 is_snan_s:
6791         mov.b           &SNAN, %d0
6792         rts
6793 is_qnan_s:
6794         mov.b           &QNAN, %d0
6795         rts
6796
6797 #########################################################################
6798 # XDEF **************************************************************** #
6799 #       unf_res(): routine to produce default underflow result of a     #
6800 #                  scaled extended precision number; this is used by    #
6801 #                  fadd/fdiv/fmul/etc. emulation routines.              #
6802 #       unf_res4(): same as above but for fsglmul/fsgldiv which use     #
6803 #                   single round prec and extended prec mode.           #
6804 #                                                                       #
6805 # XREF **************************************************************** #
6806 #       _denorm() - denormalize according to scale factor               #
6807 #       _round() - round denormalized number according to rnd prec      #
6808 #                                                                       #
6809 # INPUT *************************************************************** #
6810 #       a0 = pointer to extended precison operand                       #
6811 #       d0 = scale factor                                               #
6812 #       d1 = rounding precision/mode                                    #
6813 #                                                                       #
6814 # OUTPUT ************************************************************** #
6815 #       a0 = pointer to default underflow result in extended precision  #
6816 #       d0.b = result FPSR_cc which caller may or may not want to save  #
6817 #                                                                       #
6818 # ALGORITHM *********************************************************** #
6819 #       Convert the input operand to "internal format" which means the  #
6820 # exponent is extended to 16 bits and the sign is stored in the unused  #
6821 # portion of the extended precison operand. Denormalize the number      #
6822 # according to the scale factor passed in d0. Then, round the           #
6823 # denormalized result.                                                  #
6824 #       Set the FPSR_exc bits as appropriate but return the cc bits in  #
6825 # d0 in case the caller doesn't want to save them (as is the case for   #
6826 # fmove out).                                                           #
6827 #       unf_res4() for fsglmul/fsgldiv forces the denorm to extended    #
6828 # precision and the rounding mode to single.                            #
6829 #                                                                       #
6830 #########################################################################
6831         global          unf_res
6832 unf_res:
6833         mov.l           %d1, -(%sp)             # save rnd prec,mode on stack
6834
6835         btst            &0x7, FTEMP_EX(%a0)     # make "internal" format
6836         sne             FTEMP_SGN(%a0)
6837
6838         mov.w           FTEMP_EX(%a0), %d1      # extract exponent
6839         and.w           &0x7fff, %d1
6840         sub.w           %d0, %d1
6841         mov.w           %d1, FTEMP_EX(%a0)      # insert 16 bit exponent
6842
6843         mov.l           %a0, -(%sp)             # save operand ptr during calls
6844
6845         mov.l           0x4(%sp),%d0            # pass rnd prec.
6846         andi.w          &0x00c0,%d0
6847         lsr.w           &0x4,%d0
6848         bsr.l           _denorm                 # denorm result
6849
6850         mov.l           (%sp),%a0
6851         mov.w           0x6(%sp),%d1            # load prec:mode into %d1
6852         andi.w          &0xc0,%d1               # extract rnd prec
6853         lsr.w           &0x4,%d1
6854         swap            %d1
6855         mov.w           0x6(%sp),%d1
6856         andi.w          &0x30,%d1
6857         lsr.w           &0x4,%d1
6858         bsr.l           _round                  # round the denorm
6859
6860         mov.l           (%sp)+, %a0
6861
6862 # result is now rounded properly. convert back to normal format
6863         bclr            &0x7, FTEMP_EX(%a0)     # clear sgn first; may have residue
6864         tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
6865         beq.b           unf_res_chkifzero       # no; result is positive
6866         bset            &0x7, FTEMP_EX(%a0)     # set result sgn
6867         clr.b           FTEMP_SGN(%a0)          # clear temp sign
6868
6869 # the number may have become zero after rounding. set ccodes accordingly.
6870 unf_res_chkifzero:
6871         clr.l           %d0
6872         tst.l           FTEMP_HI(%a0)           # is value now a zero?
6873         bne.b           unf_res_cont            # no
6874         tst.l           FTEMP_LO(%a0)
6875         bne.b           unf_res_cont            # no
6876 #       bset            &z_bit, FPSR_CC(%a6)    # yes; set zero ccode bit
6877         bset            &z_bit, %d0             # yes; set zero ccode bit
6878
6879 unf_res_cont:
6880
6881 #
6882 # can inex1 also be set along with unfl and inex2???
6883 #
6884 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6885 #
6886         btst            &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6887         beq.b           unf_res_end             # no
6888         bset            &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6889
6890 unf_res_end:
6891         add.l           &0x4, %sp               # clear stack
6892         rts
6893
6894 # unf_res() for fsglmul() and fsgldiv().
6895         global          unf_res4
6896 unf_res4:
6897         mov.l           %d1,-(%sp)              # save rnd prec,mode on stack
6898
6899         btst            &0x7,FTEMP_EX(%a0)      # make "internal" format
6900         sne             FTEMP_SGN(%a0)
6901
6902         mov.w           FTEMP_EX(%a0),%d1       # extract exponent
6903         and.w           &0x7fff,%d1
6904         sub.w           %d0,%d1
6905         mov.w           %d1,FTEMP_EX(%a0)       # insert 16 bit exponent
6906
6907         mov.l           %a0,-(%sp)              # save operand ptr during calls
6908
6909         clr.l           %d0                     # force rnd prec = ext
6910         bsr.l           _denorm                 # denorm result
6911
6912         mov.l           (%sp),%a0
6913         mov.w           &s_mode,%d1             # force rnd prec = sgl
6914         swap            %d1
6915         mov.w           0x6(%sp),%d1            # load rnd mode
6916         andi.w          &0x30,%d1               # extract rnd prec
6917         lsr.w           &0x4,%d1
6918         bsr.l           _round                  # round the denorm
6919
6920         mov.l           (%sp)+,%a0
6921
6922 # result is now rounded properly. convert back to normal format
6923         bclr            &0x7,FTEMP_EX(%a0)      # clear sgn first; may have residue
6924         tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
6925         beq.b           unf_res4_chkifzero      # no; result is positive
6926         bset            &0x7,FTEMP_EX(%a0)      # set result sgn
6927         clr.b           FTEMP_SGN(%a0)          # clear temp sign
6928
6929 # the number may have become zero after rounding. set ccodes accordingly.
6930 unf_res4_chkifzero:
6931         clr.l           %d0
6932         tst.l           FTEMP_HI(%a0)           # is value now a zero?
6933         bne.b           unf_res4_cont           # no
6934         tst.l           FTEMP_LO(%a0)
6935         bne.b           unf_res4_cont           # no
6936 #       bset            &z_bit,FPSR_CC(%a6)     # yes; set zero ccode bit
6937         bset            &z_bit,%d0              # yes; set zero ccode bit
6938
6939 unf_res4_cont:
6940
6941 #
6942 # can inex1 also be set along with unfl and inex2???
6943 #
6944 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6945 #
6946         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6947         beq.b           unf_res4_end            # no
6948         bset            &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6949
6950 unf_res4_end:
6951         add.l           &0x4,%sp                # clear stack
6952         rts
6953
6954 #########################################################################
6955 # XDEF **************************************************************** #
6956 #       ovf_res(): routine to produce the default overflow result of    #
6957 #                  an overflowing number.                               #
6958 #       ovf_res2(): same as above but the rnd mode/prec are passed      #
6959 #                   differently.                                        #
6960 #                                                                       #
6961 # XREF **************************************************************** #
6962 #       none                                                            #
6963 #                                                                       #
6964 # INPUT *************************************************************** #
6965 #       d1.b    = '-1' => (-); '0' => (+)                               #
6966 #   ovf_res():                                                          #
6967 #       d0      = rnd mode/prec                                         #
6968 #   ovf_res2():                                                         #
6969 #       hi(d0)  = rnd prec                                              #
6970 #       lo(d0)  = rnd mode                                              #
6971 #                                                                       #
6972 # OUTPUT ************************************************************** #
6973 #       a0      = points to extended precision result                   #
6974 #       d0.b    = condition code bits                                   #
6975 #                                                                       #
6976 # ALGORITHM *********************************************************** #
6977 #       The default overflow result can be determined by the sign of    #
6978 # the result and the rounding mode/prec in effect. These bits are       #
6979 # concatenated together to create an index into the default result      #
6980 # table. A pointer to the correct result is returned in a0. The         #
6981 # resulting condition codes are returned in d0 in case the caller       #
6982 # doesn't want FPSR_cc altered (as is the case for fmove out).          #
6983 #                                                                       #
6984 #########################################################################
6985
6986         global          ovf_res
6987 ovf_res:
6988         andi.w          &0x10,%d1               # keep result sign
6989         lsr.b           &0x4,%d0                # shift prec/mode
6990         or.b            %d0,%d1                 # concat the two
6991         mov.w           %d1,%d0                 # make a copy
6992         lsl.b           &0x1,%d1                # multiply d1 by 2
6993         bra.b           ovf_res_load
6994
6995         global          ovf_res2
6996 ovf_res2:
6997         and.w           &0x10, %d1              # keep result sign
6998         or.b            %d0, %d1                # insert rnd mode
6999         swap            %d0
7000         or.b            %d0, %d1                # insert rnd prec
7001         mov.w           %d1, %d0                # make a copy
7002         lsl.b           &0x1, %d1               # shift left by 1
7003
7004 #
7005 # use the rounding mode, precision, and result sign as in index into the
7006 # two tables below to fetch the default result and the result ccodes.
7007 #
7008 ovf_res_load:
7009         mov.b           (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7010         lea             (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7011
7012         rts
7013
7014 tbl_ovfl_cc:
7015         byte            0x2, 0x0, 0x0, 0x2
7016         byte            0x2, 0x0, 0x0, 0x2
7017         byte            0x2, 0x0, 0x0, 0x2
7018         byte            0x0, 0x0, 0x0, 0x0
7019         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7020         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7021         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7022
7023 tbl_ovfl_result:
7024         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025         long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7026         long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7027         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028
7029         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7030         long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7031         long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7032         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7033
7034         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7035         long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7036         long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7037         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7038
7039         long            0x00000000,0x00000000,0x00000000,0x00000000
7040         long            0x00000000,0x00000000,0x00000000,0x00000000
7041         long            0x00000000,0x00000000,0x00000000,0x00000000
7042         long            0x00000000,0x00000000,0x00000000,0x00000000
7043
7044         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045         long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7046         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047         long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7048
7049         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7050         long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7051         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7052         long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7053
7054         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7055         long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7056         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7057         long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7058
7059 #########################################################################
7060 # XDEF **************************************************************** #
7061 #       fout(): move from fp register to memory or data register        #
7062 #                                                                       #
7063 # XREF **************************************************************** #
7064 #       _round() - needed to create EXOP for sgl/dbl precision          #
7065 #       norm() - needed to create EXOP for extended precision           #
7066 #       ovf_res() - create default overflow result for sgl/dbl precision#
7067 #       unf_res() - create default underflow result for sgl/dbl prec.   #
7068 #       dst_dbl() - create rounded dbl precision result.                #
7069 #       dst_sgl() - create rounded sgl precision result.                #
7070 #       fetch_dreg() - fetch dynamic k-factor reg for packed.           #
7071 #       bindec() - convert FP binary number to packed number.           #
7072 #       _mem_write() - write data to memory.                            #
7073 #       _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7074 #       _dmem_write_{byte,word,long}() - write data to memory.          #
7075 #       store_dreg_{b,w,l}() - store data to data register file.        #
7076 #       facc_out_{b,w,l,d,x}() - data access error occurred.            #
7077 #                                                                       #
7078 # INPUT *************************************************************** #
7079 #       a0 = pointer to extended precision source operand               #
7080 #       d0 = round prec,mode                                            #
7081 #                                                                       #
7082 # OUTPUT ************************************************************** #
7083 #       fp0 : intermediate underflow or overflow result if              #
7084 #             OVFL/UNFL occurred for a sgl or dbl operand               #
7085 #                                                                       #
7086 # ALGORITHM *********************************************************** #
7087 #       This routine is accessed by many handlers that need to do an    #
7088 # opclass three move of an operand out to memory.                       #
7089 #       Decode an fmove out (opclass 3) instruction to determine if     #
7090 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data  #
7091 # register or memory. The algorithm uses a standard "fmove" to create   #
7092 # the rounded result. Also, since exceptions are disabled, this also    #
7093 # create the correct OPERR default result if appropriate.               #
7094 #       For sgl or dbl precision, overflow or underflow can occur. If   #
7095 # either occurs and is enabled, the EXOP.                               #
7096 #       For extended precision, the stacked <ea> must be fixed along    #
7097 # w/ the address index register as appropriate w/ _calc_ea_fout(). If   #
7098 # the source is a denorm and if underflow is enabled, an EXOP must be   #
7099 # created.                                                              #
7100 #       For packed, the k-factor must be fetched from the instruction   #
7101 # word or a data register. The <ea> must be fixed as w/ extended        #
7102 # precision. Then, bindec() is called to create the appropriate         #
7103 # packed result.                                                        #
7104 #       If at any time an access error is flagged by one of the move-   #
7105 # to-memory routines, then a special exit must be made so that the      #
7106 # access error can be handled properly.                                 #
7107 #                                                                       #
7108 #########################################################################
7109
7110         global          fout
7111 fout:
7112         bfextu          EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7113         mov.w           (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7114         jmp             (tbl_fout.b,%pc,%a1)    # jump to routine
7115
7116         swbeg           &0x8
7117 tbl_fout:
7118         short           fout_long       -       tbl_fout
7119         short           fout_sgl        -       tbl_fout
7120         short           fout_ext        -       tbl_fout
7121         short           fout_pack       -       tbl_fout
7122         short           fout_word       -       tbl_fout
7123         short           fout_dbl        -       tbl_fout
7124         short           fout_byte       -       tbl_fout
7125         short           fout_pack       -       tbl_fout
7126
7127 #################################################################
7128 # fmove.b out ###################################################
7129 #################################################################
7130
7131 # Only "Unimplemented Data Type" exceptions enter here. The operand
7132 # is either a DENORM or a NORM.
7133 fout_byte:
7134         tst.b           STAG(%a6)               # is operand normalized?
7135         bne.b           fout_byte_denorm        # no
7136
7137         fmovm.x         SRC(%a0),&0x80          # load value
7138
7139 fout_byte_norm:
7140         fmov.l          %d0,%fpcr               # insert rnd prec,mode
7141
7142         fmov.b          %fp0,%d0                # exec move out w/ correct rnd mode
7143
7144         fmov.l          &0x0,%fpcr              # clear FPCR
7145         fmov.l          %fpsr,%d1               # fetch FPSR
7146         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7147
7148         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7149         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7150         beq.b           fout_byte_dn            # must save to integer regfile
7151
7152         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7153         bsr.l           _dmem_write_byte        # write byte
7154
7155         tst.l           %d1                     # did dstore fail?
7156         bne.l           facc_out_b              # yes
7157
7158         rts
7159
7160 fout_byte_dn:
7161         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7162         andi.w          &0x7,%d1
7163         bsr.l           store_dreg_b
7164         rts
7165
7166 fout_byte_denorm:
7167         mov.l           SRC_EX(%a0),%d1
7168         andi.l          &0x80000000,%d1         # keep DENORM sign
7169         ori.l           &0x00800000,%d1         # make smallest sgl
7170         fmov.s          %d1,%fp0
7171         bra.b           fout_byte_norm
7172
7173 #################################################################
7174 # fmove.w out ###################################################
7175 #################################################################
7176
7177 # Only "Unimplemented Data Type" exceptions enter here. The operand
7178 # is either a DENORM or a NORM.
7179 fout_word:
7180         tst.b           STAG(%a6)               # is operand normalized?
7181         bne.b           fout_word_denorm        # no
7182
7183         fmovm.x         SRC(%a0),&0x80          # load value
7184
7185 fout_word_norm:
7186         fmov.l          %d0,%fpcr               # insert rnd prec:mode
7187
7188         fmov.w          %fp0,%d0                # exec move out w/ correct rnd mode
7189
7190         fmov.l          &0x0,%fpcr              # clear FPCR
7191         fmov.l          %fpsr,%d1               # fetch FPSR
7192         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7193
7194         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7195         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7196         beq.b           fout_word_dn            # must save to integer regfile
7197
7198         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7199         bsr.l           _dmem_write_word        # write word
7200
7201         tst.l           %d1                     # did dstore fail?
7202         bne.l           facc_out_w              # yes
7203
7204         rts
7205
7206 fout_word_dn:
7207         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7208         andi.w          &0x7,%d1
7209         bsr.l           store_dreg_w
7210         rts
7211
7212 fout_word_denorm:
7213         mov.l           SRC_EX(%a0),%d1
7214         andi.l          &0x80000000,%d1         # keep DENORM sign
7215         ori.l           &0x00800000,%d1         # make smallest sgl
7216         fmov.s          %d1,%fp0
7217         bra.b           fout_word_norm
7218
7219 #################################################################
7220 # fmove.l out ###################################################
7221 #################################################################
7222
7223 # Only "Unimplemented Data Type" exceptions enter here. The operand
7224 # is either a DENORM or a NORM.
7225 fout_long:
7226         tst.b           STAG(%a6)               # is operand normalized?
7227         bne.b           fout_long_denorm        # no
7228
7229         fmovm.x         SRC(%a0),&0x80          # load value
7230
7231 fout_long_norm:
7232         fmov.l          %d0,%fpcr               # insert rnd prec:mode
7233
7234         fmov.l          %fp0,%d0                # exec move out w/ correct rnd mode
7235
7236         fmov.l          &0x0,%fpcr              # clear FPCR
7237         fmov.l          %fpsr,%d1               # fetch FPSR
7238         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7239
7240 fout_long_write:
7241         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7242         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7243         beq.b           fout_long_dn            # must save to integer regfile
7244
7245         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7246         bsr.l           _dmem_write_long        # write long
7247
7248         tst.l           %d1                     # did dstore fail?
7249         bne.l           facc_out_l              # yes
7250
7251         rts
7252
7253 fout_long_dn:
7254         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7255         andi.w          &0x7,%d1
7256         bsr.l           store_dreg_l
7257         rts
7258
7259 fout_long_denorm:
7260         mov.l           SRC_EX(%a0),%d1
7261         andi.l          &0x80000000,%d1         # keep DENORM sign
7262         ori.l           &0x00800000,%d1         # make smallest sgl
7263         fmov.s          %d1,%fp0
7264         bra.b           fout_long_norm
7265
7266 #################################################################
7267 # fmove.x out ###################################################
7268 #################################################################
7269
7270 # Only "Unimplemented Data Type" exceptions enter here. The operand
7271 # is either a DENORM or a NORM.
7272 # The DENORM causes an Underflow exception.
7273 fout_ext:
7274
7275 # we copy the extended precision result to FP_SCR0 so that the reserved
7276 # 16-bit field gets zeroed. we do this since we promise not to disturb
7277 # what's at SRC(a0).
7278         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7279         clr.w           2+FP_SCR0_EX(%a6)       # clear reserved field
7280         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7281         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7282
7283         fmovm.x         SRC(%a0),&0x80          # return result
7284
7285         bsr.l           _calc_ea_fout           # fix stacked <ea>
7286
7287         mov.l           %a0,%a1                 # pass: dst addr
7288         lea             FP_SCR0(%a6),%a0        # pass: src addr
7289         mov.l           &0xc,%d0                # pass: opsize is 12 bytes
7290
7291 # we must not yet write the extended precision data to the stack
7292 # in the pre-decrement case from supervisor mode or else we'll corrupt
7293 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
7294         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
7295         beq.b           fout_ext_a7
7296
7297         bsr.l           _dmem_write             # write ext prec number to memory
7298
7299         tst.l           %d1                     # did dstore fail?
7300         bne.w           fout_ext_err            # yes
7301
7302         tst.b           STAG(%a6)               # is operand normalized?
7303         bne.b           fout_ext_denorm         # no
7304         rts
7305
7306 # the number is a DENORM. must set the underflow exception bit
7307 fout_ext_denorm:
7308         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7309
7310         mov.b           FPCR_ENABLE(%a6),%d0
7311         andi.b          &0x0a,%d0               # is UNFL or INEX enabled?
7312         bne.b           fout_ext_exc            # yes
7313         rts
7314
7315 # we don't want to do the write if the exception occurred in supervisor mode
7316 # so _mem_write2() handles this for us.
7317 fout_ext_a7:
7318         bsr.l           _mem_write2             # write ext prec number to memory
7319
7320         tst.l           %d1                     # did dstore fail?
7321         bne.w           fout_ext_err            # yes
7322
7323         tst.b           STAG(%a6)               # is operand normalized?
7324         bne.b           fout_ext_denorm         # no
7325         rts
7326
7327 fout_ext_exc:
7328         lea             FP_SCR0(%a6),%a0
7329         bsr.l           norm                    # normalize the mantissa
7330         neg.w           %d0                     # new exp = -(shft amt)
7331         andi.w          &0x7fff,%d0
7332         andi.w          &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7333         or.w            %d0,FP_SCR0_EX(%a6)     # insert new exponent
7334         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
7335         rts
7336
7337 fout_ext_err:
7338         mov.l           EXC_A6(%a6),(%a6)       # fix stacked a6
7339         bra.l           facc_out_x
7340
7341 #########################################################################
7342 # fmove.s out ###########################################################
7343 #########################################################################
7344 fout_sgl:
7345         andi.b          &0x30,%d0               # clear rnd prec
7346         ori.b           &s_mode*0x10,%d0        # insert sgl prec
7347         mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
7348
7349 #
7350 # operand is a normalized number. first, we check to see if the move out
7351 # would cause either an underflow or overflow. these cases are handled
7352 # separately. otherwise, set the FPCR to the proper rounding mode and
7353 # execute the move.
7354 #
7355         mov.w           SRC_EX(%a0),%d0         # extract exponent
7356         andi.w          &0x7fff,%d0             # strip sign
7357
7358         cmpi.w          %d0,&SGL_HI             # will operand overflow?
7359         bgt.w           fout_sgl_ovfl           # yes; go handle OVFL
7360         beq.w           fout_sgl_may_ovfl       # maybe; go handle possible OVFL
7361         cmpi.w          %d0,&SGL_LO             # will operand underflow?
7362         blt.w           fout_sgl_unfl           # yes; go handle underflow
7363
7364 #
7365 # NORMs(in range) can be stored out by a simple "fmov.s"
7366 # Unnormalized inputs can come through this point.
7367 #
7368 fout_sgl_exg:
7369         fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
7370
7371         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7372         fmov.l          &0x0,%fpsr              # clear FPSR
7373
7374         fmov.s          %fp0,%d0                # store does convert and round
7375
7376         fmov.l          &0x0,%fpcr              # clear FPCR
7377         fmov.l          %fpsr,%d1               # save FPSR
7378
7379         or.w            %d1,2+USER_FPSR(%a6)    # set possible inex2/ainex
7380
7381 fout_sgl_exg_write:
7382         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7383         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7384         beq.b           fout_sgl_exg_write_dn   # must save to integer regfile
7385
7386         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7387         bsr.l           _dmem_write_long        # write long
7388
7389         tst.l           %d1                     # did dstore fail?
7390         bne.l           facc_out_l              # yes
7391
7392         rts
7393
7394 fout_sgl_exg_write_dn:
7395         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7396         andi.w          &0x7,%d1
7397         bsr.l           store_dreg_l
7398         rts
7399
7400 #
7401 # here, we know that the operand would UNFL if moved out to single prec,
7402 # so, denorm and round and then use generic store single routine to
7403 # write the value to memory.
7404 #
7405 fout_sgl_unfl:
7406         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7407
7408         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7409         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7410         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7411         mov.l           %a0,-(%sp)
7412
7413         clr.l           %d0                     # pass: S.F. = 0
7414
7415         cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
7416         bne.b           fout_sgl_unfl_cont      # let DENORMs fall through
7417
7418         lea             FP_SCR0(%a6),%a0
7419         bsr.l           norm                    # normalize the DENORM
7420
7421 fout_sgl_unfl_cont:
7422         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
7423         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
7424         bsr.l           unf_res                 # calc default underflow result
7425
7426         lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
7427         bsr.l           dst_sgl                 # convert to single prec
7428
7429         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7430         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7431         beq.b           fout_sgl_unfl_dn        # must save to integer regfile
7432
7433         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7434         bsr.l           _dmem_write_long        # write long
7435
7436         tst.l           %d1                     # did dstore fail?
7437         bne.l           facc_out_l              # yes
7438
7439         bra.b           fout_sgl_unfl_chkexc
7440
7441 fout_sgl_unfl_dn:
7442         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7443         andi.w          &0x7,%d1
7444         bsr.l           store_dreg_l
7445
7446 fout_sgl_unfl_chkexc:
7447         mov.b           FPCR_ENABLE(%a6),%d1
7448         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7449         bne.w           fout_sd_exc_unfl        # yes
7450         addq.l          &0x4,%sp
7451         rts
7452
7453 #
7454 # it's definitely an overflow so call ovf_res to get the correct answer
7455 #
7456 fout_sgl_ovfl:
7457         tst.b           3+SRC_HI(%a0)           # is result inexact?
7458         bne.b           fout_sgl_ovfl_inex2
7459         tst.l           SRC_LO(%a0)             # is result inexact?
7460         bne.b           fout_sgl_ovfl_inex2
7461         ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7462         bra.b           fout_sgl_ovfl_cont
7463 fout_sgl_ovfl_inex2:
7464         ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7465
7466 fout_sgl_ovfl_cont:
7467         mov.l           %a0,-(%sp)
7468
7469 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7470 # overflow result. DON'T save the returned ccodes from ovf_res() since
7471 # fmove out doesn't alter them.
7472         tst.b           SRC_EX(%a0)             # is operand negative?
7473         smi             %d1                     # set if so
7474         mov.l           L_SCR3(%a6),%d0         # pass: sgl prec,rnd mode
7475         bsr.l           ovf_res                 # calc OVFL result
7476         fmovm.x         (%a0),&0x80             # load default overflow result
7477         fmov.s          %fp0,%d0                # store to single
7478
7479         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7480         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7481         beq.b           fout_sgl_ovfl_dn        # must save to integer regfile
7482
7483         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7484         bsr.l           _dmem_write_long        # write long
7485
7486         tst.l           %d1                     # did dstore fail?
7487         bne.l           facc_out_l              # yes
7488
7489         bra.b           fout_sgl_ovfl_chkexc
7490
7491 fout_sgl_ovfl_dn:
7492         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7493         andi.w          &0x7,%d1
7494         bsr.l           store_dreg_l
7495
7496 fout_sgl_ovfl_chkexc:
7497         mov.b           FPCR_ENABLE(%a6),%d1
7498         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7499         bne.w           fout_sd_exc_ovfl        # yes
7500         addq.l          &0x4,%sp
7501         rts
7502
7503 #
7504 # move out MAY overflow:
7505 # (1) force the exp to 0x3fff
7506 # (2) do a move w/ appropriate rnd mode
7507 # (3) if exp still equals zero, then insert original exponent
7508 #       for the correct result.
7509 #     if exp now equals one, then it overflowed so call ovf_res.
7510 #
7511 fout_sgl_may_ovfl:
7512         mov.w           SRC_EX(%a0),%d1         # fetch current sign
7513         andi.w          &0x8000,%d1             # keep it,clear exp
7514         ori.w           &0x3fff,%d1             # insert exp = 0
7515         mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
7516         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7517         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7518
7519         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7520
7521         fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
7522         fmov.l          &0x0,%fpcr              # clear FPCR
7523
7524         fabs.x          %fp0                    # need absolute value
7525         fcmp.b          %fp0,&0x2               # did exponent increase?
7526         fblt.w          fout_sgl_exg            # no; go finish NORM
7527         bra.w           fout_sgl_ovfl           # yes; go handle overflow
7528
7529 ################
7530
7531 fout_sd_exc_unfl:
7532         mov.l           (%sp)+,%a0
7533
7534         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7535         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7536         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7537
7538         cmpi.b          STAG(%a6),&DENORM       # was src a DENORM?
7539         bne.b           fout_sd_exc_cont        # no
7540
7541         lea             FP_SCR0(%a6),%a0
7542         bsr.l           norm
7543         neg.l           %d0
7544         andi.w          &0x7fff,%d0
7545         bfins           %d0,FP_SCR0_EX(%a6){&1:&15}
7546         bra.b           fout_sd_exc_cont
7547
7548 fout_sd_exc:
7549 fout_sd_exc_ovfl:
7550         mov.l           (%sp)+,%a0              # restore a0
7551
7552         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7553         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7554         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7555
7556 fout_sd_exc_cont:
7557         bclr            &0x7,FP_SCR0_EX(%a6)    # clear sign bit
7558         sne.b           2+FP_SCR0_EX(%a6)       # set internal sign bit
7559         lea             FP_SCR0(%a6),%a0        # pass: ptr to DENORM
7560
7561         mov.b           3+L_SCR3(%a6),%d1
7562         lsr.b           &0x4,%d1
7563         andi.w          &0x0c,%d1
7564         swap            %d1
7565         mov.b           3+L_SCR3(%a6),%d1
7566         lsr.b           &0x4,%d1
7567         andi.w          &0x03,%d1
7568         clr.l           %d0                     # pass: zero g,r,s
7569         bsr.l           _round                  # round the DENORM
7570
7571         tst.b           2+FP_SCR0_EX(%a6)       # is EXOP negative?
7572         beq.b           fout_sd_exc_done        # no
7573         bset            &0x7,FP_SCR0_EX(%a6)    # yes
7574
7575 fout_sd_exc_done:
7576         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
7577         rts
7578
7579 #################################################################
7580 # fmove.d out ###################################################
7581 #################################################################
7582 fout_dbl:
7583         andi.b          &0x30,%d0               # clear rnd prec
7584         ori.b           &d_mode*0x10,%d0        # insert dbl prec
7585         mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
7586
7587 #
7588 # operand is a normalized number. first, we check to see if the move out
7589 # would cause either an underflow or overflow. these cases are handled
7590 # separately. otherwise, set the FPCR to the proper rounding mode and
7591 # execute the move.
7592 #
7593         mov.w           SRC_EX(%a0),%d0         # extract exponent
7594         andi.w          &0x7fff,%d0             # strip sign
7595
7596         cmpi.w          %d0,&DBL_HI             # will operand overflow?
7597         bgt.w           fout_dbl_ovfl           # yes; go handle OVFL
7598         beq.w           fout_dbl_may_ovfl       # maybe; go handle possible OVFL
7599         cmpi.w          %d0,&DBL_LO             # will operand underflow?
7600         blt.w           fout_dbl_unfl           # yes; go handle underflow
7601
7602 #
7603 # NORMs(in range) can be stored out by a simple "fmov.d"
7604 # Unnormalized inputs can come through this point.
7605 #
7606 fout_dbl_exg:
7607         fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
7608
7609         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7610         fmov.l          &0x0,%fpsr              # clear FPSR
7611
7612         fmov.d          %fp0,L_SCR1(%a6)        # store does convert and round
7613
7614         fmov.l          &0x0,%fpcr              # clear FPCR
7615         fmov.l          %fpsr,%d0               # save FPSR
7616
7617         or.w            %d0,2+USER_FPSR(%a6)    # set possible inex2/ainex
7618
7619         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7620         lea             L_SCR1(%a6),%a0         # pass: src addr
7621         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7622         bsr.l           _dmem_write             # store dbl fop to memory
7623
7624         tst.l           %d1                     # did dstore fail?
7625         bne.l           facc_out_d              # yes
7626
7627         rts                                     # no; so we're finished
7628
7629 #
7630 # here, we know that the operand would UNFL if moved out to double prec,
7631 # so, denorm and round and then use generic store double routine to
7632 # write the value to memory.
7633 #
7634 fout_dbl_unfl:
7635         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7636
7637         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7638         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7639         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7640         mov.l           %a0,-(%sp)
7641
7642         clr.l           %d0                     # pass: S.F. = 0
7643
7644         cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
7645         bne.b           fout_dbl_unfl_cont      # let DENORMs fall through
7646
7647         lea             FP_SCR0(%a6),%a0
7648         bsr.l           norm                    # normalize the DENORM
7649
7650 fout_dbl_unfl_cont:
7651         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
7652         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
7653         bsr.l           unf_res                 # calc default underflow result
7654
7655         lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
7656         bsr.l           dst_dbl                 # convert to single prec
7657         mov.l           %d0,L_SCR1(%a6)
7658         mov.l           %d1,L_SCR2(%a6)
7659
7660         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7661         lea             L_SCR1(%a6),%a0         # pass: src addr
7662         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7663         bsr.l           _dmem_write             # store dbl fop to memory
7664
7665         tst.l           %d1                     # did dstore fail?
7666         bne.l           facc_out_d              # yes
7667
7668         mov.b           FPCR_ENABLE(%a6),%d1
7669         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7670         bne.w           fout_sd_exc_unfl        # yes
7671         addq.l          &0x4,%sp
7672         rts
7673
7674 #
7675 # it's definitely an overflow so call ovf_res to get the correct answer
7676 #
7677 fout_dbl_ovfl:
7678         mov.w           2+SRC_LO(%a0),%d0
7679         andi.w          &0x7ff,%d0
7680         bne.b           fout_dbl_ovfl_inex2
7681
7682         ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7683         bra.b           fout_dbl_ovfl_cont
7684 fout_dbl_ovfl_inex2:
7685         ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7686
7687 fout_dbl_ovfl_cont:
7688         mov.l           %a0,-(%sp)
7689
7690 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7691 # overflow result. DON'T save the returned ccodes from ovf_res() since
7692 # fmove out doesn't alter them.
7693         tst.b           SRC_EX(%a0)             # is operand negative?
7694         smi             %d1                     # set if so
7695         mov.l           L_SCR3(%a6),%d0         # pass: dbl prec,rnd mode
7696         bsr.l           ovf_res                 # calc OVFL result
7697         fmovm.x         (%a0),&0x80             # load default overflow result
7698         fmov.d          %fp0,L_SCR1(%a6)        # store to double
7699
7700         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7701         lea             L_SCR1(%a6),%a0         # pass: src addr
7702         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7703         bsr.l           _dmem_write             # store dbl fop to memory
7704
7705         tst.l           %d1                     # did dstore fail?
7706         bne.l           facc_out_d              # yes
7707
7708         mov.b           FPCR_ENABLE(%a6),%d1
7709         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7710         bne.w           fout_sd_exc_ovfl        # yes
7711         addq.l          &0x4,%sp
7712         rts
7713
7714 #
7715 # move out MAY overflow:
7716 # (1) force the exp to 0x3fff
7717 # (2) do a move w/ appropriate rnd mode
7718 # (3) if exp still equals zero, then insert original exponent
7719 #       for the correct result.
7720 #     if exp now equals one, then it overflowed so call ovf_res.
7721 #
7722 fout_dbl_may_ovfl:
7723         mov.w           SRC_EX(%a0),%d1         # fetch current sign
7724         andi.w          &0x8000,%d1             # keep it,clear exp
7725         ori.w           &0x3fff,%d1             # insert exp = 0
7726         mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
7727         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7728         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7729
7730         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7731
7732         fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
7733         fmov.l          &0x0,%fpcr              # clear FPCR
7734
7735         fabs.x          %fp0                    # need absolute value
7736         fcmp.b          %fp0,&0x2               # did exponent increase?
7737         fblt.w          fout_dbl_exg            # no; go finish NORM
7738         bra.w           fout_dbl_ovfl           # yes; go handle overflow
7739
7740 #########################################################################
7741 # XDEF **************************************************************** #
7742 #       dst_dbl(): create double precision value from extended prec.    #
7743 #                                                                       #
7744 # XREF **************************************************************** #
7745 #       None                                                            #
7746 #                                                                       #
7747 # INPUT *************************************************************** #
7748 #       a0 = pointer to source operand in extended precision            #
7749 #                                                                       #
7750 # OUTPUT ************************************************************** #
7751 #       d0 = hi(double precision result)                                #
7752 #       d1 = lo(double precision result)                                #
7753 #                                                                       #
7754 # ALGORITHM *********************************************************** #
7755 #                                                                       #
7756 #  Changes extended precision to double precision.                      #
7757 #  Note: no attempt is made to round the extended value to double.      #
7758 #       dbl_sign = ext_sign                                             #
7759 #       dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)            #
7760 #       get rid of ext integer bit                                      #
7761 #       dbl_mant = ext_mant{62:12}                                      #
7762 #                                                                       #
7763 #               ---------------   ---------------    ---------------    #
7764 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
7765 #               ---------------   ---------------    ---------------    #
7766 #                95         64    63 62       32      31     11   0     #
7767 #                                    |                       |          #
7768 #                                    |                       |          #
7769 #                                    |                       |          #
7770 #                                    v                       v          #
7771 #                             ---------------   ---------------         #
7772 #  double   ->                |s|exp| mant  |   |  mant       |         #
7773 #                             ---------------   ---------------         #
7774 #                             63     51   32   31              0        #
7775 #                                                                       #
7776 #########################################################################
7777
7778 dst_dbl:
7779         clr.l           %d0                     # clear d0
7780         mov.w           FTEMP_EX(%a0),%d0       # get exponent
7781         subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
7782         addi.w          &DBL_BIAS,%d0           # add double precision bias
7783         tst.b           FTEMP_HI(%a0)           # is number a denorm?
7784         bmi.b           dst_get_dupper          # no
7785         subq.w          &0x1,%d0                # yes; denorm bias = DBL_BIAS - 1
7786 dst_get_dupper:
7787         swap            %d0                     # d0 now in upper word
7788         lsl.l           &0x4,%d0                # d0 in proper place for dbl prec exp
7789         tst.b           FTEMP_EX(%a0)           # test sign
7790         bpl.b           dst_get_dman            # if postive, go process mantissa
7791         bset            &0x1f,%d0               # if negative, set sign
7792 dst_get_dman:
7793         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7794         bfextu          %d1{&1:&20},%d1         # get upper 20 bits of ms
7795         or.l            %d1,%d0                 # put these bits in ms word of double
7796         mov.l           %d0,L_SCR1(%a6)         # put the new exp back on the stack
7797         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7798         mov.l           &21,%d0                 # load shift count
7799         lsl.l           %d0,%d1                 # put lower 11 bits in upper bits
7800         mov.l           %d1,L_SCR2(%a6)         # build lower lword in memory
7801         mov.l           FTEMP_LO(%a0),%d1       # get ls mantissa
7802         bfextu          %d1{&0:&21},%d0         # get ls 21 bits of double
7803         mov.l           L_SCR2(%a6),%d1
7804         or.l            %d0,%d1                 # put them in double result
7805         mov.l           L_SCR1(%a6),%d0
7806         rts
7807
7808 #########################################################################
7809 # XDEF **************************************************************** #
7810 #       dst_sgl(): create single precision value from extended prec     #
7811 #                                                                       #
7812 # XREF **************************************************************** #
7813 #                                                                       #
7814 # INPUT *************************************************************** #
7815 #       a0 = pointer to source operand in extended precision            #
7816 #                                                                       #
7817 # OUTPUT ************************************************************** #
7818 #       d0 = single precision result                                    #
7819 #                                                                       #
7820 # ALGORITHM *********************************************************** #
7821 #                                                                       #
7822 # Changes extended precision to single precision.                       #
7823 #       sgl_sign = ext_sign                                             #
7824 #       sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)             #
7825 #       get rid of ext integer bit                                      #
7826 #       sgl_mant = ext_mant{62:12}                                      #
7827 #                                                                       #
7828 #               ---------------   ---------------    ---------------    #
7829 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
7830 #               ---------------   ---------------    ---------------    #
7831 #                95         64    63 62    40 32      31     12   0     #
7832 #                                    |     |                            #
7833 #                                    |     |                            #
7834 #                                    |     |                            #
7835 #                                    v     v                            #
7836 #                             ---------------                           #
7837 #  single   ->                |s|exp| mant  |                           #
7838 #                             ---------------                           #
7839 #                             31     22     0                           #
7840 #                                                                       #
7841 #########################################################################
7842
7843 dst_sgl:
7844         clr.l           %d0
7845         mov.w           FTEMP_EX(%a0),%d0       # get exponent
7846         subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
7847         addi.w          &SGL_BIAS,%d0           # add single precision bias
7848         tst.b           FTEMP_HI(%a0)           # is number a denorm?
7849         bmi.b           dst_get_supper          # no
7850         subq.w          &0x1,%d0                # yes; denorm bias = SGL_BIAS - 1
7851 dst_get_supper:
7852         swap            %d0                     # put exp in upper word of d0
7853         lsl.l           &0x7,%d0                # shift it into single exp bits
7854         tst.b           FTEMP_EX(%a0)           # test sign
7855         bpl.b           dst_get_sman            # if positive, continue
7856         bset            &0x1f,%d0               # if negative, put in sign first
7857 dst_get_sman:
7858         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7859         andi.l          &0x7fffff00,%d1         # get upper 23 bits of ms
7860         lsr.l           &0x8,%d1                # and put them flush right
7861         or.l            %d1,%d0                 # put these bits in ms word of single
7862         rts
7863
7864 ##############################################################################
7865 fout_pack:
7866         bsr.l           _calc_ea_fout           # fetch the <ea>
7867         mov.l           %a0,-(%sp)
7868
7869         mov.b           STAG(%a6),%d0           # fetch input type
7870         bne.w           fout_pack_not_norm      # input is not NORM
7871
7872 fout_pack_norm:
7873         btst            &0x4,EXC_CMDREG(%a6)    # static or dynamic?
7874         beq.b           fout_pack_s             # static
7875
7876 fout_pack_d:
7877         mov.b           1+EXC_CMDREG(%a6),%d1   # fetch dynamic reg
7878         lsr.b           &0x4,%d1
7879         andi.w          &0x7,%d1
7880
7881         bsr.l           fetch_dreg              # fetch Dn w/ k-factor
7882
7883         bra.b           fout_pack_type
7884 fout_pack_s:
7885         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch static field
7886
7887 fout_pack_type:
7888         bfexts          %d0{&25:&7},%d0         # extract k-factor
7889         mov.l   %d0,-(%sp)
7890
7891         lea             FP_SRC(%a6),%a0         # pass: ptr to input
7892
7893 # bindec is currently scrambling FP_SRC for denorm inputs.
7894 # we'll have to change this, but for now, tough luck!!!
7895         bsr.l           bindec                  # convert xprec to packed
7896
7897 #       andi.l          &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7898         andi.l          &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7899
7900         mov.l   (%sp)+,%d0
7901
7902         tst.b           3+FP_SCR0_EX(%a6)
7903         bne.b           fout_pack_set
7904         tst.l           FP_SCR0_HI(%a6)
7905         bne.b           fout_pack_set
7906         tst.l           FP_SCR0_LO(%a6)
7907         bne.b           fout_pack_set
7908
7909 # add the extra condition that only if the k-factor was zero, too, should
7910 # we zero the exponent
7911         tst.l           %d0
7912         bne.b           fout_pack_set
7913 # "mantissa" is all zero which means that the answer is zero. but, the '040
7914 # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
7915 # if the mantissa is zero, I will zero the exponent, too.
7916 # the question now is whether the exponents sign bit is allowed to be non-zero
7917 # for a zero, also...
7918         andi.w          &0xf000,FP_SCR0(%a6)
7919
7920 fout_pack_set:
7921
7922         lea             FP_SCR0(%a6),%a0        # pass: src addr
7923
7924 fout_pack_write:
7925         mov.l           (%sp)+,%a1              # pass: dst addr
7926         mov.l           &0xc,%d0                # pass: opsize is 12 bytes
7927
7928         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
7929         beq.b           fout_pack_a7
7930
7931         bsr.l           _dmem_write             # write ext prec number to memory
7932
7933         tst.l           %d1                     # did dstore fail?
7934         bne.w           fout_ext_err            # yes
7935
7936         rts
7937
7938 # we don't want to do the write if the exception occurred in supervisor mode
7939 # so _mem_write2() handles this for us.
7940 fout_pack_a7:
7941         bsr.l           _mem_write2             # write ext prec number to memory
7942
7943         tst.l           %d1                     # did dstore fail?
7944         bne.w           fout_ext_err            # yes
7945
7946         rts
7947
7948 fout_pack_not_norm:
7949         cmpi.b          %d0,&DENORM             # is it a DENORM?
7950         beq.w           fout_pack_norm          # yes
7951         lea             FP_SRC(%a6),%a0
7952         clr.w           2+FP_SRC_EX(%a6)
7953         cmpi.b          %d0,&SNAN               # is it an SNAN?
7954         beq.b           fout_pack_snan          # yes
7955         bra.b           fout_pack_write         # no
7956
7957 fout_pack_snan:
7958         ori.w           &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7959         bset            &0x6,FP_SRC_HI(%a6)     # set snan bit
7960         bra.b           fout_pack_write
7961
7962 #########################################################################
7963 # XDEF **************************************************************** #
7964 #       fmul(): emulates the fmul instruction                           #
7965 #       fsmul(): emulates the fsmul instruction                         #
7966 #       fdmul(): emulates the fdmul instruction                         #
7967 #                                                                       #
7968 # XREF **************************************************************** #
7969 #       scale_to_zero_src() - scale src exponent to zero                #
7970 #       scale_to_zero_dst() - scale dst exponent to zero                #
7971 #       unf_res() - return default underflow result                     #
7972 #       ovf_res() - return default overflow result                      #
7973 #       res_qnan() - return QNAN result                                 #
7974 #       res_snan() - return SNAN result                                 #
7975 #                                                                       #
7976 # INPUT *************************************************************** #
7977 #       a0 = pointer to extended precision source operand               #
7978 #       a1 = pointer to extended precision destination operand          #
7979 #       d0  rnd prec,mode                                               #
7980 #                                                                       #
7981 # OUTPUT ************************************************************** #
7982 #       fp0 = result                                                    #
7983 #       fp1 = EXOP (if exception occurred)                              #
7984 #                                                                       #
7985 # ALGORITHM *********************************************************** #
7986 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
7987 # norms/denorms into ext/sgl/dbl precision.                             #
7988 #       For norms/denorms, scale the exponents such that a multiply     #
7989 # instruction won't cause an exception. Use the regular fmul to         #
7990 # compute a result. Check if the regular operands would have taken      #
7991 # an exception. If so, return the default overflow/underflow result     #
7992 # and return the EXOP if exceptions are enabled. Else, scale the        #
7993 # result operand to the proper exponent.                                #
7994 #                                                                       #
7995 #########################################################################
7996
7997         align           0x10
7998 tbl_fmul_ovfl:
7999         long            0x3fff - 0x7ffe         # ext_max
8000         long            0x3fff - 0x407e         # sgl_max
8001         long            0x3fff - 0x43fe         # dbl_max
8002 tbl_fmul_unfl:
8003         long            0x3fff + 0x0001         # ext_unfl
8004         long            0x3fff - 0x3f80         # sgl_unfl
8005         long            0x3fff - 0x3c00         # dbl_unfl
8006
8007         global          fsmul
8008 fsmul:
8009         andi.b          &0x30,%d0               # clear rnd prec
8010         ori.b           &s_mode*0x10,%d0        # insert sgl prec
8011         bra.b           fmul
8012
8013         global          fdmul
8014 fdmul:
8015         andi.b          &0x30,%d0
8016         ori.b           &d_mode*0x10,%d0        # insert dbl prec
8017
8018         global          fmul
8019 fmul:
8020         mov.l           %d0,L_SCR3(%a6)         # store rnd info
8021
8022         clr.w           %d1
8023         mov.b           DTAG(%a6),%d1
8024         lsl.b           &0x3,%d1
8025         or.b            STAG(%a6),%d1           # combine src tags
8026         bne.w           fmul_not_norm           # optimize on non-norm input
8027
8028 fmul_norm:
8029         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
8030         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
8031         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
8032
8033         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8034         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8035         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8036
8037         bsr.l           scale_to_zero_src       # scale src exponent
8038         mov.l           %d0,-(%sp)              # save scale factor 1
8039
8040         bsr.l           scale_to_zero_dst       # scale dst exponent
8041
8042         add.l           %d0,(%sp)               # SCALE_FACTOR = scale1 + scale2
8043
8044         mov.w           2+L_SCR3(%a6),%d1       # fetch precision
8045         lsr.b           &0x6,%d1                # shift to lo bits
8046         mov.l           (%sp)+,%d0              # load S.F.
8047         cmp.l           %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8048         beq.w           fmul_may_ovfl           # result may rnd to overflow
8049         blt.w           fmul_ovfl               # result will overflow
8050
8051         cmp.l           %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8052         beq.w           fmul_may_unfl           # result may rnd to no unfl
8053         bgt.w           fmul_unfl               # result will underflow
8054
8055 #
8056 # NORMAL:
8057 # - the result of the multiply operation will neither overflow nor underflow.
8058 # - do the multiply to the proper precision and rounding mode.
8059 # - scale the result exponent using the scale factor. if both operands were
8060 # normalized then we really don't need to go through this scaling. but for now,
8061 # this will do.
8062 #
8063 fmul_normal:
8064         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8065
8066         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8067         fmov.l          &0x0,%fpsr              # clear FPSR
8068
8069         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8070
8071         fmov.l          %fpsr,%d1               # save status
8072         fmov.l          &0x0,%fpcr              # clear FPCR
8073
8074         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8075
8076 fmul_normal_exit:
8077         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8078         mov.l           %d2,-(%sp)              # save d2
8079         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8080         mov.l           %d1,%d2                 # make a copy
8081         andi.l          &0x7fff,%d1             # strip sign
8082         andi.w          &0x8000,%d2             # keep old sign
8083         sub.l           %d0,%d1                 # add scale factor
8084         or.w            %d2,%d1                 # concat old sign,new exp
8085         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8086         mov.l           (%sp)+,%d2              # restore d2
8087         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8088         rts
8089
8090 #
8091 # OVERFLOW:
8092 # - the result of the multiply operation is an overflow.
8093 # - do the multiply to the proper precision and rounding mode in order to
8094 # set the inexact bits.
8095 # - calculate the default result and return it in fp0.
8096 # - if overflow or inexact is enabled, we need a multiply result rounded to
8097 # extended precision. if the original operation was extended, then we have this
8098 # result. if the original operation was single or double, we have to do another
8099 # multiply using extended precision and the correct rounding mode. the result
8100 # of this operation then has its exponent scaled by -0x6000 to create the
8101 # exceptional operand.
8102 #
8103 fmul_ovfl:
8104         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8105
8106         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8107         fmov.l          &0x0,%fpsr              # clear FPSR
8108
8109         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8110
8111         fmov.l          %fpsr,%d1               # save status
8112         fmov.l          &0x0,%fpcr              # clear FPCR
8113
8114         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8115
8116 # save setting this until now because this is where fmul_may_ovfl may jump in
8117 fmul_ovfl_tst:
8118         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8119
8120         mov.b           FPCR_ENABLE(%a6),%d1
8121         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8122         bne.b           fmul_ovfl_ena           # yes
8123
8124 # calculate the default result
8125 fmul_ovfl_dis:
8126         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8127         sne             %d1                     # set sign param accordingly
8128         mov.l           L_SCR3(%a6),%d0         # pass rnd prec,mode
8129         bsr.l           ovf_res                 # calculate default result
8130         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
8131         fmovm.x         (%a0),&0x80             # return default result in fp0
8132         rts
8133
8134 #
8135 # OVFL is enabled; Create EXOP:
8136 # - if precision is extended, then we have the EXOP. simply bias the exponent
8137 # with an extra -0x6000. if the precision is single or double, we need to
8138 # calculate a result rounded to extended precision.
8139 #
8140 fmul_ovfl_ena:
8141         mov.l           L_SCR3(%a6),%d1
8142         andi.b          &0xc0,%d1               # test the rnd prec
8143         bne.b           fmul_ovfl_ena_sd        # it's sgl or dbl
8144
8145 fmul_ovfl_ena_cont:
8146         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
8147
8148         mov.l           %d2,-(%sp)              # save d2
8149         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8150         mov.w           %d1,%d2                 # make a copy
8151         andi.l          &0x7fff,%d1             # strip sign
8152         sub.l           %d0,%d1                 # add scale factor
8153         subi.l          &0x6000,%d1             # subtract bias
8154         andi.w          &0x7fff,%d1             # clear sign bit
8155         andi.w          &0x8000,%d2             # keep old sign
8156         or.w            %d2,%d1                 # concat old sign,new exp
8157         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8158         mov.l           (%sp)+,%d2              # restore d2
8159         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8160         bra.b           fmul_ovfl_dis
8161
8162 fmul_ovfl_ena_sd:
8163         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8164
8165         mov.l           L_SCR3(%a6),%d1
8166         andi.b          &0x30,%d1               # keep rnd mode only
8167         fmov.l          %d1,%fpcr               # set FPCR
8168
8169         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8170
8171         fmov.l          &0x0,%fpcr              # clear FPCR
8172         bra.b           fmul_ovfl_ena_cont
8173
8174 #
8175 # may OVERFLOW:
8176 # - the result of the multiply operation MAY overflow.
8177 # - do the multiply to the proper precision and rounding mode in order to
8178 # set the inexact bits.
8179 # - calculate the default result and return it in fp0.
8180 #
8181 fmul_may_ovfl:
8182         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8183
8184         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8185         fmov.l          &0x0,%fpsr              # clear FPSR
8186
8187         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8188
8189         fmov.l          %fpsr,%d1               # save status
8190         fmov.l          &0x0,%fpcr              # clear FPCR
8191
8192         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8193
8194         fabs.x          %fp0,%fp1               # make a copy of result
8195         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
8196         fbge.w          fmul_ovfl_tst           # yes; overflow has occurred
8197
8198 # no, it didn't overflow; we have correct result
8199         bra.w           fmul_normal_exit
8200
8201 #
8202 # UNDERFLOW:
8203 # - the result of the multiply operation is an underflow.
8204 # - do the multiply to the proper precision and rounding mode in order to
8205 # set the inexact bits.
8206 # - calculate the default result and return it in fp0.
8207 # - if overflow or inexact is enabled, we need a multiply result rounded to
8208 # extended precision. if the original operation was extended, then we have this
8209 # result. if the original operation was single or double, we have to do another
8210 # multiply using extended precision and the correct rounding mode. the result
8211 # of this operation then has its exponent scaled by -0x6000 to create the
8212 # exceptional operand.
8213 #
8214 fmul_unfl:
8215         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8216
8217 # for fun, let's use only extended precision, round to zero. then, let
8218 # the unf_res() routine figure out all the rest.
8219 # will we get the correct answer.
8220         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8221
8222         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
8223         fmov.l          &0x0,%fpsr              # clear FPSR
8224
8225         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8226
8227         fmov.l          %fpsr,%d1               # save status
8228         fmov.l          &0x0,%fpcr              # clear FPCR
8229
8230         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8231
8232         mov.b           FPCR_ENABLE(%a6),%d1
8233         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
8234         bne.b           fmul_unfl_ena           # yes
8235
8236 fmul_unfl_dis:
8237         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8238
8239         lea             FP_SCR0(%a6),%a0        # pass: result addr
8240         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
8241         bsr.l           unf_res                 # calculate default result
8242         or.b            %d0,FPSR_CC(%a6)        # unf_res2 may have set 'Z'
8243         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8244         rts
8245
8246 #
8247 # UNFL is enabled.
8248 #
8249 fmul_unfl_ena:
8250         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
8251
8252         mov.l           L_SCR3(%a6),%d1
8253         andi.b          &0xc0,%d1               # is precision extended?
8254         bne.b           fmul_unfl_ena_sd        # no, sgl or dbl
8255
8256 # if the rnd mode is anything but RZ, then we have to re-do the above
8257 # multiplication because we used RZ for all.
8258         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8259
8260 fmul_unfl_ena_cont:
8261         fmov.l          &0x0,%fpsr              # clear FPSR
8262
8263         fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
8264
8265         fmov.l          &0x0,%fpcr              # clear FPCR
8266
8267         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
8268         mov.l           %d2,-(%sp)              # save d2
8269         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8270         mov.l           %d1,%d2                 # make a copy
8271         andi.l          &0x7fff,%d1             # strip sign
8272         andi.w          &0x8000,%d2             # keep old sign
8273         sub.l           %d0,%d1                 # add scale factor
8274         addi.l          &0x6000,%d1             # add bias
8275         andi.w          &0x7fff,%d1
8276         or.w            %d2,%d1                 # concat old sign,new exp
8277         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8278         mov.l           (%sp)+,%d2              # restore d2
8279         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8280         bra.w           fmul_unfl_dis
8281
8282 fmul_unfl_ena_sd:
8283         mov.l           L_SCR3(%a6),%d1
8284         andi.b          &0x30,%d1               # use only rnd mode
8285         fmov.l          %d1,%fpcr               # set FPCR
8286
8287         bra.b           fmul_unfl_ena_cont
8288
8289 # MAY UNDERFLOW:
8290 # -use the correct rounding mode and precision. this code favors operations
8291 # that do not underflow.
8292 fmul_may_unfl:
8293         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8294
8295         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8296         fmov.l          &0x0,%fpsr              # clear FPSR
8297
8298         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8299
8300         fmov.l          %fpsr,%d1               # save status
8301         fmov.l          &0x0,%fpcr              # clear FPCR
8302
8303         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8304
8305         fabs.x          %fp0,%fp1               # make a copy of result
8306         fcmp.b          %fp1,&0x2               # is |result| > 2.b?
8307         fbgt.w          fmul_normal_exit        # no; no underflow occurred
8308         fblt.w          fmul_unfl               # yes; underflow occurred
8309
8310 #
8311 # we still don't know if underflow occurred. result is ~ equal to 2. but,
8312 # we don't know if the result was an underflow that rounded up to a 2 or
8313 # a normalized number that rounded down to a 2. so, redo the entire operation
8314 # using RZ as the rounding mode to see what the pre-rounded result is.
8315 # this case should be relatively rare.
8316 #
8317         fmovm.x         FP_SCR1(%a6),&0x40      # load dst operand
8318
8319         mov.l           L_SCR3(%a6),%d1
8320         andi.b          &0xc0,%d1               # keep rnd prec
8321         ori.b           &rz_mode*0x10,%d1       # insert RZ
8322
8323         fmov.l          %d1,%fpcr               # set FPCR
8324         fmov.l          &0x0,%fpsr              # clear FPSR
8325
8326         fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
8327
8328         fmov.l          &0x0,%fpcr              # clear FPCR
8329         fabs.x          %fp1                    # make absolute value
8330         fcmp.b          %fp1,&0x2               # is |result| < 2.b?
8331         fbge.w          fmul_normal_exit        # no; no underflow occurred
8332         bra.w           fmul_unfl               # yes, underflow occurred
8333
8334 ################################################################################
8335
8336 #
8337 # Multiply: inputs are not both normalized; what are they?
8338 #
8339 fmul_not_norm:
8340         mov.w           (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8341         jmp             (tbl_fmul_op.b,%pc,%d1.w)
8342
8343         swbeg           &48
8344 tbl_fmul_op:
8345         short           fmul_norm       - tbl_fmul_op # NORM x NORM
8346         short           fmul_zero       - tbl_fmul_op # NORM x ZERO
8347         short           fmul_inf_src    - tbl_fmul_op # NORM x INF
8348         short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
8349         short           fmul_norm       - tbl_fmul_op # NORM x DENORM
8350         short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
8351         short           tbl_fmul_op     - tbl_fmul_op #
8352         short           tbl_fmul_op     - tbl_fmul_op #
8353
8354         short           fmul_zero       - tbl_fmul_op # ZERO x NORM
8355         short           fmul_zero       - tbl_fmul_op # ZERO x ZERO
8356         short           fmul_res_operr  - tbl_fmul_op # ZERO x INF
8357         short           fmul_res_qnan   - tbl_fmul_op # ZERO x QNAN
8358         short           fmul_zero       - tbl_fmul_op # ZERO x DENORM
8359         short           fmul_res_snan   - tbl_fmul_op # ZERO x SNAN
8360         short           tbl_fmul_op     - tbl_fmul_op #
8361         short           tbl_fmul_op     - tbl_fmul_op #
8362
8363         short           fmul_inf_dst    - tbl_fmul_op # INF x NORM
8364         short           fmul_res_operr  - tbl_fmul_op # INF x ZERO
8365         short           fmul_inf_dst    - tbl_fmul_op # INF x INF
8366         short           fmul_res_qnan   - tbl_fmul_op # INF x QNAN
8367         short           fmul_inf_dst    - tbl_fmul_op # INF x DENORM
8368         short           fmul_res_snan   - tbl_fmul_op # INF x SNAN
8369         short           tbl_fmul_op     - tbl_fmul_op #
8370         short           tbl_fmul_op     - tbl_fmul_op #
8371
8372         short           fmul_res_qnan   - tbl_fmul_op # QNAN x NORM
8373         short           fmul_res_qnan   - tbl_fmul_op # QNAN x ZERO
8374         short           fmul_res_qnan   - tbl_fmul_op # QNAN x INF
8375         short           fmul_res_qnan   - tbl_fmul_op # QNAN x QNAN
8376         short           fmul_res_qnan   - tbl_fmul_op # QNAN x DENORM
8377         short           fmul_res_snan   - tbl_fmul_op # QNAN x SNAN
8378         short           tbl_fmul_op     - tbl_fmul_op #
8379         short           tbl_fmul_op     - tbl_fmul_op #
8380
8381         short           fmul_norm       - tbl_fmul_op # NORM x NORM
8382         short           fmul_zero       - tbl_fmul_op # NORM x ZERO
8383         short           fmul_inf_src    - tbl_fmul_op # NORM x INF
8384         short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
8385         short           fmul_norm       - tbl_fmul_op # NORM x DENORM
8386         short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
8387         short           tbl_fmul_op     - tbl_fmul_op #
8388         short           tbl_fmul_op     - tbl_fmul_op #
8389
8390         short           fmul_res_snan   - tbl_fmul_op # SNAN x NORM
8391         short           fmul_res_snan   - tbl_fmul_op # SNAN x ZERO
8392         short           fmul_res_snan   - tbl_fmul_op # SNAN x INF
8393         short           fmul_res_snan   - tbl_fmul_op # SNAN x QNAN
8394         short           fmul_res_snan   - tbl_fmul_op # SNAN x DENORM
8395         short           fmul_res_snan   - tbl_fmul_op # SNAN x SNAN
8396         short           tbl_fmul_op     - tbl_fmul_op #
8397         short           tbl_fmul_op     - tbl_fmul_op #
8398
8399 fmul_res_operr:
8400         bra.l           res_operr
8401 fmul_res_snan:
8402         bra.l           res_snan
8403 fmul_res_qnan:
8404         bra.l           res_qnan
8405
8406 #
8407 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8408 #
8409         global          fmul_zero               # global for fsglmul
8410 fmul_zero:
8411         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8412         mov.b           DST_EX(%a1),%d1
8413         eor.b           %d0,%d1
8414         bpl.b           fmul_zero_p             # result ZERO is pos.
8415 fmul_zero_n:
8416         fmov.s          &0x80000000,%fp0        # load -ZERO
8417         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8418         rts
8419 fmul_zero_p:
8420         fmov.s          &0x00000000,%fp0        # load +ZERO
8421         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
8422         rts
8423
8424 #
8425 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8426 #
8427 # Note: The j-bit for an infinity is a don't-care. However, to be
8428 # strictly compatible w/ the 68881/882, we make sure to return an
8429 # INF w/ the j-bit set if the input INF j-bit was set. Destination
8430 # INFs take priority.
8431 #
8432         global          fmul_inf_dst            # global for fsglmul
8433 fmul_inf_dst:
8434         fmovm.x         DST(%a1),&0x80          # return INF result in fp0
8435         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8436         mov.b           DST_EX(%a1),%d1
8437         eor.b           %d0,%d1
8438         bpl.b           fmul_inf_dst_p          # result INF is pos.
8439 fmul_inf_dst_n:
8440         fabs.x          %fp0                    # clear result sign
8441         fneg.x          %fp0                    # set result sign
8442         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8443         rts
8444 fmul_inf_dst_p:
8445         fabs.x          %fp0                    # clear result sign
8446         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
8447         rts
8448
8449         global          fmul_inf_src            # global for fsglmul
8450 fmul_inf_src:
8451         fmovm.x         SRC(%a0),&0x80          # return INF result in fp0
8452         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8453         mov.b           DST_EX(%a1),%d1
8454         eor.b           %d0,%d1
8455         bpl.b           fmul_inf_dst_p          # result INF is pos.
8456         bra.b           fmul_inf_dst_n
8457
8458 #########################################################################
8459 # XDEF **************************************************************** #
8460 #       fin(): emulates the fmove instruction                           #
8461 #       fsin(): emulates the fsmove instruction                         #
8462 #       fdin(): emulates the fdmove instruction                         #
8463 #                                                                       #
8464 # XREF **************************************************************** #
8465 #       norm() - normalize mantissa for EXOP on denorm                  #
8466 #       scale_to_zero_src() - scale src exponent to zero                #
8467 #       ovf_res() - return default overflow result                      #
8468 #       unf_res() - return default underflow result                     #
8469 #       res_qnan_1op() - return QNAN result                             #
8470 #       res_snan_1op() - return SNAN result                             #
8471 #                                                                       #
8472 # INPUT *************************************************************** #
8473 #       a0 = pointer to extended precision source operand               #
8474 #       d0 = round prec/mode                                            #
8475 #                                                                       #
8476 # OUTPUT ************************************************************** #
8477 #       fp0 = result                                                    #
8478 #       fp1 = EXOP (if exception occurred)                              #
8479 #                                                                       #
8480 # ALGORITHM *********************************************************** #
8481 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
8482 # norms into extended, single, and double precision.                    #
8483 #       Norms can be emulated w/ a regular fmove instruction. For       #
8484 # sgl/dbl, must scale exponent and perform an "fmove". Check to see     #
8485 # if the result would have overflowed/underflowed. If so, use unf_res() #
8486 # or ovf_res() to return the default result. Also return EXOP if        #
8487 # exception is enabled. If no exception, return the default result.     #
8488 #       Unnorms don't pass through here.                                #
8489 #                                                                       #
8490 #########################################################################
8491
8492         global          fsin
8493 fsin:
8494         andi.b          &0x30,%d0               # clear rnd prec
8495         ori.b           &s_mode*0x10,%d0        # insert sgl precision
8496         bra.b           fin
8497
8498         global          fdin
8499 fdin:
8500         andi.b          &0x30,%d0               # clear rnd prec
8501         ori.b           &d_mode*0x10,%d0        # insert dbl precision
8502
8503         global          fin
8504 fin:
8505         mov.l           %d0,L_SCR3(%a6)         # store rnd info
8506
8507         mov.b           STAG(%a6),%d1           # fetch src optype tag
8508         bne.w           fin_not_norm            # optimize on non-norm input
8509
8510 #
8511 # FP MOVE IN: NORMs and DENORMs ONLY!
8512 #
8513 fin_norm:
8514         andi.b          &0xc0,%d0               # is precision extended?
8515         bne.w           fin_not_ext             # no, so go handle dbl or sgl
8516
8517 #
8518 # precision selected is extended. so...we cannot get an underflow
8519 # or overflow because of rounding to the correct precision. so...
8520 # skip the scaling and unscaling...
8521 #
8522         tst.b           SRC_EX(%a0)             # is the operand negative?
8523         bpl.b           fin_norm_done           # no
8524         bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
8525 fin_norm_done:
8526         fmovm.x         SRC(%a0),&0x80          # return result in fp0
8527         rts
8528
8529 #
8530 # for an extended precision DENORM, the UNFL exception bit is set
8531 # the accrued bit is NOT set in this instance(no inexactness!)
8532 #
8533 fin_denorm:
8534         andi.b          &0xc0,%d0               # is precision extended?
8535         bne.w           fin_not_ext             # no, so go handle dbl or sgl
8536
8537         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8538         tst.b           SRC_EX(%a0)             # is the operand negative?
8539         bpl.b           fin_denorm_done         # no
8540         bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
8541 fin_denorm_done:
8542         fmovm.x         SRC(%a0),&0x80          # return result in fp0
8543         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8544         bne.b           fin_denorm_unfl_ena     # yes
8545         rts
8546
8547 #
8548 # the input is an extended DENORM and underflow is enabled in the FPCR.
8549 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
8550 # exponent and insert back into the operand.
8551 #
8552 fin_denorm_unfl_ena:
8553         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8554         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8555         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8556         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
8557         bsr.l           norm                    # normalize result
8558         neg.w           %d0                     # new exponent = -(shft val)
8559         addi.w          &0x6000,%d0             # add new bias to exponent
8560         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
8561         andi.w          &0x8000,%d1             # keep old sign
8562         andi.w          &0x7fff,%d0             # clear sign position
8563         or.w            %d1,%d0                 # concat new exo,old sign
8564         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
8565         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8566         rts
8567
8568 #
8569 # operand is to be rounded to single or double precision
8570 #
8571 fin_not_ext:
8572         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
8573         bne.b           fin_dbl
8574
8575 #
8576 # operand is to be rounded to single precision
8577 #
8578 fin_sgl:
8579         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8580         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8581         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8582         bsr.l           scale_to_zero_src       # calculate scale factor
8583
8584         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
8585         bge.w           fin_sd_unfl             # yes; go handle underflow
8586         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
8587         beq.w           fin_sd_may_ovfl         # maybe; go check
8588         blt.w           fin_sd_ovfl             # yes; go handle overflow
8589
8590 #
8591 # operand will NOT overflow or underflow when moved into the fp reg file
8592 #
8593 fin_sd_normal:
8594         fmov.l          &0x0,%fpsr              # clear FPSR
8595         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8596
8597         fmov.x          FP_SCR0(%a6),%fp0       # perform move
8598
8599         fmov.l          %fpsr,%d1               # save FPSR
8600         fmov.l          &0x0,%fpcr              # clear FPCR
8601
8602         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8603
8604 fin_sd_normal_exit:
8605         mov.l           %d2,-(%sp)              # save d2
8606         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8607         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8608         mov.w           %d1,%d2                 # make a copy
8609         andi.l          &0x7fff,%d1             # strip sign
8610         sub.l           %d0,%d1                 # add scale factor
8611         andi.w          &0x8000,%d2             # keep old sign
8612         or.w            %d1,%d2                 # concat old sign,new exponent
8613         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
8614         mov.l           (%sp)+,%d2              # restore d2
8615         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
8616         rts
8617
8618 #
8619 # operand is to be rounded to double precision
8620 #
8621 fin_dbl:
8622         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8623         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8624         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8625         bsr.l           scale_to_zero_src       # calculate scale factor
8626
8627         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
8628         bge.w           fin_sd_unfl             # yes; go handle underflow
8629         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
8630         beq.w           fin_sd_may_ovfl         # maybe; go check
8631         blt.w           fin_sd_ovfl             # yes; go handle overflow
8632         bra.w           fin_sd_normal           # no; ho handle normalized op
8633
8634 #
8635 # operand WILL underflow when moved in to the fp register file
8636 #
8637 fin_sd_unfl:
8638         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8639
8640         tst.b           FP_SCR0_EX(%a6)         # is operand negative?
8641         bpl.b           fin_sd_unfl_tst
8642         bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
8643
8644 # if underflow or inexact is enabled, then go calculate the EXOP first.
8645 fin_sd_unfl_tst:
8646         mov.b           FPCR_ENABLE(%a6),%d1
8647         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
8648         bne.b           fin_sd_unfl_ena         # yes
8649
8650 fin_sd_unfl_dis:
8651         lea             FP_SCR0(%a6),%a0        # pass: result addr
8652         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
8653         bsr.l           unf_res                 # calculate default result
8654         or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
8655         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8656         rts
8657
8658 #
8659 # operand will underflow AND underflow or inexact is enabled.
8660 # therefore, we must return the result rounded to extended precision.
8661 #
8662 fin_sd_unfl_ena:
8663         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8664         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8665         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
8666
8667         mov.l           %d2,-(%sp)              # save d2
8668         mov.w           %d1,%d2                 # make a copy
8669         andi.l          &0x7fff,%d1             # strip sign
8670         sub.l           %d0,%d1                 # subtract scale factor
8671         andi.w          &0x8000,%d2             # extract old sign
8672         addi.l          &0x6000,%d1             # add new bias
8673         andi.w          &0x7fff,%d1
8674         or.w            %d1,%d2                 # concat old sign,new exp
8675         mov.w           %d2,FP_SCR1_EX(%a6)     # insert new exponent
8676         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
8677         mov.l           (%sp)+,%d2              # restore d2
8678         bra.b           fin_sd_unfl_dis
8679
8680 #
8681 # operand WILL overflow.
8682 #
8683 fin_sd_ovfl:
8684         fmov.l          &0x0,%fpsr              # clear FPSR
8685         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8686
8687         fmov.x          FP_SCR0(%a6),%fp0       # perform move
8688
8689         fmov.l          &0x0,%fpcr              # clear FPCR
8690         fmov.l          %fpsr,%d1               # save FPSR
8691
8692         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8693
8694 fin_sd_ovfl_tst:
8695         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8696
8697         mov.b           FPCR_ENABLE(%a6),%d1
8698         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8699         bne.b           fin_sd_ovfl_ena         # yes
8700
8701 #
8702 # OVFL is not enabled; therefore, we must create the default result by
8703 # calling ovf_res().
8704 #
8705 fin_sd_ovfl_dis:
8706         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8707         sne             %d1                     # set sign param accordingly
8708         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
8709         bsr.l           ovf_res                 # calculate default result
8710         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
8711         fmovm.x         (%a0),&0x80             # return default result in fp0
8712         rts
8713
8714 #
8715 # OVFL is enabled.
8716 # the INEX2 bit has already been updated by the round to the correct precision.
8717 # now, round to extended(and don't alter the FPSR).
8718 #
8719 fin_sd_ovfl_ena:
8720         mov.l           %d2,-(%sp)              # save d2
8721         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8722         mov.l           %d1,%d2                 # make a copy
8723         andi.l          &0x7fff,%d1             # strip sign
8724         andi.w          &0x8000,%d2             # keep old sign
8725         sub.l           %d0,%d1                 # add scale factor
8726         sub.l           &0x6000,%d1             # subtract bias
8727         andi.w          &0x7fff,%d1
8728         or.w            %d2,%d1
8729         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8730         mov.l           (%sp)+,%d2              # restore d2
8731         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8732         bra.b           fin_sd_ovfl_dis
8733
8734 #
8735 # the move in MAY overflow. so...
8736 #
8737 fin_sd_may_ovfl:
8738         fmov.l          &0x0,%fpsr              # clear FPSR
8739         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8740
8741         fmov.x          FP_SCR0(%a6),%fp0       # perform the move
8742
8743         fmov.l          %fpsr,%d1               # save status
8744         fmov.l          &0x0,%fpcr              # clear FPCR
8745
8746         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8747
8748         fabs.x          %fp0,%fp1               # make a copy of result
8749         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
8750         fbge.w          fin_sd_ovfl_tst         # yes; overflow has occurred
8751
8752 # no, it didn't overflow; we have correct result
8753         bra.w           fin_sd_normal_exit
8754
8755 ##########################################################################
8756
8757 #
8758 # operand is not a NORM: check its optype and branch accordingly
8759 #
8760 fin_not_norm:
8761         cmpi.b          %d1,&DENORM             # weed out DENORM
8762         beq.w           fin_denorm
8763         cmpi.b          %d1,&SNAN               # weed out SNANs
8764         beq.l           res_snan_1op
8765         cmpi.b          %d1,&QNAN               # weed out QNANs
8766         beq.l           res_qnan_1op
8767
8768 #
8769 # do the fmove in; at this point, only possible ops are ZERO and INF.
8770 # use fmov to determine ccodes.
8771 # prec:mode should be zero at this point but it won't affect answer anyways.
8772 #
8773         fmov.x          SRC(%a0),%fp0           # do fmove in
8774         fmov.l          %fpsr,%d0               # no exceptions possible
8775         rol.l           &0x8,%d0                # put ccodes in lo byte
8776         mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
8777         rts
8778
8779 #########################################################################
8780 # XDEF **************************************************************** #
8781 #       fdiv(): emulates the fdiv instruction                           #
8782 #       fsdiv(): emulates the fsdiv instruction                         #
8783 #       fddiv(): emulates the fddiv instruction                         #
8784 #                                                                       #
8785 # XREF **************************************************************** #
8786 #       scale_to_zero_src() - scale src exponent to zero                #
8787 #       scale_to_zero_dst() - scale dst exponent to zero                #
8788 #       unf_res() - return default underflow result                     #
8789 #       ovf_res() - return default overflow result                      #
8790 #       res_qnan() - return QNAN result                                 #
8791 #       res_snan() - return SNAN result                                 #
8792 #                                                                       #
8793 # INPUT *************************************************************** #
8794 #       a0 = pointer to extended precision source operand               #
8795 #       a1 = pointer to extended precision destination operand          #
8796 #       d0  rnd prec,mode                                               #
8797 #                                                                       #
8798 # OUTPUT ************************************************************** #
8799 #       fp0 = result                                                    #
8800 #       fp1 = EXOP (if exception occurred)                              #
8801 #                                                                       #
8802 # ALGORITHM *********************************************************** #
8803 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
8804 # norms/denorms into ext/sgl/dbl precision.                             #
8805 #       For norms/denorms, scale the exponents such that a divide       #
8806 # instruction won't cause an exception. Use the regular fdiv to         #
8807 # compute a result. Check if the regular operands would have taken      #
8808 # an exception. If so, return the default overflow/underflow result     #
8809 # and return the EXOP if exceptions are enabled. Else, scale the        #
8810 # result operand to the proper exponent.                                #
8811 #                                                                       #
8812 #########################################################################
8813
8814         align           0x10
8815 tbl_fdiv_unfl:
8816         long            0x3fff - 0x0000         # ext_unfl
8817         long            0x3fff - 0x3f81         # sgl_unfl
8818         long            0x3fff - 0x3c01         # dbl_unfl
8819
8820 tbl_fdiv_ovfl:
8821         long            0x3fff - 0x7ffe         # ext overflow exponent
8822         long            0x3fff - 0x407e         # sgl overflow exponent
8823         long            0x3fff - 0x43fe         # dbl overflow exponent
8824
8825         global          fsdiv
8826 fsdiv:
8827         andi.b          &0x30,%d0               # clear rnd prec
8828         ori.b           &s_mode*0x10,%d0        # insert sgl prec
8829         bra.b           fdiv
8830
8831         global          fddiv
8832 fddiv:
8833         andi.b          &0x30,%d0               # clear rnd prec
8834         ori.b           &d_mode*0x10,%d0        # insert dbl prec
8835
8836         global          fdiv
8837 fdiv:
8838         mov.l           %d0,L_SCR3(%a6)         # store rnd info
8839
8840         clr.w           %d1
8841         mov.b           DTAG(%a6),%d1
8842         lsl.b           &0x3,%d1
8843         or.b            STAG(%a6),%d1           # combine src tags
8844
8845         bne.w           fdiv_not_norm           # optimize on non-norm input
8846
8847 #
8848 # DIVIDE: NORMs and DENORMs ONLY!
8849 #
8850 fdiv_norm:
8851         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
8852         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
8853         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
8854
8855         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8856         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8857         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8858
8859         bsr.l           scale_to_zero_src       # scale src exponent
8860         mov.l           %d0,-(%sp)              # save scale factor 1
8861
8862         bsr.l           scale_to_zero_dst       # scale dst exponent
8863
8864         neg.l           (%sp)                   # SCALE FACTOR = scale1 - scale2
8865         add.l           %d0,(%sp)
8866
8867         mov.w           2+L_SCR3(%a6),%d1       # fetch precision
8868         lsr.b           &0x6,%d1                # shift to lo bits
8869         mov.l           (%sp)+,%d0              # load S.F.
8870         cmp.l           %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8871         ble.w           fdiv_may_ovfl           # result will overflow
8872
8873         cmp.l           %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8874         beq.w           fdiv_may_unfl           # maybe
8875         bgt.w           fdiv_unfl               # yes; go handle underflow
8876
8877 fdiv_normal:
8878         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8879
8880         fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
8881         fmov.l          &0x0,%fpsr              # clear FPSR
8882
8883         fdiv.x          FP_SCR0(%a6),%fp0       # perform divide
8884
8885         fmov.l          %fpsr,%d1               # save FPSR
8886         fmov.l          &0x0,%fpcr              # clear FPCR
8887
8888         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8889
8890 fdiv_normal_exit:
8891         fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
8892         mov.l           %d2,-(%sp)              # store d2
8893         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8894         mov.l           %d1,%d2                 # make a copy
8895         andi.l          &0x7fff,%d1             # strip sign
8896         andi.w          &0x8000,%d2             # keep old sign
8897         sub.l           %d0,%d1                 # add scale factor
8898         or.w            %d2,%d1                 # concat old sign,new exp
8899         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8900         mov.l           (%sp)+,%d2              # restore d2
8901         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
8902         rts
8903
8904 tbl_fdiv_ovfl2:
8905         long            0x7fff
8906         long            0x407f
8907         long            0x43ff
8908
8909 fdiv_no_ovfl:
8910         mov.l           (%sp)+,%d0              # restore scale factor
8911         bra.b           fdiv_normal_exit
8912
8913 fdiv_may_ovfl:
8914         mov.l           %d0,-(%sp)              # save scale factor
8915
8916         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8917
8918         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8919         fmov.l          &0x0,%fpsr              # set FPSR
8920
8921         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8922
8923         fmov.l          %fpsr,%d0
8924         fmov.l          &0x0,%fpcr
8925
8926         or.l            %d0,USER_FPSR(%a6)      # save INEX,N
8927
8928         fmovm.x         &0x01,-(%sp)            # save result to stack
8929         mov.w           (%sp),%d0               # fetch new exponent
8930         add.l           &0xc,%sp                # clear result from stack
8931         andi.l          &0x7fff,%d0             # strip sign
8932         sub.l           (%sp),%d0               # add scale factor
8933         cmp.l           %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8934         blt.b           fdiv_no_ovfl
8935         mov.l           (%sp)+,%d0
8936
8937 fdiv_ovfl_tst:
8938         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8939
8940         mov.b           FPCR_ENABLE(%a6),%d1
8941         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8942         bne.b           fdiv_ovfl_ena           # yes
8943
8944 fdiv_ovfl_dis:
8945         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8946         sne             %d1                     # set sign param accordingly
8947         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
8948         bsr.l           ovf_res                 # calculate default result
8949         or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
8950         fmovm.x         (%a0),&0x80             # return default result in fp0
8951         rts
8952
8953 fdiv_ovfl_ena:
8954         mov.l           L_SCR3(%a6),%d1
8955         andi.b          &0xc0,%d1               # is precision extended?
8956         bne.b           fdiv_ovfl_ena_sd        # no, do sgl or dbl
8957
8958 fdiv_ovfl_ena_cont:
8959         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
8960
8961         mov.l           %d2,-(%sp)              # save d2
8962         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8963         mov.w           %d1,%d2                 # make a copy
8964         andi.l          &0x7fff,%d1             # strip sign
8965         sub.l           %d0,%d1                 # add scale factor
8966         subi.l          &0x6000,%d1             # subtract bias
8967         andi.w          &0x7fff,%d1             # clear sign bit
8968         andi.w          &0x8000,%d2             # keep old sign
8969         or.w            %d2,%d1                 # concat old sign,new exp
8970         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8971         mov.l           (%sp)+,%d2              # restore d2
8972         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8973         bra.b           fdiv_ovfl_dis
8974
8975 fdiv_ovfl_ena_sd:
8976         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8977
8978         mov.l           L_SCR3(%a6),%d1
8979         andi.b          &0x30,%d1               # keep rnd mode
8980         fmov.l          %d1,%fpcr               # set FPCR
8981
8982         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8983
8984         fmov.l          &0x0,%fpcr              # clear FPCR
8985         bra.b           fdiv_ovfl_ena_cont
8986
8987 fdiv_unfl:
8988         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8989
8990         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8991
8992         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
8993         fmov.l          &0x0,%fpsr              # clear FPSR
8994
8995         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8996
8997         fmov.l          %fpsr,%d1               # save status
8998         fmov.l          &0x0,%fpcr              # clear FPCR
8999
9000         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9001
9002         mov.b           FPCR_ENABLE(%a6),%d1
9003         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
9004         bne.b           fdiv_unfl_ena           # yes
9005
9006 fdiv_unfl_dis:
9007         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
9008
9009         lea             FP_SCR0(%a6),%a0        # pass: result addr
9010         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
9011         bsr.l           unf_res                 # calculate default result
9012         or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
9013         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9014         rts
9015
9016 #
9017 # UNFL is enabled.
9018 #
9019 fdiv_unfl_ena:
9020         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
9021
9022         mov.l           L_SCR3(%a6),%d1
9023         andi.b          &0xc0,%d1               # is precision extended?
9024         bne.b           fdiv_unfl_ena_sd        # no, sgl or dbl
9025
9026         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9027
9028 fdiv_unfl_ena_cont:
9029         fmov.l          &0x0,%fpsr              # clear FPSR
9030
9031         fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
9032
9033         fmov.l          &0x0,%fpcr              # clear FPCR
9034
9035         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
9036         mov.l           %d2,-(%sp)              # save d2
9037         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
9038         mov.l           %d1,%d2                 # make a copy
9039         andi.l          &0x7fff,%d1             # strip sign
9040         andi.w          &0x8000,%d2             # keep old sign
9041         sub.l           %d0,%d1                 # add scale factoer
9042         addi.l          &0x6000,%d1             # add bias
9043         andi.w          &0x7fff,%d1
9044         or.w            %d2,%d1                 # concat old sign,new exp
9045         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exp
9046         mov.l           (%sp)+,%d2              # restore d2
9047         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9048         bra.w           fdiv_unfl_dis
9049
9050 fdiv_unfl_ena_sd:
9051         mov.l           L_SCR3(%a6),%d1
9052         andi.b          &0x30,%d1               # use only rnd mode
9053         fmov.l          %d1,%fpcr               # set FPCR
9054
9055         bra.b           fdiv_unfl_ena_cont
9056
9057 #
9058 # the divide operation MAY underflow:
9059 #
9060 fdiv_may_unfl:
9061         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
9062
9063         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9064         fmov.l          &0x0,%fpsr              # clear FPSR
9065
9066         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
9067
9068         fmov.l          %fpsr,%d1               # save status
9069         fmov.l          &0x0,%fpcr              # clear FPCR
9070
9071         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9072
9073         fabs.x          %fp0,%fp1               # make a copy of result
9074         fcmp.b          %fp1,&0x1               # is |result| > 1.b?
9075         fbgt.w          fdiv_normal_exit        # no; no underflow occurred
9076         fblt.w          fdiv_unfl               # yes; underflow occurred
9077
9078 #
9079 # we still don't know if underflow occurred. result is ~ equal to 1. but,
9080 # we don't know if the result was an underflow that rounded up to a 1
9081 # or a normalized number that rounded down to a 1. so, redo the entire
9082 # operation using RZ as the rounding mode to see what the pre-rounded
9083 # result is. this case should be relatively rare.
9084 #
9085         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
9086
9087         mov.l           L_SCR3(%a6),%d1
9088         andi.b          &0xc0,%d1               # keep rnd prec
9089         ori.b           &rz_mode*0x10,%d1       # insert RZ
9090
9091         fmov.l          %d1,%fpcr               # set FPCR
9092         fmov.l          &0x0,%fpsr              # clear FPSR
9093
9094         fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
9095
9096         fmov.l          &0x0,%fpcr              # clear FPCR
9097         fabs.x          %fp1                    # make absolute value
9098         fcmp.b          %fp1,&0x1               # is |result| < 1.b?
9099         fbge.w          fdiv_normal_exit        # no; no underflow occurred
9100         bra.w           fdiv_unfl               # yes; underflow occurred
9101
9102 ############################################################################
9103
9104 #
9105 # Divide: inputs are not both normalized; what are they?
9106 #
9107 fdiv_not_norm:
9108         mov.w           (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9109         jmp             (tbl_fdiv_op.b,%pc,%d1.w*1)
9110
9111         swbeg           &48
9112 tbl_fdiv_op:
9113         short           fdiv_norm       - tbl_fdiv_op # NORM / NORM
9114         short           fdiv_inf_load   - tbl_fdiv_op # NORM / ZERO
9115         short           fdiv_zero_load  - tbl_fdiv_op # NORM / INF
9116         short           fdiv_res_qnan   - tbl_fdiv_op # NORM / QNAN
9117         short           fdiv_norm       - tbl_fdiv_op # NORM / DENORM
9118         short           fdiv_res_snan   - tbl_fdiv_op # NORM / SNAN
9119         short           tbl_fdiv_op     - tbl_fdiv_op #
9120         short           tbl_fdiv_op     - tbl_fdiv_op #
9121
9122         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / NORM
9123         short           fdiv_res_operr  - tbl_fdiv_op # ZERO / ZERO
9124         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / INF
9125         short           fdiv_res_qnan   - tbl_fdiv_op # ZERO / QNAN
9126         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / DENORM
9127         short           fdiv_res_snan   - tbl_fdiv_op # ZERO / SNAN
9128         short           tbl_fdiv_op     - tbl_fdiv_op #
9129         short           tbl_fdiv_op     - tbl_fdiv_op #
9130
9131         short           fdiv_inf_dst    - tbl_fdiv_op # INF / NORM
9132         short           fdiv_inf_dst    - tbl_fdiv_op # INF / ZERO
9133         short           fdiv_res_operr  - tbl_fdiv_op # INF / INF
9134         short           fdiv_res_qnan   - tbl_fdiv_op # INF / QNAN
9135         short           fdiv_inf_dst    - tbl_fdiv_op # INF / DENORM
9136         short           fdiv_res_snan   - tbl_fdiv_op # INF / SNAN
9137         short           tbl_fdiv_op     - tbl_fdiv_op #
9138         short           tbl_fdiv_op     - tbl_fdiv_op #
9139
9140         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / NORM
9141         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / ZERO
9142         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / INF
9143         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / QNAN
9144         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / DENORM
9145         short           fdiv_res_snan   - tbl_fdiv_op # QNAN / SNAN
9146         short           tbl_fdiv_op     - tbl_fdiv_op #
9147         short           tbl_fdiv_op     - tbl_fdiv_op #
9148
9149         short           fdiv_norm       - tbl_fdiv_op # DENORM / NORM
9150         short           fdiv_inf_load   - tbl_fdiv_op # DENORM / ZERO
9151         short           fdiv_zero_load  - tbl_fdiv_op # DENORM / INF
9152         short           fdiv_res_qnan   - tbl_fdiv_op # DENORM / QNAN
9153         short           fdiv_norm       - tbl_fdiv_op # DENORM / DENORM
9154         short           fdiv_res_snan   - tbl_fdiv_op # DENORM / SNAN
9155         short           tbl_fdiv_op     - tbl_fdiv_op #
9156         short           tbl_fdiv_op     - tbl_fdiv_op #
9157
9158         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / NORM
9159         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / ZERO
9160         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / INF
9161         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / QNAN
9162         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / DENORM
9163         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / SNAN
9164         short           tbl_fdiv_op     - tbl_fdiv_op #
9165         short           tbl_fdiv_op     - tbl_fdiv_op #
9166
9167 fdiv_res_qnan:
9168         bra.l           res_qnan
9169 fdiv_res_snan:
9170         bra.l           res_snan
9171 fdiv_res_operr:
9172         bra.l           res_operr
9173
9174         global          fdiv_zero_load          # global for fsgldiv
9175 fdiv_zero_load:
9176         mov.b           SRC_EX(%a0),%d0         # result sign is exclusive
9177         mov.b           DST_EX(%a1),%d1         # or of input signs.
9178         eor.b           %d0,%d1
9179         bpl.b           fdiv_zero_load_p        # result is positive
9180         fmov.s          &0x80000000,%fp0        # load a -ZERO
9181         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9182         rts
9183 fdiv_zero_load_p:
9184         fmov.s          &0x00000000,%fp0        # load a +ZERO
9185         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
9186         rts
9187
9188 #
9189 # The destination was In Range and the source was a ZERO. The result,
9190 # therefore, is an INF w/ the proper sign.
9191 # So, determine the sign and return a new INF (w/ the j-bit cleared).
9192 #
9193         global          fdiv_inf_load           # global for fsgldiv
9194 fdiv_inf_load:
9195         ori.w           &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9196         mov.b           SRC_EX(%a0),%d0         # load both signs
9197         mov.b           DST_EX(%a1),%d1
9198         eor.b           %d0,%d1
9199         bpl.b           fdiv_inf_load_p         # result is positive
9200         fmov.s          &0xff800000,%fp0        # make result -INF
9201         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9202         rts
9203 fdiv_inf_load_p:
9204         fmov.s          &0x7f800000,%fp0        # make result +INF
9205         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
9206         rts
9207
9208 #
9209 # The destination was an INF w/ an In Range or ZERO source, the result is
9210 # an INF w/ the proper sign.
9211 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9212 # dst INF is set, then then j-bit of the result INF is also set).
9213 #
9214         global          fdiv_inf_dst            # global for fsgldiv
9215 fdiv_inf_dst:
9216         mov.b           DST_EX(%a1),%d0         # load both signs
9217         mov.b           SRC_EX(%a0),%d1
9218         eor.b           %d0,%d1
9219         bpl.b           fdiv_inf_dst_p          # result is positive
9220
9221         fmovm.x         DST(%a1),&0x80          # return result in fp0
9222         fabs.x          %fp0                    # clear sign bit
9223         fneg.x          %fp0                    # set sign bit
9224         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9225         rts
9226
9227 fdiv_inf_dst_p:
9228         fmovm.x         DST(%a1),&0x80          # return result in fp0
9229         fabs.x          %fp0                    # return positive INF
9230         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
9231         rts
9232
9233 #########################################################################
9234 # XDEF **************************************************************** #
9235 #       fneg(): emulates the fneg instruction                           #
9236 #       fsneg(): emulates the fsneg instruction                         #
9237 #       fdneg(): emulates the fdneg instruction                         #
9238 #                                                                       #
9239 # XREF **************************************************************** #
9240 #       norm() - normalize a denorm to provide EXOP                     #
9241 #       scale_to_zero_src() - scale sgl/dbl source exponent             #
9242 #       ovf_res() - return default overflow result                      #
9243 #       unf_res() - return default underflow result                     #
9244 #       res_qnan_1op() - return QNAN result                             #
9245 #       res_snan_1op() - return SNAN result                             #
9246 #                                                                       #
9247 # INPUT *************************************************************** #
9248 #       a0 = pointer to extended precision source operand               #
9249 #       d0 = rnd prec,mode                                              #
9250 #                                                                       #
9251 # OUTPUT ************************************************************** #
9252 #       fp0 = result                                                    #
9253 #       fp1 = EXOP (if exception occurred)                              #
9254 #                                                                       #
9255 # ALGORITHM *********************************************************** #
9256 #       Handle NANs, zeroes, and infinities as special cases. Separate  #
9257 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be  #
9258 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled  #
9259 # and an actual fneg performed to see if overflow/underflow would have  #
9260 # occurred. If so, return default underflow/overflow result. Else,      #
9261 # scale the result exponent and return result. FPSR gets set based on   #
9262 # the result value.                                                     #
9263 #                                                                       #
9264 #########################################################################
9265
9266         global          fsneg
9267 fsneg:
9268         andi.b          &0x30,%d0               # clear rnd prec
9269         ori.b           &s_mode*0x10,%d0        # insert sgl precision
9270         bra.b           fneg
9271
9272         global          fdneg
9273 fdneg:
9274         andi.b          &0x30,%d0               # clear rnd prec
9275         ori.b           &d_mode*0x10,%d0        # insert dbl prec
9276
9277         global          fneg
9278 fneg:
9279         mov.l           %d0,L_SCR3(%a6)         # store rnd info
9280         mov.b           STAG(%a6),%d1
9281         bne.w           fneg_not_norm           # optimize on non-norm input
9282
9283 #
9284 # NEGATE SIGN : norms and denorms ONLY!
9285 #
9286 fneg_norm:
9287         andi.b          &0xc0,%d0               # is precision extended?
9288         bne.w           fneg_not_ext            # no; go handle sgl or dbl
9289
9290 #
9291 # precision selected is extended. so...we can not get an underflow
9292 # or overflow because of rounding to the correct precision. so...
9293 # skip the scaling and unscaling...
9294 #
9295         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9296         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9297         mov.w           SRC_EX(%a0),%d0
9298         eori.w          &0x8000,%d0             # negate sign
9299         bpl.b           fneg_norm_load          # sign is positive
9300         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9301 fneg_norm_load:
9302         mov.w           %d0,FP_SCR0_EX(%a6)
9303         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9304         rts
9305
9306 #
9307 # for an extended precision DENORM, the UNFL exception bit is set
9308 # the accrued bit is NOT set in this instance(no inexactness!)
9309 #
9310 fneg_denorm:
9311         andi.b          &0xc0,%d0               # is precision extended?
9312         bne.b           fneg_not_ext            # no; go handle sgl or dbl
9313
9314         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9315
9316         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9317         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9318         mov.w           SRC_EX(%a0),%d0
9319         eori.w          &0x8000,%d0             # negate sign
9320         bpl.b           fneg_denorm_done        # no
9321         mov.b           &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9322 fneg_denorm_done:
9323         mov.w           %d0,FP_SCR0_EX(%a6)
9324         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9325
9326         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9327         bne.b           fneg_ext_unfl_ena       # yes
9328         rts
9329
9330 #
9331 # the input is an extended DENORM and underflow is enabled in the FPCR.
9332 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9333 # exponent and insert back into the operand.
9334 #
9335 fneg_ext_unfl_ena:
9336         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
9337         bsr.l           norm                    # normalize result
9338         neg.w           %d0                     # new exponent = -(shft val)
9339         addi.w          &0x6000,%d0             # add new bias to exponent
9340         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
9341         andi.w          &0x8000,%d1             # keep old sign
9342         andi.w          &0x7fff,%d0             # clear sign position
9343         or.w            %d1,%d0                 # concat old sign, new exponent
9344         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
9345         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9346         rts
9347
9348 #
9349 # operand is either single or double
9350 #
9351 fneg_not_ext:
9352         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
9353         bne.b           fneg_dbl
9354
9355 #
9356 # operand is to be rounded to single precision
9357 #
9358 fneg_sgl:
9359         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9360         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9361         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9362         bsr.l           scale_to_zero_src       # calculate scale factor
9363
9364         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
9365         bge.w           fneg_sd_unfl            # yes; go handle underflow
9366         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
9367         beq.w           fneg_sd_may_ovfl        # maybe; go check
9368         blt.w           fneg_sd_ovfl            # yes; go handle overflow
9369
9370 #
9371 # operand will NOT overflow or underflow when moved in to the fp reg file
9372 #
9373 fneg_sd_normal:
9374         fmov.l          &0x0,%fpsr              # clear FPSR
9375         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9376
9377         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9378
9379         fmov.l          %fpsr,%d1               # save FPSR
9380         fmov.l          &0x0,%fpcr              # clear FPCR
9381
9382         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9383
9384 fneg_sd_normal_exit:
9385         mov.l           %d2,-(%sp)              # save d2
9386         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
9387         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
9388         mov.w           %d1,%d2                 # make a copy
9389         andi.l          &0x7fff,%d1             # strip sign
9390         sub.l           %d0,%d1                 # add scale factor
9391         andi.w          &0x8000,%d2             # keep old sign
9392         or.w            %d1,%d2                 # concat old sign,new exp
9393         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
9394         mov.l           (%sp)+,%d2              # restore d2
9395         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9396         rts
9397
9398 #
9399 # operand is to be rounded to double precision
9400 #
9401 fneg_dbl:
9402         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9403         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9404         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9405         bsr.l           scale_to_zero_src       # calculate scale factor
9406
9407         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
9408         bge.b           fneg_sd_unfl            # yes; go handle underflow
9409         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
9410         beq.w           fneg_sd_may_ovfl        # maybe; go check
9411         blt.w           fneg_sd_ovfl            # yes; go handle overflow
9412         bra.w           fneg_sd_normal          # no; ho handle normalized op
9413
9414 #
9415 # operand WILL underflow when moved in to the fp register file
9416 #
9417 fneg_sd_unfl:
9418         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9419
9420         eori.b          &0x80,FP_SCR0_EX(%a6)   # negate sign
9421         bpl.b           fneg_sd_unfl_tst
9422         bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
9423
9424 # if underflow or inexact is enabled, go calculate EXOP first.
9425 fneg_sd_unfl_tst:
9426         mov.b           FPCR_ENABLE(%a6),%d1
9427         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
9428         bne.b           fneg_sd_unfl_ena        # yes
9429
9430 fneg_sd_unfl_dis:
9431         lea             FP_SCR0(%a6),%a0        # pass: result addr
9432         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
9433         bsr.l           unf_res                 # calculate default result
9434         or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
9435         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9436         rts
9437
9438 #
9439 # operand will underflow AND underflow is enabled.
9440 # therefore, we must return the result rounded to extended precision.
9441 #
9442 fneg_sd_unfl_ena:
9443         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9444         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9445         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
9446
9447         mov.l           %d2,-(%sp)              # save d2
9448         mov.l           %d1,%d2                 # make a copy
9449         andi.l          &0x7fff,%d1             # strip sign
9450         andi.w          &0x8000,%d2             # keep old sign
9451         sub.l           %d0,%d1                 # subtract scale factor
9452         addi.l          &0x6000,%d1             # add new bias
9453         andi.w          &0x7fff,%d1
9454         or.w            %d2,%d1                 # concat new sign,new exp
9455         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
9456         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
9457         mov.l           (%sp)+,%d2              # restore d2
9458         bra.b           fneg_sd_unfl_dis
9459
9460 #
9461 # operand WILL overflow.
9462 #
9463 fneg_sd_ovfl:
9464         fmov.l          &0x0,%fpsr              # clear FPSR
9465         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9466
9467         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9468
9469         fmov.l          &0x0,%fpcr              # clear FPCR
9470         fmov.l          %fpsr,%d1               # save FPSR
9471
9472         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9473
9474 fneg_sd_ovfl_tst:
9475         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9476
9477         mov.b           FPCR_ENABLE(%a6),%d1
9478         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
9479         bne.b           fneg_sd_ovfl_ena        # yes
9480
9481 #
9482 # OVFL is not enabled; therefore, we must create the default result by
9483 # calling ovf_res().
9484 #
9485 fneg_sd_ovfl_dis:
9486         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
9487         sne             %d1                     # set sign param accordingly
9488         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
9489         bsr.l           ovf_res                 # calculate default result
9490         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
9491         fmovm.x         (%a0),&0x80             # return default result in fp0
9492         rts
9493
9494 #
9495 # OVFL is enabled.
9496 # the INEX2 bit has already been updated by the round to the correct precision.
9497 # now, round to extended(and don't alter the FPSR).
9498 #
9499 fneg_sd_ovfl_ena:
9500         mov.l           %d2,-(%sp)              # save d2
9501         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
9502         mov.l           %d1,%d2                 # make a copy
9503         andi.l          &0x7fff,%d1             # strip sign
9504         andi.w          &0x8000,%d2             # keep old sign
9505         sub.l           %d0,%d1                 # add scale factor
9506         subi.l          &0x6000,%d1             # subtract bias
9507         andi.w          &0x7fff,%d1
9508         or.w            %d2,%d1                 # concat sign,exp
9509         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
9510         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9511         mov.l           (%sp)+,%d2              # restore d2
9512         bra.b           fneg_sd_ovfl_dis
9513
9514 #
9515 # the move in MAY underflow. so...
9516 #
9517 fneg_sd_may_ovfl:
9518         fmov.l          &0x0,%fpsr              # clear FPSR
9519         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9520
9521         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9522
9523         fmov.l          %fpsr,%d1               # save status
9524         fmov.l          &0x0,%fpcr              # clear FPCR
9525
9526         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9527
9528         fabs.x          %fp0,%fp1               # make a copy of result
9529         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
9530         fbge.w          fneg_sd_ovfl_tst        # yes; overflow has occurred
9531
9532 # no, it didn't overflow; we have correct result
9533         bra.w           fneg_sd_normal_exit
9534
9535 ##########################################################################
9536
9537 #
9538 # input is not normalized; what is it?
9539 #
9540 fneg_not_norm:
9541         cmpi.b          %d1,&DENORM             # weed out DENORM
9542         beq.w           fneg_denorm
9543         cmpi.b          %d1,&SNAN               # weed out SNAN
9544         beq.l           res_snan_1op
9545         cmpi.b          %d1,&QNAN               # weed out QNAN
9546         beq.l           res_qnan_1op
9547
9548 #
9549 # do the fneg; at this point, only possible ops are ZERO and INF.
9550 # use fneg to determine ccodes.
9551 # prec:mode should be zero at this point but it won't affect answer anyways.
9552 #
9553         fneg.x          SRC_EX(%a0),%fp0        # do fneg
9554         fmov.l          %fpsr,%d0
9555         rol.l           &0x8,%d0                # put ccodes in lo byte
9556         mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
9557         rts
9558
9559 #########################################################################
9560 # XDEF **************************************************************** #
9561 #       ftst(): emulates the ftest instruction                          #
9562 #                                                                       #
9563 # XREF **************************************************************** #
9564 #       res{s,q}nan_1op() - set NAN result for monadic instruction      #
9565 #                                                                       #
9566 # INPUT *************************************************************** #
9567 #       a0 = pointer to extended precision source operand               #
9568 #                                                                       #
9569 # OUTPUT ************************************************************** #
9570 #       none                                                            #
9571 #                                                                       #
9572 # ALGORITHM *********************************************************** #
9573 #       Check the source operand tag (STAG) and set the FPCR according  #
9574 # to the operand type and sign.                                         #
9575 #                                                                       #
9576 #########################################################################
9577
9578         global          ftst
9579 ftst:
9580         mov.b           STAG(%a6),%d1
9581         bne.b           ftst_not_norm           # optimize on non-norm input
9582
9583 #
9584 # Norm:
9585 #
9586 ftst_norm:
9587         tst.b           SRC_EX(%a0)             # is operand negative?
9588         bmi.b           ftst_norm_m             # yes
9589         rts
9590 ftst_norm_m:
9591         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9592         rts
9593
9594 #
9595 # input is not normalized; what is it?
9596 #
9597 ftst_not_norm:
9598         cmpi.b          %d1,&ZERO               # weed out ZERO
9599         beq.b           ftst_zero
9600         cmpi.b          %d1,&INF                # weed out INF
9601         beq.b           ftst_inf
9602         cmpi.b          %d1,&SNAN               # weed out SNAN
9603         beq.l           res_snan_1op
9604         cmpi.b          %d1,&QNAN               # weed out QNAN
9605         beq.l           res_qnan_1op
9606
9607 #
9608 # Denorm:
9609 #
9610 ftst_denorm:
9611         tst.b           SRC_EX(%a0)             # is operand negative?
9612         bmi.b           ftst_denorm_m           # yes
9613         rts
9614 ftst_denorm_m:
9615         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9616         rts
9617
9618 #
9619 # Infinity:
9620 #
9621 ftst_inf:
9622         tst.b           SRC_EX(%a0)             # is operand negative?
9623         bmi.b           ftst_inf_m              # yes
9624 ftst_inf_p:
9625         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9626         rts
9627 ftst_inf_m:
9628         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9629         rts
9630
9631 #
9632 # Zero:
9633 #
9634 ftst_zero:
9635         tst.b           SRC_EX(%a0)             # is operand negative?
9636         bmi.b           ftst_zero_m             # yes
9637 ftst_zero_p:
9638         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'N' ccode bit
9639         rts
9640 ftst_zero_m:
9641         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9642         rts
9643
9644 #########################################################################
9645 # XDEF **************************************************************** #
9646 #       fint(): emulates the fint instruction                           #
9647 #                                                                       #
9648 # XREF **************************************************************** #
9649 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9650 #                                                                       #
9651 # INPUT *************************************************************** #
9652 #       a0 = pointer to extended precision source operand               #
9653 #       d0 = round precision/mode                                       #
9654 #                                                                       #
9655 # OUTPUT ************************************************************** #
9656 #       fp0 = result                                                    #
9657 #                                                                       #
9658 # ALGORITHM *********************************************************** #
9659 #       Separate according to operand type. Unnorms don't pass through  #
9660 # here. For norms, load the rounding mode/prec, execute a "fint", then  #
9661 # store the resulting FPSR bits.                                        #
9662 #       For denorms, force the j-bit to a one and do the same as for    #
9663 # norms. Denorms are so low that the answer will either be a zero or a  #
9664 # one.                                                                  #
9665 #       For zeroes/infs/NANs, return the same while setting the FPSR    #
9666 # as appropriate.                                                       #
9667 #                                                                       #
9668 #########################################################################
9669
9670         global          fint
9671 fint:
9672         mov.b           STAG(%a6),%d1
9673         bne.b           fint_not_norm           # optimize on non-norm input
9674
9675 #
9676 # Norm:
9677 #
9678 fint_norm:
9679         andi.b          &0x30,%d0               # set prec = ext
9680
9681         fmov.l          %d0,%fpcr               # set FPCR
9682         fmov.l          &0x0,%fpsr              # clear FPSR
9683
9684         fint.x          SRC(%a0),%fp0           # execute fint
9685
9686         fmov.l          &0x0,%fpcr              # clear FPCR
9687         fmov.l          %fpsr,%d0               # save FPSR
9688         or.l            %d0,USER_FPSR(%a6)      # set exception bits
9689
9690         rts
9691
9692 #
9693 # input is not normalized; what is it?
9694 #
9695 fint_not_norm:
9696         cmpi.b          %d1,&ZERO               # weed out ZERO
9697         beq.b           fint_zero
9698         cmpi.b          %d1,&INF                # weed out INF
9699         beq.b           fint_inf
9700         cmpi.b          %d1,&DENORM             # weed out DENORM
9701         beq.b           fint_denorm
9702         cmpi.b          %d1,&SNAN               # weed out SNAN
9703         beq.l           res_snan_1op
9704         bra.l           res_qnan_1op            # weed out QNAN
9705
9706 #
9707 # Denorm:
9708 #
9709 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9710 # also, the INEX2 and AINEX exception bits will be set.
9711 # so, we could either set these manually or force the DENORM
9712 # to a very small NORM and ship it to the NORM routine.
9713 # I do the latter.
9714 #
9715 fint_denorm:
9716         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9717         mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
9718         lea             FP_SCR0(%a6),%a0
9719         bra.b           fint_norm
9720
9721 #
9722 # Zero:
9723 #
9724 fint_zero:
9725         tst.b           SRC_EX(%a0)             # is ZERO negative?
9726         bmi.b           fint_zero_m             # yes
9727 fint_zero_p:
9728         fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
9729         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
9730         rts
9731 fint_zero_m:
9732         fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
9733         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9734         rts
9735
9736 #
9737 # Infinity:
9738 #
9739 fint_inf:
9740         fmovm.x         SRC(%a0),&0x80          # return result in fp0
9741         tst.b           SRC_EX(%a0)             # is INF negative?
9742         bmi.b           fint_inf_m              # yes
9743 fint_inf_p:
9744         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9745         rts
9746 fint_inf_m:
9747         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9748         rts
9749
9750 #########################################################################
9751 # XDEF **************************************************************** #
9752 #       fintrz(): emulates the fintrz instruction                       #
9753 #                                                                       #
9754 # XREF **************************************************************** #
9755 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9756 #                                                                       #
9757 # INPUT *************************************************************** #
9758 #       a0 = pointer to extended precision source operand               #
9759 #       d0 = round precision/mode                                       #
9760 #                                                                       #
9761 # OUTPUT ************************************************************** #
9762 #       fp0 = result                                                    #
9763 #                                                                       #
9764 # ALGORITHM *********************************************************** #
9765 #       Separate according to operand type. Unnorms don't pass through  #
9766 # here. For norms, load the rounding mode/prec, execute a "fintrz",     #
9767 # then store the resulting FPSR bits.                                   #
9768 #       For denorms, force the j-bit to a one and do the same as for    #
9769 # norms. Denorms are so low that the answer will either be a zero or a  #
9770 # one.                                                                  #
9771 #       For zeroes/infs/NANs, return the same while setting the FPSR    #
9772 # as appropriate.                                                       #
9773 #                                                                       #
9774 #########################################################################
9775
9776         global          fintrz
9777 fintrz:
9778         mov.b           STAG(%a6),%d1
9779         bne.b           fintrz_not_norm         # optimize on non-norm input
9780
9781 #
9782 # Norm:
9783 #
9784 fintrz_norm:
9785         fmov.l          &0x0,%fpsr              # clear FPSR
9786
9787         fintrz.x        SRC(%a0),%fp0           # execute fintrz
9788
9789         fmov.l          %fpsr,%d0               # save FPSR
9790         or.l            %d0,USER_FPSR(%a6)      # set exception bits
9791
9792         rts
9793
9794 #
9795 # input is not normalized; what is it?
9796 #
9797 fintrz_not_norm:
9798         cmpi.b          %d1,&ZERO               # weed out ZERO
9799         beq.b           fintrz_zero
9800         cmpi.b          %d1,&INF                # weed out INF
9801         beq.b           fintrz_inf
9802         cmpi.b          %d1,&DENORM             # weed out DENORM
9803         beq.b           fintrz_denorm
9804         cmpi.b          %d1,&SNAN               # weed out SNAN
9805         beq.l           res_snan_1op
9806         bra.l           res_qnan_1op            # weed out QNAN
9807
9808 #
9809 # Denorm:
9810 #
9811 # for DENORMs, the result will be (+/-)ZERO.
9812 # also, the INEX2 and AINEX exception bits will be set.
9813 # so, we could either set these manually or force the DENORM
9814 # to a very small NORM and ship it to the NORM routine.
9815 # I do the latter.
9816 #
9817 fintrz_denorm:
9818         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9819         mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
9820         lea             FP_SCR0(%a6),%a0
9821         bra.b           fintrz_norm
9822
9823 #
9824 # Zero:
9825 #
9826 fintrz_zero:
9827         tst.b           SRC_EX(%a0)             # is ZERO negative?
9828         bmi.b           fintrz_zero_m           # yes
9829 fintrz_zero_p:
9830         fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
9831         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
9832         rts
9833 fintrz_zero_m:
9834         fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
9835         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9836         rts
9837
9838 #
9839 # Infinity:
9840 #
9841 fintrz_inf:
9842         fmovm.x         SRC(%a0),&0x80          # return result in fp0
9843         tst.b           SRC_EX(%a0)             # is INF negative?
9844         bmi.b           fintrz_inf_m            # yes
9845 fintrz_inf_p:
9846         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9847         rts
9848 fintrz_inf_m:
9849         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9850         rts
9851
9852 #########################################################################
9853 # XDEF **************************************************************** #
9854 #       fabs():  emulates the fabs instruction                          #
9855 #       fsabs(): emulates the fsabs instruction                         #
9856 #       fdabs(): emulates the fdabs instruction                         #
9857 #                                                                       #
9858 # XREF **************************************************************** #
9859 #       norm() - normalize denorm mantissa to provide EXOP              #
9860 #       scale_to_zero_src() - make exponent. = 0; get scale factor      #
9861 #       unf_res() - calculate underflow result                          #
9862 #       ovf_res() - calculate overflow result                           #
9863 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9864 #                                                                       #
9865 # INPUT *************************************************************** #
9866 #       a0 = pointer to extended precision source operand               #
9867 #       d0 = rnd precision/mode                                         #
9868 #                                                                       #
9869 # OUTPUT ************************************************************** #
9870 #       fp0 = result                                                    #
9871 #       fp1 = EXOP (if exception occurred)                              #
9872 #                                                                       #
9873 # ALGORITHM *********************************************************** #
9874 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
9875 # norms into extended, single, and double precision.                    #
9876 #       Simply clear sign for extended precision norm. Ext prec denorm  #
9877 # gets an EXOP created for it since it's an underflow.                  #
9878 #       Double and single precision can overflow and underflow. First,  #
9879 # scale the operand such that the exponent is zero. Perform an "fabs"   #
9880 # using the correct rnd mode/prec. Check to see if the original         #
9881 # exponent would take an exception. If so, use unf_res() or ovf_res()   #
9882 # to calculate the default result. Also, create the EXOP for the        #
9883 # exceptional case. If no exception should occur, insert the correct    #
9884 # result exponent and return.                                           #
9885 #       Unnorms don't pass through here.                                #
9886 #                                                                       #
9887 #########################################################################
9888
9889         global          fsabs
9890 fsabs:
9891         andi.b          &0x30,%d0               # clear rnd prec
9892         ori.b           &s_mode*0x10,%d0        # insert sgl precision
9893         bra.b           fabs
9894
9895         global          fdabs
9896 fdabs:
9897         andi.b          &0x30,%d0               # clear rnd prec
9898         ori.b           &d_mode*0x10,%d0        # insert dbl precision
9899
9900         global          fabs
9901 fabs:
9902         mov.l           %d0,L_SCR3(%a6)         # store rnd info
9903         mov.b           STAG(%a6),%d1
9904         bne.w           fabs_not_norm           # optimize on non-norm input
9905
9906 #
9907 # ABSOLUTE VALUE: norms and denorms ONLY!
9908 #
9909 fabs_norm:
9910         andi.b          &0xc0,%d0               # is precision extended?
9911         bne.b           fabs_not_ext            # no; go handle sgl or dbl
9912
9913 #
9914 # precision selected is extended. so...we can not get an underflow
9915 # or overflow because of rounding to the correct precision. so...
9916 # skip the scaling and unscaling...
9917 #
9918         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9919         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9920         mov.w           SRC_EX(%a0),%d1
9921         bclr            &15,%d1                 # force absolute value
9922         mov.w           %d1,FP_SCR0_EX(%a6)     # insert exponent
9923         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9924         rts
9925
9926 #
9927 # for an extended precision DENORM, the UNFL exception bit is set
9928 # the accrued bit is NOT set in this instance(no inexactness!)
9929 #
9930 fabs_denorm:
9931         andi.b          &0xc0,%d0               # is precision extended?
9932         bne.b           fabs_not_ext            # no
9933
9934         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9935
9936         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9937         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9938         mov.w           SRC_EX(%a0),%d0
9939         bclr            &15,%d0                 # clear sign
9940         mov.w           %d0,FP_SCR0_EX(%a6)     # insert exponent
9941
9942         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9943
9944         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9945         bne.b           fabs_ext_unfl_ena
9946         rts
9947
9948 #
9949 # the input is an extended DENORM and underflow is enabled in the FPCR.
9950 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9951 # exponent and insert back into the operand.
9952 #
9953 fabs_ext_unfl_ena:
9954         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
9955         bsr.l           norm                    # normalize result
9956         neg.w           %d0                     # new exponent = -(shft val)
9957         addi.w          &0x6000,%d0             # add new bias to exponent
9958         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
9959         andi.w          &0x8000,%d1             # keep old sign
9960         andi.w          &0x7fff,%d0             # clear sign position
9961         or.w            %d1,%d0                 # concat old sign, new exponent
9962         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
9963         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9964         rts
9965
9966 #
9967 # operand is either single or double
9968 #
9969 fabs_not_ext:
9970         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
9971         bne.b           fabs_dbl
9972
9973 #
9974 # operand is to be rounded to single precision
9975 #
9976 fabs_sgl:
9977         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9978         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9979         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9980         bsr.l           scale_to_zero_src       # calculate scale factor
9981
9982         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
9983         bge.w           fabs_sd_unfl            # yes; go handle underflow
9984         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
9985         beq.w           fabs_sd_may_ovfl        # maybe; go check
9986         blt.w           fabs_sd_ovfl            # yes; go handle overflow
9987
9988 #
9989 # operand will NOT overflow or underflow when moved in to the fp reg file
9990 #
9991 fabs_sd_normal:
9992         fmov.l          &0x0,%fpsr              # clear FPSR
9993         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9994
9995         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
9996
9997         fmov.l          %fpsr,%d1               # save FPSR
9998         fmov.l          &0x0,%fpcr              # clear FPCR
9999
10000         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10001
10002 fabs_sd_normal_exit:
10003         mov.l           %d2,-(%sp)              # save d2
10004         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10005         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
10006         mov.l           %d1,%d2                 # make a copy
10007         andi.l          &0x7fff,%d1             # strip sign
10008         sub.l           %d0,%d1                 # add scale factor
10009         andi.w          &0x8000,%d2             # keep old sign
10010         or.w            %d1,%d2                 # concat old sign,new exp
10011         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
10012         mov.l           (%sp)+,%d2              # restore d2
10013         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10014         rts
10015
10016 #
10017 # operand is to be rounded to double precision
10018 #
10019 fabs_dbl:
10020         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10021         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10022         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10023         bsr.l           scale_to_zero_src       # calculate scale factor
10024
10025         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
10026         bge.b           fabs_sd_unfl            # yes; go handle underflow
10027         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
10028         beq.w           fabs_sd_may_ovfl        # maybe; go check
10029         blt.w           fabs_sd_ovfl            # yes; go handle overflow
10030         bra.w           fabs_sd_normal          # no; ho handle normalized op
10031
10032 #
10033 # operand WILL underflow when moved in to the fp register file
10034 #
10035 fabs_sd_unfl:
10036         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10037
10038         bclr            &0x7,FP_SCR0_EX(%a6)    # force absolute value
10039
10040 # if underflow or inexact is enabled, go calculate EXOP first.
10041         mov.b           FPCR_ENABLE(%a6),%d1
10042         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10043         bne.b           fabs_sd_unfl_ena        # yes
10044
10045 fabs_sd_unfl_dis:
10046         lea             FP_SCR0(%a6),%a0        # pass: result addr
10047         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10048         bsr.l           unf_res                 # calculate default result
10049         or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
10050         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10051         rts
10052
10053 #
10054 # operand will underflow AND underflow is enabled.
10055 # therefore, we must return the result rounded to extended precision.
10056 #
10057 fabs_sd_unfl_ena:
10058         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10059         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10060         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
10061
10062         mov.l           %d2,-(%sp)              # save d2
10063         mov.l           %d1,%d2                 # make a copy
10064         andi.l          &0x7fff,%d1             # strip sign
10065         andi.w          &0x8000,%d2             # keep old sign
10066         sub.l           %d0,%d1                 # subtract scale factor
10067         addi.l          &0x6000,%d1             # add new bias
10068         andi.w          &0x7fff,%d1
10069         or.w            %d2,%d1                 # concat new sign,new exp
10070         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
10071         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
10072         mov.l           (%sp)+,%d2              # restore d2
10073         bra.b           fabs_sd_unfl_dis
10074
10075 #
10076 # operand WILL overflow.
10077 #
10078 fabs_sd_ovfl:
10079         fmov.l          &0x0,%fpsr              # clear FPSR
10080         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10081
10082         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
10083
10084         fmov.l          &0x0,%fpcr              # clear FPCR
10085         fmov.l          %fpsr,%d1               # save FPSR
10086
10087         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10088
10089 fabs_sd_ovfl_tst:
10090         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10091
10092         mov.b           FPCR_ENABLE(%a6),%d1
10093         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10094         bne.b           fabs_sd_ovfl_ena        # yes
10095
10096 #
10097 # OVFL is not enabled; therefore, we must create the default result by
10098 # calling ovf_res().
10099 #
10100 fabs_sd_ovfl_dis:
10101         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
10102         sne             %d1                     # set sign param accordingly
10103         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
10104         bsr.l           ovf_res                 # calculate default result
10105         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
10106         fmovm.x         (%a0),&0x80             # return default result in fp0
10107         rts
10108
10109 #
10110 # OVFL is enabled.
10111 # the INEX2 bit has already been updated by the round to the correct precision.
10112 # now, round to extended(and don't alter the FPSR).
10113 #
10114 fabs_sd_ovfl_ena:
10115         mov.l           %d2,-(%sp)              # save d2
10116         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10117         mov.l           %d1,%d2                 # make a copy
10118         andi.l          &0x7fff,%d1             # strip sign
10119         andi.w          &0x8000,%d2             # keep old sign
10120         sub.l           %d0,%d1                 # add scale factor
10121         subi.l          &0x6000,%d1             # subtract bias
10122         andi.w          &0x7fff,%d1
10123         or.w            %d2,%d1                 # concat sign,exp
10124         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10125         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10126         mov.l           (%sp)+,%d2              # restore d2
10127         bra.b           fabs_sd_ovfl_dis
10128
10129 #
10130 # the move in MAY underflow. so...
10131 #
10132 fabs_sd_may_ovfl:
10133         fmov.l          &0x0,%fpsr              # clear FPSR
10134         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10135
10136         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
10137
10138         fmov.l          %fpsr,%d1               # save status
10139         fmov.l          &0x0,%fpcr              # clear FPCR
10140
10141         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10142
10143         fabs.x          %fp0,%fp1               # make a copy of result
10144         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
10145         fbge.w          fabs_sd_ovfl_tst        # yes; overflow has occurred
10146
10147 # no, it didn't overflow; we have correct result
10148         bra.w           fabs_sd_normal_exit
10149
10150 ##########################################################################
10151
10152 #
10153 # input is not normalized; what is it?
10154 #
10155 fabs_not_norm:
10156         cmpi.b          %d1,&DENORM             # weed out DENORM
10157         beq.w           fabs_denorm
10158         cmpi.b          %d1,&SNAN               # weed out SNAN
10159         beq.l           res_snan_1op
10160         cmpi.b          %d1,&QNAN               # weed out QNAN
10161         beq.l           res_qnan_1op
10162
10163         fabs.x          SRC(%a0),%fp0           # force absolute value
10164
10165         cmpi.b          %d1,&INF                # weed out INF
10166         beq.b           fabs_inf
10167 fabs_zero:
10168         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
10169         rts
10170 fabs_inf:
10171         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10172         rts
10173
10174 #########################################################################
10175 # XDEF **************************************************************** #
10176 #       fcmp(): fp compare op routine                                   #
10177 #                                                                       #
10178 # XREF **************************************************************** #
10179 #       res_qnan() - return QNAN result                                 #
10180 #       res_snan() - return SNAN result                                 #
10181 #                                                                       #
10182 # INPUT *************************************************************** #
10183 #       a0 = pointer to extended precision source operand               #
10184 #       a1 = pointer to extended precision destination operand          #
10185 #       d0 = round prec/mode                                            #
10186 #                                                                       #
10187 # OUTPUT ************************************************************** #
10188 #       None                                                            #
10189 #                                                                       #
10190 # ALGORITHM *********************************************************** #
10191 #       Handle NANs and denorms as special cases. For everything else,  #
10192 # just use the actual fcmp instruction to produce the correct condition #
10193 # codes.                                                                #
10194 #                                                                       #
10195 #########################################################################
10196
10197         global          fcmp
10198 fcmp:
10199         clr.w           %d1
10200         mov.b           DTAG(%a6),%d1
10201         lsl.b           &0x3,%d1
10202         or.b            STAG(%a6),%d1
10203         bne.b           fcmp_not_norm           # optimize on non-norm input
10204
10205 #
10206 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10207 #
10208 fcmp_norm:
10209         fmovm.x         DST(%a1),&0x80          # load dst op
10210
10211         fcmp.x          %fp0,SRC(%a0)           # do compare
10212
10213         fmov.l          %fpsr,%d0               # save FPSR
10214         rol.l           &0x8,%d0                # extract ccode bits
10215         mov.b           %d0,FPSR_CC(%a6)        # set ccode bits(no exc bits are set)
10216
10217         rts
10218
10219 #
10220 # fcmp: inputs are not both normalized; what are they?
10221 #
10222 fcmp_not_norm:
10223         mov.w           (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10224         jmp             (tbl_fcmp_op.b,%pc,%d1.w*1)
10225
10226         swbeg           &48
10227 tbl_fcmp_op:
10228         short           fcmp_norm       - tbl_fcmp_op # NORM - NORM
10229         short           fcmp_norm       - tbl_fcmp_op # NORM - ZERO
10230         short           fcmp_norm       - tbl_fcmp_op # NORM - INF
10231         short           fcmp_res_qnan   - tbl_fcmp_op # NORM - QNAN
10232         short           fcmp_nrm_dnrm   - tbl_fcmp_op # NORM - DENORM
10233         short           fcmp_res_snan   - tbl_fcmp_op # NORM - SNAN
10234         short           tbl_fcmp_op     - tbl_fcmp_op #
10235         short           tbl_fcmp_op     - tbl_fcmp_op #
10236
10237         short           fcmp_norm       - tbl_fcmp_op # ZERO - NORM
10238         short           fcmp_norm       - tbl_fcmp_op # ZERO - ZERO
10239         short           fcmp_norm       - tbl_fcmp_op # ZERO - INF
10240         short           fcmp_res_qnan   - tbl_fcmp_op # ZERO - QNAN
10241         short           fcmp_dnrm_s     - tbl_fcmp_op # ZERO - DENORM
10242         short           fcmp_res_snan   - tbl_fcmp_op # ZERO - SNAN
10243         short           tbl_fcmp_op     - tbl_fcmp_op #
10244         short           tbl_fcmp_op     - tbl_fcmp_op #
10245
10246         short           fcmp_norm       - tbl_fcmp_op # INF - NORM
10247         short           fcmp_norm       - tbl_fcmp_op # INF - ZERO
10248         short           fcmp_norm       - tbl_fcmp_op # INF - INF
10249         short           fcmp_res_qnan   - tbl_fcmp_op # INF - QNAN
10250         short           fcmp_dnrm_s     - tbl_fcmp_op # INF - DENORM
10251         short           fcmp_res_snan   - tbl_fcmp_op # INF - SNAN
10252         short           tbl_fcmp_op     - tbl_fcmp_op #
10253         short           tbl_fcmp_op     - tbl_fcmp_op #
10254
10255         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - NORM
10256         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - ZERO
10257         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - INF
10258         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - QNAN
10259         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - DENORM
10260         short           fcmp_res_snan   - tbl_fcmp_op # QNAN - SNAN
10261         short           tbl_fcmp_op     - tbl_fcmp_op #
10262         short           tbl_fcmp_op     - tbl_fcmp_op #
10263
10264         short           fcmp_dnrm_nrm   - tbl_fcmp_op # DENORM - NORM
10265         short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - ZERO
10266         short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - INF
10267         short           fcmp_res_qnan   - tbl_fcmp_op # DENORM - QNAN
10268         short           fcmp_dnrm_sd    - tbl_fcmp_op # DENORM - DENORM
10269         short           fcmp_res_snan   - tbl_fcmp_op # DENORM - SNAN
10270         short           tbl_fcmp_op     - tbl_fcmp_op #
10271         short           tbl_fcmp_op     - tbl_fcmp_op #
10272
10273         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - NORM
10274         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - ZERO
10275         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - INF
10276         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - QNAN
10277         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - DENORM
10278         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - SNAN
10279         short           tbl_fcmp_op     - tbl_fcmp_op #
10280         short           tbl_fcmp_op     - tbl_fcmp_op #
10281
10282 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10283 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10284 fcmp_res_qnan:
10285         bsr.l           res_qnan
10286         andi.b          &0xf7,FPSR_CC(%a6)
10287         rts
10288 fcmp_res_snan:
10289         bsr.l           res_snan
10290         andi.b          &0xf7,FPSR_CC(%a6)
10291         rts
10292
10293 #
10294 # DENORMs are a little more difficult.
10295 # If you have a 2 DENORMs, then you can just force the j-bit to a one
10296 # and use the fcmp_norm routine.
10297 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10298 # and use the fcmp_norm routine.
10299 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10300 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
10301 # (1) signs are (+) and the DENORM is the dst or
10302 # (2) signs are (-) and the DENORM is the src
10303 #
10304
10305 fcmp_dnrm_s:
10306         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10307         mov.l           SRC_HI(%a0),%d0
10308         bset            &31,%d0                 # DENORM src; make into small norm
10309         mov.l           %d0,FP_SCR0_HI(%a6)
10310         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10311         lea             FP_SCR0(%a6),%a0
10312         bra.w           fcmp_norm
10313
10314 fcmp_dnrm_d:
10315         mov.l           DST_EX(%a1),FP_SCR0_EX(%a6)
10316         mov.l           DST_HI(%a1),%d0
10317         bset            &31,%d0                 # DENORM src; make into small norm
10318         mov.l           %d0,FP_SCR0_HI(%a6)
10319         mov.l           DST_LO(%a1),FP_SCR0_LO(%a6)
10320         lea             FP_SCR0(%a6),%a1
10321         bra.w           fcmp_norm
10322
10323 fcmp_dnrm_sd:
10324         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10325         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10326         mov.l           DST_HI(%a1),%d0
10327         bset            &31,%d0                 # DENORM dst; make into small norm
10328         mov.l           %d0,FP_SCR1_HI(%a6)
10329         mov.l           SRC_HI(%a0),%d0
10330         bset            &31,%d0                 # DENORM dst; make into small norm
10331         mov.l           %d0,FP_SCR0_HI(%a6)
10332         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10333         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10334         lea             FP_SCR1(%a6),%a1
10335         lea             FP_SCR0(%a6),%a0
10336         bra.w           fcmp_norm
10337
10338 fcmp_nrm_dnrm:
10339         mov.b           SRC_EX(%a0),%d0         # determine if like signs
10340         mov.b           DST_EX(%a1),%d1
10341         eor.b           %d0,%d1
10342         bmi.w           fcmp_dnrm_s
10343
10344 # signs are the same, so must determine the answer ourselves.
10345         tst.b           %d0                     # is src op negative?
10346         bmi.b           fcmp_nrm_dnrm_m         # yes
10347         rts
10348 fcmp_nrm_dnrm_m:
10349         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10350         rts
10351
10352 fcmp_dnrm_nrm:
10353         mov.b           SRC_EX(%a0),%d0         # determine if like signs
10354         mov.b           DST_EX(%a1),%d1
10355         eor.b           %d0,%d1
10356         bmi.w           fcmp_dnrm_d
10357
10358 # signs are the same, so must determine the answer ourselves.
10359         tst.b           %d0                     # is src op negative?
10360         bpl.b           fcmp_dnrm_nrm_m         # no
10361         rts
10362 fcmp_dnrm_nrm_m:
10363         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10364         rts
10365
10366 #########################################################################
10367 # XDEF **************************************************************** #
10368 #       fsglmul(): emulates the fsglmul instruction                     #
10369 #                                                                       #
10370 # XREF **************************************************************** #
10371 #       scale_to_zero_src() - scale src exponent to zero                #
10372 #       scale_to_zero_dst() - scale dst exponent to zero                #
10373 #       unf_res4() - return default underflow result for sglop          #
10374 #       ovf_res() - return default overflow result                      #
10375 #       res_qnan() - return QNAN result                                 #
10376 #       res_snan() - return SNAN result                                 #
10377 #                                                                       #
10378 # INPUT *************************************************************** #
10379 #       a0 = pointer to extended precision source operand               #
10380 #       a1 = pointer to extended precision destination operand          #
10381 #       d0  rnd prec,mode                                               #
10382 #                                                                       #
10383 # OUTPUT ************************************************************** #
10384 #       fp0 = result                                                    #
10385 #       fp1 = EXOP (if exception occurred)                              #
10386 #                                                                       #
10387 # ALGORITHM *********************************************************** #
10388 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
10389 # norms/denorms into ext/sgl/dbl precision.                             #
10390 #       For norms/denorms, scale the exponents such that a multiply     #
10391 # instruction won't cause an exception. Use the regular fsglmul to      #
10392 # compute a result. Check if the regular operands would have taken      #
10393 # an exception. If so, return the default overflow/underflow result     #
10394 # and return the EXOP if exceptions are enabled. Else, scale the        #
10395 # result operand to the proper exponent.                                #
10396 #                                                                       #
10397 #########################################################################
10398
10399         global          fsglmul
10400 fsglmul:
10401         mov.l           %d0,L_SCR3(%a6)         # store rnd info
10402
10403         clr.w           %d1
10404         mov.b           DTAG(%a6),%d1
10405         lsl.b           &0x3,%d1
10406         or.b            STAG(%a6),%d1
10407
10408         bne.w           fsglmul_not_norm        # optimize on non-norm input
10409
10410 fsglmul_norm:
10411         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10412         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
10413         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10414
10415         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10416         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10417         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10418
10419         bsr.l           scale_to_zero_src       # scale exponent
10420         mov.l           %d0,-(%sp)              # save scale factor 1
10421
10422         bsr.l           scale_to_zero_dst       # scale dst exponent
10423
10424         add.l           (%sp)+,%d0              # SCALE_FACTOR = scale1 + scale2
10425
10426         cmpi.l          %d0,&0x3fff-0x7ffe      # would result ovfl?
10427         beq.w           fsglmul_may_ovfl        # result may rnd to overflow
10428         blt.w           fsglmul_ovfl            # result will overflow
10429
10430         cmpi.l          %d0,&0x3fff+0x0001      # would result unfl?
10431         beq.w           fsglmul_may_unfl        # result may rnd to no unfl
10432         bgt.w           fsglmul_unfl            # result will underflow
10433
10434 fsglmul_normal:
10435         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10436
10437         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10438         fmov.l          &0x0,%fpsr              # clear FPSR
10439
10440         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10441
10442         fmov.l          %fpsr,%d1               # save status
10443         fmov.l          &0x0,%fpcr              # clear FPCR
10444
10445         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10446
10447 fsglmul_normal_exit:
10448         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10449         mov.l           %d2,-(%sp)              # save d2
10450         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
10451         mov.l           %d1,%d2                 # make a copy
10452         andi.l          &0x7fff,%d1             # strip sign
10453         andi.w          &0x8000,%d2             # keep old sign
10454         sub.l           %d0,%d1                 # add scale factor
10455         or.w            %d2,%d1                 # concat old sign,new exp
10456         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10457         mov.l           (%sp)+,%d2              # restore d2
10458         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10459         rts
10460
10461 fsglmul_ovfl:
10462         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10463
10464         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10465         fmov.l          &0x0,%fpsr              # clear FPSR
10466
10467         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10468
10469         fmov.l          %fpsr,%d1               # save status
10470         fmov.l          &0x0,%fpcr              # clear FPCR
10471
10472         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10473
10474 fsglmul_ovfl_tst:
10475
10476 # save setting this until now because this is where fsglmul_may_ovfl may jump in
10477         or.l            &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10478
10479         mov.b           FPCR_ENABLE(%a6),%d1
10480         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10481         bne.b           fsglmul_ovfl_ena        # yes
10482
10483 fsglmul_ovfl_dis:
10484         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
10485         sne             %d1                     # set sign param accordingly
10486         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
10487         andi.b          &0x30,%d0               # force prec = ext
10488         bsr.l           ovf_res                 # calculate default result
10489         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
10490         fmovm.x         (%a0),&0x80             # return default result in fp0
10491         rts
10492
10493 fsglmul_ovfl_ena:
10494         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
10495
10496         mov.l           %d2,-(%sp)              # save d2
10497         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10498         mov.l           %d1,%d2                 # make a copy
10499         andi.l          &0x7fff,%d1             # strip sign
10500         sub.l           %d0,%d1                 # add scale factor
10501         subi.l          &0x6000,%d1             # subtract bias
10502         andi.w          &0x7fff,%d1
10503         andi.w          &0x8000,%d2             # keep old sign
10504         or.w            %d2,%d1                 # concat old sign,new exp
10505         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10506         mov.l           (%sp)+,%d2              # restore d2
10507         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10508         bra.b           fsglmul_ovfl_dis
10509
10510 fsglmul_may_ovfl:
10511         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10512
10513         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10514         fmov.l          &0x0,%fpsr              # clear FPSR
10515
10516         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10517
10518         fmov.l          %fpsr,%d1               # save status
10519         fmov.l          &0x0,%fpcr              # clear FPCR
10520
10521         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10522
10523         fabs.x          %fp0,%fp1               # make a copy of result
10524         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
10525         fbge.w          fsglmul_ovfl_tst        # yes; overflow has occurred
10526
10527 # no, it didn't overflow; we have correct result
10528         bra.w           fsglmul_normal_exit
10529
10530 fsglmul_unfl:
10531         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10532
10533         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10534
10535         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
10536         fmov.l          &0x0,%fpsr              # clear FPSR
10537
10538         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10539
10540         fmov.l          %fpsr,%d1               # save status
10541         fmov.l          &0x0,%fpcr              # clear FPCR
10542
10543         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10544
10545         mov.b           FPCR_ENABLE(%a6),%d1
10546         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10547         bne.b           fsglmul_unfl_ena        # yes
10548
10549 fsglmul_unfl_dis:
10550         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10551
10552         lea             FP_SCR0(%a6),%a0        # pass: result addr
10553         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10554         bsr.l           unf_res4                # calculate default result
10555         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
10556         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10557         rts
10558
10559 #
10560 # UNFL is enabled.
10561 #
10562 fsglmul_unfl_ena:
10563         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
10564
10565         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10566         fmov.l          &0x0,%fpsr              # clear FPSR
10567
10568         fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
10569
10570         fmov.l          &0x0,%fpcr              # clear FPCR
10571
10572         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
10573         mov.l           %d2,-(%sp)              # save d2
10574         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10575         mov.l           %d1,%d2                 # make a copy
10576         andi.l          &0x7fff,%d1             # strip sign
10577         andi.w          &0x8000,%d2             # keep old sign
10578         sub.l           %d0,%d1                 # add scale factor
10579         addi.l          &0x6000,%d1             # add bias
10580         andi.w          &0x7fff,%d1
10581         or.w            %d2,%d1                 # concat old sign,new exp
10582         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10583         mov.l           (%sp)+,%d2              # restore d2
10584         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10585         bra.w           fsglmul_unfl_dis
10586
10587 fsglmul_may_unfl:
10588         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10589
10590         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10591         fmov.l          &0x0,%fpsr              # clear FPSR
10592
10593         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10594
10595         fmov.l          %fpsr,%d1               # save status
10596         fmov.l          &0x0,%fpcr              # clear FPCR
10597
10598         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10599
10600         fabs.x          %fp0,%fp1               # make a copy of result
10601         fcmp.b          %fp1,&0x2               # is |result| > 2.b?
10602         fbgt.w          fsglmul_normal_exit     # no; no underflow occurred
10603         fblt.w          fsglmul_unfl            # yes; underflow occurred
10604
10605 #
10606 # we still don't know if underflow occurred. result is ~ equal to 2. but,
10607 # we don't know if the result was an underflow that rounded up to a 2 or
10608 # a normalized number that rounded down to a 2. so, redo the entire operation
10609 # using RZ as the rounding mode to see what the pre-rounded result is.
10610 # this case should be relatively rare.
10611 #
10612         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
10613
10614         mov.l           L_SCR3(%a6),%d1
10615         andi.b          &0xc0,%d1               # keep rnd prec
10616         ori.b           &rz_mode*0x10,%d1       # insert RZ
10617
10618         fmov.l          %d1,%fpcr               # set FPCR
10619         fmov.l          &0x0,%fpsr              # clear FPSR
10620
10621         fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
10622
10623         fmov.l          &0x0,%fpcr              # clear FPCR
10624         fabs.x          %fp1                    # make absolute value
10625         fcmp.b          %fp1,&0x2               # is |result| < 2.b?
10626         fbge.w          fsglmul_normal_exit     # no; no underflow occurred
10627         bra.w           fsglmul_unfl            # yes, underflow occurred
10628
10629 ##############################################################################
10630
10631 #
10632 # Single Precision Multiply: inputs are not both normalized; what are they?
10633 #
10634 fsglmul_not_norm:
10635         mov.w           (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10636         jmp             (tbl_fsglmul_op.b,%pc,%d1.w*1)
10637
10638         swbeg           &48
10639 tbl_fsglmul_op:
10640         short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
10641         short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
10642         short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
10643         short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
10644         short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
10645         short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
10646         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10647         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10648
10649         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x NORM
10650         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x ZERO
10651         short           fsglmul_res_operr       - tbl_fsglmul_op # ZERO x INF
10652         short           fsglmul_res_qnan        - tbl_fsglmul_op # ZERO x QNAN
10653         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x DENORM
10654         short           fsglmul_res_snan        - tbl_fsglmul_op # ZERO x SNAN
10655         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10656         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10657
10658         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x NORM
10659         short           fsglmul_res_operr       - tbl_fsglmul_op # INF x ZERO
10660         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x INF
10661         short           fsglmul_res_qnan        - tbl_fsglmul_op # INF x QNAN
10662         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x DENORM
10663         short           fsglmul_res_snan        - tbl_fsglmul_op # INF x SNAN
10664         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10665         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10666
10667         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x NORM
10668         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x ZERO
10669         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x INF
10670         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x QNAN
10671         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x DENORM
10672         short           fsglmul_res_snan        - tbl_fsglmul_op # QNAN x SNAN
10673         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10674         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10675
10676         short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
10677         short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
10678         short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
10679         short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
10680         short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
10681         short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
10682         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10683         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10684
10685         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x NORM
10686         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x ZERO
10687         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x INF
10688         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x QNAN
10689         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x DENORM
10690         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x SNAN
10691         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10692         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10693
10694 fsglmul_res_operr:
10695         bra.l           res_operr
10696 fsglmul_res_snan:
10697         bra.l           res_snan
10698 fsglmul_res_qnan:
10699         bra.l           res_qnan
10700 fsglmul_zero:
10701         bra.l           fmul_zero
10702 fsglmul_inf_src:
10703         bra.l           fmul_inf_src
10704 fsglmul_inf_dst:
10705         bra.l           fmul_inf_dst
10706
10707 #########################################################################
10708 # XDEF **************************************************************** #
10709 #       fsgldiv(): emulates the fsgldiv instruction                     #
10710 #                                                                       #
10711 # XREF **************************************************************** #
10712 #       scale_to_zero_src() - scale src exponent to zero                #
10713 #       scale_to_zero_dst() - scale dst exponent to zero                #
10714 #       unf_res4() - return default underflow result for sglop          #
10715 #       ovf_res() - return default overflow result                      #
10716 #       res_qnan() - return QNAN result                                 #
10717 #       res_snan() - return SNAN result                                 #
10718 #                                                                       #
10719 # INPUT *************************************************************** #
10720 #       a0 = pointer to extended precision source operand               #
10721 #       a1 = pointer to extended precision destination operand          #
10722 #       d0  rnd prec,mode                                               #
10723 #                                                                       #
10724 # OUTPUT ************************************************************** #
10725 #       fp0 = result                                                    #
10726 #       fp1 = EXOP (if exception occurred)                              #
10727 #                                                                       #
10728 # ALGORITHM *********************************************************** #
10729 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
10730 # norms/denorms into ext/sgl/dbl precision.                             #
10731 #       For norms/denorms, scale the exponents such that a divide       #
10732 # instruction won't cause an exception. Use the regular fsgldiv to      #
10733 # compute a result. Check if the regular operands would have taken      #
10734 # an exception. If so, return the default overflow/underflow result     #
10735 # and return the EXOP if exceptions are enabled. Else, scale the        #
10736 # result operand to the proper exponent.                                #
10737 #                                                                       #
10738 #########################################################################
10739
10740         global          fsgldiv
10741 fsgldiv:
10742         mov.l           %d0,L_SCR3(%a6)         # store rnd info
10743
10744         clr.w           %d1
10745         mov.b           DTAG(%a6),%d1
10746         lsl.b           &0x3,%d1
10747         or.b            STAG(%a6),%d1           # combine src tags
10748
10749         bne.w           fsgldiv_not_norm        # optimize on non-norm input
10750
10751 #
10752 # DIVIDE: NORMs and DENORMs ONLY!
10753 #
10754 fsgldiv_norm:
10755         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10756         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
10757         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10758
10759         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10760         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10761         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10762
10763         bsr.l           scale_to_zero_src       # calculate scale factor 1
10764         mov.l           %d0,-(%sp)              # save scale factor 1
10765
10766         bsr.l           scale_to_zero_dst       # calculate scale factor 2
10767
10768         neg.l           (%sp)                   # S.F. = scale1 - scale2
10769         add.l           %d0,(%sp)
10770
10771         mov.w           2+L_SCR3(%a6),%d1       # fetch precision,mode
10772         lsr.b           &0x6,%d1
10773         mov.l           (%sp)+,%d0
10774         cmpi.l          %d0,&0x3fff-0x7ffe
10775         ble.w           fsgldiv_may_ovfl
10776
10777         cmpi.l          %d0,&0x3fff-0x0000      # will result underflow?
10778         beq.w           fsgldiv_may_unfl        # maybe
10779         bgt.w           fsgldiv_unfl            # yes; go handle underflow
10780
10781 fsgldiv_normal:
10782         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10783
10784         fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
10785         fmov.l          &0x0,%fpsr              # clear FPSR
10786
10787         fsgldiv.x       FP_SCR0(%a6),%fp0       # perform sgl divide
10788
10789         fmov.l          %fpsr,%d1               # save FPSR
10790         fmov.l          &0x0,%fpcr              # clear FPCR
10791
10792         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10793
10794 fsgldiv_normal_exit:
10795         fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
10796         mov.l           %d2,-(%sp)              # save d2
10797         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
10798         mov.l           %d1,%d2                 # make a copy
10799         andi.l          &0x7fff,%d1             # strip sign
10800         andi.w          &0x8000,%d2             # keep old sign
10801         sub.l           %d0,%d1                 # add scale factor
10802         or.w            %d2,%d1                 # concat old sign,new exp
10803         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10804         mov.l           (%sp)+,%d2              # restore d2
10805         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10806         rts
10807
10808 fsgldiv_may_ovfl:
10809         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10810
10811         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10812         fmov.l          &0x0,%fpsr              # set FPSR
10813
10814         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute divide
10815
10816         fmov.l          %fpsr,%d1
10817         fmov.l          &0x0,%fpcr
10818
10819         or.l            %d1,USER_FPSR(%a6)      # save INEX,N
10820
10821         fmovm.x         &0x01,-(%sp)            # save result to stack
10822         mov.w           (%sp),%d1               # fetch new exponent
10823         add.l           &0xc,%sp                # clear result
10824         andi.l          &0x7fff,%d1             # strip sign
10825         sub.l           %d0,%d1                 # add scale factor
10826         cmp.l           %d1,&0x7fff             # did divide overflow?
10827         blt.b           fsgldiv_normal_exit
10828
10829 fsgldiv_ovfl_tst:
10830         or.w            &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10831
10832         mov.b           FPCR_ENABLE(%a6),%d1
10833         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10834         bne.b           fsgldiv_ovfl_ena        # yes
10835
10836 fsgldiv_ovfl_dis:
10837         btst            &neg_bit,FPSR_CC(%a6)   # is result negative
10838         sne             %d1                     # set sign param accordingly
10839         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
10840         andi.b          &0x30,%d0               # kill precision
10841         bsr.l           ovf_res                 # calculate default result
10842         or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
10843         fmovm.x         (%a0),&0x80             # return default result in fp0
10844         rts
10845
10846 fsgldiv_ovfl_ena:
10847         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
10848
10849         mov.l           %d2,-(%sp)              # save d2
10850         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10851         mov.l           %d1,%d2                 # make a copy
10852         andi.l          &0x7fff,%d1             # strip sign
10853         andi.w          &0x8000,%d2             # keep old sign
10854         sub.l           %d0,%d1                 # add scale factor
10855         subi.l          &0x6000,%d1             # subtract new bias
10856         andi.w          &0x7fff,%d1             # clear ms bit
10857         or.w            %d2,%d1                 # concat old sign,new exp
10858         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10859         mov.l           (%sp)+,%d2              # restore d2
10860         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10861         bra.b           fsgldiv_ovfl_dis
10862
10863 fsgldiv_unfl:
10864         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10865
10866         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10867
10868         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
10869         fmov.l          &0x0,%fpsr              # clear FPSR
10870
10871         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
10872
10873         fmov.l          %fpsr,%d1               # save status
10874         fmov.l          &0x0,%fpcr              # clear FPCR
10875
10876         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10877
10878         mov.b           FPCR_ENABLE(%a6),%d1
10879         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10880         bne.b           fsgldiv_unfl_ena        # yes
10881
10882 fsgldiv_unfl_dis:
10883         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10884
10885         lea             FP_SCR0(%a6),%a0        # pass: result addr
10886         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10887         bsr.l           unf_res4                # calculate default result
10888         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
10889         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10890         rts
10891
10892 #
10893 # UNFL is enabled.
10894 #
10895 fsgldiv_unfl_ena:
10896         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
10897
10898         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10899         fmov.l          &0x0,%fpsr              # clear FPSR
10900
10901         fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
10902
10903         fmov.l          &0x0,%fpcr              # clear FPCR
10904
10905         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
10906         mov.l           %d2,-(%sp)              # save d2
10907         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10908         mov.l           %d1,%d2                 # make a copy
10909         andi.l          &0x7fff,%d1             # strip sign
10910         andi.w          &0x8000,%d2             # keep old sign
10911         sub.l           %d0,%d1                 # add scale factor
10912         addi.l          &0x6000,%d1             # add bias
10913         andi.w          &0x7fff,%d1             # clear top bit
10914         or.w            %d2,%d1                 # concat old sign, new exp
10915         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10916         mov.l           (%sp)+,%d2              # restore d2
10917         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10918         bra.b           fsgldiv_unfl_dis
10919
10920 #
10921 # the divide operation MAY underflow:
10922 #
10923 fsgldiv_may_unfl:
10924         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10925
10926         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10927         fmov.l          &0x0,%fpsr              # clear FPSR
10928
10929         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
10930
10931         fmov.l          %fpsr,%d1               # save status
10932         fmov.l          &0x0,%fpcr              # clear FPCR
10933
10934         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10935
10936         fabs.x          %fp0,%fp1               # make a copy of result
10937         fcmp.b          %fp1,&0x1               # is |result| > 1.b?
10938         fbgt.w          fsgldiv_normal_exit     # no; no underflow occurred
10939         fblt.w          fsgldiv_unfl            # yes; underflow occurred
10940
10941 #
10942 # we still don't know if underflow occurred. result is ~ equal to 1. but,
10943 # we don't know if the result was an underflow that rounded up to a 1
10944 # or a normalized number that rounded down to a 1. so, redo the entire
10945 # operation using RZ as the rounding mode to see what the pre-rounded
10946 # result is. this case should be relatively rare.
10947 #
10948         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into %fp1
10949
10950         clr.l           %d1                     # clear scratch register
10951         ori.b           &rz_mode*0x10,%d1       # force RZ rnd mode
10952
10953         fmov.l          %d1,%fpcr               # set FPCR
10954         fmov.l          &0x0,%fpsr              # clear FPSR
10955
10956         fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
10957
10958         fmov.l          &0x0,%fpcr              # clear FPCR
10959         fabs.x          %fp1                    # make absolute value
10960         fcmp.b          %fp1,&0x1               # is |result| < 1.b?
10961         fbge.w          fsgldiv_normal_exit     # no; no underflow occurred
10962         bra.w           fsgldiv_unfl            # yes; underflow occurred
10963
10964 ############################################################################
10965
10966 #
10967 # Divide: inputs are not both normalized; what are they?
10968 #
10969 fsgldiv_not_norm:
10970         mov.w           (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10971         jmp             (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10972
10973         swbeg           &48
10974 tbl_fsgldiv_op:
10975         short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / NORM
10976         short           fsgldiv_inf_load        - tbl_fsgldiv_op # NORM / ZERO
10977         short           fsgldiv_zero_load       - tbl_fsgldiv_op # NORM / INF
10978         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # NORM / QNAN
10979         short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / DENORM
10980         short           fsgldiv_res_snan        - tbl_fsgldiv_op # NORM / SNAN
10981         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10982         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10983
10984         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / NORM
10985         short           fsgldiv_res_operr       - tbl_fsgldiv_op # ZERO / ZERO
10986         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / INF
10987         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # ZERO / QNAN
10988         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / DENORM
10989         short           fsgldiv_res_snan        - tbl_fsgldiv_op # ZERO / SNAN
10990         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10991         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10992
10993         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / NORM
10994         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / ZERO
10995         short           fsgldiv_res_operr       - tbl_fsgldiv_op # INF / INF
10996         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # INF / QNAN
10997         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / DENORM
10998         short           fsgldiv_res_snan        - tbl_fsgldiv_op # INF / SNAN
10999         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11000         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11001
11002         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / NORM
11003         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / ZERO
11004         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / INF
11005         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / QNAN
11006         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / DENORM
11007         short           fsgldiv_res_snan        - tbl_fsgldiv_op # QNAN / SNAN
11008         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11009         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11010
11011         short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / NORM
11012         short           fsgldiv_inf_load        - tbl_fsgldiv_op # DENORM / ZERO
11013         short           fsgldiv_zero_load       - tbl_fsgldiv_op # DENORM / INF
11014         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # DENORM / QNAN
11015         short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / DENORM
11016         short           fsgldiv_res_snan        - tbl_fsgldiv_op # DENORM / SNAN
11017         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11018         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11019
11020         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / NORM
11021         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / ZERO
11022         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / INF
11023         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / QNAN
11024         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / DENORM
11025         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / SNAN
11026         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11027         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11028
11029 fsgldiv_res_qnan:
11030         bra.l           res_qnan
11031 fsgldiv_res_snan:
11032         bra.l           res_snan
11033 fsgldiv_res_operr:
11034         bra.l           res_operr
11035 fsgldiv_inf_load:
11036         bra.l           fdiv_inf_load
11037 fsgldiv_zero_load:
11038         bra.l           fdiv_zero_load
11039 fsgldiv_inf_dst:
11040         bra.l           fdiv_inf_dst
11041
11042 #########################################################################
11043 # XDEF **************************************************************** #
11044 #       fadd(): emulates the fadd instruction                           #
11045 #       fsadd(): emulates the fadd instruction                          #
11046 #       fdadd(): emulates the fdadd instruction                         #
11047 #                                                                       #
11048 # XREF **************************************************************** #
11049 #       addsub_scaler2() - scale the operands so they won't take exc    #
11050 #       ovf_res() - return default overflow result                      #
11051 #       unf_res() - return default underflow result                     #
11052 #       res_qnan() - set QNAN result                                    #
11053 #       res_snan() - set SNAN result                                    #
11054 #       res_operr() - set OPERR result                                  #
11055 #       scale_to_zero_src() - set src operand exponent equal to zero    #
11056 #       scale_to_zero_dst() - set dst operand exponent equal to zero    #
11057 #                                                                       #
11058 # INPUT *************************************************************** #
11059 #       a0 = pointer to extended precision source operand               #
11060 #       a1 = pointer to extended precision destination operand          #
11061 #                                                                       #
11062 # OUTPUT ************************************************************** #
11063 #       fp0 = result                                                    #
11064 #       fp1 = EXOP (if exception occurred)                              #
11065 #                                                                       #
11066 # ALGORITHM *********************************************************** #
11067 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
11068 # norms into extended, single, and double precision.                    #
11069 #       Do addition after scaling exponents such that exception won't   #
11070 # occur. Then, check result exponent to see if exception would have     #
11071 # occurred. If so, return default result and maybe EXOP. Else, insert   #
11072 # the correct result exponent and return. Set FPSR bits as appropriate. #
11073 #                                                                       #
11074 #########################################################################
11075
11076         global          fsadd
11077 fsadd:
11078         andi.b          &0x30,%d0               # clear rnd prec
11079         ori.b           &s_mode*0x10,%d0        # insert sgl prec
11080         bra.b           fadd
11081
11082         global          fdadd
11083 fdadd:
11084         andi.b          &0x30,%d0               # clear rnd prec
11085         ori.b           &d_mode*0x10,%d0        # insert dbl prec
11086
11087         global          fadd
11088 fadd:
11089         mov.l           %d0,L_SCR3(%a6)         # store rnd info
11090
11091         clr.w           %d1
11092         mov.b           DTAG(%a6),%d1
11093         lsl.b           &0x3,%d1
11094         or.b            STAG(%a6),%d1           # combine src tags
11095
11096         bne.w           fadd_not_norm           # optimize on non-norm input
11097
11098 #
11099 # ADD: norms and denorms
11100 #
11101 fadd_norm:
11102         bsr.l           addsub_scaler2          # scale exponents
11103
11104 fadd_zero_entry:
11105         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11106
11107         fmov.l          &0x0,%fpsr              # clear FPSR
11108         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11109
11110         fadd.x          FP_SCR0(%a6),%fp0       # execute add
11111
11112         fmov.l          &0x0,%fpcr              # clear FPCR
11113         fmov.l          %fpsr,%d1               # fetch INEX2,N,Z
11114
11115         or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
11116
11117         fbeq.w          fadd_zero_exit          # if result is zero, end now
11118
11119         mov.l           %d2,-(%sp)              # save d2
11120
11121         fmovm.x         &0x01,-(%sp)            # save result to stack
11122
11123         mov.w           2+L_SCR3(%a6),%d1
11124         lsr.b           &0x6,%d1
11125
11126         mov.w           (%sp),%d2               # fetch new sign, exp
11127         andi.l          &0x7fff,%d2             # strip sign
11128         sub.l           %d0,%d2                 # add scale factor
11129
11130         cmp.l           %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11131         bge.b           fadd_ovfl               # yes
11132
11133         cmp.l           %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11134         blt.w           fadd_unfl               # yes
11135         beq.w           fadd_may_unfl           # maybe; go find out
11136
11137 fadd_normal:
11138         mov.w           (%sp),%d1
11139         andi.w          &0x8000,%d1             # keep sign
11140         or.w            %d2,%d1                 # concat sign,new exp
11141         mov.w           %d1,(%sp)               # insert new exponent
11142
11143         fmovm.x         (%sp)+,&0x80            # return result in fp0
11144
11145         mov.l           (%sp)+,%d2              # restore d2
11146         rts
11147
11148 fadd_zero_exit:
11149 #       fmov.s          &0x00000000,%fp0        # return zero in fp0
11150         rts
11151
11152 tbl_fadd_ovfl:
11153         long            0x7fff                  # ext ovfl
11154         long            0x407f                  # sgl ovfl
11155         long            0x43ff                  # dbl ovfl
11156
11157 tbl_fadd_unfl:
11158         long            0x0000                  # ext unfl
11159         long            0x3f81                  # sgl unfl
11160         long            0x3c01                  # dbl unfl
11161
11162 fadd_ovfl:
11163         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11164
11165         mov.b           FPCR_ENABLE(%a6),%d1
11166         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
11167         bne.b           fadd_ovfl_ena           # yes
11168
11169         add.l           &0xc,%sp
11170 fadd_ovfl_dis:
11171         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
11172         sne             %d1                     # set sign param accordingly
11173         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
11174         bsr.l           ovf_res                 # calculate default result
11175         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
11176         fmovm.x         (%a0),&0x80             # return default result in fp0
11177         mov.l           (%sp)+,%d2              # restore d2
11178         rts
11179
11180 fadd_ovfl_ena:
11181         mov.b           L_SCR3(%a6),%d1
11182         andi.b          &0xc0,%d1               # is precision extended?
11183         bne.b           fadd_ovfl_ena_sd        # no; prec = sgl or dbl
11184
11185 fadd_ovfl_ena_cont:
11186         mov.w           (%sp),%d1
11187         andi.w          &0x8000,%d1             # keep sign
11188         subi.l          &0x6000,%d2             # add extra bias
11189         andi.w          &0x7fff,%d2
11190         or.w            %d2,%d1                 # concat sign,new exp
11191         mov.w           %d1,(%sp)               # insert new exponent
11192
11193         fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
11194         bra.b           fadd_ovfl_dis
11195
11196 fadd_ovfl_ena_sd:
11197         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11198
11199         mov.l           L_SCR3(%a6),%d1
11200         andi.b          &0x30,%d1               # keep rnd mode
11201         fmov.l          %d1,%fpcr               # set FPCR
11202
11203         fadd.x          FP_SCR0(%a6),%fp0       # execute add
11204
11205         fmov.l          &0x0,%fpcr              # clear FPCR
11206
11207         add.l           &0xc,%sp
11208         fmovm.x         &0x01,-(%sp)
11209         bra.b           fadd_ovfl_ena_cont
11210
11211 fadd_unfl:
11212         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11213
11214         add.l           &0xc,%sp
11215
11216         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11217
11218         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
11219         fmov.l          &0x0,%fpsr              # clear FPSR
11220
11221         fadd.x          FP_SCR0(%a6),%fp0       # execute add
11222
11223         fmov.l          &0x0,%fpcr              # clear FPCR
11224         fmov.l          %fpsr,%d1               # save status
11225
11226         or.l            %d1,USER_FPSR(%a6)      # save INEX,N
11227
11228         mov.b           FPCR_ENABLE(%a6),%d1
11229         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
11230         bne.b           fadd_unfl_ena           # yes
11231
11232 fadd_unfl_dis:
11233         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11234
11235         lea             FP_SCR0(%a6),%a0        # pass: result addr
11236         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
11237         bsr.l           unf_res                 # calculate default result
11238         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
11239         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11240         mov.l           (%sp)+,%d2              # restore d2
11241         rts
11242
11243 fadd_unfl_ena:
11244         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
11245
11246         mov.l           L_SCR3(%a6),%d1
11247         andi.b          &0xc0,%d1               # is precision extended?
11248         bne.b           fadd_unfl_ena_sd        # no; sgl or dbl
11249
11250         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11251
11252 fadd_unfl_ena_cont:
11253         fmov.l          &0x0,%fpsr              # clear FPSR
11254
11255         fadd.x          FP_SCR0(%a6),%fp1       # execute multiply
11256
11257         fmov.l          &0x0,%fpcr              # clear FPCR
11258
11259         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
11260         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11261         mov.l           %d1,%d2                 # make a copy
11262         andi.l          &0x7fff,%d1             # strip sign
11263         andi.w          &0x8000,%d2             # keep old sign
11264         sub.l           %d0,%d1                 # add scale factor
11265         addi.l          &0x6000,%d1             # add new bias
11266         andi.w          &0x7fff,%d1             # clear top bit
11267         or.w            %d2,%d1                 # concat sign,new exp
11268         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11269         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11270         bra.w           fadd_unfl_dis
11271
11272 fadd_unfl_ena_sd:
11273         mov.l           L_SCR3(%a6),%d1
11274         andi.b          &0x30,%d1               # use only rnd mode
11275         fmov.l          %d1,%fpcr               # set FPCR
11276
11277         bra.b           fadd_unfl_ena_cont
11278
11279 #
11280 # result is equal to the smallest normalized number in the selected precision
11281 # if the precision is extended, this result could not have come from an
11282 # underflow that rounded up.
11283 #
11284 fadd_may_unfl:
11285         mov.l           L_SCR3(%a6),%d1
11286         andi.b          &0xc0,%d1
11287         beq.w           fadd_normal             # yes; no underflow occurred
11288
11289         mov.l           0x4(%sp),%d1            # extract hi(man)
11290         cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
11291         bne.w           fadd_normal             # no; no underflow occurred
11292
11293         tst.l           0x8(%sp)                # is lo(man) = 0x0?
11294         bne.w           fadd_normal             # no; no underflow occurred
11295
11296         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11297         beq.w           fadd_normal             # no; no underflow occurred
11298
11299 #
11300 # ok, so now the result has a exponent equal to the smallest normalized
11301 # exponent for the selected precision. also, the mantissa is equal to
11302 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11303 # g,r,s.
11304 # now, we must determine whether the pre-rounded result was an underflow
11305 # rounded "up" or a normalized number rounded "down".
11306 # so, we do this be re-executing the add using RZ as the rounding mode and
11307 # seeing if the new result is smaller or equal to the current result.
11308 #
11309         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
11310
11311         mov.l           L_SCR3(%a6),%d1
11312         andi.b          &0xc0,%d1               # keep rnd prec
11313         ori.b           &rz_mode*0x10,%d1       # insert rnd mode
11314         fmov.l          %d1,%fpcr               # set FPCR
11315         fmov.l          &0x0,%fpsr              # clear FPSR
11316
11317         fadd.x          FP_SCR0(%a6),%fp1       # execute add
11318
11319         fmov.l          &0x0,%fpcr              # clear FPCR
11320
11321         fabs.x          %fp0                    # compare absolute values
11322         fabs.x          %fp1
11323         fcmp.x          %fp0,%fp1               # is first result > second?
11324
11325         fbgt.w          fadd_unfl               # yes; it's an underflow
11326         bra.w           fadd_normal             # no; it's not an underflow
11327
11328 ##########################################################################
11329
11330 #
11331 # Add: inputs are not both normalized; what are they?
11332 #
11333 fadd_not_norm:
11334         mov.w           (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11335         jmp             (tbl_fadd_op.b,%pc,%d1.w*1)
11336
11337         swbeg           &48
11338 tbl_fadd_op:
11339         short           fadd_norm       - tbl_fadd_op # NORM + NORM
11340         short           fadd_zero_src   - tbl_fadd_op # NORM + ZERO
11341         short           fadd_inf_src    - tbl_fadd_op # NORM + INF
11342         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11343         short           fadd_norm       - tbl_fadd_op # NORM + DENORM
11344         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11345         short           tbl_fadd_op     - tbl_fadd_op #
11346         short           tbl_fadd_op     - tbl_fadd_op #
11347
11348         short           fadd_zero_dst   - tbl_fadd_op # ZERO + NORM
11349         short           fadd_zero_2     - tbl_fadd_op # ZERO + ZERO
11350         short           fadd_inf_src    - tbl_fadd_op # ZERO + INF
11351         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11352         short           fadd_zero_dst   - tbl_fadd_op # ZERO + DENORM
11353         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11354         short           tbl_fadd_op     - tbl_fadd_op #
11355         short           tbl_fadd_op     - tbl_fadd_op #
11356
11357         short           fadd_inf_dst    - tbl_fadd_op # INF + NORM
11358         short           fadd_inf_dst    - tbl_fadd_op # INF + ZERO
11359         short           fadd_inf_2      - tbl_fadd_op # INF + INF
11360         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11361         short           fadd_inf_dst    - tbl_fadd_op # INF + DENORM
11362         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11363         short           tbl_fadd_op     - tbl_fadd_op #
11364         short           tbl_fadd_op     - tbl_fadd_op #
11365
11366         short           fadd_res_qnan   - tbl_fadd_op # QNAN + NORM
11367         short           fadd_res_qnan   - tbl_fadd_op # QNAN + ZERO
11368         short           fadd_res_qnan   - tbl_fadd_op # QNAN + INF
11369         short           fadd_res_qnan   - tbl_fadd_op # QNAN + QNAN
11370         short           fadd_res_qnan   - tbl_fadd_op # QNAN + DENORM
11371         short           fadd_res_snan   - tbl_fadd_op # QNAN + SNAN
11372         short           tbl_fadd_op     - tbl_fadd_op #
11373         short           tbl_fadd_op     - tbl_fadd_op #
11374
11375         short           fadd_norm       - tbl_fadd_op # DENORM + NORM
11376         short           fadd_zero_src   - tbl_fadd_op # DENORM + ZERO
11377         short           fadd_inf_src    - tbl_fadd_op # DENORM + INF
11378         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11379         short           fadd_norm       - tbl_fadd_op # DENORM + DENORM
11380         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11381         short           tbl_fadd_op     - tbl_fadd_op #
11382         short           tbl_fadd_op     - tbl_fadd_op #
11383
11384         short           fadd_res_snan   - tbl_fadd_op # SNAN + NORM
11385         short           fadd_res_snan   - tbl_fadd_op # SNAN + ZERO
11386         short           fadd_res_snan   - tbl_fadd_op # SNAN + INF
11387         short           fadd_res_snan   - tbl_fadd_op # SNAN + QNAN
11388         short           fadd_res_snan   - tbl_fadd_op # SNAN + DENORM
11389         short           fadd_res_snan   - tbl_fadd_op # SNAN + SNAN
11390         short           tbl_fadd_op     - tbl_fadd_op #
11391         short           tbl_fadd_op     - tbl_fadd_op #
11392
11393 fadd_res_qnan:
11394         bra.l           res_qnan
11395 fadd_res_snan:
11396         bra.l           res_snan
11397
11398 #
11399 # both operands are ZEROes
11400 #
11401 fadd_zero_2:
11402         mov.b           SRC_EX(%a0),%d0         # are the signs opposite
11403         mov.b           DST_EX(%a1),%d1
11404         eor.b           %d0,%d1
11405         bmi.w           fadd_zero_2_chk_rm      # weed out (-ZERO)+(+ZERO)
11406
11407 # the signs are the same. so determine whether they are positive or negative
11408 # and return the appropriately signed zero.
11409         tst.b           %d0                     # are ZEROes positive or negative?
11410         bmi.b           fadd_zero_rm            # negative
11411         fmov.s          &0x00000000,%fp0        # return +ZERO
11412         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11413         rts
11414
11415 #
11416 # the ZEROes have opposite signs:
11417 # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11418 # - -ZERO is returned in the case of RM.
11419 #
11420 fadd_zero_2_chk_rm:
11421         mov.b           3+L_SCR3(%a6),%d1
11422         andi.b          &0x30,%d1               # extract rnd mode
11423         cmpi.b          %d1,&rm_mode*0x10       # is rnd mode == RM?
11424         beq.b           fadd_zero_rm            # yes
11425         fmov.s          &0x00000000,%fp0        # return +ZERO
11426         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11427         rts
11428
11429 fadd_zero_rm:
11430         fmov.s          &0x80000000,%fp0        # return -ZERO
11431         mov.b           &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11432         rts
11433
11434 #
11435 # one operand is a ZERO and the other is a DENORM or NORM. scale
11436 # the DENORM or NORM and jump to the regular fadd routine.
11437 #
11438 fadd_zero_dst:
11439         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
11440         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
11441         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
11442         bsr.l           scale_to_zero_src       # scale the operand
11443         clr.w           FP_SCR1_EX(%a6)
11444         clr.l           FP_SCR1_HI(%a6)
11445         clr.l           FP_SCR1_LO(%a6)
11446         bra.w           fadd_zero_entry         # go execute fadd
11447
11448 fadd_zero_src:
11449         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
11450         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
11451         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
11452         bsr.l           scale_to_zero_dst       # scale the operand
11453         clr.w           FP_SCR0_EX(%a6)
11454         clr.l           FP_SCR0_HI(%a6)
11455         clr.l           FP_SCR0_LO(%a6)
11456         bra.w           fadd_zero_entry         # go execute fadd
11457
11458 #
11459 # both operands are INFs. an OPERR will result if the INFs have
11460 # different signs. else, an INF of the same sign is returned
11461 #
11462 fadd_inf_2:
11463         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11464         mov.b           DST_EX(%a1),%d1
11465         eor.b           %d1,%d0
11466         bmi.l           res_operr               # weed out (-INF)+(+INF)
11467
11468 # ok, so it's not an OPERR. but, we do have to remember to return the
11469 # src INF since that's where the 881/882 gets the j-bit from...
11470
11471 #
11472 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11473 #
11474 fadd_inf_src:
11475         fmovm.x         SRC(%a0),&0x80          # return src INF
11476         tst.b           SRC_EX(%a0)             # is INF positive?
11477         bpl.b           fadd_inf_done           # yes; we're done
11478         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479         rts
11480
11481 #
11482 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11483 #
11484 fadd_inf_dst:
11485         fmovm.x         DST(%a1),&0x80          # return dst INF
11486         tst.b           DST_EX(%a1)             # is INF positive?
11487         bpl.b           fadd_inf_done           # yes; we're done
11488         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11489         rts
11490
11491 fadd_inf_done:
11492         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
11493         rts
11494
11495 #########################################################################
11496 # XDEF **************************************************************** #
11497 #       fsub(): emulates the fsub instruction                           #
11498 #       fssub(): emulates the fssub instruction                         #
11499 #       fdsub(): emulates the fdsub instruction                         #
11500 #                                                                       #
11501 # XREF **************************************************************** #
11502 #       addsub_scaler2() - scale the operands so they won't take exc    #
11503 #       ovf_res() - return default overflow result                      #
11504 #       unf_res() - return default underflow result                     #
11505 #       res_qnan() - set QNAN result                                    #
11506 #       res_snan() - set SNAN result                                    #
11507 #       res_operr() - set OPERR result                                  #
11508 #       scale_to_zero_src() - set src operand exponent equal to zero    #
11509 #       scale_to_zero_dst() - set dst operand exponent equal to zero    #
11510 #                                                                       #
11511 # INPUT *************************************************************** #
11512 #       a0 = pointer to extended precision source operand               #
11513 #       a1 = pointer to extended precision destination operand          #
11514 #                                                                       #
11515 # OUTPUT ************************************************************** #
11516 #       fp0 = result                                                    #
11517 #       fp1 = EXOP (if exception occurred)                              #
11518 #                                                                       #
11519 # ALGORITHM *********************************************************** #
11520 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
11521 # norms into extended, single, and double precision.                    #
11522 #       Do subtraction after scaling exponents such that exception won't#
11523 # occur. Then, check result exponent to see if exception would have     #
11524 # occurred. If so, return default result and maybe EXOP. Else, insert   #
11525 # the correct result exponent and return. Set FPSR bits as appropriate. #
11526 #                                                                       #
11527 #########################################################################
11528
11529         global          fssub
11530 fssub:
11531         andi.b          &0x30,%d0               # clear rnd prec
11532         ori.b           &s_mode*0x10,%d0        # insert sgl prec
11533         bra.b           fsub
11534
11535         global          fdsub
11536 fdsub:
11537         andi.b          &0x30,%d0               # clear rnd prec
11538         ori.b           &d_mode*0x10,%d0        # insert dbl prec
11539
11540         global          fsub
11541 fsub:
11542         mov.l           %d0,L_SCR3(%a6)         # store rnd info
11543
11544         clr.w           %d1
11545         mov.b           DTAG(%a6),%d1
11546         lsl.b           &0x3,%d1
11547         or.b            STAG(%a6),%d1           # combine src tags
11548
11549         bne.w           fsub_not_norm           # optimize on non-norm input
11550
11551 #
11552 # SUB: norms and denorms
11553 #
11554 fsub_norm:
11555         bsr.l           addsub_scaler2          # scale exponents
11556
11557 fsub_zero_entry:
11558         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11559
11560         fmov.l          &0x0,%fpsr              # clear FPSR
11561         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11562
11563         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11564
11565         fmov.l          &0x0,%fpcr              # clear FPCR
11566         fmov.l          %fpsr,%d1               # fetch INEX2, N, Z
11567
11568         or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
11569
11570         fbeq.w          fsub_zero_exit          # if result zero, end now
11571
11572         mov.l           %d2,-(%sp)              # save d2
11573
11574         fmovm.x         &0x01,-(%sp)            # save result to stack
11575
11576         mov.w           2+L_SCR3(%a6),%d1
11577         lsr.b           &0x6,%d1
11578
11579         mov.w           (%sp),%d2               # fetch new exponent
11580         andi.l          &0x7fff,%d2             # strip sign
11581         sub.l           %d0,%d2                 # add scale factor
11582
11583         cmp.l           %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11584         bge.b           fsub_ovfl               # yes
11585
11586         cmp.l           %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11587         blt.w           fsub_unfl               # yes
11588         beq.w           fsub_may_unfl           # maybe; go find out
11589
11590 fsub_normal:
11591         mov.w           (%sp),%d1
11592         andi.w          &0x8000,%d1             # keep sign
11593         or.w            %d2,%d1                 # insert new exponent
11594         mov.w           %d1,(%sp)               # insert new exponent
11595
11596         fmovm.x         (%sp)+,&0x80            # return result in fp0
11597
11598         mov.l           (%sp)+,%d2              # restore d2
11599         rts
11600
11601 fsub_zero_exit:
11602 #       fmov.s          &0x00000000,%fp0        # return zero in fp0
11603         rts
11604
11605 tbl_fsub_ovfl:
11606         long            0x7fff                  # ext ovfl
11607         long            0x407f                  # sgl ovfl
11608         long            0x43ff                  # dbl ovfl
11609
11610 tbl_fsub_unfl:
11611         long            0x0000                  # ext unfl
11612         long            0x3f81                  # sgl unfl
11613         long            0x3c01                  # dbl unfl
11614
11615 fsub_ovfl:
11616         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11617
11618         mov.b           FPCR_ENABLE(%a6),%d1
11619         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
11620         bne.b           fsub_ovfl_ena           # yes
11621
11622         add.l           &0xc,%sp
11623 fsub_ovfl_dis:
11624         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
11625         sne             %d1                     # set sign param accordingly
11626         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
11627         bsr.l           ovf_res                 # calculate default result
11628         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
11629         fmovm.x         (%a0),&0x80             # return default result in fp0
11630         mov.l           (%sp)+,%d2              # restore d2
11631         rts
11632
11633 fsub_ovfl_ena:
11634         mov.b           L_SCR3(%a6),%d1
11635         andi.b          &0xc0,%d1               # is precision extended?
11636         bne.b           fsub_ovfl_ena_sd        # no
11637
11638 fsub_ovfl_ena_cont:
11639         mov.w           (%sp),%d1               # fetch {sgn,exp}
11640         andi.w          &0x8000,%d1             # keep sign
11641         subi.l          &0x6000,%d2             # subtract new bias
11642         andi.w          &0x7fff,%d2             # clear top bit
11643         or.w            %d2,%d1                 # concat sign,exp
11644         mov.w           %d1,(%sp)               # insert new exponent
11645
11646         fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
11647         bra.b           fsub_ovfl_dis
11648
11649 fsub_ovfl_ena_sd:
11650         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11651
11652         mov.l           L_SCR3(%a6),%d1
11653         andi.b          &0x30,%d1               # clear rnd prec
11654         fmov.l          %d1,%fpcr               # set FPCR
11655
11656         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11657
11658         fmov.l          &0x0,%fpcr              # clear FPCR
11659
11660         add.l           &0xc,%sp
11661         fmovm.x         &0x01,-(%sp)
11662         bra.b           fsub_ovfl_ena_cont
11663
11664 fsub_unfl:
11665         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11666
11667         add.l           &0xc,%sp
11668
11669         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11670
11671         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
11672         fmov.l          &0x0,%fpsr              # clear FPSR
11673
11674         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11675
11676         fmov.l          &0x0,%fpcr              # clear FPCR
11677         fmov.l          %fpsr,%d1               # save status
11678
11679         or.l            %d1,USER_FPSR(%a6)
11680
11681         mov.b           FPCR_ENABLE(%a6),%d1
11682         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
11683         bne.b           fsub_unfl_ena           # yes
11684
11685 fsub_unfl_dis:
11686         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11687
11688         lea             FP_SCR0(%a6),%a0        # pass: result addr
11689         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
11690         bsr.l           unf_res                 # calculate default result
11691         or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
11692         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11693         mov.l           (%sp)+,%d2              # restore d2
11694         rts
11695
11696 fsub_unfl_ena:
11697         fmovm.x         FP_SCR1(%a6),&0x40
11698
11699         mov.l           L_SCR3(%a6),%d1
11700         andi.b          &0xc0,%d1               # is precision extended?
11701         bne.b           fsub_unfl_ena_sd        # no
11702
11703         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11704
11705 fsub_unfl_ena_cont:
11706         fmov.l          &0x0,%fpsr              # clear FPSR
11707
11708         fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
11709
11710         fmov.l          &0x0,%fpcr              # clear FPCR
11711
11712         fmovm.x         &0x40,FP_SCR0(%a6)      # store result to stack
11713         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11714         mov.l           %d1,%d2                 # make a copy
11715         andi.l          &0x7fff,%d1             # strip sign
11716         andi.w          &0x8000,%d2             # keep old sign
11717         sub.l           %d0,%d1                 # add scale factor
11718         addi.l          &0x6000,%d1             # subtract new bias
11719         andi.w          &0x7fff,%d1             # clear top bit
11720         or.w            %d2,%d1                 # concat sgn,exp
11721         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11722         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11723         bra.w           fsub_unfl_dis
11724
11725 fsub_unfl_ena_sd:
11726         mov.l           L_SCR3(%a6),%d1
11727         andi.b          &0x30,%d1               # clear rnd prec
11728         fmov.l          %d1,%fpcr               # set FPCR
11729
11730         bra.b           fsub_unfl_ena_cont
11731
11732 #
11733 # result is equal to the smallest normalized number in the selected precision
11734 # if the precision is extended, this result could not have come from an
11735 # underflow that rounded up.
11736 #
11737 fsub_may_unfl:
11738         mov.l           L_SCR3(%a6),%d1
11739         andi.b          &0xc0,%d1               # fetch rnd prec
11740         beq.w           fsub_normal             # yes; no underflow occurred
11741
11742         mov.l           0x4(%sp),%d1
11743         cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
11744         bne.w           fsub_normal             # no; no underflow occurred
11745
11746         tst.l           0x8(%sp)                # is lo(man) = 0x0?
11747         bne.w           fsub_normal             # no; no underflow occurred
11748
11749         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11750         beq.w           fsub_normal             # no; no underflow occurred
11751
11752 #
11753 # ok, so now the result has a exponent equal to the smallest normalized
11754 # exponent for the selected precision. also, the mantissa is equal to
11755 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11756 # g,r,s.
11757 # now, we must determine whether the pre-rounded result was an underflow
11758 # rounded "up" or a normalized number rounded "down".
11759 # so, we do this be re-executing the add using RZ as the rounding mode and
11760 # seeing if the new result is smaller or equal to the current result.
11761 #
11762         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
11763
11764         mov.l           L_SCR3(%a6),%d1
11765         andi.b          &0xc0,%d1               # keep rnd prec
11766         ori.b           &rz_mode*0x10,%d1       # insert rnd mode
11767         fmov.l          %d1,%fpcr               # set FPCR
11768         fmov.l          &0x0,%fpsr              # clear FPSR
11769
11770         fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
11771
11772         fmov.l          &0x0,%fpcr              # clear FPCR
11773
11774         fabs.x          %fp0                    # compare absolute values
11775         fabs.x          %fp1
11776         fcmp.x          %fp0,%fp1               # is first result > second?
11777
11778         fbgt.w          fsub_unfl               # yes; it's an underflow
11779         bra.w           fsub_normal             # no; it's not an underflow
11780
11781 ##########################################################################
11782
11783 #
11784 # Sub: inputs are not both normalized; what are they?
11785 #
11786 fsub_not_norm:
11787         mov.w           (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11788         jmp             (tbl_fsub_op.b,%pc,%d1.w*1)
11789
11790         swbeg           &48
11791 tbl_fsub_op:
11792         short           fsub_norm       - tbl_fsub_op # NORM - NORM
11793         short           fsub_zero_src   - tbl_fsub_op # NORM - ZERO
11794         short           fsub_inf_src    - tbl_fsub_op # NORM - INF
11795         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11796         short           fsub_norm       - tbl_fsub_op # NORM - DENORM
11797         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11798         short           tbl_fsub_op     - tbl_fsub_op #
11799         short           tbl_fsub_op     - tbl_fsub_op #
11800
11801         short           fsub_zero_dst   - tbl_fsub_op # ZERO - NORM
11802         short           fsub_zero_2     - tbl_fsub_op # ZERO - ZERO
11803         short           fsub_inf_src    - tbl_fsub_op # ZERO - INF
11804         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11805         short           fsub_zero_dst   - tbl_fsub_op # ZERO - DENORM
11806         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11807         short           tbl_fsub_op     - tbl_fsub_op #
11808         short           tbl_fsub_op     - tbl_fsub_op #
11809
11810         short           fsub_inf_dst    - tbl_fsub_op # INF - NORM
11811         short           fsub_inf_dst    - tbl_fsub_op # INF - ZERO
11812         short           fsub_inf_2      - tbl_fsub_op # INF - INF
11813         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11814         short           fsub_inf_dst    - tbl_fsub_op # INF - DENORM
11815         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11816         short           tbl_fsub_op     - tbl_fsub_op #
11817         short           tbl_fsub_op     - tbl_fsub_op #
11818
11819         short           fsub_res_qnan   - tbl_fsub_op # QNAN - NORM
11820         short           fsub_res_qnan   - tbl_fsub_op # QNAN - ZERO
11821         short           fsub_res_qnan   - tbl_fsub_op # QNAN - INF
11822         short           fsub_res_qnan   - tbl_fsub_op # QNAN - QNAN
11823         short           fsub_res_qnan   - tbl_fsub_op # QNAN - DENORM
11824         short           fsub_res_snan   - tbl_fsub_op # QNAN - SNAN
11825         short           tbl_fsub_op     - tbl_fsub_op #
11826         short           tbl_fsub_op     - tbl_fsub_op #
11827
11828         short           fsub_norm       - tbl_fsub_op # DENORM - NORM
11829         short           fsub_zero_src   - tbl_fsub_op # DENORM - ZERO
11830         short           fsub_inf_src    - tbl_fsub_op # DENORM - INF
11831         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11832         short           fsub_norm       - tbl_fsub_op # DENORM - DENORM
11833         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11834         short           tbl_fsub_op     - tbl_fsub_op #
11835         short           tbl_fsub_op     - tbl_fsub_op #
11836
11837         short           fsub_res_snan   - tbl_fsub_op # SNAN - NORM
11838         short           fsub_res_snan   - tbl_fsub_op # SNAN - ZERO
11839         short           fsub_res_snan   - tbl_fsub_op # SNAN - INF
11840         short           fsub_res_snan   - tbl_fsub_op # SNAN - QNAN
11841         short           fsub_res_snan   - tbl_fsub_op # SNAN - DENORM
11842         short           fsub_res_snan   - tbl_fsub_op # SNAN - SNAN
11843         short           tbl_fsub_op     - tbl_fsub_op #
11844         short           tbl_fsub_op     - tbl_fsub_op #
11845
11846 fsub_res_qnan:
11847         bra.l           res_qnan
11848 fsub_res_snan:
11849         bra.l           res_snan
11850
11851 #
11852 # both operands are ZEROes
11853 #
11854 fsub_zero_2:
11855         mov.b           SRC_EX(%a0),%d0
11856         mov.b           DST_EX(%a1),%d1
11857         eor.b           %d1,%d0
11858         bpl.b           fsub_zero_2_chk_rm
11859
11860 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11861         tst.b           %d0                     # is dst negative?
11862         bmi.b           fsub_zero_2_rm          # yes
11863         fmov.s          &0x00000000,%fp0        # no; return +ZERO
11864         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11865         rts
11866
11867 #
11868 # the ZEROes have the same signs:
11869 # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11870 # - -ZERO is returned in the case of RM.
11871 #
11872 fsub_zero_2_chk_rm:
11873         mov.b           3+L_SCR3(%a6),%d1
11874         andi.b          &0x30,%d1               # extract rnd mode
11875         cmpi.b          %d1,&rm_mode*0x10       # is rnd mode = RM?
11876         beq.b           fsub_zero_2_rm          # yes
11877         fmov.s          &0x00000000,%fp0        # no; return +ZERO
11878         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11879         rts
11880
11881 fsub_zero_2_rm:
11882         fmov.s          &0x80000000,%fp0        # return -ZERO
11883         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11884         rts
11885
11886 #
11887 # one operand is a ZERO and the other is a DENORM or a NORM.
11888 # scale the DENORM or NORM and jump to the regular fsub routine.
11889 #
11890 fsub_zero_dst:
11891         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
11892         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
11893         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
11894         bsr.l           scale_to_zero_src       # scale the operand
11895         clr.w           FP_SCR1_EX(%a6)
11896         clr.l           FP_SCR1_HI(%a6)
11897         clr.l           FP_SCR1_LO(%a6)
11898         bra.w           fsub_zero_entry         # go execute fsub
11899
11900 fsub_zero_src:
11901         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
11902         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
11903         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
11904         bsr.l           scale_to_zero_dst       # scale the operand
11905         clr.w           FP_SCR0_EX(%a6)
11906         clr.l           FP_SCR0_HI(%a6)
11907         clr.l           FP_SCR0_LO(%a6)
11908         bra.w           fsub_zero_entry         # go execute fsub
11909
11910 #
11911 # both operands are INFs. an OPERR will result if the INFs have the
11912 # same signs. else,
11913 #
11914 fsub_inf_2:
11915         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11916         mov.b           DST_EX(%a1),%d1
11917         eor.b           %d1,%d0
11918         bpl.l           res_operr               # weed out (-INF)+(+INF)
11919
11920 # ok, so it's not an OPERR. but we do have to remember to return
11921 # the src INF since that's where the 881/882 gets the j-bit.
11922
11923 fsub_inf_src:
11924         fmovm.x         SRC(%a0),&0x80          # return src INF
11925         fneg.x          %fp0                    # invert sign
11926         fbge.w          fsub_inf_done           # sign is now positive
11927         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11928         rts
11929
11930 fsub_inf_dst:
11931         fmovm.x         DST(%a1),&0x80          # return dst INF
11932         tst.b           DST_EX(%a1)             # is INF negative?
11933         bpl.b           fsub_inf_done           # no
11934         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11935         rts
11936
11937 fsub_inf_done:
11938         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
11939         rts
11940
11941 #########################################################################
11942 # XDEF **************************************************************** #
11943 #       fsqrt(): emulates the fsqrt instruction                         #
11944 #       fssqrt(): emulates the fssqrt instruction                       #
11945 #       fdsqrt(): emulates the fdsqrt instruction                       #
11946 #                                                                       #
11947 # XREF **************************************************************** #
11948 #       scale_sqrt() - scale the source operand                         #
11949 #       unf_res() - return default underflow result                     #
11950 #       ovf_res() - return default overflow result                      #
11951 #       res_qnan_1op() - return QNAN result                             #
11952 #       res_snan_1op() - return SNAN result                             #
11953 #                                                                       #
11954 # INPUT *************************************************************** #
11955 #       a0 = pointer to extended precision source operand               #
11956 #       d0  rnd prec,mode                                               #
11957 #                                                                       #
11958 # OUTPUT ************************************************************** #
11959 #       fp0 = result                                                    #
11960 #       fp1 = EXOP (if exception occurred)                              #
11961 #                                                                       #
11962 # ALGORITHM *********************************************************** #
11963 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
11964 # norms/denorms into ext/sgl/dbl precision.                             #
11965 #       For norms/denorms, scale the exponents such that a sqrt         #
11966 # instruction won't cause an exception. Use the regular fsqrt to        #
11967 # compute a result. Check if the regular operands would have taken      #
11968 # an exception. If so, return the default overflow/underflow result     #
11969 # and return the EXOP if exceptions are enabled. Else, scale the        #
11970 # result operand to the proper exponent.                                #
11971 #                                                                       #
11972 #########################################################################
11973
11974         global          fssqrt
11975 fssqrt:
11976         andi.b          &0x30,%d0               # clear rnd prec
11977         ori.b           &s_mode*0x10,%d0        # insert sgl precision
11978         bra.b           fsqrt
11979
11980         global          fdsqrt
11981 fdsqrt:
11982         andi.b          &0x30,%d0               # clear rnd prec
11983         ori.b           &d_mode*0x10,%d0        # insert dbl precision
11984
11985         global          fsqrt
11986 fsqrt:
11987         mov.l           %d0,L_SCR3(%a6)         # store rnd info
11988         clr.w           %d1
11989         mov.b           STAG(%a6),%d1
11990         bne.w           fsqrt_not_norm          # optimize on non-norm input
11991
11992 #
11993 # SQUARE ROOT: norms and denorms ONLY!
11994 #
11995 fsqrt_norm:
11996         tst.b           SRC_EX(%a0)             # is operand negative?
11997         bmi.l           res_operr               # yes
11998
11999         andi.b          &0xc0,%d0               # is precision extended?
12000         bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
12001
12002         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12003         fmov.l          &0x0,%fpsr              # clear FPSR
12004
12005         fsqrt.x         (%a0),%fp0              # execute square root
12006
12007         fmov.l          %fpsr,%d1
12008         or.l            %d1,USER_FPSR(%a6)      # set N,INEX
12009
12010         rts
12011
12012 fsqrt_denorm:
12013         tst.b           SRC_EX(%a0)             # is operand negative?
12014         bmi.l           res_operr               # yes
12015
12016         andi.b          &0xc0,%d0               # is precision extended?
12017         bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
12018
12019         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12020         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12021         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12022
12023         bsr.l           scale_sqrt              # calculate scale factor
12024
12025         bra.w           fsqrt_sd_normal
12026
12027 #
12028 # operand is either single or double
12029 #
12030 fsqrt_not_ext:
12031         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
12032         bne.w           fsqrt_dbl
12033
12034 #
12035 # operand is to be rounded to single precision
12036 #
12037 fsqrt_sgl:
12038         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12039         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12040         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12041
12042         bsr.l           scale_sqrt              # calculate scale factor
12043
12044         cmpi.l          %d0,&0x3fff-0x3f81      # will move in underflow?
12045         beq.w           fsqrt_sd_may_unfl
12046         bgt.w           fsqrt_sd_unfl           # yes; go handle underflow
12047         cmpi.l          %d0,&0x3fff-0x407f      # will move in overflow?
12048         beq.w           fsqrt_sd_may_ovfl       # maybe; go check
12049         blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
12050
12051 #
12052 # operand will NOT overflow or underflow when moved in to the fp reg file
12053 #
12054 fsqrt_sd_normal:
12055         fmov.l          &0x0,%fpsr              # clear FPSR
12056         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12057
12058         fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
12059
12060         fmov.l          %fpsr,%d1               # save FPSR
12061         fmov.l          &0x0,%fpcr              # clear FPCR
12062
12063         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12064
12065 fsqrt_sd_normal_exit:
12066         mov.l           %d2,-(%sp)              # save d2
12067         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12068         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
12069         mov.l           %d1,%d2                 # make a copy
12070         andi.l          &0x7fff,%d1             # strip sign
12071         sub.l           %d0,%d1                 # add scale factor
12072         andi.w          &0x8000,%d2             # keep old sign
12073         or.w            %d1,%d2                 # concat old sign,new exp
12074         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
12075         mov.l           (%sp)+,%d2              # restore d2
12076         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12077         rts
12078
12079 #
12080 # operand is to be rounded to double precision
12081 #
12082 fsqrt_dbl:
12083         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12084         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12085         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12086
12087         bsr.l           scale_sqrt              # calculate scale factor
12088
12089         cmpi.l          %d0,&0x3fff-0x3c01      # will move in underflow?
12090         beq.w           fsqrt_sd_may_unfl
12091         bgt.b           fsqrt_sd_unfl           # yes; go handle underflow
12092         cmpi.l          %d0,&0x3fff-0x43ff      # will move in overflow?
12093         beq.w           fsqrt_sd_may_ovfl       # maybe; go check
12094         blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
12095         bra.w           fsqrt_sd_normal         # no; ho handle normalized op
12096
12097 # we're on the line here and the distinguising characteristic is whether
12098 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12099 # elsewise fall through to underflow.
12100 fsqrt_sd_may_unfl:
12101         btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
12102         bne.w           fsqrt_sd_normal         # yes, so no underflow
12103
12104 #
12105 # operand WILL underflow when moved in to the fp register file
12106 #
12107 fsqrt_sd_unfl:
12108         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12109
12110         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
12111         fmov.l          &0x0,%fpsr              # clear FPSR
12112
12113         fsqrt.x         FP_SCR0(%a6),%fp0       # execute square root
12114
12115         fmov.l          %fpsr,%d1               # save status
12116         fmov.l          &0x0,%fpcr              # clear FPCR
12117
12118         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12119
12120 # if underflow or inexact is enabled, go calculate EXOP first.
12121         mov.b           FPCR_ENABLE(%a6),%d1
12122         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12123         bne.b           fsqrt_sd_unfl_ena       # yes
12124
12125 fsqrt_sd_unfl_dis:
12126         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12127
12128         lea             FP_SCR0(%a6),%a0        # pass: result addr
12129         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
12130         bsr.l           unf_res                 # calculate default result
12131         or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
12132         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12133         rts
12134
12135 #
12136 # operand will underflow AND underflow is enabled.
12137 # therefore, we must return the result rounded to extended precision.
12138 #
12139 fsqrt_sd_unfl_ena:
12140         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12141         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12142         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
12143
12144         mov.l           %d2,-(%sp)              # save d2
12145         mov.l           %d1,%d2                 # make a copy
12146         andi.l          &0x7fff,%d1             # strip sign
12147         andi.w          &0x8000,%d2             # keep old sign
12148         sub.l           %d0,%d1                 # subtract scale factor
12149         addi.l          &0x6000,%d1             # add new bias
12150         andi.w          &0x7fff,%d1
12151         or.w            %d2,%d1                 # concat new sign,new exp
12152         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
12153         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
12154         mov.l           (%sp)+,%d2              # restore d2
12155         bra.b           fsqrt_sd_unfl_dis
12156
12157 #
12158 # operand WILL overflow.
12159 #
12160 fsqrt_sd_ovfl:
12161         fmov.l          &0x0,%fpsr              # clear FPSR
12162         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12163
12164         fsqrt.x         FP_SCR0(%a6),%fp0       # perform square root
12165
12166         fmov.l          &0x0,%fpcr              # clear FPCR
12167         fmov.l          %fpsr,%d1               # save FPSR
12168
12169         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12170
12171 fsqrt_sd_ovfl_tst:
12172         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12173
12174         mov.b           FPCR_ENABLE(%a6),%d1
12175         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
12176         bne.b           fsqrt_sd_ovfl_ena       # yes
12177
12178 #
12179 # OVFL is not enabled; therefore, we must create the default result by
12180 # calling ovf_res().
12181 #
12182 fsqrt_sd_ovfl_dis:
12183         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
12184         sne             %d1                     # set sign param accordingly
12185         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
12186         bsr.l           ovf_res                 # calculate default result
12187         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
12188         fmovm.x         (%a0),&0x80             # return default result in fp0
12189         rts
12190
12191 #
12192 # OVFL is enabled.
12193 # the INEX2 bit has already been updated by the round to the correct precision.
12194 # now, round to extended(and don't alter the FPSR).
12195 #
12196 fsqrt_sd_ovfl_ena:
12197         mov.l           %d2,-(%sp)              # save d2
12198         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12199         mov.l           %d1,%d2                 # make a copy
12200         andi.l          &0x7fff,%d1             # strip sign
12201         andi.w          &0x8000,%d2             # keep old sign
12202         sub.l           %d0,%d1                 # add scale factor
12203         subi.l          &0x6000,%d1             # subtract bias
12204         andi.w          &0x7fff,%d1
12205         or.w            %d2,%d1                 # concat sign,exp
12206         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12207         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12208         mov.l           (%sp)+,%d2              # restore d2
12209         bra.b           fsqrt_sd_ovfl_dis
12210
12211 #
12212 # the move in MAY underflow. so...
12213 #
12214 fsqrt_sd_may_ovfl:
12215         btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
12216         bne.w           fsqrt_sd_ovfl           # yes, so overflow
12217
12218         fmov.l          &0x0,%fpsr              # clear FPSR
12219         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12220
12221         fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
12222
12223         fmov.l          %fpsr,%d1               # save status
12224         fmov.l          &0x0,%fpcr              # clear FPCR
12225
12226         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12227
12228         fmov.x          %fp0,%fp1               # make a copy of result
12229         fcmp.b          %fp1,&0x1               # is |result| >= 1.b?
12230         fbge.w          fsqrt_sd_ovfl_tst       # yes; overflow has occurred
12231
12232 # no, it didn't overflow; we have correct result
12233         bra.w           fsqrt_sd_normal_exit
12234
12235 ##########################################################################
12236
12237 #
12238 # input is not normalized; what is it?
12239 #
12240 fsqrt_not_norm:
12241         cmpi.b          %d1,&DENORM             # weed out DENORM
12242         beq.w           fsqrt_denorm
12243         cmpi.b          %d1,&ZERO               # weed out ZERO
12244         beq.b           fsqrt_zero
12245         cmpi.b          %d1,&INF                # weed out INF
12246         beq.b           fsqrt_inf
12247         cmpi.b          %d1,&SNAN               # weed out SNAN
12248         beq.l           res_snan_1op
12249         bra.l           res_qnan_1op
12250
12251 #
12252 #       fsqrt(+0) = +0
12253 #       fsqrt(-0) = -0
12254 #       fsqrt(+INF) = +INF
12255 #       fsqrt(-INF) = OPERR
12256 #
12257 fsqrt_zero:
12258         tst.b           SRC_EX(%a0)             # is ZERO positive or negative?
12259         bmi.b           fsqrt_zero_m            # negative
12260 fsqrt_zero_p:
12261         fmov.s          &0x00000000,%fp0        # return +ZERO
12262         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
12263         rts
12264 fsqrt_zero_m:
12265         fmov.s          &0x80000000,%fp0        # return -ZERO
12266         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12267         rts
12268
12269 fsqrt_inf:
12270         tst.b           SRC_EX(%a0)             # is INF positive or negative?
12271         bmi.l           res_operr               # negative
12272 fsqrt_inf_p:
12273         fmovm.x         SRC(%a0),&0x80          # return +INF in fp0
12274         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12275         rts
12276
12277 #########################################################################
12278 # XDEF **************************************************************** #
12279 #       fetch_dreg(): fetch register according to index in d1           #
12280 #                                                                       #
12281 # XREF **************************************************************** #
12282 #       None                                                            #
12283 #                                                                       #
12284 # INPUT *************************************************************** #
12285 #       d1 = index of register to fetch from                            #
12286 #                                                                       #
12287 # OUTPUT ************************************************************** #
12288 #       d0 = value of register fetched                                  #
12289 #                                                                       #
12290 # ALGORITHM *********************************************************** #
12291 #       According to the index value in d1 which can range from zero    #
12292 # to fifteen, load the corresponding register file value (where         #
12293 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the    #
12294 # stack. The rest should still be in their original places.             #
12295 #                                                                       #
12296 #########################################################################
12297
12298 # this routine leaves d1 intact for subsequent store_dreg calls.
12299         global          fetch_dreg
12300 fetch_dreg:
12301         mov.w           (tbl_fdreg.b,%pc,%d1.w*2),%d0
12302         jmp             (tbl_fdreg.b,%pc,%d0.w*1)
12303
12304 tbl_fdreg:
12305         short           fdreg0 - tbl_fdreg
12306         short           fdreg1 - tbl_fdreg
12307         short           fdreg2 - tbl_fdreg
12308         short           fdreg3 - tbl_fdreg
12309         short           fdreg4 - tbl_fdreg
12310         short           fdreg5 - tbl_fdreg
12311         short           fdreg6 - tbl_fdreg
12312         short           fdreg7 - tbl_fdreg
12313         short           fdreg8 - tbl_fdreg
12314         short           fdreg9 - tbl_fdreg
12315         short           fdrega - tbl_fdreg
12316         short           fdregb - tbl_fdreg
12317         short           fdregc - tbl_fdreg
12318         short           fdregd - tbl_fdreg
12319         short           fdrege - tbl_fdreg
12320         short           fdregf - tbl_fdreg
12321
12322 fdreg0:
12323         mov.l           EXC_DREGS+0x0(%a6),%d0
12324         rts
12325 fdreg1:
12326         mov.l           EXC_DREGS+0x4(%a6),%d0
12327         rts
12328 fdreg2:
12329         mov.l           %d2,%d0
12330         rts
12331 fdreg3:
12332         mov.l           %d3,%d0
12333         rts
12334 fdreg4:
12335         mov.l           %d4,%d0
12336         rts
12337 fdreg5:
12338         mov.l           %d5,%d0
12339         rts
12340 fdreg6:
12341         mov.l           %d6,%d0
12342         rts
12343 fdreg7:
12344         mov.l           %d7,%d0
12345         rts
12346 fdreg8:
12347         mov.l           EXC_DREGS+0x8(%a6),%d0
12348         rts
12349 fdreg9:
12350         mov.l           EXC_DREGS+0xc(%a6),%d0
12351         rts
12352 fdrega:
12353         mov.l           %a2,%d0
12354         rts
12355 fdregb:
12356         mov.l           %a3,%d0
12357         rts
12358 fdregc:
12359         mov.l           %a4,%d0
12360         rts
12361 fdregd:
12362         mov.l           %a5,%d0
12363         rts
12364 fdrege:
12365         mov.l           (%a6),%d0
12366         rts
12367 fdregf:
12368         mov.l           EXC_A7(%a6),%d0
12369         rts
12370
12371 #########################################################################
12372 # XDEF **************************************************************** #
12373 #       store_dreg_l(): store longword to data register specified by d1 #
12374 #                                                                       #
12375 # XREF **************************************************************** #
12376 #       None                                                            #
12377 #                                                                       #
12378 # INPUT *************************************************************** #
12379 #       d0 = longowrd value to store                                    #
12380 #       d1 = index of register to fetch from                            #
12381 #                                                                       #
12382 # OUTPUT ************************************************************** #
12383 #       (data register is updated)                                      #
12384 #                                                                       #
12385 # ALGORITHM *********************************************************** #
12386 #       According to the index value in d1, store the longword value    #
12387 # in d0 to the corresponding data register. D0/D1 are on the stack      #
12388 # while the rest are in their initial places.                           #
12389 #                                                                       #
12390 #########################################################################
12391
12392         global          store_dreg_l
12393 store_dreg_l:
12394         mov.w           (tbl_sdregl.b,%pc,%d1.w*2),%d1
12395         jmp             (tbl_sdregl.b,%pc,%d1.w*1)
12396
12397 tbl_sdregl:
12398         short           sdregl0 - tbl_sdregl
12399         short           sdregl1 - tbl_sdregl
12400         short           sdregl2 - tbl_sdregl
12401         short           sdregl3 - tbl_sdregl
12402         short           sdregl4 - tbl_sdregl
12403         short           sdregl5 - tbl_sdregl
12404         short           sdregl6 - tbl_sdregl
12405         short           sdregl7 - tbl_sdregl
12406
12407 sdregl0:
12408         mov.l           %d0,EXC_DREGS+0x0(%a6)
12409         rts
12410 sdregl1:
12411         mov.l           %d0,EXC_DREGS+0x4(%a6)
12412         rts
12413 sdregl2:
12414         mov.l           %d0,%d2
12415         rts
12416 sdregl3:
12417         mov.l           %d0,%d3
12418         rts
12419 sdregl4:
12420         mov.l           %d0,%d4
12421         rts
12422 sdregl5:
12423         mov.l           %d0,%d5
12424         rts
12425 sdregl6:
12426         mov.l           %d0,%d6
12427         rts
12428 sdregl7:
12429         mov.l           %d0,%d7
12430         rts
12431
12432 #########################################################################
12433 # XDEF **************************************************************** #
12434 #       store_dreg_w(): store word to data register specified by d1     #
12435 #                                                                       #
12436 # XREF **************************************************************** #
12437 #       None                                                            #
12438 #                                                                       #
12439 # INPUT *************************************************************** #
12440 #       d0 = word value to store                                        #
12441 #       d1 = index of register to fetch from                            #
12442 #                                                                       #
12443 # OUTPUT ************************************************************** #
12444 #       (data register is updated)                                      #
12445 #                                                                       #
12446 # ALGORITHM *********************************************************** #
12447 #       According to the index value in d1, store the word value        #
12448 # in d0 to the corresponding data register. D0/D1 are on the stack      #
12449 # while the rest are in their initial places.                           #
12450 #                                                                       #
12451 #########################################################################
12452
12453         global          store_dreg_w
12454 store_dreg_w:
12455         mov.w           (tbl_sdregw.b,%pc,%d1.w*2),%d1
12456         jmp             (tbl_sdregw.b,%pc,%d1.w*1)
12457
12458 tbl_sdregw:
12459         short           sdregw0 - tbl_sdregw
12460         short           sdregw1 - tbl_sdregw
12461         short           sdregw2 - tbl_sdregw
12462         short           sdregw3 - tbl_sdregw
12463         short           sdregw4 - tbl_sdregw
12464         short           sdregw5 - tbl_sdregw
12465         short           sdregw6 - tbl_sdregw
12466         short           sdregw7 - tbl_sdregw
12467
12468 sdregw0:
12469         mov.w           %d0,2+EXC_DREGS+0x0(%a6)
12470         rts
12471 sdregw1:
12472         mov.w           %d0,2+EXC_DREGS+0x4(%a6)
12473         rts
12474 sdregw2:
12475         mov.w           %d0,%d2
12476         rts
12477 sdregw3:
12478         mov.w           %d0,%d3
12479         rts
12480 sdregw4:
12481         mov.w           %d0,%d4
12482         rts
12483 sdregw5:
12484         mov.w           %d0,%d5
12485         rts
12486 sdregw6:
12487         mov.w           %d0,%d6
12488         rts
12489 sdregw7:
12490         mov.w           %d0,%d7
12491         rts
12492
12493 #########################################################################
12494 # XDEF **************************************************************** #
12495 #       store_dreg_b(): store byte to data register specified by d1     #
12496 #                                                                       #
12497 # XREF **************************************************************** #
12498 #       None                                                            #
12499 #                                                                       #
12500 # INPUT *************************************************************** #
12501 #       d0 = byte value to store                                        #
12502 #       d1 = index of register to fetch from                            #
12503 #                                                                       #
12504 # OUTPUT ************************************************************** #
12505 #       (data register is updated)                                      #
12506 #                                                                       #
12507 # ALGORITHM *********************************************************** #
12508 #       According to the index value in d1, store the byte value        #
12509 # in d0 to the corresponding data register. D0/D1 are on the stack      #
12510 # while the rest are in their initial places.                           #
12511 #                                                                       #
12512 #########################################################################
12513
12514         global          store_dreg_b
12515 store_dreg_b:
12516         mov.w           (tbl_sdregb.b,%pc,%d1.w*2),%d1
12517         jmp             (tbl_sdregb.b,%pc,%d1.w*1)
12518
12519 tbl_sdregb:
12520         short           sdregb0 - tbl_sdregb
12521         short           sdregb1 - tbl_sdregb
12522         short           sdregb2 - tbl_sdregb
12523         short           sdregb3 - tbl_sdregb
12524         short           sdregb4 - tbl_sdregb
12525         short           sdregb5 - tbl_sdregb
12526         short           sdregb6 - tbl_sdregb
12527         short           sdregb7 - tbl_sdregb
12528
12529 sdregb0:
12530         mov.b           %d0,3+EXC_DREGS+0x0(%a6)
12531         rts
12532 sdregb1:
12533         mov.b           %d0,3+EXC_DREGS+0x4(%a6)
12534         rts
12535 sdregb2:
12536         mov.b           %d0,%d2
12537         rts
12538 sdregb3:
12539         mov.b           %d0,%d3
12540         rts
12541 sdregb4:
12542         mov.b           %d0,%d4
12543         rts
12544 sdregb5:
12545         mov.b           %d0,%d5
12546         rts
12547 sdregb6:
12548         mov.b           %d0,%d6
12549         rts
12550 sdregb7:
12551         mov.b           %d0,%d7
12552         rts
12553
12554 #########################################################################
12555 # XDEF **************************************************************** #
12556 #       inc_areg(): increment an address register by the value in d0    #
12557 #                                                                       #
12558 # XREF **************************************************************** #
12559 #       None                                                            #
12560 #                                                                       #
12561 # INPUT *************************************************************** #
12562 #       d0 = amount to increment by                                     #
12563 #       d1 = index of address register to increment                     #
12564 #                                                                       #
12565 # OUTPUT ************************************************************** #
12566 #       (address register is updated)                                   #
12567 #                                                                       #
12568 # ALGORITHM *********************************************************** #
12569 #       Typically used for an instruction w/ a post-increment <ea>,     #
12570 # this routine adds the increment value in d0 to the address register   #
12571 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
12572 # in their original places.                                             #
12573 #       For a7, if the increment amount is one, then we have to         #
12574 # increment by two. For any a7 update, set the mia7_flag so that if     #
12575 # an access error exception occurs later in emulation, this address     #
12576 # register update can be undone.                                        #
12577 #                                                                       #
12578 #########################################################################
12579
12580         global          inc_areg
12581 inc_areg:
12582         mov.w           (tbl_iareg.b,%pc,%d1.w*2),%d1
12583         jmp             (tbl_iareg.b,%pc,%d1.w*1)
12584
12585 tbl_iareg:
12586         short           iareg0 - tbl_iareg
12587         short           iareg1 - tbl_iareg
12588         short           iareg2 - tbl_iareg
12589         short           iareg3 - tbl_iareg
12590         short           iareg4 - tbl_iareg
12591         short           iareg5 - tbl_iareg
12592         short           iareg6 - tbl_iareg
12593         short           iareg7 - tbl_iareg
12594
12595 iareg0: add.l           %d0,EXC_DREGS+0x8(%a6)
12596         rts
12597 iareg1: add.l           %d0,EXC_DREGS+0xc(%a6)
12598         rts
12599 iareg2: add.l           %d0,%a2
12600         rts
12601 iareg3: add.l           %d0,%a3
12602         rts
12603 iareg4: add.l           %d0,%a4
12604         rts
12605 iareg5: add.l           %d0,%a5
12606         rts
12607 iareg6: add.l           %d0,(%a6)
12608         rts
12609 iareg7: mov.b           &mia7_flg,SPCOND_FLG(%a6)
12610         cmpi.b          %d0,&0x1
12611         beq.b           iareg7b
12612         add.l           %d0,EXC_A7(%a6)
12613         rts
12614 iareg7b:
12615         addq.l          &0x2,EXC_A7(%a6)
12616         rts
12617
12618 #########################################################################
12619 # XDEF **************************************************************** #
12620 #       dec_areg(): decrement an address register by the value in d0    #
12621 #                                                                       #
12622 # XREF **************************************************************** #
12623 #       None                                                            #
12624 #                                                                       #
12625 # INPUT *************************************************************** #
12626 #       d0 = amount to decrement by                                     #
12627 #       d1 = index of address register to decrement                     #
12628 #                                                                       #
12629 # OUTPUT ************************************************************** #
12630 #       (address register is updated)                                   #
12631 #                                                                       #
12632 # ALGORITHM *********************************************************** #
12633 #       Typically used for an instruction w/ a pre-decrement <ea>,      #
12634 # this routine adds the decrement value in d0 to the address register   #
12635 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
12636 # in their original places.                                             #
12637 #       For a7, if the decrement amount is one, then we have to         #
12638 # decrement by two. For any a7 update, set the mda7_flag so that if     #
12639 # an access error exception occurs later in emulation, this address     #
12640 # register update can be undone.                                        #
12641 #                                                                       #
12642 #########################################################################
12643
12644         global          dec_areg
12645 dec_areg:
12646         mov.w           (tbl_dareg.b,%pc,%d1.w*2),%d1
12647         jmp             (tbl_dareg.b,%pc,%d1.w*1)
12648
12649 tbl_dareg:
12650         short           dareg0 - tbl_dareg
12651         short           dareg1 - tbl_dareg
12652         short           dareg2 - tbl_dareg
12653         short           dareg3 - tbl_dareg
12654         short           dareg4 - tbl_dareg
12655         short           dareg5 - tbl_dareg
12656         short           dareg6 - tbl_dareg
12657         short           dareg7 - tbl_dareg
12658
12659 dareg0: sub.l           %d0,EXC_DREGS+0x8(%a6)
12660         rts
12661 dareg1: sub.l           %d0,EXC_DREGS+0xc(%a6)
12662         rts
12663 dareg2: sub.l           %d0,%a2
12664         rts
12665 dareg3: sub.l           %d0,%a3
12666         rts
12667 dareg4: sub.l           %d0,%a4
12668         rts
12669 dareg5: sub.l           %d0,%a5
12670         rts
12671 dareg6: sub.l           %d0,(%a6)
12672         rts
12673 dareg7: mov.b           &mda7_flg,SPCOND_FLG(%a6)
12674         cmpi.b          %d0,&0x1
12675         beq.b           dareg7b
12676         sub.l           %d0,EXC_A7(%a6)
12677         rts
12678 dareg7b:
12679         subq.l          &0x2,EXC_A7(%a6)
12680         rts
12681
12682 ##############################################################################
12683
12684 #########################################################################
12685 # XDEF **************************************************************** #
12686 #       load_fpn1(): load FP register value into FP_SRC(a6).            #
12687 #                                                                       #
12688 # XREF **************************************************************** #
12689 #       None                                                            #
12690 #                                                                       #
12691 # INPUT *************************************************************** #
12692 #       d0 = index of FP register to load                               #
12693 #                                                                       #
12694 # OUTPUT ************************************************************** #
12695 #       FP_SRC(a6) = value loaded from FP register file                 #
12696 #                                                                       #
12697 # ALGORITHM *********************************************************** #
12698 #       Using the index in d0, load FP_SRC(a6) with a number from the   #
12699 # FP register file.                                                     #
12700 #                                                                       #
12701 #########################################################################
12702
12703         global          load_fpn1
12704 load_fpn1:
12705         mov.w           (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12706         jmp             (tbl_load_fpn1.b,%pc,%d0.w*1)
12707
12708 tbl_load_fpn1:
12709         short           load_fpn1_0 - tbl_load_fpn1
12710         short           load_fpn1_1 - tbl_load_fpn1
12711         short           load_fpn1_2 - tbl_load_fpn1
12712         short           load_fpn1_3 - tbl_load_fpn1
12713         short           load_fpn1_4 - tbl_load_fpn1
12714         short           load_fpn1_5 - tbl_load_fpn1
12715         short           load_fpn1_6 - tbl_load_fpn1
12716         short           load_fpn1_7 - tbl_load_fpn1
12717
12718 load_fpn1_0:
12719         mov.l           0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12720         mov.l           4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12721         mov.l           8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12722         lea             FP_SRC(%a6), %a0
12723         rts
12724 load_fpn1_1:
12725         mov.l           0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12726         mov.l           4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12727         mov.l           8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12728         lea             FP_SRC(%a6), %a0
12729         rts
12730 load_fpn1_2:
12731         fmovm.x         &0x20, FP_SRC(%a6)
12732         lea             FP_SRC(%a6), %a0
12733         rts
12734 load_fpn1_3:
12735         fmovm.x         &0x10, FP_SRC(%a6)
12736         lea             FP_SRC(%a6), %a0
12737         rts
12738 load_fpn1_4:
12739         fmovm.x         &0x08, FP_SRC(%a6)
12740         lea             FP_SRC(%a6), %a0
12741         rts
12742 load_fpn1_5:
12743         fmovm.x         &0x04, FP_SRC(%a6)
12744         lea             FP_SRC(%a6), %a0
12745         rts
12746 load_fpn1_6:
12747         fmovm.x         &0x02, FP_SRC(%a6)
12748         lea             FP_SRC(%a6), %a0
12749         rts
12750 load_fpn1_7:
12751         fmovm.x         &0x01, FP_SRC(%a6)
12752         lea             FP_SRC(%a6), %a0
12753         rts
12754
12755 #############################################################################
12756
12757 #########################################################################
12758 # XDEF **************************************************************** #
12759 #       load_fpn2(): load FP register value into FP_DST(a6).            #
12760 #                                                                       #
12761 # XREF **************************************************************** #
12762 #       None                                                            #
12763 #                                                                       #
12764 # INPUT *************************************************************** #
12765 #       d0 = index of FP register to load                               #
12766 #                                                                       #
12767 # OUTPUT ************************************************************** #
12768 #       FP_DST(a6) = value loaded from FP register file                 #
12769 #                                                                       #
12770 # ALGORITHM *********************************************************** #
12771 #       Using the index in d0, load FP_DST(a6) with a number from the   #
12772 # FP register file.                                                     #
12773 #                                                                       #
12774 #########################################################################
12775
12776         global          load_fpn2
12777 load_fpn2:
12778         mov.w           (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12779         jmp             (tbl_load_fpn2.b,%pc,%d0.w*1)
12780
12781 tbl_load_fpn2:
12782         short           load_fpn2_0 - tbl_load_fpn2
12783         short           load_fpn2_1 - tbl_load_fpn2
12784         short           load_fpn2_2 - tbl_load_fpn2
12785         short           load_fpn2_3 - tbl_load_fpn2
12786         short           load_fpn2_4 - tbl_load_fpn2
12787         short           load_fpn2_5 - tbl_load_fpn2
12788         short           load_fpn2_6 - tbl_load_fpn2
12789         short           load_fpn2_7 - tbl_load_fpn2
12790
12791 load_fpn2_0:
12792         mov.l           0+EXC_FP0(%a6), 0+FP_DST(%a6)
12793         mov.l           4+EXC_FP0(%a6), 4+FP_DST(%a6)
12794         mov.l           8+EXC_FP0(%a6), 8+FP_DST(%a6)
12795         lea             FP_DST(%a6), %a0
12796         rts
12797 load_fpn2_1:
12798         mov.l           0+EXC_FP1(%a6), 0+FP_DST(%a6)
12799         mov.l           4+EXC_FP1(%a6), 4+FP_DST(%a6)
12800         mov.l           8+EXC_FP1(%a6), 8+FP_DST(%a6)
12801         lea             FP_DST(%a6), %a0
12802         rts
12803 load_fpn2_2:
12804         fmovm.x         &0x20, FP_DST(%a6)
12805         lea             FP_DST(%a6), %a0
12806         rts
12807 load_fpn2_3:
12808         fmovm.x         &0x10, FP_DST(%a6)
12809         lea             FP_DST(%a6), %a0
12810         rts
12811 load_fpn2_4:
12812         fmovm.x         &0x08, FP_DST(%a6)
12813         lea             FP_DST(%a6), %a0
12814         rts
12815 load_fpn2_5:
12816         fmovm.x         &0x04, FP_DST(%a6)
12817         lea             FP_DST(%a6), %a0
12818         rts
12819 load_fpn2_6:
12820         fmovm.x         &0x02, FP_DST(%a6)
12821         lea             FP_DST(%a6), %a0
12822         rts
12823 load_fpn2_7:
12824         fmovm.x         &0x01, FP_DST(%a6)
12825         lea             FP_DST(%a6), %a0
12826         rts
12827
12828 #############################################################################
12829
12830 #########################################################################
12831 # XDEF **************************************************************** #
12832 #       store_fpreg(): store an fp value to the fpreg designated d0.    #
12833 #                                                                       #
12834 # XREF **************************************************************** #
12835 #       None                                                            #
12836 #                                                                       #
12837 # INPUT *************************************************************** #
12838 #       fp0 = extended precision value to store                         #
12839 #       d0  = index of floating-point register                          #
12840 #                                                                       #
12841 # OUTPUT ************************************************************** #
12842 #       None                                                            #
12843 #                                                                       #
12844 # ALGORITHM *********************************************************** #
12845 #       Store the value in fp0 to the FP register designated by the     #
12846 # value in d0. The FP number can be DENORM or SNAN so we have to be     #
12847 # careful that we don't take an exception here.                         #
12848 #                                                                       #
12849 #########################################################################
12850
12851         global          store_fpreg
12852 store_fpreg:
12853         mov.w           (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12854         jmp             (tbl_store_fpreg.b,%pc,%d0.w*1)
12855
12856 tbl_store_fpreg:
12857         short           store_fpreg_0 - tbl_store_fpreg
12858         short           store_fpreg_1 - tbl_store_fpreg
12859         short           store_fpreg_2 - tbl_store_fpreg
12860         short           store_fpreg_3 - tbl_store_fpreg
12861         short           store_fpreg_4 - tbl_store_fpreg
12862         short           store_fpreg_5 - tbl_store_fpreg
12863         short           store_fpreg_6 - tbl_store_fpreg
12864         short           store_fpreg_7 - tbl_store_fpreg
12865
12866 store_fpreg_0:
12867         fmovm.x         &0x80, EXC_FP0(%a6)
12868         rts
12869 store_fpreg_1:
12870         fmovm.x         &0x80, EXC_FP1(%a6)
12871         rts
12872 store_fpreg_2:
12873         fmovm.x         &0x01, -(%sp)
12874         fmovm.x         (%sp)+, &0x20
12875         rts
12876 store_fpreg_3:
12877         fmovm.x         &0x01, -(%sp)
12878         fmovm.x         (%sp)+, &0x10
12879         rts
12880 store_fpreg_4:
12881         fmovm.x         &0x01, -(%sp)
12882         fmovm.x         (%sp)+, &0x08
12883         rts
12884 store_fpreg_5:
12885         fmovm.x         &0x01, -(%sp)
12886         fmovm.x         (%sp)+, &0x04
12887         rts
12888 store_fpreg_6:
12889         fmovm.x         &0x01, -(%sp)
12890         fmovm.x         (%sp)+, &0x02
12891         rts
12892 store_fpreg_7:
12893         fmovm.x         &0x01, -(%sp)
12894         fmovm.x         (%sp)+, &0x01
12895         rts
12896
12897 #########################################################################
12898 # XDEF **************************************************************** #
12899 #       get_packed(): fetch a packed operand from memory and then       #
12900 #                     convert it to a floating-point binary number.     #
12901 #                                                                       #
12902 # XREF **************************************************************** #
12903 #       _dcalc_ea() - calculate the correct <ea>                        #
12904 #       _mem_read() - fetch the packed operand from memory              #
12905 #       facc_in_x() - the fetch failed so jump to special exit code     #
12906 #       decbin()    - convert packed to binary extended precision       #
12907 #                                                                       #
12908 # INPUT *************************************************************** #
12909 #       None                                                            #
12910 #                                                                       #
12911 # OUTPUT ************************************************************** #
12912 #       If no failure on _mem_read():                                   #
12913 #       FP_SRC(a6) = packed operand now as a binary FP number           #
12914 #                                                                       #
12915 # ALGORITHM *********************************************************** #
12916 #       Get the correct <ea> whihc is the value on the exception stack  #
12917 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.     #
12918 # Then, fetch the operand from memory. If the fetch fails, exit         #
12919 # through facc_in_x().                                                  #
12920 #       If the packed operand is a ZERO,NAN, or INF, convert it to      #
12921 # its binary representation here. Else, call decbin() which will        #
12922 # convert the packed value to an extended precision binary value.       #
12923 #                                                                       #
12924 #########################################################################
12925
12926 # the stacked <ea> for packed is correct except for -(An).
12927 # the base reg must be updated for both -(An) and (An)+.
12928         global          get_packed
12929 get_packed:
12930         mov.l           &0xc,%d0                # packed is 12 bytes
12931         bsr.l           _dcalc_ea               # fetch <ea>; correct An
12932
12933         lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
12934         mov.l           &0xc,%d0                # pass: 12 bytes
12935         bsr.l           _dmem_read              # read packed operand
12936
12937         tst.l           %d1                     # did dfetch fail?
12938         bne.l           facc_in_x               # yes
12939
12940 # The packed operand is an INF or a NAN if the exponent field is all ones.
12941         bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
12942         cmpi.w          %d0,&0x7fff             # INF or NAN?
12943         bne.b           gp_try_zero             # no
12944         rts                                     # operand is an INF or NAN
12945
12946 # The packed operand is a zero if the mantissa is all zero, else it's
12947 # a normal packed op.
12948 gp_try_zero:
12949         mov.b           3+FP_SRC(%a6),%d0       # get byte 4
12950         andi.b          &0x0f,%d0               # clear all but last nybble
12951         bne.b           gp_not_spec             # not a zero
12952         tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
12953         bne.b           gp_not_spec             # not a zero
12954         tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
12955         bne.b           gp_not_spec             # not a zero
12956         rts                                     # operand is a ZERO
12957 gp_not_spec:
12958         lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
12959         bsr.l           decbin                  # convert to extended
12960         fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
12961         rts
12962
12963 #########################################################################
12964 # decbin(): Converts normalized packed bcd value pointed to by register #
12965 #           a0 to extended-precision value in fp0.                      #
12966 #                                                                       #
12967 # INPUT *************************************************************** #
12968 #       a0 = pointer to normalized packed bcd value                     #
12969 #                                                                       #
12970 # OUTPUT ************************************************************** #
12971 #       fp0 = exact fp representation of the packed bcd value.          #
12972 #                                                                       #
12973 # ALGORITHM *********************************************************** #
12974 #       Expected is a normal bcd (i.e. non-exceptional; all inf, zero,  #
12975 #       and NaN operands are dispatched without entering this routine)  #
12976 #       value in 68881/882 format at location (a0).                     #
12977 #                                                                       #
12978 #       A1. Convert the bcd exponent to binary by successive adds and   #
12979 #       muls. Set the sign according to SE. Subtract 16 to compensate   #
12980 #       for the mantissa which is to be interpreted as 17 integer       #
12981 #       digits, rather than 1 integer and 16 fraction digits.           #
12982 #       Note: this operation can never overflow.                        #
12983 #                                                                       #
12984 #       A2. Convert the bcd mantissa to binary by successive            #
12985 #       adds and muls in FP0. Set the sign according to SM.             #
12986 #       The mantissa digits will be converted with the decimal point    #
12987 #       assumed following the least-significant digit.                  #
12988 #       Note: this operation can never overflow.                        #
12989 #                                                                       #
12990 #       A3. Count the number of leading/trailing zeros in the           #
12991 #       bcd string.  If SE is positive, count the leading zeros;        #
12992 #       if negative, count the trailing zeros.  Set the adjusted        #
12993 #       exponent equal to the exponent from A1 and the zero count       #
12994 #       added if SM = 1 and subtracted if SM = 0.  Scale the            #
12995 #       mantissa the equivalent of forcing in the bcd value:            #
12996 #                                                                       #
12997 #       SM = 0  a non-zero digit in the integer position                #
12998 #       SM = 1  a non-zero digit in Mant0, lsd of the fraction          #
12999 #                                                                       #
13000 #       this will insure that any value, regardless of its              #
13001 #       representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted     #
13002 #       consistently.                                                   #
13003 #                                                                       #
13004 #       A4. Calculate the factor 10^exp in FP1 using a table of         #
13005 #       10^(2^n) values.  To reduce the error in forming factors        #
13006 #       greater than 10^27, a directed rounding scheme is used with     #
13007 #       tables rounded to RN, RM, and RP, according to the table        #
13008 #       in the comments of the pwrten section.                          #
13009 #                                                                       #
13010 #       A5. Form the final binary number by scaling the mantissa by     #
13011 #       the exponent factor.  This is done by multiplying the           #
13012 #       mantissa in FP0 by the factor in FP1 if the adjusted            #
13013 #       exponent sign is positive, and dividing FP0 by FP1 if           #
13014 #       it is negative.                                                 #
13015 #                                                                       #
13016 #       Clean up and return. Check if the final mul or div was inexact. #
13017 #       If so, set INEX1 in USER_FPSR.                                  #
13018 #                                                                       #
13019 #########################################################################
13020
13021 #
13022 #       PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13023 #       to nearest, minus, and plus, respectively.  The tables include
13024 #       10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
13025 #       is required until the power is greater than 27, however, all
13026 #       tables include the first 5 for ease of indexing.
13027 #
13028 RTABLE:
13029         byte            0,0,0,0
13030         byte            2,3,2,3
13031         byte            2,3,3,2
13032         byte            3,2,2,3
13033
13034         set             FNIBS,7
13035         set             FSTRT,0
13036
13037         set             ESTRT,4
13038         set             EDIGITS,2
13039
13040         global          decbin
13041 decbin:
13042         mov.l           0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13043         mov.l           0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13044         mov.l           0x8(%a0),FP_SCR0_LO(%a6)
13045
13046         lea             FP_SCR0(%a6),%a0
13047
13048         movm.l          &0x3c00,-(%sp)          # save d2-d5
13049         fmovm.x         &0x1,-(%sp)             # save fp1
13050 #
13051 # Calculate exponent:
13052 #  1. Copy bcd value in memory for use as a working copy.
13053 #  2. Calculate absolute value of exponent in d1 by mul and add.
13054 #  3. Correct for exponent sign.
13055 #  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13056 #     (i.e., all digits assumed left of the decimal point.)
13057 #
13058 # Register usage:
13059 #
13060 #  calc_e:
13061 #       (*)  d0: temp digit storage
13062 #       (*)  d1: accumulator for binary exponent
13063 #       (*)  d2: digit count
13064 #       (*)  d3: offset pointer
13065 #       ( )  d4: first word of bcd
13066 #       ( )  a0: pointer to working bcd value
13067 #       ( )  a6: pointer to original bcd value
13068 #       (*)  FP_SCR1: working copy of original bcd value
13069 #       (*)  L_SCR1: copy of original exponent word
13070 #
13071 calc_e:
13072         mov.l           &EDIGITS,%d2            # # of nibbles (digits) in fraction part
13073         mov.l           &ESTRT,%d3              # counter to pick up digits
13074         mov.l           (%a0),%d4               # get first word of bcd
13075         clr.l           %d1                     # zero d1 for accumulator
13076 e_gd:
13077         mulu.l          &0xa,%d1                # mul partial product by one digit place
13078         bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend into d0
13079         add.l           %d0,%d1                 # d1 = d1 + d0
13080         addq.b          &4,%d3                  # advance d3 to the next digit
13081         dbf.w           %d2,e_gd                # if we have used all 3 digits, exit loop
13082         btst            &30,%d4                 # get SE
13083         beq.b           e_pos                   # don't negate if pos
13084         neg.l           %d1                     # negate before subtracting
13085 e_pos:
13086         sub.l           &16,%d1                 # sub to compensate for shift of mant
13087         bge.b           e_save                  # if still pos, do not neg
13088         neg.l           %d1                     # now negative, make pos and set SE
13089         or.l            &0x40000000,%d4         # set SE in d4,
13090         or.l            &0x40000000,(%a0)       # and in working bcd
13091 e_save:
13092         mov.l           %d1,-(%sp)              # save exp on stack
13093 #
13094 #
13095 # Calculate mantissa:
13096 #  1. Calculate absolute value of mantissa in fp0 by mul and add.
13097 #  2. Correct for mantissa sign.
13098 #     (i.e., all digits assumed left of the decimal point.)
13099 #
13100 # Register usage:
13101 #
13102 #  calc_m:
13103 #       (*)  d0: temp digit storage
13104 #       (*)  d1: lword counter
13105 #       (*)  d2: digit count
13106 #       (*)  d3: offset pointer
13107 #       ( )  d4: words 2 and 3 of bcd
13108 #       ( )  a0: pointer to working bcd value
13109 #       ( )  a6: pointer to original bcd value
13110 #       (*) fp0: mantissa accumulator
13111 #       ( )  FP_SCR1: working copy of original bcd value
13112 #       ( )  L_SCR1: copy of original exponent word
13113 #
13114 calc_m:
13115         mov.l           &1,%d1                  # word counter, init to 1
13116         fmov.s          &0x00000000,%fp0        # accumulator
13117 #
13118 #
13119 #  Since the packed number has a long word between the first & second parts,
13120 #  get the integer digit then skip down & get the rest of the
13121 #  mantissa.  We will unroll the loop once.
13122 #
13123         bfextu          (%a0){&28:&4},%d0       # integer part is ls digit in long word
13124         fadd.b          %d0,%fp0                # add digit to sum in fp0
13125 #
13126 #
13127 #  Get the rest of the mantissa.
13128 #
13129 loadlw:
13130         mov.l           (%a0,%d1.L*4),%d4       # load mantissa lonqword into d4
13131         mov.l           &FSTRT,%d3              # counter to pick up digits
13132         mov.l           &FNIBS,%d2              # reset number of digits per a0 ptr
13133 md2b:
13134         fmul.s          &0x41200000,%fp0        # fp0 = fp0 * 10
13135         bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend
13136         fadd.b          %d0,%fp0                # fp0 = fp0 + digit
13137 #
13138 #
13139 #  If all the digits (8) in that long word have been converted (d2=0),
13140 #  then inc d1 (=2) to point to the next long word and reset d3 to 0
13141 #  to initialize the digit offset, and set d2 to 7 for the digit count;
13142 #  else continue with this long word.
13143 #
13144         addq.b          &4,%d3                  # advance d3 to the next digit
13145         dbf.w           %d2,md2b                # check for last digit in this lw
13146 nextlw:
13147         addq.l          &1,%d1                  # inc lw pointer in mantissa
13148         cmp.l           %d1,&2                  # test for last lw
13149         ble.b           loadlw                  # if not, get last one
13150 #
13151 #  Check the sign of the mant and make the value in fp0 the same sign.
13152 #
13153 m_sign:
13154         btst            &31,(%a0)               # test sign of the mantissa
13155         beq.b           ap_st_z                 # if clear, go to append/strip zeros
13156         fneg.x          %fp0                    # if set, negate fp0
13157 #
13158 # Append/strip zeros:
13159 #
13160 #  For adjusted exponents which have an absolute value greater than 27*,
13161 #  this routine calculates the amount needed to normalize the mantissa
13162 #  for the adjusted exponent.  That number is subtracted from the exp
13163 #  if the exp was positive, and added if it was negative.  The purpose
13164 #  of this is to reduce the value of the exponent and the possibility
13165 #  of error in calculation of pwrten.
13166 #
13167 #  1. Branch on the sign of the adjusted exponent.
13168 #  2p.(positive exp)
13169 #   2. Check M16 and the digits in lwords 2 and 3 in decending order.
13170 #   3. Add one for each zero encountered until a non-zero digit.
13171 #   4. Subtract the count from the exp.
13172 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
13173 #          and set SE.
13174 #       6. Multiply the mantissa by 10**count.
13175 #  2n.(negative exp)
13176 #   2. Check the digits in lwords 3 and 2 in decending order.
13177 #   3. Add one for each zero encountered until a non-zero digit.
13178 #   4. Add the count to the exp.
13179 #   5. Check if the exp has crossed zero in #3 above; clear SE.
13180 #   6. Divide the mantissa by 10**count.
13181 #
13182 #  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
13183 #   any adjustment due to append/strip zeros will drive the resultane
13184 #   exponent towards zero.  Since all pwrten constants with a power
13185 #   of 27 or less are exact, there is no need to use this routine to
13186 #   attempt to lessen the resultant exponent.
13187 #
13188 # Register usage:
13189 #
13190 #  ap_st_z:
13191 #       (*)  d0: temp digit storage
13192 #       (*)  d1: zero count
13193 #       (*)  d2: digit count
13194 #       (*)  d3: offset pointer
13195 #       ( )  d4: first word of bcd
13196 #       (*)  d5: lword counter
13197 #       ( )  a0: pointer to working bcd value
13198 #       ( )  FP_SCR1: working copy of original bcd value
13199 #       ( )  L_SCR1: copy of original exponent word
13200 #
13201 #
13202 # First check the absolute value of the exponent to see if this
13203 # routine is necessary.  If so, then check the sign of the exponent
13204 # and do append (+) or strip (-) zeros accordingly.
13205 # This section handles a positive adjusted exponent.
13206 #
13207 ap_st_z:
13208         mov.l           (%sp),%d1               # load expA for range test
13209         cmp.l           %d1,&27                 # test is with 27
13210         ble.w           pwrten                  # if abs(expA) <28, skip ap/st zeros
13211         btst            &30,(%a0)               # check sign of exp
13212         bne.b           ap_st_n                 # if neg, go to neg side
13213         clr.l           %d1                     # zero count reg
13214         mov.l           (%a0),%d4               # load lword 1 to d4
13215         bfextu          %d4{&28:&4},%d0         # get M16 in d0
13216         bne.b           ap_p_fx                 # if M16 is non-zero, go fix exp
13217         addq.l          &1,%d1                  # inc zero count
13218         mov.l           &1,%d5                  # init lword counter
13219         mov.l           (%a0,%d5.L*4),%d4       # get lword 2 to d4
13220         bne.b           ap_p_cl                 # if lw 2 is zero, skip it
13221         addq.l          &8,%d1                  # and inc count by 8
13222         addq.l          &1,%d5                  # inc lword counter
13223         mov.l           (%a0,%d5.L*4),%d4       # get lword 3 to d4
13224 ap_p_cl:
13225         clr.l           %d3                     # init offset reg
13226         mov.l           &7,%d2                  # init digit counter
13227 ap_p_gd:
13228         bfextu          %d4{%d3:&4},%d0         # get digit
13229         bne.b           ap_p_fx                 # if non-zero, go to fix exp
13230         addq.l          &4,%d3                  # point to next digit
13231         addq.l          &1,%d1                  # inc digit counter
13232         dbf.w           %d2,ap_p_gd             # get next digit
13233 ap_p_fx:
13234         mov.l           %d1,%d0                 # copy counter to d2
13235         mov.l           (%sp),%d1               # get adjusted exp from memory
13236         sub.l           %d0,%d1                 # subtract count from exp
13237         bge.b           ap_p_fm                 # if still pos, go to pwrten
13238         neg.l           %d1                     # now its neg; get abs
13239         mov.l           (%a0),%d4               # load lword 1 to d4
13240         or.l            &0x40000000,%d4         # and set SE in d4
13241         or.l            &0x40000000,(%a0)       # and in memory
13242 #
13243 # Calculate the mantissa multiplier to compensate for the striping of
13244 # zeros from the mantissa.
13245 #
13246 ap_p_fm:
13247         lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
13248         clr.l           %d3                     # init table index
13249         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13250         mov.l           &3,%d2                  # init d2 to count bits in counter
13251 ap_p_el:
13252         asr.l           &1,%d0                  # shift lsb into carry
13253         bcc.b           ap_p_en                 # if 1, mul fp1 by pwrten factor
13254         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13255 ap_p_en:
13256         add.l           &12,%d3                 # inc d3 to next rtable entry
13257         tst.l           %d0                     # check if d0 is zero
13258         bne.b           ap_p_el                 # if not, get next bit
13259         fmul.x          %fp1,%fp0               # mul mantissa by 10**(no_bits_shifted)
13260         bra.b           pwrten                  # go calc pwrten
13261 #
13262 # This section handles a negative adjusted exponent.
13263 #
13264 ap_st_n:
13265         clr.l           %d1                     # clr counter
13266         mov.l           &2,%d5                  # set up d5 to point to lword 3
13267         mov.l           (%a0,%d5.L*4),%d4       # get lword 3
13268         bne.b           ap_n_cl                 # if not zero, check digits
13269         sub.l           &1,%d5                  # dec d5 to point to lword 2
13270         addq.l          &8,%d1                  # inc counter by 8
13271         mov.l           (%a0,%d5.L*4),%d4       # get lword 2
13272 ap_n_cl:
13273         mov.l           &28,%d3                 # point to last digit
13274         mov.l           &7,%d2                  # init digit counter
13275 ap_n_gd:
13276         bfextu          %d4{%d3:&4},%d0         # get digit
13277         bne.b           ap_n_fx                 # if non-zero, go to exp fix
13278         subq.l          &4,%d3                  # point to previous digit
13279         addq.l          &1,%d1                  # inc digit counter
13280         dbf.w           %d2,ap_n_gd             # get next digit
13281 ap_n_fx:
13282         mov.l           %d1,%d0                 # copy counter to d0
13283         mov.l           (%sp),%d1               # get adjusted exp from memory
13284         sub.l           %d0,%d1                 # subtract count from exp
13285         bgt.b           ap_n_fm                 # if still pos, go fix mantissa
13286         neg.l           %d1                     # take abs of exp and clr SE
13287         mov.l           (%a0),%d4               # load lword 1 to d4
13288         and.l           &0xbfffffff,%d4         # and clr SE in d4
13289         and.l           &0xbfffffff,(%a0)       # and in memory
13290 #
13291 # Calculate the mantissa multiplier to compensate for the appending of
13292 # zeros to the mantissa.
13293 #
13294 ap_n_fm:
13295         lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
13296         clr.l           %d3                     # init table index
13297         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13298         mov.l           &3,%d2                  # init d2 to count bits in counter
13299 ap_n_el:
13300         asr.l           &1,%d0                  # shift lsb into carry
13301         bcc.b           ap_n_en                 # if 1, mul fp1 by pwrten factor
13302         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13303 ap_n_en:
13304         add.l           &12,%d3                 # inc d3 to next rtable entry
13305         tst.l           %d0                     # check if d0 is zero
13306         bne.b           ap_n_el                 # if not, get next bit
13307         fdiv.x          %fp1,%fp0               # div mantissa by 10**(no_bits_shifted)
13308 #
13309 #
13310 # Calculate power-of-ten factor from adjusted and shifted exponent.
13311 #
13312 # Register usage:
13313 #
13314 #  pwrten:
13315 #       (*)  d0: temp
13316 #       ( )  d1: exponent
13317 #       (*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13318 #       (*)  d3: FPCR work copy
13319 #       ( )  d4: first word of bcd
13320 #       (*)  a1: RTABLE pointer
13321 #  calc_p:
13322 #       (*)  d0: temp
13323 #       ( )  d1: exponent
13324 #       (*)  d3: PWRTxx table index
13325 #       ( )  a0: pointer to working copy of bcd
13326 #       (*)  a1: PWRTxx pointer
13327 #       (*) fp1: power-of-ten accumulator
13328 #
13329 # Pwrten calculates the exponent factor in the selected rounding mode
13330 # according to the following table:
13331 #
13332 #       Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
13333 #
13334 #       ANY       ANY   RN      RN
13335 #
13336 #        +         +    RP      RP
13337 #        -         +    RP      RM
13338 #        +         -    RP      RM
13339 #        -         -    RP      RP
13340 #
13341 #        +         +    RM      RM
13342 #        -         +    RM      RP
13343 #        +         -    RM      RP
13344 #        -         -    RM      RM
13345 #
13346 #        +         +    RZ      RM
13347 #        -         +    RZ      RM
13348 #        +         -    RZ      RP
13349 #        -         -    RZ      RP
13350 #
13351 #
13352 pwrten:
13353         mov.l           USER_FPCR(%a6),%d3      # get user's FPCR
13354         bfextu          %d3{&26:&2},%d2         # isolate rounding mode bits
13355         mov.l           (%a0),%d4               # reload 1st bcd word to d4
13356         asl.l           &2,%d2                  # format d2 to be
13357         bfextu          %d4{&0:&2},%d0          # {FPCR[6],FPCR[5],SM,SE}
13358         add.l           %d0,%d2                 # in d2 as index into RTABLE
13359         lea.l           RTABLE(%pc),%a1         # load rtable base
13360         mov.b           (%a1,%d2),%d0           # load new rounding bits from table
13361         clr.l           %d3                     # clear d3 to force no exc and extended
13362         bfins           %d0,%d3{&26:&2}         # stuff new rounding bits in FPCR
13363         fmov.l          %d3,%fpcr               # write new FPCR
13364         asr.l           &1,%d0                  # write correct PTENxx table
13365         bcc.b           not_rp                  # to a1
13366         lea.l           PTENRP(%pc),%a1         # it is RP
13367         bra.b           calc_p                  # go to init section
13368 not_rp:
13369         asr.l           &1,%d0                  # keep checking
13370         bcc.b           not_rm
13371         lea.l           PTENRM(%pc),%a1         # it is RM
13372         bra.b           calc_p                  # go to init section
13373 not_rm:
13374         lea.l           PTENRN(%pc),%a1         # it is RN
13375 calc_p:
13376         mov.l           %d1,%d0                 # copy exp to d0;use d0
13377         bpl.b           no_neg                  # if exp is negative,
13378         neg.l           %d0                     # invert it
13379         or.l            &0x40000000,(%a0)       # and set SE bit
13380 no_neg:
13381         clr.l           %d3                     # table index
13382         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13383 e_loop:
13384         asr.l           &1,%d0                  # shift next bit into carry
13385         bcc.b           e_next                  # if zero, skip the mul
13386         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13387 e_next:
13388         add.l           &12,%d3                 # inc d3 to next rtable entry
13389         tst.l           %d0                     # check if d0 is zero
13390         bne.b           e_loop                  # not zero, continue shifting
13391 #
13392 #
13393 #  Check the sign of the adjusted exp and make the value in fp0 the
13394 #  same sign. If the exp was pos then multiply fp1*fp0;
13395 #  else divide fp0/fp1.
13396 #
13397 # Register Usage:
13398 #  norm:
13399 #       ( )  a0: pointer to working bcd value
13400 #       (*) fp0: mantissa accumulator
13401 #       ( ) fp1: scaling factor - 10**(abs(exp))
13402 #
13403 pnorm:
13404         btst            &30,(%a0)               # test the sign of the exponent
13405         beq.b           mul                     # if clear, go to multiply
13406 div:
13407         fdiv.x          %fp1,%fp0               # exp is negative, so divide mant by exp
13408         bra.b           end_dec
13409 mul:
13410         fmul.x          %fp1,%fp0               # exp is positive, so multiply by exp
13411 #
13412 #
13413 # Clean up and return with result in fp0.
13414 #
13415 # If the final mul/div in decbin incurred an inex exception,
13416 # it will be inex2, but will be reported as inex1 by get_op.
13417 #
13418 end_dec:
13419         fmov.l          %fpsr,%d0               # get status register
13420         bclr            &inex2_bit+8,%d0        # test for inex2 and clear it
13421         beq.b           no_exc                  # skip this if no exc
13422         ori.w           &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13423 no_exc:
13424         add.l           &0x4,%sp                # clear 1 lw param
13425         fmovm.x         (%sp)+,&0x40            # restore fp1
13426         movm.l          (%sp)+,&0x3c            # restore d2-d5
13427         fmov.l          &0x0,%fpcr
13428         fmov.l          &0x0,%fpsr
13429         rts
13430
13431 #########################################################################
13432 # bindec(): Converts an input in extended precision format to bcd format#
13433 #                                                                       #
13434 # INPUT *************************************************************** #
13435 #       a0 = pointer to the input extended precision value in memory.   #
13436 #            the input may be either normalized, unnormalized, or       #
13437 #            denormalized.                                              #
13438 #       d0 = contains the k-factor sign-extended to 32-bits.            #
13439 #                                                                       #
13440 # OUTPUT ************************************************************** #
13441 #       FP_SCR0(a6) = bcd format result on the stack.                   #
13442 #                                                                       #
13443 # ALGORITHM *********************************************************** #
13444 #                                                                       #
13445 #       A1.     Set RM and size ext;  Set SIGMA = sign of input.        #
13446 #               The k-factor is saved for use in d7. Clear the          #
13447 #               BINDEC_FLG for separating normalized/denormalized       #
13448 #               input.  If input is unnormalized or denormalized,       #
13449 #               normalize it.                                           #
13450 #                                                                       #
13451 #       A2.     Set X = abs(input).                                     #
13452 #                                                                       #
13453 #       A3.     Compute ILOG.                                           #
13454 #               ILOG is the log base 10 of the input value.  It is      #
13455 #               approximated by adding e + 0.f when the original        #
13456 #               value is viewed as 2^^e * 1.f in extended precision.    #
13457 #               This value is stored in d6.                             #
13458 #                                                                       #
13459 #       A4.     Clr INEX bit.                                           #
13460 #               The operation in A3 above may have set INEX2.           #
13461 #                                                                       #
13462 #       A5.     Set ICTR = 0;                                           #
13463 #               ICTR is a flag used in A13.  It must be set before the  #
13464 #               loop entry A6.                                          #
13465 #                                                                       #
13466 #       A6.     Calculate LEN.                                          #
13467 #               LEN is the number of digits to be displayed.  The       #
13468 #               k-factor can dictate either the total number of digits, #
13469 #               if it is a positive number, or the number of digits     #
13470 #               after the decimal point which are to be included as     #
13471 #               significant.  See the 68882 manual for examples.        #
13472 #               If LEN is computed to be greater than 17, set OPERR in  #
13473 #               USER_FPSR.  LEN is stored in d4.                        #
13474 #                                                                       #
13475 #       A7.     Calculate SCALE.                                        #
13476 #               SCALE is equal to 10^ISCALE, where ISCALE is the number #
13477 #               of decimal places needed to insure LEN integer digits   #
13478 #               in the output before conversion to bcd. LAMBDA is the   #
13479 #               sign of ISCALE, used in A9. Fp1 contains                #
13480 #               10^^(abs(ISCALE)) using a rounding mode which is a      #
13481 #               function of the original rounding mode and the signs    #
13482 #               of ISCALE and X.  A table is given in the code.         #
13483 #                                                                       #
13484 #       A8.     Clr INEX; Force RZ.                                     #
13485 #               The operation in A3 above may have set INEX2.           #
13486 #               RZ mode is forced for the scaling operation to insure   #
13487 #               only one rounding error.  The grs bits are collected in #
13488 #               the INEX flag for use in A10.                           #
13489 #                                                                       #
13490 #       A9.     Scale X -> Y.                                           #
13491 #               The mantissa is scaled to the desired number of         #
13492 #               significant digits.  The excess digits are collected    #
13493 #               in INEX2.                                               #
13494 #                                                                       #
13495 #       A10.    Or in INEX.                                             #
13496 #               If INEX is set, round error occurred.  This is          #
13497 #               compensated for by 'or-ing' in the INEX2 flag to        #
13498 #               the lsb of Y.                                           #
13499 #                                                                       #
13500 #       A11.    Restore original FPCR; set size ext.                    #
13501 #               Perform FINT operation in the user's rounding mode.     #
13502 #               Keep the size to extended.                              #
13503 #                                                                       #
13504 #       A12.    Calculate YINT = FINT(Y) according to user's rounding   #
13505 #               mode.  The FPSP routine sintd0 is used.  The output     #
13506 #               is in fp0.                                              #
13507 #                                                                       #
13508 #       A13.    Check for LEN digits.                                   #
13509 #               If the int operation results in more than LEN digits,   #
13510 #               or less than LEN -1 digits, adjust ILOG and repeat from #
13511 #               A6.  This test occurs only on the first pass.  If the   #
13512 #               result is exactly 10^LEN, decrement ILOG and divide     #
13513 #               the mantissa by 10.                                     #
13514 #                                                                       #
13515 #       A14.    Convert the mantissa to bcd.                            #
13516 #               The binstr routine is used to convert the LEN digit     #
13517 #               mantissa to bcd in memory.  The input to binstr is      #
13518 #               to be a fraction; i.e. (mantissa)/10^LEN and adjusted   #
13519 #               such that the decimal point is to the left of bit 63.   #
13520 #               The bcd digits are stored in the correct position in    #
13521 #               the final string area in memory.                        #
13522 #                                                                       #
13523 #       A15.    Convert the exponent to bcd.                            #
13524 #               As in A14 above, the exp is converted to bcd and the    #
13525 #               digits are stored in the final string.                  #
13526 #               Test the length of the final exponent string.  If the   #
13527 #               length is 4, set operr.                                 #
13528 #                                                                       #
13529 #       A16.    Write sign bits to final string.                        #
13530 #                                                                       #
13531 #########################################################################
13532
13533 set     BINDEC_FLG,     EXC_TEMP        # DENORM flag
13534
13535 # Constants in extended precision
13536 PLOG2:
13537         long            0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13538 PLOG2UP1:
13539         long            0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13540
13541 # Constants in single precision
13542 FONE:
13543         long            0x3F800000,0x00000000,0x00000000,0x00000000
13544 FTWO:
13545         long            0x40000000,0x00000000,0x00000000,0x00000000
13546 FTEN:
13547         long            0x41200000,0x00000000,0x00000000,0x00000000
13548 F4933:
13549         long            0x459A2800,0x00000000,0x00000000,0x00000000
13550
13551 RBDTBL:
13552         byte            0,0,0,0
13553         byte            3,3,2,2
13554         byte            3,2,2,3
13555         byte            2,3,3,2
13556
13557 #       Implementation Notes:
13558 #
13559 #       The registers are used as follows:
13560 #
13561 #               d0: scratch; LEN input to binstr
13562 #               d1: scratch
13563 #               d2: upper 32-bits of mantissa for binstr
13564 #               d3: scratch;lower 32-bits of mantissa for binstr
13565 #               d4: LEN
13566 #               d5: LAMBDA/ICTR
13567 #               d6: ILOG
13568 #               d7: k-factor
13569 #               a0: ptr for original operand/final result
13570 #               a1: scratch pointer
13571 #               a2: pointer to FP_X; abs(original value) in ext
13572 #               fp0: scratch
13573 #               fp1: scratch
13574 #               fp2: scratch
13575 #               F_SCR1:
13576 #               F_SCR2:
13577 #               L_SCR1:
13578 #               L_SCR2:
13579
13580         global          bindec
13581 bindec:
13582         movm.l          &0x3f20,-(%sp)  #  {%d2-%d7/%a2}
13583         fmovm.x         &0x7,-(%sp)     #  {%fp0-%fp2}
13584
13585 # A1. Set RM and size ext. Set SIGMA = sign input;
13586 #     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
13587 #     separating  normalized/denormalized input.  If the input
13588 #     is a denormalized number, set the BINDEC_FLG memory word
13589 #     to signal denorm.  If the input is unnormalized, normalize
13590 #     the input and test for denormalized result.
13591 #
13592         fmov.l          &rm_mode*0x10,%fpcr     # set RM and ext
13593         mov.l           (%a0),L_SCR2(%a6)       # save exponent for sign check
13594         mov.l           %d0,%d7         # move k-factor to d7
13595
13596         clr.b           BINDEC_FLG(%a6) # clr norm/denorm flag
13597         cmpi.b          STAG(%a6),&DENORM # is input a DENORM?
13598         bne.w           A2_str          # no; input is a NORM
13599
13600 #
13601 # Normalize the denorm
13602 #
13603 un_de_norm:
13604         mov.w           (%a0),%d0
13605         and.w           &0x7fff,%d0     # strip sign of normalized exp
13606         mov.l           4(%a0),%d1
13607         mov.l           8(%a0),%d2
13608 norm_loop:
13609         sub.w           &1,%d0
13610         lsl.l           &1,%d2
13611         roxl.l          &1,%d1
13612         tst.l           %d1
13613         bge.b           norm_loop
13614 #
13615 # Test if the normalized input is denormalized
13616 #
13617         tst.w           %d0
13618         bgt.b           pos_exp         # if greater than zero, it is a norm
13619         st              BINDEC_FLG(%a6) # set flag for denorm
13620 pos_exp:
13621         and.w           &0x7fff,%d0     # strip sign of normalized exp
13622         mov.w           %d0,(%a0)
13623         mov.l           %d1,4(%a0)
13624         mov.l           %d2,8(%a0)
13625
13626 # A2. Set X = abs(input).
13627 #
13628 A2_str:
13629         mov.l           (%a0),FP_SCR1(%a6)      # move input to work space
13630         mov.l           4(%a0),FP_SCR1+4(%a6)   # move input to work space
13631         mov.l           8(%a0),FP_SCR1+8(%a6)   # move input to work space
13632         and.l           &0x7fffffff,FP_SCR1(%a6)        # create abs(X)
13633
13634 # A3. Compute ILOG.
13635 #     ILOG is the log base 10 of the input value.  It is approx-
13636 #     imated by adding e + 0.f when the original value is viewed
13637 #     as 2^^e * 1.f in extended precision.  This value is stored
13638 #     in d6.
13639 #
13640 # Register usage:
13641 #       Input/Output
13642 #       d0: k-factor/exponent
13643 #       d2: x/x
13644 #       d3: x/x
13645 #       d4: x/x
13646 #       d5: x/x
13647 #       d6: x/ILOG
13648 #       d7: k-factor/Unchanged
13649 #       a0: ptr for original operand/final result
13650 #       a1: x/x
13651 #       a2: x/x
13652 #       fp0: x/float(ILOG)
13653 #       fp1: x/x
13654 #       fp2: x/x
13655 #       F_SCR1:x/x
13656 #       F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13657 #       L_SCR1:x/x
13658 #       L_SCR2:first word of X packed/Unchanged
13659
13660         tst.b           BINDEC_FLG(%a6) # check for denorm
13661         beq.b           A3_cont         # if clr, continue with norm
13662         mov.l           &-4933,%d6      # force ILOG = -4933
13663         bra.b           A4_str
13664 A3_cont:
13665         mov.w           FP_SCR1(%a6),%d0        # move exp to d0
13666         mov.w           &0x3fff,FP_SCR1(%a6)    # replace exponent with 0x3fff
13667         fmov.x          FP_SCR1(%a6),%fp0       # now fp0 has 1.f
13668         sub.w           &0x3fff,%d0     # strip off bias
13669         fadd.w          %d0,%fp0        # add in exp
13670         fsub.s          FONE(%pc),%fp0  # subtract off 1.0
13671         fbge.w          pos_res         # if pos, branch
13672         fmul.x          PLOG2UP1(%pc),%fp0      # if neg, mul by LOG2UP1
13673         fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
13674         bra.b           A4_str          # go move out ILOG
13675 pos_res:
13676         fmul.x          PLOG2(%pc),%fp0 # if pos, mul by LOG2
13677         fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
13678
13679
13680 # A4. Clr INEX bit.
13681 #     The operation in A3 above may have set INEX2.
13682
13683 A4_str:
13684         fmov.l          &0,%fpsr        # zero all of fpsr - nothing needed
13685
13686
13687 # A5. Set ICTR = 0;
13688 #     ICTR is a flag used in A13.  It must be set before the
13689 #     loop entry A6. The lower word of d5 is used for ICTR.
13690
13691         clr.w           %d5             # clear ICTR
13692
13693 # A6. Calculate LEN.
13694 #     LEN is the number of digits to be displayed.  The k-factor
13695 #     can dictate either the total number of digits, if it is
13696 #     a positive number, or the number of digits after the
13697 #     original decimal point which are to be included as
13698 #     significant.  See the 68882 manual for examples.
13699 #     If LEN is computed to be greater than 17, set OPERR in
13700 #     USER_FPSR.  LEN is stored in d4.
13701 #
13702 # Register usage:
13703 #       Input/Output
13704 #       d0: exponent/Unchanged
13705 #       d2: x/x/scratch
13706 #       d3: x/x
13707 #       d4: exc picture/LEN
13708 #       d5: ICTR/Unchanged
13709 #       d6: ILOG/Unchanged
13710 #       d7: k-factor/Unchanged
13711 #       a0: ptr for original operand/final result
13712 #       a1: x/x
13713 #       a2: x/x
13714 #       fp0: float(ILOG)/Unchanged
13715 #       fp1: x/x
13716 #       fp2: x/x
13717 #       F_SCR1:x/x
13718 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13719 #       L_SCR1:x/x
13720 #       L_SCR2:first word of X packed/Unchanged
13721
13722 A6_str:
13723         tst.l           %d7             # branch on sign of k
13724         ble.b           k_neg           # if k <= 0, LEN = ILOG + 1 - k
13725         mov.l           %d7,%d4         # if k > 0, LEN = k
13726         bra.b           len_ck          # skip to LEN check
13727 k_neg:
13728         mov.l           %d6,%d4         # first load ILOG to d4
13729         sub.l           %d7,%d4         # subtract off k
13730         addq.l          &1,%d4          # add in the 1
13731 len_ck:
13732         tst.l           %d4             # LEN check: branch on sign of LEN
13733         ble.b           LEN_ng          # if neg, set LEN = 1
13734         cmp.l           %d4,&17         # test if LEN > 17
13735         ble.b           A7_str          # if not, forget it
13736         mov.l           &17,%d4         # set max LEN = 17
13737         tst.l           %d7             # if negative, never set OPERR
13738         ble.b           A7_str          # if positive, continue
13739         or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
13740         bra.b           A7_str          # finished here
13741 LEN_ng:
13742         mov.l           &1,%d4          # min LEN is 1
13743
13744
13745 # A7. Calculate SCALE.
13746 #     SCALE is equal to 10^ISCALE, where ISCALE is the number
13747 #     of decimal places needed to insure LEN integer digits
13748 #     in the output before conversion to bcd. LAMBDA is the sign
13749 #     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
13750 #     the rounding mode as given in the following table (see
13751 #     Coonen, p. 7.23 as ref.; however, the SCALE variable is
13752 #     of opposite sign in bindec.sa from Coonen).
13753 #
13754 #       Initial                                 USE
13755 #       FPCR[6:5]       LAMBDA  SIGN(X)         FPCR[6:5]
13756 #       ----------------------------------------------
13757 #        RN     00         0       0            00/0    RN
13758 #        RN     00         0       1            00/0    RN
13759 #        RN     00         1       0            00/0    RN
13760 #        RN     00         1       1            00/0    RN
13761 #        RZ     01         0       0            11/3    RP
13762 #        RZ     01         0       1            11/3    RP
13763 #        RZ     01         1       0            10/2    RM
13764 #        RZ     01         1       1            10/2    RM
13765 #        RM     10         0       0            11/3    RP
13766 #        RM     10         0       1            10/2    RM
13767 #        RM     10         1       0            10/2    RM
13768 #        RM     10         1       1            11/3    RP
13769 #        RP     11         0       0            10/2    RM
13770 #        RP     11         0       1            11/3    RP
13771 #        RP     11         1       0            11/3    RP
13772 #        RP     11         1       1            10/2    RM
13773 #
13774 # Register usage:
13775 #       Input/Output
13776 #       d0: exponent/scratch - final is 0
13777 #       d2: x/0 or 24 for A9
13778 #       d3: x/scratch - offset ptr into PTENRM array
13779 #       d4: LEN/Unchanged
13780 #       d5: 0/ICTR:LAMBDA
13781 #       d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13782 #       d7: k-factor/Unchanged
13783 #       a0: ptr for original operand/final result
13784 #       a1: x/ptr to PTENRM array
13785 #       a2: x/x
13786 #       fp0: float(ILOG)/Unchanged
13787 #       fp1: x/10^ISCALE
13788 #       fp2: x/x
13789 #       F_SCR1:x/x
13790 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13791 #       L_SCR1:x/x
13792 #       L_SCR2:first word of X packed/Unchanged
13793
13794 A7_str:
13795         tst.l           %d7             # test sign of k
13796         bgt.b           k_pos           # if pos and > 0, skip this
13797         cmp.l           %d7,%d6         # test k - ILOG
13798         blt.b           k_pos           # if ILOG >= k, skip this
13799         mov.l           %d7,%d6         # if ((k<0) & (ILOG < k)) ILOG = k
13800 k_pos:
13801         mov.l           %d6,%d0         # calc ILOG + 1 - LEN in d0
13802         addq.l          &1,%d0          # add the 1
13803         sub.l           %d4,%d0         # sub off LEN
13804         swap            %d5             # use upper word of d5 for LAMBDA
13805         clr.w           %d5             # set it zero initially
13806         clr.w           %d2             # set up d2 for very small case
13807         tst.l           %d0             # test sign of ISCALE
13808         bge.b           iscale          # if pos, skip next inst
13809         addq.w          &1,%d5          # if neg, set LAMBDA true
13810         cmp.l           %d0,&0xffffecd4 # test iscale <= -4908
13811         bgt.b           no_inf          # if false, skip rest
13812         add.l           &24,%d0         # add in 24 to iscale
13813         mov.l           &24,%d2         # put 24 in d2 for A9
13814 no_inf:
13815         neg.l           %d0             # and take abs of ISCALE
13816 iscale:
13817         fmov.s          FONE(%pc),%fp1  # init fp1 to 1
13818         bfextu          USER_FPCR(%a6){&26:&2},%d1      # get initial rmode bits
13819         lsl.w           &1,%d1          # put them in bits 2:1
13820         add.w           %d5,%d1         # add in LAMBDA
13821         lsl.w           &1,%d1          # put them in bits 3:1
13822         tst.l           L_SCR2(%a6)     # test sign of original x
13823         bge.b           x_pos           # if pos, don't set bit 0
13824         addq.l          &1,%d1          # if neg, set bit 0
13825 x_pos:
13826         lea.l           RBDTBL(%pc),%a2 # load rbdtbl base
13827         mov.b           (%a2,%d1),%d3   # load d3 with new rmode
13828         lsl.l           &4,%d3          # put bits in proper position
13829         fmov.l          %d3,%fpcr       # load bits into fpu
13830         lsr.l           &4,%d3          # put bits in proper position
13831         tst.b           %d3             # decode new rmode for pten table
13832         bne.b           not_rn          # if zero, it is RN
13833         lea.l           PTENRN(%pc),%a1 # load a1 with RN table base
13834         bra.b           rmode           # exit decode
13835 not_rn:
13836         lsr.b           &1,%d3          # get lsb in carry
13837         bcc.b           not_rp2         # if carry clear, it is RM
13838         lea.l           PTENRP(%pc),%a1 # load a1 with RP table base
13839         bra.b           rmode           # exit decode
13840 not_rp2:
13841         lea.l           PTENRM(%pc),%a1 # load a1 with RM table base
13842 rmode:
13843         clr.l           %d3             # clr table index
13844 e_loop2:
13845         lsr.l           &1,%d0          # shift next bit into carry
13846         bcc.b           e_next2         # if zero, skip the mul
13847         fmul.x          (%a1,%d3),%fp1  # mul by 10**(d3_bit_no)
13848 e_next2:
13849         add.l           &12,%d3         # inc d3 to next pwrten table entry
13850         tst.l           %d0             # test if ISCALE is zero
13851         bne.b           e_loop2         # if not, loop
13852
13853 # A8. Clr INEX; Force RZ.
13854 #     The operation in A3 above may have set INEX2.
13855 #     RZ mode is forced for the scaling operation to insure
13856 #     only one rounding error.  The grs bits are collected in
13857 #     the INEX flag for use in A10.
13858 #
13859 # Register usage:
13860 #       Input/Output
13861
13862         fmov.l          &0,%fpsr        # clr INEX
13863         fmov.l          &rz_mode*0x10,%fpcr     # set RZ rounding mode
13864
13865 # A9. Scale X -> Y.
13866 #     The mantissa is scaled to the desired number of significant
13867 #     digits.  The excess digits are collected in INEX2. If mul,
13868 #     Check d2 for excess 10 exponential value.  If not zero,
13869 #     the iscale value would have caused the pwrten calculation
13870 #     to overflow.  Only a negative iscale can cause this, so
13871 #     multiply by 10^(d2), which is now only allowed to be 24,
13872 #     with a multiply by 10^8 and 10^16, which is exact since
13873 #     10^24 is exact.  If the input was denormalized, we must
13874 #     create a busy stack frame with the mul command and the
13875 #     two operands, and allow the fpu to complete the multiply.
13876 #
13877 # Register usage:
13878 #       Input/Output
13879 #       d0: FPCR with RZ mode/Unchanged
13880 #       d2: 0 or 24/unchanged
13881 #       d3: x/x
13882 #       d4: LEN/Unchanged
13883 #       d5: ICTR:LAMBDA
13884 #       d6: ILOG/Unchanged
13885 #       d7: k-factor/Unchanged
13886 #       a0: ptr for original operand/final result
13887 #       a1: ptr to PTENRM array/Unchanged
13888 #       a2: x/x
13889 #       fp0: float(ILOG)/X adjusted for SCALE (Y)
13890 #       fp1: 10^ISCALE/Unchanged
13891 #       fp2: x/x
13892 #       F_SCR1:x/x
13893 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13894 #       L_SCR1:x/x
13895 #       L_SCR2:first word of X packed/Unchanged
13896
13897 A9_str:
13898         fmov.x          (%a0),%fp0      # load X from memory
13899         fabs.x          %fp0            # use abs(X)
13900         tst.w           %d5             # LAMBDA is in lower word of d5
13901         bne.b           sc_mul          # if neg (LAMBDA = 1), scale by mul
13902         fdiv.x          %fp1,%fp0       # calculate X / SCALE -> Y to fp0
13903         bra.w           A10_st          # branch to A10
13904
13905 sc_mul:
13906         tst.b           BINDEC_FLG(%a6) # check for denorm
13907         beq.w           A9_norm         # if norm, continue with mul
13908
13909 # for DENORM, we must calculate:
13910 #       fp0 = input_op * 10^ISCALE * 10^24
13911 # since the input operand is a DENORM, we can't multiply it directly.
13912 # so, we do the multiplication of the exponents and mantissas separately.
13913 # in this way, we avoid underflow on intermediate stages of the
13914 # multiplication and guarantee a result without exception.
13915         fmovm.x         &0x2,-(%sp)     # save 10^ISCALE to stack
13916
13917         mov.w           (%sp),%d3       # grab exponent
13918         andi.w          &0x7fff,%d3     # clear sign
13919         ori.w           &0x8000,(%a0)   # make DENORM exp negative
13920         add.w           (%a0),%d3       # add DENORM exp to 10^ISCALE exp
13921         subi.w          &0x3fff,%d3     # subtract BIAS
13922         add.w           36(%a1),%d3
13923         subi.w          &0x3fff,%d3     # subtract BIAS
13924         add.w           48(%a1),%d3
13925         subi.w          &0x3fff,%d3     # subtract BIAS
13926
13927         bmi.w           sc_mul_err      # is result is DENORM, punt!!!
13928
13929         andi.w          &0x8000,(%sp)   # keep sign
13930         or.w            %d3,(%sp)       # insert new exponent
13931         andi.w          &0x7fff,(%a0)   # clear sign bit on DENORM again
13932         mov.l           0x8(%a0),-(%sp) # put input op mantissa on stk
13933         mov.l           0x4(%a0),-(%sp)
13934         mov.l           &0x3fff0000,-(%sp) # force exp to zero
13935         fmovm.x         (%sp)+,&0x80    # load normalized DENORM into fp0
13936         fmul.x          (%sp)+,%fp0
13937
13938 #       fmul.x  36(%a1),%fp0    # multiply fp0 by 10^8
13939 #       fmul.x  48(%a1),%fp0    # multiply fp0 by 10^16
13940         mov.l           36+8(%a1),-(%sp) # get 10^8 mantissa
13941         mov.l           36+4(%a1),-(%sp)
13942         mov.l           &0x3fff0000,-(%sp) # force exp to zero
13943         mov.l           48+8(%a1),-(%sp) # get 10^16 mantissa
13944         mov.l           48+4(%a1),-(%sp)
13945         mov.l           &0x3fff0000,-(%sp)# force exp to zero
13946         fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^8
13947         fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^16
13948         bra.b           A10_st
13949
13950 sc_mul_err:
13951         bra.b           sc_mul_err
13952
13953 A9_norm:
13954         tst.w           %d2             # test for small exp case
13955         beq.b           A9_con          # if zero, continue as normal
13956         fmul.x          36(%a1),%fp0    # multiply fp0 by 10^8
13957         fmul.x          48(%a1),%fp0    # multiply fp0 by 10^16
13958 A9_con:
13959         fmul.x          %fp1,%fp0       # calculate X * SCALE -> Y to fp0
13960
13961 # A10. Or in INEX.
13962 #      If INEX is set, round error occurred.  This is compensated
13963 #      for by 'or-ing' in the INEX2 flag to the lsb of Y.
13964 #
13965 # Register usage:
13966 #       Input/Output
13967 #       d0: FPCR with RZ mode/FPSR with INEX2 isolated
13968 #       d2: x/x
13969 #       d3: x/x
13970 #       d4: LEN/Unchanged
13971 #       d5: ICTR:LAMBDA
13972 #       d6: ILOG/Unchanged
13973 #       d7: k-factor/Unchanged
13974 #       a0: ptr for original operand/final result
13975 #       a1: ptr to PTENxx array/Unchanged
13976 #       a2: x/ptr to FP_SCR1(a6)
13977 #       fp0: Y/Y with lsb adjusted
13978 #       fp1: 10^ISCALE/Unchanged
13979 #       fp2: x/x
13980
13981 A10_st:
13982         fmov.l          %fpsr,%d0       # get FPSR
13983         fmov.x          %fp0,FP_SCR1(%a6)       # move Y to memory
13984         lea.l           FP_SCR1(%a6),%a2        # load a2 with ptr to FP_SCR1
13985         btst            &9,%d0          # check if INEX2 set
13986         beq.b           A11_st          # if clear, skip rest
13987         or.l            &1,8(%a2)       # or in 1 to lsb of mantissa
13988         fmov.x          FP_SCR1(%a6),%fp0       # write adjusted Y back to fpu
13989
13990
13991 # A11. Restore original FPCR; set size ext.
13992 #      Perform FINT operation in the user's rounding mode.  Keep
13993 #      the size to extended.  The sintdo entry point in the sint
13994 #      routine expects the FPCR value to be in USER_FPCR for
13995 #      mode and precision.  The original FPCR is saved in L_SCR1.
13996
13997 A11_st:
13998         mov.l           USER_FPCR(%a6),L_SCR1(%a6)      # save it for later
13999         and.l           &0x00000030,USER_FPCR(%a6)      # set size to ext,
14000 #                                       ;block exceptions
14001
14002
14003 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
14004 #      The FPSP routine sintd0 is used.  The output is in fp0.
14005 #
14006 # Register usage:
14007 #       Input/Output
14008 #       d0: FPSR with AINEX cleared/FPCR with size set to ext
14009 #       d2: x/x/scratch
14010 #       d3: x/x
14011 #       d4: LEN/Unchanged
14012 #       d5: ICTR:LAMBDA/Unchanged
14013 #       d6: ILOG/Unchanged
14014 #       d7: k-factor/Unchanged
14015 #       a0: ptr for original operand/src ptr for sintdo
14016 #       a1: ptr to PTENxx array/Unchanged
14017 #       a2: ptr to FP_SCR1(a6)/Unchanged
14018 #       a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14019 #       fp0: Y/YINT
14020 #       fp1: 10^ISCALE/Unchanged
14021 #       fp2: x/x
14022 #       F_SCR1:x/x
14023 #       F_SCR2:Y adjusted for inex/Y with original exponent
14024 #       L_SCR1:x/original USER_FPCR
14025 #       L_SCR2:first word of X packed/Unchanged
14026
14027 A12_st:
14028         movm.l  &0xc0c0,-(%sp)  # save regs used by sintd0       {%d0-%d1/%a0-%a1}
14029         mov.l   L_SCR1(%a6),-(%sp)
14030         mov.l   L_SCR2(%a6),-(%sp)
14031
14032         lea.l           FP_SCR1(%a6),%a0        # a0 is ptr to FP_SCR1(a6)
14033         fmov.x          %fp0,(%a0)      # move Y to memory at FP_SCR1(a6)
14034         tst.l           L_SCR2(%a6)     # test sign of original operand
14035         bge.b           do_fint12               # if pos, use Y
14036         or.l            &0x80000000,(%a0)       # if neg, use -Y
14037 do_fint12:
14038         mov.l   USER_FPSR(%a6),-(%sp)
14039 #       bsr     sintdo          # sint routine returns int in fp0
14040
14041         fmov.l  USER_FPCR(%a6),%fpcr
14042         fmov.l  &0x0,%fpsr                      # clear the AEXC bits!!!
14043 ##      mov.l           USER_FPCR(%a6),%d0      # ext prec/keep rnd mode
14044 ##      andi.l          &0x00000030,%d0
14045 ##      fmov.l          %d0,%fpcr
14046         fint.x          FP_SCR1(%a6),%fp0       # do fint()
14047         fmov.l  %fpsr,%d0
14048         or.w    %d0,FPSR_EXCEPT(%a6)
14049 ##      fmov.l          &0x0,%fpcr
14050 ##      fmov.l          %fpsr,%d0               # don't keep ccodes
14051 ##      or.w            %d0,FPSR_EXCEPT(%a6)
14052
14053         mov.b   (%sp),USER_FPSR(%a6)
14054         add.l   &4,%sp
14055
14056         mov.l   (%sp)+,L_SCR2(%a6)
14057         mov.l   (%sp)+,L_SCR1(%a6)
14058         movm.l  (%sp)+,&0x303   # restore regs used by sint      {%d0-%d1/%a0-%a1}
14059
14060         mov.l   L_SCR2(%a6),FP_SCR1(%a6)        # restore original exponent
14061         mov.l   L_SCR1(%a6),USER_FPCR(%a6)      # restore user's FPCR
14062
14063 # A13. Check for LEN digits.
14064 #      If the int operation results in more than LEN digits,
14065 #      or less than LEN -1 digits, adjust ILOG and repeat from
14066 #      A6.  This test occurs only on the first pass.  If the
14067 #      result is exactly 10^LEN, decrement ILOG and divide
14068 #      the mantissa by 10.  The calculation of 10^LEN cannot
14069 #      be inexact, since all powers of ten upto 10^27 are exact
14070 #      in extended precision, so the use of a previous power-of-ten
14071 #      table will introduce no error.
14072 #
14073 #
14074 # Register usage:
14075 #       Input/Output
14076 #       d0: FPCR with size set to ext/scratch final = 0
14077 #       d2: x/x
14078 #       d3: x/scratch final = x
14079 #       d4: LEN/LEN adjusted
14080 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
14081 #       d6: ILOG/ILOG adjusted
14082 #       d7: k-factor/Unchanged
14083 #       a0: pointer into memory for packed bcd string formation
14084 #       a1: ptr to PTENxx array/Unchanged
14085 #       a2: ptr to FP_SCR1(a6)/Unchanged
14086 #       fp0: int portion of Y/abs(YINT) adjusted
14087 #       fp1: 10^ISCALE/Unchanged
14088 #       fp2: x/10^LEN
14089 #       F_SCR1:x/x
14090 #       F_SCR2:Y with original exponent/Unchanged
14091 #       L_SCR1:original USER_FPCR/Unchanged
14092 #       L_SCR2:first word of X packed/Unchanged
14093
14094 A13_st:
14095         swap            %d5             # put ICTR in lower word of d5
14096         tst.w           %d5             # check if ICTR = 0
14097         bne             not_zr          # if non-zero, go to second test
14098 #
14099 # Compute 10^(LEN-1)
14100 #
14101         fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
14102         mov.l           %d4,%d0         # put LEN in d0
14103         subq.l          &1,%d0          # d0 = LEN -1
14104         clr.l           %d3             # clr table index
14105 l_loop:
14106         lsr.l           &1,%d0          # shift next bit into carry
14107         bcc.b           l_next          # if zero, skip the mul
14108         fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
14109 l_next:
14110         add.l           &12,%d3         # inc d3 to next pwrten table entry
14111         tst.l           %d0             # test if LEN is zero
14112         bne.b           l_loop          # if not, loop
14113 #
14114 # 10^LEN-1 is computed for this test and A14.  If the input was
14115 # denormalized, check only the case in which YINT > 10^LEN.
14116 #
14117         tst.b           BINDEC_FLG(%a6) # check if input was norm
14118         beq.b           A13_con         # if norm, continue with checking
14119         fabs.x          %fp0            # take abs of YINT
14120         bra             test_2
14121 #
14122 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14123 #
14124 A13_con:
14125         fabs.x          %fp0            # take abs of YINT
14126         fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^(LEN-1)
14127         fbge.w          test_2          # if greater, do next test
14128         subq.l          &1,%d6          # subtract 1 from ILOG
14129         mov.w           &1,%d5          # set ICTR
14130         fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
14131         fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
14132         bra.w           A6_str          # return to A6 and recompute YINT
14133 test_2:
14134         fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
14135         fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^LEN
14136         fblt.w          A14_st          # if less, all is ok, go to A14
14137         fbgt.w          fix_ex          # if greater, fix and redo
14138         fdiv.s          FTEN(%pc),%fp0  # if equal, divide by 10
14139         addq.l          &1,%d6          # and inc ILOG
14140         bra.b           A14_st          # and continue elsewhere
14141 fix_ex:
14142         addq.l          &1,%d6          # increment ILOG by 1
14143         mov.w           &1,%d5          # set ICTR
14144         fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
14145         bra.w           A6_str          # return to A6 and recompute YINT
14146 #
14147 # Since ICTR <> 0, we have already been through one adjustment,
14148 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14149 # 10^LEN is again computed using whatever table is in a1 since the
14150 # value calculated cannot be inexact.
14151 #
14152 not_zr:
14153         fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
14154         mov.l           %d4,%d0         # put LEN in d0
14155         clr.l           %d3             # clr table index
14156 z_loop:
14157         lsr.l           &1,%d0          # shift next bit into carry
14158         bcc.b           z_next          # if zero, skip the mul
14159         fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
14160 z_next:
14161         add.l           &12,%d3         # inc d3 to next pwrten table entry
14162         tst.l           %d0             # test if LEN is zero
14163         bne.b           z_loop          # if not, loop
14164         fabs.x          %fp0            # get abs(YINT)
14165         fcmp.x          %fp0,%fp2       # check if abs(YINT) = 10^LEN
14166         fbneq.w         A14_st          # if not, skip this
14167         fdiv.s          FTEN(%pc),%fp0  # divide abs(YINT) by 10
14168         addq.l          &1,%d6          # and inc ILOG by 1
14169         addq.l          &1,%d4          # and inc LEN
14170         fmul.s          FTEN(%pc),%fp2  # if LEN++, the get 10^^LEN
14171
14172 # A14. Convert the mantissa to bcd.
14173 #      The binstr routine is used to convert the LEN digit
14174 #      mantissa to bcd in memory.  The input to binstr is
14175 #      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14176 #      such that the decimal point is to the left of bit 63.
14177 #      The bcd digits are stored in the correct position in
14178 #      the final string area in memory.
14179 #
14180 #
14181 # Register usage:
14182 #       Input/Output
14183 #       d0: x/LEN call to binstr - final is 0
14184 #       d1: x/0
14185 #       d2: x/ms 32-bits of mant of abs(YINT)
14186 #       d3: x/ls 32-bits of mant of abs(YINT)
14187 #       d4: LEN/Unchanged
14188 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
14189 #       d6: ILOG
14190 #       d7: k-factor/Unchanged
14191 #       a0: pointer into memory for packed bcd string formation
14192 #           /ptr to first mantissa byte in result string
14193 #       a1: ptr to PTENxx array/Unchanged
14194 #       a2: ptr to FP_SCR1(a6)/Unchanged
14195 #       fp0: int portion of Y/abs(YINT) adjusted
14196 #       fp1: 10^ISCALE/Unchanged
14197 #       fp2: 10^LEN/Unchanged
14198 #       F_SCR1:x/Work area for final result
14199 #       F_SCR2:Y with original exponent/Unchanged
14200 #       L_SCR1:original USER_FPCR/Unchanged
14201 #       L_SCR2:first word of X packed/Unchanged
14202
14203 A14_st:
14204         fmov.l          &rz_mode*0x10,%fpcr     # force rz for conversion
14205         fdiv.x          %fp2,%fp0       # divide abs(YINT) by 10^LEN
14206         lea.l           FP_SCR0(%a6),%a0
14207         fmov.x          %fp0,(%a0)      # move abs(YINT)/10^LEN to memory
14208         mov.l           4(%a0),%d2      # move 2nd word of FP_RES to d2
14209         mov.l           8(%a0),%d3      # move 3rd word of FP_RES to d3
14210         clr.l           4(%a0)          # zero word 2 of FP_RES
14211         clr.l           8(%a0)          # zero word 3 of FP_RES
14212         mov.l           (%a0),%d0       # move exponent to d0
14213         swap            %d0             # put exponent in lower word
14214         beq.b           no_sft          # if zero, don't shift
14215         sub.l           &0x3ffd,%d0     # sub bias less 2 to make fract
14216         tst.l           %d0             # check if > 1
14217         bgt.b           no_sft          # if so, don't shift
14218         neg.l           %d0             # make exp positive
14219 m_loop:
14220         lsr.l           &1,%d2          # shift d2:d3 right, add 0s
14221         roxr.l          &1,%d3          # the number of places
14222         dbf.w           %d0,m_loop      # given in d0
14223 no_sft:
14224         tst.l           %d2             # check for mantissa of zero
14225         bne.b           no_zr           # if not, go on
14226         tst.l           %d3             # continue zero check
14227         beq.b           zer_m           # if zero, go directly to binstr
14228 no_zr:
14229         clr.l           %d1             # put zero in d1 for addx
14230         add.l           &0x00000080,%d3 # inc at bit 7
14231         addx.l          %d1,%d2         # continue inc
14232         and.l           &0xffffff80,%d3 # strip off lsb not used by 882
14233 zer_m:
14234         mov.l           %d4,%d0         # put LEN in d0 for binstr call
14235         addq.l          &3,%a0          # a0 points to M16 byte in result
14236         bsr             binstr          # call binstr to convert mant
14237
14238
14239 # A15. Convert the exponent to bcd.
14240 #      As in A14 above, the exp is converted to bcd and the
14241 #      digits are stored in the final string.
14242 #
14243 #      Digits are stored in L_SCR1(a6) on return from BINDEC as:
14244 #
14245 #        32               16 15                0
14246 #       -----------------------------------------
14247 #       |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
14248 #       -----------------------------------------
14249 #
14250 # And are moved into their proper places in FP_SCR0.  If digit e4
14251 # is non-zero, OPERR is signaled.  In all cases, all 4 digits are
14252 # written as specified in the 881/882 manual for packed decimal.
14253 #
14254 # Register usage:
14255 #       Input/Output
14256 #       d0: x/LEN call to binstr - final is 0
14257 #       d1: x/scratch (0);shift count for final exponent packing
14258 #       d2: x/ms 32-bits of exp fraction/scratch
14259 #       d3: x/ls 32-bits of exp fraction
14260 #       d4: LEN/Unchanged
14261 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
14262 #       d6: ILOG
14263 #       d7: k-factor/Unchanged
14264 #       a0: ptr to result string/ptr to L_SCR1(a6)
14265 #       a1: ptr to PTENxx array/Unchanged
14266 #       a2: ptr to FP_SCR1(a6)/Unchanged
14267 #       fp0: abs(YINT) adjusted/float(ILOG)
14268 #       fp1: 10^ISCALE/Unchanged
14269 #       fp2: 10^LEN/Unchanged
14270 #       F_SCR1:Work area for final result/BCD result
14271 #       F_SCR2:Y with original exponent/ILOG/10^4
14272 #       L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14273 #       L_SCR2:first word of X packed/Unchanged
14274
14275 A15_st:
14276         tst.b           BINDEC_FLG(%a6) # check for denorm
14277         beq.b           not_denorm
14278         ftest.x         %fp0            # test for zero
14279         fbeq.w          den_zero        # if zero, use k-factor or 4933
14280         fmov.l          %d6,%fp0        # float ILOG
14281         fabs.x          %fp0            # get abs of ILOG
14282         bra.b           convrt
14283 den_zero:
14284         tst.l           %d7             # check sign of the k-factor
14285         blt.b           use_ilog        # if negative, use ILOG
14286         fmov.s          F4933(%pc),%fp0 # force exponent to 4933
14287         bra.b           convrt          # do it
14288 use_ilog:
14289         fmov.l          %d6,%fp0        # float ILOG
14290         fabs.x          %fp0            # get abs of ILOG
14291         bra.b           convrt
14292 not_denorm:
14293         ftest.x         %fp0            # test for zero
14294         fbneq.w         not_zero        # if zero, force exponent
14295         fmov.s          FONE(%pc),%fp0  # force exponent to 1
14296         bra.b           convrt          # do it
14297 not_zero:
14298         fmov.l          %d6,%fp0        # float ILOG
14299         fabs.x          %fp0            # get abs of ILOG
14300 convrt:
14301         fdiv.x          24(%a1),%fp0    # compute ILOG/10^4
14302         fmov.x          %fp0,FP_SCR1(%a6)       # store fp0 in memory
14303         mov.l           4(%a2),%d2      # move word 2 to d2
14304         mov.l           8(%a2),%d3      # move word 3 to d3
14305         mov.w           (%a2),%d0       # move exp to d0
14306         beq.b           x_loop_fin      # if zero, skip the shift
14307         sub.w           &0x3ffd,%d0     # subtract off bias
14308         neg.w           %d0             # make exp positive
14309 x_loop:
14310         lsr.l           &1,%d2          # shift d2:d3 right
14311         roxr.l          &1,%d3          # the number of places
14312         dbf.w           %d0,x_loop      # given in d0
14313 x_loop_fin:
14314         clr.l           %d1             # put zero in d1 for addx
14315         add.l           &0x00000080,%d3 # inc at bit 6
14316         addx.l          %d1,%d2         # continue inc
14317         and.l           &0xffffff80,%d3 # strip off lsb not used by 882
14318         mov.l           &4,%d0          # put 4 in d0 for binstr call
14319         lea.l           L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14320         bsr             binstr          # call binstr to convert exp
14321         mov.l           L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14322         mov.l           &12,%d1         # use d1 for shift count
14323         lsr.l           %d1,%d0         # shift d0 right by 12
14324         bfins           %d0,FP_SCR0(%a6){&4:&12}        # put e3:e2:e1 in FP_SCR0
14325         lsr.l           %d1,%d0         # shift d0 right by 12
14326         bfins           %d0,FP_SCR0(%a6){&16:&4}        # put e4 in FP_SCR0
14327         tst.b           %d0             # check if e4 is zero
14328         beq.b           A16_st          # if zero, skip rest
14329         or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
14330
14331
14332 # A16. Write sign bits to final string.
14333 #          Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14334 #
14335 # Register usage:
14336 #       Input/Output
14337 #       d0: x/scratch - final is x
14338 #       d2: x/x
14339 #       d3: x/x
14340 #       d4: LEN/Unchanged
14341 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
14342 #       d6: ILOG/ILOG adjusted
14343 #       d7: k-factor/Unchanged
14344 #       a0: ptr to L_SCR1(a6)/Unchanged
14345 #       a1: ptr to PTENxx array/Unchanged
14346 #       a2: ptr to FP_SCR1(a6)/Unchanged
14347 #       fp0: float(ILOG)/Unchanged
14348 #       fp1: 10^ISCALE/Unchanged
14349 #       fp2: 10^LEN/Unchanged
14350 #       F_SCR1:BCD result with correct signs
14351 #       F_SCR2:ILOG/10^4
14352 #       L_SCR1:Exponent digits on return from binstr
14353 #       L_SCR2:first word of X packed/Unchanged
14354
14355 A16_st:
14356         clr.l           %d0             # clr d0 for collection of signs
14357         and.b           &0x0f,FP_SCR0(%a6)      # clear first nibble of FP_SCR0
14358         tst.l           L_SCR2(%a6)     # check sign of original mantissa
14359         bge.b           mant_p          # if pos, don't set SM
14360         mov.l           &2,%d0          # move 2 in to d0 for SM
14361 mant_p:
14362         tst.l           %d6             # check sign of ILOG
14363         bge.b           wr_sgn          # if pos, don't set SE
14364         addq.l          &1,%d0          # set bit 0 in d0 for SE
14365 wr_sgn:
14366         bfins           %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14367
14368 # Clean up and restore all registers used.
14369
14370         fmov.l          &0,%fpsr        # clear possible inex2/ainex bits
14371         fmovm.x         (%sp)+,&0xe0    #  {%fp0-%fp2}
14372         movm.l          (%sp)+,&0x4fc   #  {%d2-%d7/%a2}
14373         rts
14374
14375         global          PTENRN
14376 PTENRN:
14377         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14378         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14379         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14380         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14381         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14382         long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
14383         long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
14384         long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
14385         long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
14386         long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
14387         long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
14388         long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
14389         long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
14390
14391         global          PTENRP
14392 PTENRP:
14393         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14394         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14395         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14396         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14397         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14398         long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
14399         long            0x40D30000,0xC2781F49,0xFFCFA6D6        # 10 ^ 64
14400         long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
14401         long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
14402         long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
14403         long            0x4D480000,0xC9767586,0x81750C18        # 10 ^ 1024
14404         long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
14405         long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
14406
14407         global          PTENRM
14408 PTENRM:
14409         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14410         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14411         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14412         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14413         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14414         long            0x40690000,0x9DC5ADA8,0x2B70B59D        # 10 ^ 32
14415         long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
14416         long            0x41A80000,0x93BA47C9,0x80E98CDF        # 10 ^ 128
14417         long            0x43510000,0xAA7EEBFB,0x9DF9DE8D        # 10 ^ 256
14418         long            0x46A30000,0xE319A0AE,0xA60E91C6        # 10 ^ 512
14419         long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
14420         long            0x5A920000,0x9E8B3B5D,0xC53D5DE4        # 10 ^ 2048
14421         long            0x75250000,0xC4605202,0x8A20979A        # 10 ^ 4096
14422
14423 #########################################################################
14424 # binstr(): Converts a 64-bit binary integer to bcd.                    #
14425 #                                                                       #
14426 # INPUT *************************************************************** #
14427 #       d2:d3 = 64-bit binary integer                                   #
14428 #       d0    = desired length (LEN)                                    #
14429 #       a0    = pointer to start in memory for bcd characters           #
14430 #               (This pointer must point to byte 4 of the first         #
14431 #                lword of the packed decimal memory string.)            #
14432 #                                                                       #
14433 # OUTPUT ************************************************************** #
14434 #       a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14435 #                                                                       #
14436 # ALGORITHM *********************************************************** #
14437 #       The 64-bit binary is assumed to have a decimal point before     #
14438 #       bit 63.  The fraction is multiplied by 10 using a mul by 2      #
14439 #       shift and a mul by 8 shift.  The bits shifted out of the        #
14440 #       msb form a decimal digit.  This process is iterated until       #
14441 #       LEN digits are formed.                                          #
14442 #                                                                       #
14443 # A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the        #
14444 #     digit formed will be assumed the least significant.  This is      #
14445 #     to force the first byte formed to have a 0 in the upper 4 bits.   #
14446 #                                                                       #
14447 # A2. Beginning of the loop:                                            #
14448 #     Copy the fraction in d2:d3 to d4:d5.                              #
14449 #                                                                       #
14450 # A3. Multiply the fraction in d2:d3 by 8 using bit-field               #
14451 #     extracts and shifts.  The three msbs from d2 will go into d1.     #
14452 #                                                                       #
14453 # A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb        #
14454 #     will be collected by the carry.                                   #
14455 #                                                                       #
14456 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5      #
14457 #     into d2:d3.  D1 will contain the bcd digit formed.                #
14458 #                                                                       #
14459 # A6. Test d7.  If zero, the digit formed is the ms digit.  If non-     #
14460 #     zero, it is the ls digit.  Put the digit in its place in the      #
14461 #     upper word of d0.  If it is the ls digit, write the word          #
14462 #     from d0 to memory.                                                #
14463 #                                                                       #
14464 # A7. Decrement d6 (LEN counter) and repeat the loop until zero.        #
14465 #                                                                       #
14466 #########################################################################
14467
14468 #       Implementation Notes:
14469 #
14470 #       The registers are used as follows:
14471 #
14472 #               d0: LEN counter
14473 #               d1: temp used to form the digit
14474 #               d2: upper 32-bits of fraction for mul by 8
14475 #               d3: lower 32-bits of fraction for mul by 8
14476 #               d4: upper 32-bits of fraction for mul by 2
14477 #               d5: lower 32-bits of fraction for mul by 2
14478 #               d6: temp for bit-field extracts
14479 #               d7: byte digit formation word;digit count {0,1}
14480 #               a0: pointer into memory for packed bcd string formation
14481 #
14482
14483         global          binstr
14484 binstr:
14485         movm.l          &0xff00,-(%sp)  #  {%d0-%d7}
14486
14487 #
14488 # A1: Init d7
14489 #
14490         mov.l           &1,%d7          # init d7 for second digit
14491         subq.l          &1,%d0          # for dbf d0 would have LEN+1 passes
14492 #
14493 # A2. Copy d2:d3 to d4:d5.  Start loop.
14494 #
14495 loop:
14496         mov.l           %d2,%d4         # copy the fraction before muls
14497         mov.l           %d3,%d5         # to d4:d5
14498 #
14499 # A3. Multiply d2:d3 by 8; extract msbs into d1.
14500 #
14501         bfextu          %d2{&0:&3},%d1  # copy 3 msbs of d2 into d1
14502         asl.l           &3,%d2          # shift d2 left by 3 places
14503         bfextu          %d3{&0:&3},%d6  # copy 3 msbs of d3 into d6
14504         asl.l           &3,%d3          # shift d3 left by 3 places
14505         or.l            %d6,%d2         # or in msbs from d3 into d2
14506 #
14507 # A4. Multiply d4:d5 by 2; add carry out to d1.
14508 #
14509         asl.l           &1,%d5          # mul d5 by 2
14510         roxl.l          &1,%d4          # mul d4 by 2
14511         swap            %d6             # put 0 in d6 lower word
14512         addx.w          %d6,%d1         # add in extend from mul by 2
14513 #
14514 # A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
14515 #
14516         add.l           %d5,%d3         # add lower 32 bits
14517         nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14518         addx.l          %d4,%d2         # add with extend upper 32 bits
14519         nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14520         addx.w          %d6,%d1         # add in extend from add to d1
14521         swap            %d6             # with d6 = 0; put 0 in upper word
14522 #
14523 # A6. Test d7 and branch.
14524 #
14525         tst.w           %d7             # if zero, store digit & to loop
14526         beq.b           first_d         # if non-zero, form byte & write
14527 sec_d:
14528         swap            %d7             # bring first digit to word d7b
14529         asl.w           &4,%d7          # first digit in upper 4 bits d7b
14530         add.w           %d1,%d7         # add in ls digit to d7b
14531         mov.b           %d7,(%a0)+      # store d7b byte in memory
14532         swap            %d7             # put LEN counter in word d7a
14533         clr.w           %d7             # set d7a to signal no digits done
14534         dbf.w           %d0,loop        # do loop some more!
14535         bra.b           end_bstr        # finished, so exit
14536 first_d:
14537         swap            %d7             # put digit word in d7b
14538         mov.w           %d1,%d7         # put new digit in d7b
14539         swap            %d7             # put LEN counter in word d7a
14540         addq.w          &1,%d7          # set d7a to signal first digit done
14541         dbf.w           %d0,loop        # do loop some more!
14542         swap            %d7             # put last digit in string
14543         lsl.w           &4,%d7          # move it to upper 4 bits
14544         mov.b           %d7,(%a0)+      # store it in memory string
14545 #
14546 # Clean up and return with result in fp0.
14547 #
14548 end_bstr:
14549         movm.l          (%sp)+,&0xff    #  {%d0-%d7}
14550         rts
14551
14552 #########################################################################
14553 # XDEF **************************************************************** #
14554 #       facc_in_b(): dmem_read_byte failed                              #
14555 #       facc_in_w(): dmem_read_word failed                              #
14556 #       facc_in_l(): dmem_read_long failed                              #
14557 #       facc_in_d(): dmem_read of dbl prec failed                       #
14558 #       facc_in_x(): dmem_read of ext prec failed                       #
14559 #                                                                       #
14560 #       facc_out_b(): dmem_write_byte failed                            #
14561 #       facc_out_w(): dmem_write_word failed                            #
14562 #       facc_out_l(): dmem_write_long failed                            #
14563 #       facc_out_d(): dmem_write of dbl prec failed                     #
14564 #       facc_out_x(): dmem_write of ext prec failed                     #
14565 #                                                                       #
14566 # XREF **************************************************************** #
14567 #       _real_access() - exit through access error handler              #
14568 #                                                                       #
14569 # INPUT *************************************************************** #
14570 #       None                                                            #
14571 #                                                                       #
14572 # OUTPUT ************************************************************** #
14573 #       None                                                            #
14574 #                                                                       #
14575 # ALGORITHM *********************************************************** #
14576 #       Flow jumps here when an FP data fetch call gets an error        #
14577 # result. This means the operating system wants an access error frame   #
14578 # made out of the current exception stack frame.                        #
14579 #       So, we first call restore() which makes sure that any updated   #
14580 # -(an)+ register gets returned to its pre-exception value and then     #
14581 # we change the stack to an acess error stack frame.                    #
14582 #                                                                       #
14583 #########################################################################
14584
14585 facc_in_b:
14586         movq.l          &0x1,%d0                        # one byte
14587         bsr.w           restore                         # fix An
14588
14589         mov.w           &0x0121,EXC_VOFF(%a6)           # set FSLW
14590         bra.w           facc_finish
14591
14592 facc_in_w:
14593         movq.l          &0x2,%d0                        # two bytes
14594         bsr.w           restore                         # fix An
14595
14596         mov.w           &0x0141,EXC_VOFF(%a6)           # set FSLW
14597         bra.b           facc_finish
14598
14599 facc_in_l:
14600         movq.l          &0x4,%d0                        # four bytes
14601         bsr.w           restore                         # fix An
14602
14603         mov.w           &0x0101,EXC_VOFF(%a6)           # set FSLW
14604         bra.b           facc_finish
14605
14606 facc_in_d:
14607         movq.l          &0x8,%d0                        # eight bytes
14608         bsr.w           restore                         # fix An
14609
14610         mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
14611         bra.b           facc_finish
14612
14613 facc_in_x:
14614         movq.l          &0xc,%d0                        # twelve bytes
14615         bsr.w           restore                         # fix An
14616
14617         mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
14618         bra.b           facc_finish
14619
14620 ################################################################
14621
14622 facc_out_b:
14623         movq.l          &0x1,%d0                        # one byte
14624         bsr.w           restore                         # restore An
14625
14626         mov.w           &0x00a1,EXC_VOFF(%a6)           # set FSLW
14627         bra.b           facc_finish
14628
14629 facc_out_w:
14630         movq.l          &0x2,%d0                        # two bytes
14631         bsr.w           restore                         # restore An
14632
14633         mov.w           &0x00c1,EXC_VOFF(%a6)           # set FSLW
14634         bra.b           facc_finish
14635
14636 facc_out_l:
14637         movq.l          &0x4,%d0                        # four bytes
14638         bsr.w           restore                         # restore An
14639
14640         mov.w           &0x0081,EXC_VOFF(%a6)           # set FSLW
14641         bra.b           facc_finish
14642
14643 facc_out_d:
14644         movq.l          &0x8,%d0                        # eight bytes
14645         bsr.w           restore                         # restore An
14646
14647         mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
14648         bra.b           facc_finish
14649
14650 facc_out_x:
14651         mov.l           &0xc,%d0                        # twelve bytes
14652         bsr.w           restore                         # restore An
14653
14654         mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
14655
14656 # here's where we actually create the access error frame from the
14657 # current exception stack frame.
14658 facc_finish:
14659         mov.l           USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14660
14661         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
14662         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14663         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
14664
14665         unlk            %a6
14666
14667         mov.l           (%sp),-(%sp)            # store SR, hi(PC)
14668         mov.l           0x8(%sp),0x4(%sp)       # store lo(PC)
14669         mov.l           0xc(%sp),0x8(%sp)       # store EA
14670         mov.l           &0x00000001,0xc(%sp)    # store FSLW
14671         mov.w           0x6(%sp),0xc(%sp)       # fix FSLW (size)
14672         mov.w           &0x4008,0x6(%sp)        # store voff
14673
14674         btst            &0x5,(%sp)              # supervisor or user mode?
14675         beq.b           facc_out2               # user
14676         bset            &0x2,0xd(%sp)           # set supervisor TM bit
14677
14678 facc_out2:
14679         bra.l           _real_access
14680
14681 ##################################################################
14682
14683 # if the effective addressing mode was predecrement or postincrement,
14684 # the emulation has already changed its value to the correct post-
14685 # instruction value. but since we're exiting to the access error
14686 # handler, then AN must be returned to its pre-instruction value.
14687 # we do that here.
14688 restore:
14689         mov.b           EXC_OPWORD+0x1(%a6),%d1
14690         andi.b          &0x38,%d1               # extract opmode
14691         cmpi.b          %d1,&0x18               # postinc?
14692         beq.w           rest_inc
14693         cmpi.b          %d1,&0x20               # predec?
14694         beq.w           rest_dec
14695         rts
14696
14697 rest_inc:
14698         mov.b           EXC_OPWORD+0x1(%a6),%d1
14699         andi.w          &0x0007,%d1             # fetch An
14700
14701         mov.w           (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14702         jmp             (tbl_rest_inc.b,%pc,%d1.w*1)
14703
14704 tbl_rest_inc:
14705         short           ri_a0 - tbl_rest_inc
14706         short           ri_a1 - tbl_rest_inc
14707         short           ri_a2 - tbl_rest_inc
14708         short           ri_a3 - tbl_rest_inc
14709         short           ri_a4 - tbl_rest_inc
14710         short           ri_a5 - tbl_rest_inc
14711         short           ri_a6 - tbl_rest_inc
14712         short           ri_a7 - tbl_rest_inc
14713
14714 ri_a0:
14715         sub.l           %d0,EXC_DREGS+0x8(%a6)  # fix stacked a0
14716         rts
14717 ri_a1:
14718         sub.l           %d0,EXC_DREGS+0xc(%a6)  # fix stacked a1
14719         rts
14720 ri_a2:
14721         sub.l           %d0,%a2                 # fix a2
14722         rts
14723 ri_a3:
14724         sub.l           %d0,%a3                 # fix a3
14725         rts
14726 ri_a4:
14727         sub.l           %d0,%a4                 # fix a4
14728         rts
14729 ri_a5:
14730         sub.l           %d0,%a5                 # fix a5
14731         rts
14732 ri_a6:
14733         sub.l           %d0,(%a6)               # fix stacked a6
14734         rts
14735 # if it's a fmove out instruction, we don't have to fix a7
14736 # because we hadn't changed it yet. if it's an opclass two
14737 # instruction (data moved in) and the exception was in supervisor
14738 # mode, then also also wasn't updated. if it was user mode, then
14739 # restore the correct a7 which is in the USP currently.
14740 ri_a7:
14741         cmpi.b          EXC_VOFF(%a6),&0x30     # move in or out?
14742         bne.b           ri_a7_done              # out
14743
14744         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
14745         bne.b           ri_a7_done              # supervisor
14746         movc            %usp,%a0                # restore USP
14747         sub.l           %d0,%a0
14748         movc            %a0,%usp
14749 ri_a7_done:
14750         rts
14751
14752 # need to invert adjustment value if the <ea> was predec
14753 rest_dec:
14754         neg.l           %d0
14755         bra.b           rest_inc