arch/m68k-all/m680x0/060sp/dist/fpsp.s

   1 #
   2 # $NetBSD: fpsp.s,v 1.5 2005/12/11 12:17:52 christos Exp $
   3 #
   4
   5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
   7 # M68000 Hi-Performance Microprocessor Division
   8 # M68060 Software Package Production Release
   9 #
  10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
  11 # All rights reserved.
  12 #
  13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  14 # To the maximum extent permitted by applicable law,
  15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
  17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
  18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
  19 # and any accompanying written materials.
  20 #
  21 # To the maximum extent permitted by applicable law,
  22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
  24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
  25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
  26 #
  27 # Motorola assumes no responsibility for the maintenance and support
  28 # of the SOFTWARE.
  29 #
  30 # You are hereby granted a copyright license to use, modify, and distribute the
  31 # SOFTWARE so long as this entire notice is retained without alteration
  32 # in any modified and/or redistributed versions, and that such modified
  33 # versions are clearly identified as such.
  34 # No licenses are granted by implication, estoppel or otherwise under any
  35 # patents or trademarks of Motorola, Inc.
  36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  37
  38 #
  39 # freal.s:
  40 #       This file is appended to the top of the 060FPSP package
  41 # and contains the entry points into the package. The user, in
  42 # effect, branches to one of the branch table entries located
  43 # after _060FPSP_TABLE.
  44 #       Also, subroutine stubs exist in this file (_fpsp_done for
  45 # example) that are referenced by the FPSP package itself in order
  46 # to call a given routine. The stub routine actually performs the
  47 # callout. The FPSP code does a "bsr" to the stub routine. This
  48 # extra layer of hierarchy adds a slight performance penalty but
  49 # it makes the FPSP code easier to read and more mainatinable.
  50 #
  51
  52 set     _off_bsun,      0x00
  53 set     _off_snan,      0x04
  54 set     _off_operr,     0x08
  55 set     _off_ovfl,      0x0c
  56 set     _off_unfl,      0x10
  57 set     _off_dz,        0x14
  58 set     _off_inex,      0x18
  59 set     _off_fline,     0x1c
  60 set     _off_fpu_dis,   0x20
  61 set     _off_trap,      0x24
  62 set     _off_trace,     0x28
  63 set     _off_access,    0x2c
  64 set     _off_done,      0x30
  65
  66 set     _off_imr,       0x40
  67 set     _off_dmr,       0x44
  68 set     _off_dmw,       0x48
  69 set     _off_irw,       0x4c
  70 set     _off_irl,       0x50
  71 set     _off_drb,       0x54
  72 set     _off_drw,       0x58
  73 set     _off_drl,       0x5c
  74 set     _off_dwb,       0x60
  75 set     _off_dww,       0x64
  76 set     _off_dwl,       0x68
  77
  78 _060FPSP_TABLE:
  79
  80 ###############################################################
  81
  82 # Here's the table of ENTRY POINTS for those linking the package.
  83         bra.l           _fpsp_snan
  84         short           0x0000
  85         bra.l           _fpsp_operr
  86         short           0x0000
  87         bra.l           _fpsp_ovfl
  88         short           0x0000
  89         bra.l           _fpsp_unfl
  90         short           0x0000
  91         bra.l           _fpsp_dz
  92         short           0x0000
  93         bra.l           _fpsp_inex
  94         short           0x0000
  95         bra.l           _fpsp_fline
  96         short           0x0000
  97         bra.l           _fpsp_unsupp
  98         short           0x0000
  99         bra.l           _fpsp_effadd
 100         short           0x0000
 101
 102         space           56
 103
 104 ###############################################################
 105         global          _fpsp_done
 106 _fpsp_done:
 107         mov.l           %d0,-(%sp)
 108         mov.l           (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
 109         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 110         mov.l           0x4(%sp),%d0
 111         rtd             &0x4
 112
 113         global          _real_ovfl
 114 _real_ovfl:
 115         mov.l           %d0,-(%sp)
 116         mov.l           (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
 117         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 118         mov.l           0x4(%sp),%d0
 119         rtd             &0x4
 120
 121         global          _real_unfl
 122 _real_unfl:
 123         mov.l           %d0,-(%sp)
 124         mov.l           (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
 125         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 126         mov.l           0x4(%sp),%d0
 127         rtd             &0x4
 128
 129         global          _real_inex
 130 _real_inex:
 131         mov.l           %d0,-(%sp)
 132         mov.l           (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
 133         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 134         mov.l           0x4(%sp),%d0
 135         rtd             &0x4
 136
 137         global          _real_bsun
 138 _real_bsun:
 139         mov.l           %d0,-(%sp)
 140         mov.l           (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
 141         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 142         mov.l           0x4(%sp),%d0
 143         rtd             &0x4
 144
 145         global          _real_operr
 146 _real_operr:
 147         mov.l           %d0,-(%sp)
 148         mov.l           (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
 149         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 150         mov.l           0x4(%sp),%d0
 151         rtd             &0x4
 152
 153         global          _real_snan
 154 _real_snan:
 155         mov.l           %d0,-(%sp)
 156         mov.l           (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
 157         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 158         mov.l           0x4(%sp),%d0
 159         rtd             &0x4
 160
 161         global          _real_dz
 162 _real_dz:
 163         mov.l           %d0,-(%sp)
 164         mov.l           (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
 165         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 166         mov.l           0x4(%sp),%d0
 167         rtd             &0x4
 168
 169         global          _real_fline
 170 _real_fline:
 171         mov.l           %d0,-(%sp)
 172         mov.l           (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
 173         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 174         mov.l           0x4(%sp),%d0
 175         rtd             &0x4
 176
 177         global          _real_fpu_disabled
 178 _real_fpu_disabled:
 179         mov.l           %d0,-(%sp)
 180         mov.l           (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
 181         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 182         mov.l           0x4(%sp),%d0
 183         rtd             &0x4
 184
 185         global          _real_trap
 186 _real_trap:
 187         mov.l           %d0,-(%sp)
 188         mov.l           (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
 189         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 190         mov.l           0x4(%sp),%d0
 191         rtd             &0x4
 192
 193         global          _real_trace
 194 _real_trace:
 195         mov.l           %d0,-(%sp)
 196         mov.l           (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
 197         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 198         mov.l           0x4(%sp),%d0
 199         rtd             &0x4
 200
 201         global          _real_access
 202 _real_access:
 203         mov.l           %d0,-(%sp)
 204         mov.l           (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
 205         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 206         mov.l           0x4(%sp),%d0
 207         rtd             &0x4
 208
 209 #######################################
 210
 211         global          _imem_read
 212 _imem_read:
 213         mov.l           %d0,-(%sp)
 214         mov.l           (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
 215         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 216         mov.l           0x4(%sp),%d0
 217         rtd             &0x4
 218
 219         global          _dmem_read
 220 _dmem_read:
 221         mov.l           %d0,-(%sp)
 222         mov.l           (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
 223         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 224         mov.l           0x4(%sp),%d0
 225         rtd             &0x4
 226
 227         global          _dmem_write
 228 _dmem_write:
 229         mov.l           %d0,-(%sp)
 230         mov.l           (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
 231         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 232         mov.l           0x4(%sp),%d0
 233         rtd             &0x4
 234
 235         global          _imem_read_word
 236 _imem_read_word:
 237         mov.l           %d0,-(%sp)
 238         mov.l           (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
 239         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 240         mov.l           0x4(%sp),%d0
 241         rtd             &0x4
 242
 243         global          _imem_read_long
 244 _imem_read_long:
 245         mov.l           %d0,-(%sp)
 246         mov.l           (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
 247         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 248         mov.l           0x4(%sp),%d0
 249         rtd             &0x4
 250
 251         global          _dmem_read_byte
 252 _dmem_read_byte:
 253         mov.l           %d0,-(%sp)
 254         mov.l           (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
 255         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 256         mov.l           0x4(%sp),%d0
 257         rtd             &0x4
 258
 259         global          _dmem_read_word
 260 _dmem_read_word:
 261         mov.l           %d0,-(%sp)
 262         mov.l           (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
 263         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 264         mov.l           0x4(%sp),%d0
 265         rtd             &0x4
 266
 267         global          _dmem_read_long
 268 _dmem_read_long:
 269         mov.l           %d0,-(%sp)
 270         mov.l           (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
 271         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 272         mov.l           0x4(%sp),%d0
 273         rtd             &0x4
 274
 275         global          _dmem_write_byte
 276 _dmem_write_byte:
 277         mov.l           %d0,-(%sp)
 278         mov.l           (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
 279         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 280         mov.l           0x4(%sp),%d0
 281         rtd             &0x4
 282
 283         global          _dmem_write_word
 284 _dmem_write_word:
 285         mov.l           %d0,-(%sp)
 286         mov.l           (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
 287         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 288         mov.l           0x4(%sp),%d0
 289         rtd             &0x4
 290
 291         global          _dmem_write_long
 292 _dmem_write_long:
 293         mov.l           %d0,-(%sp)
 294         mov.l           (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
 295         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 296         mov.l           0x4(%sp),%d0
 297         rtd             &0x4
 298
 299 #
 300 # This file contains a set of define statements for constants
 301 # in order to promote readability within the corecode itself.
 302 #
 303
 304 set LOCAL_SIZE,         192                     # stack frame size(bytes)
 305 set LV,                 -LOCAL_SIZE             # stack offset
 306
 307 set EXC_SR,             0x4                     # stack status register
 308 set EXC_PC,             0x6                     # stack pc
 309 set EXC_VOFF,           0xa                     # stacked vector offset
 310 set EXC_EA,             0xc                     # stacked <ea>
 311
 312 set EXC_FP,             0x0                     # frame pointer
 313
 314 set EXC_AREGS,          -68                     # offset of all address regs
 315 set EXC_DREGS,          -100                    # offset of all data regs
 316 set EXC_FPREGS,         -36                     # offset of all fp regs
 317
 318 set EXC_A7,             EXC_AREGS+(7*4)         # offset of saved a7
 319 set OLD_A7,             EXC_AREGS+(6*4)         # extra copy of saved a7
 320 set EXC_A6,             EXC_AREGS+(6*4)         # offset of saved a6
 321 set EXC_A5,             EXC_AREGS+(5*4)
 322 set EXC_A4,             EXC_AREGS+(4*4)
 323 set EXC_A3,             EXC_AREGS+(3*4)
 324 set EXC_A2,             EXC_AREGS+(2*4)
 325 set EXC_A1,             EXC_AREGS+(1*4)
 326 set EXC_A0,             EXC_AREGS+(0*4)
 327 set EXC_D7,             EXC_DREGS+(7*4)
 328 set EXC_D6,             EXC_DREGS+(6*4)
 329 set EXC_D5,             EXC_DREGS+(5*4)
 330 set EXC_D4,             EXC_DREGS+(4*4)
 331 set EXC_D3,             EXC_DREGS+(3*4)
 332 set EXC_D2,             EXC_DREGS+(2*4)
 333 set EXC_D1,             EXC_DREGS+(1*4)
 334 set EXC_D0,             EXC_DREGS+(0*4)
 335
 336 set EXC_FP0,            EXC_FPREGS+(0*12)       # offset of saved fp0
 337 set EXC_FP1,            EXC_FPREGS+(1*12)       # offset of saved fp1
 338 set EXC_FP2,            EXC_FPREGS+(2*12)       # offset of saved fp2 (not used)
 339
 340 set FP_SCR1,            LV+80                   # fp scratch 1
 341 set FP_SCR1_EX,         FP_SCR1+0
 342 set FP_SCR1_SGN,        FP_SCR1+2
 343 set FP_SCR1_HI,         FP_SCR1+4
 344 set FP_SCR1_LO,         FP_SCR1+8
 345
 346 set FP_SCR0,            LV+68                   # fp scratch 0
 347 set FP_SCR0_EX,         FP_SCR0+0
 348 set FP_SCR0_SGN,        FP_SCR0+2
 349 set FP_SCR0_HI,         FP_SCR0+4
 350 set FP_SCR0_LO,         FP_SCR0+8
 351
 352 set FP_DST,             LV+56                   # fp destination operand
 353 set FP_DST_EX,          FP_DST+0
 354 set FP_DST_SGN,         FP_DST+2
 355 set FP_DST_HI,          FP_DST+4
 356 set FP_DST_LO,          FP_DST+8
 357
 358 set FP_SRC,             LV+44                   # fp source operand
 359 set FP_SRC_EX,          FP_SRC+0
 360 set FP_SRC_SGN,         FP_SRC+2
 361 set FP_SRC_HI,          FP_SRC+4
 362 set FP_SRC_LO,          FP_SRC+8
 363
 364 set USER_FPIAR,         LV+40                   # FP instr address register
 365
 366 set USER_FPSR,          LV+36                   # FP status register
 367 set FPSR_CC,            USER_FPSR+0             # FPSR condition codes
 368 set FPSR_QBYTE,         USER_FPSR+1             # FPSR qoutient byte
 369 set FPSR_EXCEPT,        USER_FPSR+2             # FPSR exception status byte
 370 set FPSR_AEXCEPT,       USER_FPSR+3             # FPSR accrued exception byte
 371
 372 set USER_FPCR,          LV+32                   # FP control register
 373 set FPCR_ENABLE,        USER_FPCR+2             # FPCR exception enable
 374 set FPCR_MODE,          USER_FPCR+3             # FPCR rounding mode control
 375
 376 set L_SCR3,             LV+28                   # integer scratch 3
 377 set L_SCR2,             LV+24                   # integer scratch 2
 378 set L_SCR1,             LV+20                   # integer scratch 1
 379
 380 set STORE_FLG,          LV+19                   # flag: operand store (ie. not fcmp/ftst)
 381
 382 set EXC_TEMP2,          LV+24                   # temporary space
 383 set EXC_TEMP,           LV+16                   # temporary space
 384
 385 set DTAG,               LV+15                   # destination operand type
 386 set STAG,               LV+14                   # source operand type
 387
 388 set SPCOND_FLG,         LV+10                   # flag: special case (see below)
 389
 390 set EXC_CC,             LV+8                    # saved condition codes
 391 set EXC_EXTWPTR,        LV+4                    # saved current PC (active)
 392 set EXC_EXTWORD,        LV+2                    # saved extension word
 393 set EXC_CMDREG,         LV+2                    # saved extension word
 394 set EXC_OPWORD,         LV+0                    # saved operation word
 395
 396 ################################
 397
 398 # Helpful macros
 399
 400 set FTEMP,              0                       # offsets within an
 401 set FTEMP_EX,           0                       # extended precision
 402 set FTEMP_SGN,          2                       # value saved in memory.
 403 set FTEMP_HI,           4
 404 set FTEMP_LO,           8
 405 set FTEMP_GRS,          12
 406
 407 set LOCAL,              0                       # offsets within an
 408 set LOCAL_EX,           0                       # extended precision
 409 set LOCAL_SGN,          2                       # value saved in memory.
 410 set LOCAL_HI,           4
 411 set LOCAL_LO,           8
 412 set LOCAL_GRS,          12
 413
 414 set DST,                0                       # offsets within an
 415 set DST_EX,             0                       # extended precision
 416 set DST_HI,             4                       # value saved in memory.
 417 set DST_LO,             8
 418
 419 set SRC,                0                       # offsets within an
 420 set SRC_EX,             0                       # extended precision
 421 set SRC_HI,             4                       # value saved in memory.
 422 set SRC_LO,             8
 423
 424 set SGL_LO,             0x3f81                  # min sgl prec exponent
 425 set SGL_HI,             0x407e                  # max sgl prec exponent
 426 set DBL_LO,             0x3c01                  # min dbl prec exponent
 427 set DBL_HI,             0x43fe                  # max dbl prec exponent
 428 set EXT_LO,             0x0                     # min ext prec exponent
 429 set EXT_HI,             0x7ffe                  # max ext prec exponent
 430
 431 set EXT_BIAS,           0x3fff                  # extended precision bias
 432 set SGL_BIAS,           0x007f                  # single precision bias
 433 set DBL_BIAS,           0x03ff                  # double precision bias
 434
 435 set NORM,               0x00                    # operand type for STAG/DTAG
 436 set ZERO,               0x01                    # operand type for STAG/DTAG
 437 set INF,                0x02                    # operand type for STAG/DTAG
 438 set QNAN,               0x03                    # operand type for STAG/DTAG
 439 set DENORM,             0x04                    # operand type for STAG/DTAG
 440 set SNAN,               0x05                    # operand type for STAG/DTAG
 441 set UNNORM,             0x06                    # operand type for STAG/DTAG
 442
 443 ##################
 444 # FPSR/FPCR bits #
 445 ##################
 446 set neg_bit,            0x3                     # negative result
 447 set z_bit,              0x2                     # zero result
 448 set inf_bit,            0x1                     # infinite result
 449 set nan_bit,            0x0                     # NAN result
 450
 451 set q_sn_bit,           0x7                     # sign bit of quotient byte
 452
 453 set bsun_bit,           7                       # branch on unordered
 454 set snan_bit,           6                       # signalling NAN
 455 set operr_bit,          5                       # operand error
 456 set ovfl_bit,           4                       # overflow
 457 set unfl_bit,           3                       # underflow
 458 set dz_bit,             2                       # divide by zero
 459 set inex2_bit,          1                       # inexact result 2
 460 set inex1_bit,          0                       # inexact result 1
 461
 462 set aiop_bit,           7                       # accrued inexact operation bit
 463 set aovfl_bit,          6                       # accrued overflow bit
 464 set aunfl_bit,          5                       # accrued underflow bit
 465 set adz_bit,            4                       # accrued dz bit
 466 set ainex_bit,          3                       # accrued inexact bit
 467
 468 #############################
 469 # FPSR individual bit masks #
 470 #############################
 471 set neg_mask,           0x08000000              # negative bit mask (lw)
 472 set inf_mask,           0x02000000              # infinity bit mask (lw)
 473 set z_mask,             0x04000000              # zero bit mask (lw)
 474 set nan_mask,           0x01000000              # nan bit mask (lw)
 475
 476 set neg_bmask,          0x08                    # negative bit mask (byte)
 477 set inf_bmask,          0x02                    # infinity bit mask (byte)
 478 set z_bmask,            0x04                    # zero bit mask (byte)
 479 set nan_bmask,          0x01                    # nan bit mask (byte)
 480
 481 set bsun_mask,          0x00008000              # bsun exception mask
 482 set snan_mask,          0x00004000              # snan exception mask
 483 set operr_mask,         0x00002000              # operr exception mask
 484 set ovfl_mask,          0x00001000              # overflow exception mask
 485 set unfl_mask,          0x00000800              # underflow exception mask
 486 set dz_mask,            0x00000400              # dz exception mask
 487 set inex2_mask,         0x00000200              # inex2 exception mask
 488 set inex1_mask,         0x00000100              # inex1 exception mask
 489
 490 set aiop_mask,          0x00000080              # accrued illegal operation
 491 set aovfl_mask,         0x00000040              # accrued overflow
 492 set aunfl_mask,         0x00000020              # accrued underflow
 493 set adz_mask,           0x00000010              # accrued divide by zero
 494 set ainex_mask,         0x00000008              # accrued inexact
 495
 496 ######################################
 497 # FPSR combinations used in the FPSP #
 498 ######################################
 499 set dzinf_mask,         inf_mask+dz_mask+adz_mask
 500 set opnan_mask,         nan_mask+operr_mask+aiop_mask
 501 set nzi_mask,           0x01ffffff              #clears N, Z, and I
 502 set unfinx_mask,        unfl_mask+inex2_mask+aunfl_mask+ainex_mask
 503 set unf2inx_mask,       unfl_mask+inex2_mask+ainex_mask
 504 set ovfinx_mask,        ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
 505 set inx1a_mask,         inex1_mask+ainex_mask
 506 set inx2a_mask,         inex2_mask+ainex_mask
 507 set snaniop_mask,       nan_mask+snan_mask+aiop_mask
 508 set snaniop2_mask,      snan_mask+aiop_mask
 509 set naniop_mask,        nan_mask+aiop_mask
 510 set neginf_mask,        neg_mask+inf_mask
 511 set infaiop_mask,       inf_mask+aiop_mask
 512 set negz_mask,          neg_mask+z_mask
 513 set opaop_mask,         operr_mask+aiop_mask
 514 set unfl_inx_mask,      unfl_mask+aunfl_mask+ainex_mask
 515 set ovfl_inx_mask,      ovfl_mask+aovfl_mask+ainex_mask
 516
 517 #########
 518 # misc. #
 519 #########
 520 set rnd_stky_bit,       29                      # stky bit pos in longword
 521
 522 set sign_bit,           0x7                     # sign bit
 523 set signan_bit,         0x6                     # signalling nan bit
 524
 525 set sgl_thresh,         0x3f81                  # minimum sgl exponent
 526 set dbl_thresh,         0x3c01                  # minimum dbl exponent
 527
 528 set x_mode,             0x0                     # extended precision
 529 set s_mode,             0x4                     # single precision
 530 set d_mode,             0x8                     # double precision
 531
 532 set rn_mode,            0x0                     # round-to-nearest
 533 set rz_mode,            0x1                     # round-to-zero
 534 set rm_mode,            0x2                     # round-tp-minus-infinity
 535 set rp_mode,            0x3                     # round-to-plus-infinity
 536
 537 set mantissalen,        64                      # length of mantissa in bits
 538
 539 set BYTE,               1                       # len(byte) == 1 byte
 540 set WORD,               2                       # len(word) == 2 bytes
 541 set LONG,               4                       # len(longword) == 2 bytes
 542
 543 set BSUN_VEC,           0xc0                    # bsun    vector offset
 544 set INEX_VEC,           0xc4                    # inexact vector offset
 545 set DZ_VEC,             0xc8                    # dz      vector offset
 546 set UNFL_VEC,           0xcc                    # unfl    vector offset
 547 set OPERR_VEC,          0xd0                    # operr   vector offset
 548 set OVFL_VEC,           0xd4                    # ovfl    vector offset
 549 set SNAN_VEC,           0xd8                    # snan    vector offset
 550
 551 ###########################
 552 # SPecial CONDition FLaGs #
 553 ###########################
 554 set ftrapcc_flg,        0x01                    # flag bit: ftrapcc exception
 555 set fbsun_flg,          0x02                    # flag bit: bsun exception
 556 set mia7_flg,           0x04                    # flag bit: (a7)+ <ea>
 557 set mda7_flg,           0x08                    # flag bit: -(a7) <ea>
 558 set fmovm_flg,          0x40                    # flag bit: fmovm instruction
 559 set immed_flg,          0x80                    # flag bit: &<data> <ea>
 560
 561 set ftrapcc_bit,        0x0
 562 set fbsun_bit,          0x1
 563 set mia7_bit,           0x2
 564 set mda7_bit,           0x3
 565 set immed_bit,          0x7
 566
 567 ##################################
 568 # TRANSCENDENTAL "LAST-OP" FLAGS #
 569 ##################################
 570 set FMUL_OP,            0x0                     # fmul instr performed last
 571 set FDIV_OP,            0x1                     # fdiv performed last
 572 set FADD_OP,            0x2                     # fadd performed last
 573 set FMOV_OP,            0x3                     # fmov performed last
 574
 575 #############
 576 # CONSTANTS #
 577 #############
 578 T1:     long            0x40C62D38,0xD3D64634   # 16381 LOG2 LEAD
 579 T2:     long            0x3D6F90AE,0xB1E75CC7   # 16381 LOG2 TRAIL
 580
 581 PI:     long            0x40000000,0xC90FDAA2,0x2168C235,0x00000000
 582 PIBY2:  long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 583
 584 TWOBYPI:
 585         long            0x3FE45F30,0x6DC9C883
 586
 587 #########################################################################
 588 # XDEF **************************************************************** #
 589 #       _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.    #
 590 #                                                                       #
 591 #       This handler should be the first code executed upon taking the  #
 592 #       FP Overflow exception in an operating system.                   #
 593 #                                                                       #
 594 # XREF **************************************************************** #
 595 #       _imem_read_long() - read instruction longword                   #
 596 #       fix_skewed_ops() - adjust src operand in fsave frame            #
 597 #       set_tag_x() - determine optype of src/dst operands              #
 598 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 599 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 600 #       load_fpn2() - load dst operand from FP regfile                  #
 601 #       fout() - emulate an opclass 3 instruction                       #
 602 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 603 #       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 604 #       _real_ovfl() - "callout" for Overflow exception enabled code    #
 605 #       _real_inex() - "callout" for Inexact exception enabled code     #
 606 #       _real_trace() - "callout" for Trace exception code              #
 607 #                                                                       #
 608 # INPUT *************************************************************** #
 609 #       - The system stack contains the FP Ovfl exception stack frame   #
 610 #       - The fsave frame contains the source operand                   #
 611 #                                                                       #
 612 # OUTPUT ************************************************************** #
 613 #       Overflow Exception enabled:                                     #
 614 #       - The system stack is unchanged                                 #
 615 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
 616 #       Overflow Exception disabled:                                    #
 617 #       - The system stack is unchanged                                 #
 618 #       - The "exception present" flag in the fsave frame is cleared    #
 619 #                                                                       #
 620 # ALGORITHM *********************************************************** #
 621 #       On the 060, if an FP overflow is present as the result of any   #
 622 # instruction, the 060 will take an overflow exception whether the      #
 623 # exception is enabled or disabled in the FPCR. For the disabled case,  #
 624 # This handler emulates the instruction to determine what the correct   #
 625 # default result should be for the operation. This default result is    #
 626 # then stored in either the FP regfile, data regfile, or memory.        #
 627 # Finally, the handler exits through the "callout" _fpsp_done()         #
 628 # denoting that no exceptional conditions exist within the machine.     #
 629 #       If the exception is enabled, then this handler must create the  #
 630 # exceptional operand and plave it in the fsave state frame, and store  #
 631 # the default result (only if the instruction is opclass 3). For        #
 632 # exceptions enabled, this handler must exit through the "callout"      #
 633 # _real_ovfl() so that the operating system enabled overflow handler    #
 634 # can handle this case.                                                 #
 635 #       Two other conditions exist. First, if overflow was disabled     #
 636 # but the inexact exception was enabled, this handler must exit         #
 637 # through the "callout" _real_inex() regardless of whether the result   #
 638 # was inexact.                                                          #
 639 #       Also, in the case of an opclass three instruction where         #
 640 # overflow was disabled and the trace exception was enabled, this       #
 641 # handler must exit through the "callout" _real_trace().                #
 642 #                                                                       #
 643 #########################################################################
 644
 645         global          _fpsp_ovfl
 646 _fpsp_ovfl:
 647
 648 #$#     sub.l           &24,%sp                 # make room for src/dst
 649
 650         link.w          %a6,&-LOCAL_SIZE        # init stack frame
 651
 652         fsave           FP_SRC(%a6)             # grab the "busy" frame
 653
 654         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 655         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 656         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 657
 658 # the FPIAR holds the "current PC" of the faulting instruction
 659         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 660         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 661         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 662         bsr.l           _imem_read_long         # fetch the instruction words
 663         mov.l           %d0,EXC_OPWORD(%a6)
 664
 665 ##############################################################################
 666
 667         btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 668         bne.w           fovfl_out
 669
 670
 671         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 672         bsr.l           fix_skewed_ops          # fix src op
 673
 674 # since, I believe, only NORMs and DENORMs can come through here,
 675 # maybe we can avoid the subroutine call.
 676         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 677         bsr.l           set_tag_x               # tag the operand type
 678         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 679
 680 # bit five of the fp extension word separates the monadic and dyadic operations
 681 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
 682 # will never take this exception.
 683         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
 684         beq.b           fovfl_extract           # monadic
 685
 686         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 687         bsr.l           load_fpn2               # load dst into FP_DST
 688
 689         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 690         bsr.l           set_tag_x               # tag the operand type
 691         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 692         bne.b           fovfl_op2_done          # no
 693         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 694 fovfl_op2_done:
 695         mov.b           %d0,DTAG(%a6)           # save dst optype tag
 696
 697 fovfl_extract:
 698
 699 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 700 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 701 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 702 #$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 703 #$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 704 #$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 705
 706         clr.l           %d0
 707         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 708
 709         mov.b           1+EXC_CMDREG(%a6),%d1
 710         andi.w          &0x007f,%d1             # extract extension
 711
 712         andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
 713
 714         fmov.l          &0x0,%fpcr              # zero current control regs
 715         fmov.l          &0x0,%fpsr
 716
 717         lea             FP_SRC(%a6),%a0
 718         lea             FP_DST(%a6),%a1
 719
 720 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
 721         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 722         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 723
 724 # the operation has been emulated. the result is in fp0.
 725 # the EXOP, if an exception occurred, is in fp1.
 726 # we must save the default result regardless of whether
 727 # traps are enabled or disabled.
 728         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 729         bsr.l           store_fpreg
 730
 731 # the exceptional possibilities we have left ourselves with are ONLY overflow
 732 # and inexact. and, the inexact is such that overflow occurred and was disabled
 733 # but inexact was enabled.
 734         btst            &ovfl_bit,FPCR_ENABLE(%a6)
 735         bne.b           fovfl_ovfl_on
 736
 737         btst            &inex2_bit,FPCR_ENABLE(%a6)
 738         bne.b           fovfl_inex_on
 739
 740         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 741         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 742         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 743
 744         unlk            %a6
 745 #$#     add.l           &24,%sp
 746         bra.l           _fpsp_done
 747
 748 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 749 # in fp1. now, simply jump to _real_ovfl()!
 750 fovfl_ovfl_on:
 751         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 752
 753         mov.w           &0xe005,2+FP_SRC(%a6)   # save exc status
 754
 755         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 756         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 757         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 758
 759         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 760
 761         unlk            %a6
 762
 763         bra.l           _real_ovfl
 764
 765 # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
 766 # we must jump to real_inex().
 767 fovfl_inex_on:
 768
 769         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 770
 771         mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
 772         mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
 773
 774         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 775         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 776         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 777
 778         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 779
 780         unlk            %a6
 781
 782         bra.l           _real_inex
 783
 784 ########################################################################
 785 fovfl_out:
 786
 787
 788 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 789 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 790 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 791
 792 # the src operand is definitely a NORM(!), so tag it as such
 793         mov.b           &NORM,STAG(%a6)         # set src optype tag
 794
 795         clr.l           %d0
 796         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 797
 798         and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
 799
 800         fmov.l          &0x0,%fpcr              # zero current control regs
 801         fmov.l          &0x0,%fpsr
 802
 803         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
 804
 805         bsr.l           fout
 806
 807         btst            &ovfl_bit,FPCR_ENABLE(%a6)
 808         bne.w           fovfl_ovfl_on
 809
 810         btst            &inex2_bit,FPCR_ENABLE(%a6)
 811         bne.w           fovfl_inex_on
 812
 813         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 814         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 815         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 816
 817         unlk            %a6
 818 #$#     add.l           &24,%sp
 819
 820         btst            &0x7,(%sp)              # is trace on?
 821         beq.l           _fpsp_done              # no
 822
 823         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
 824         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
 825         bra.l           _real_trace
 826
 827 #########################################################################
 828 # XDEF **************************************************************** #
 829 #       _fpsp_unfl(): 060FPSP entry point for FP Underflow exception.   #
 830 #                                                                       #
 831 #       This handler should be the first code executed upon taking the  #
 832 #       FP Underflow exception in an operating system.                  #
 833 #                                                                       #
 834 # XREF **************************************************************** #
 835 #       _imem_read_long() - read instruction longword                   #
 836 #       fix_skewed_ops() - adjust src operand in fsave frame            #
 837 #       set_tag_x() - determine optype of src/dst operands              #
 838 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 839 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 840 #       load_fpn2() - load dst operand from FP regfile                  #
 841 #       fout() - emulate an opclass 3 instruction                       #
 842 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 843 #       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 844 #       _real_ovfl() - "callout" for Overflow exception enabled code    #
 845 #       _real_inex() - "callout" for Inexact exception enabled code     #
 846 #       _real_trace() - "callout" for Trace exception code              #
 847 #                                                                       #
 848 # INPUT *************************************************************** #
 849 #       - The system stack contains the FP Unfl exception stack frame   #
 850 #       - The fsave frame contains the source operand                   #
 851 #                                                                       #
 852 # OUTPUT ************************************************************** #
 853 #       Underflow Exception enabled:                                    #
 854 #       - The system stack is unchanged                                 #
 855 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
 856 #       Underflow Exception disabled:                                   #
 857 #       - The system stack is unchanged                                 #
 858 #       - The "exception present" flag in the fsave frame is cleared    #
 859 #                                                                       #
 860 # ALGORITHM *********************************************************** #
 861 #       On the 060, if an FP underflow is present as the result of any  #
 862 # instruction, the 060 will take an underflow exception whether the     #
 863 # exception is enabled or disabled in the FPCR. For the disabled case,  #
 864 # This handler emulates the instruction to determine what the correct   #
 865 # default result should be for the operation. This default result is    #
 866 # then stored in either the FP regfile, data regfile, or memory.        #
 867 # Finally, the handler exits through the "callout" _fpsp_done()         #
 868 # denoting that no exceptional conditions exist within the machine.     #
 869 #       If the exception is enabled, then this handler must create the  #
 870 # exceptional operand and plave it in the fsave state frame, and store  #
 871 # the default result (only if the instruction is opclass 3). For        #
 872 # exceptions enabled, this handler must exit through the "callout"      #
 873 # _real_unfl() so that the operating system enabled overflow handler    #
 874 # can handle this case.                                                 #
 875 #       Two other conditions exist. First, if underflow was disabled    #
 876 # but the inexact exception was enabled and the result was inexact,     #
 877 # this handler must exit through the "callout" _real_inex().            #
 878 # was inexact.                                                          #
 879 #       Also, in the case of an opclass three instruction where         #
 880 # underflow was disabled and the trace exception was enabled, this      #
 881 # handler must exit through the "callout" _real_trace().                #
 882 #                                                                       #
 883 #########################################################################
 884
 885         global          _fpsp_unfl
 886 _fpsp_unfl:
 887
 888 #$#     sub.l           &24,%sp                 # make room for src/dst
 889
 890         link.w          %a6,&-LOCAL_SIZE        # init stack frame
 891
 892         fsave           FP_SRC(%a6)             # grab the "busy" frame
 893
 894         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 895         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 896         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 897
 898 # the FPIAR holds the "current PC" of the faulting instruction
 899         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 900         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 901         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 902         bsr.l           _imem_read_long         # fetch the instruction words
 903         mov.l           %d0,EXC_OPWORD(%a6)
 904
 905 ##############################################################################
 906
 907         btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 908         bne.w           funfl_out
 909
 910
 911         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 912         bsr.l           fix_skewed_ops          # fix src op
 913
 914         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 915         bsr.l           set_tag_x               # tag the operand type
 916         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 917
 918 # bit five of the fp ext word separates the monadic and dyadic operations
 919 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
 920 # will never take this exception.
 921         btst            &0x5,1+EXC_CMDREG(%a6)  # is op monadic or dyadic?
 922         beq.b           funfl_extract           # monadic
 923
 924 # now, what's left that's not dyadic is fsincos. we can distinguish it
 925 # from all dyadics by the '0110xxx pattern
 926         btst            &0x4,1+EXC_CMDREG(%a6)  # is op an fsincos?
 927         bne.b           funfl_extract           # yes
 928
 929         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 930         bsr.l           load_fpn2               # load dst into FP_DST
 931
 932         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 933         bsr.l           set_tag_x               # tag the operand type
 934         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 935         bne.b           funfl_op2_done          # no
 936         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 937 funfl_op2_done:
 938         mov.b           %d0,DTAG(%a6)           # save dst optype tag
 939
 940 funfl_extract:
 941
 942 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 943 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 944 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 945 #$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 946 #$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 947 #$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 948
 949         clr.l           %d0
 950         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 951
 952         mov.b           1+EXC_CMDREG(%a6),%d1
 953         andi.w          &0x007f,%d1             # extract extension
 954
 955         andi.l          &0x00ff01ff,USER_FPSR(%a6)
 956
 957         fmov.l          &0x0,%fpcr              # zero current control regs
 958         fmov.l          &0x0,%fpsr
 959
 960         lea             FP_SRC(%a6),%a0
 961         lea             FP_DST(%a6),%a1
 962
 963 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
 964         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 965         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 966
 967         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 968         bsr.l           store_fpreg
 969
 970 # The `060 FPU multiplier hardware is such that if the result of a
 971 # multiply operation is the smallest possible normalized number
 972 # (0x00000000_80000000_00000000), then the machine will take an
 973 # underflow exception. Since this is incorrect, we need to check
 974 # if our emulation, after re-doing the operation, decided that
 975 # no underflow was called for. We do these checks only in
 976 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
 977 # special case will simply exit gracefully with the correct result.
 978
 979 # the exceptional possibilities we have left ourselves with are ONLY overflow
 980 # and inexact. and, the inexact is such that overflow occurred and was disabled
 981 # but inexact was enabled.
 982         btst            &unfl_bit,FPCR_ENABLE(%a6)
 983         bne.b           funfl_unfl_on
 984
 985 funfl_chkinex:
 986         btst            &inex2_bit,FPCR_ENABLE(%a6)
 987         bne.b           funfl_inex_on
 988
 989 funfl_exit:
 990         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 991         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 992         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 993
 994         unlk            %a6
 995 #$#     add.l           &24,%sp
 996         bra.l           _fpsp_done
 997
 998 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 999 # in fp1 (don't forget to save fp0). what to do now?
1000 # well, we simply have to get to go to _real_unfl()!
1001 funfl_unfl_on:
1002
1003 # The `060 FPU multiplier hardware is such that if the result of a
1004 # multiply operation is the smallest possible normalized number
1005 # (0x00000000_80000000_00000000), then the machine will take an
1006 # underflow exception. Since this is incorrect, we check here to see
1007 # if our emulation, after re-doing the operation, decided that
1008 # no underflow was called for.
1009         btst            &unfl_bit,FPSR_EXCEPT(%a6)
1010         beq.w           funfl_chkinex
1011
1012 funfl_unfl_on2:
1013         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
1014
1015         mov.w           &0xe003,2+FP_SRC(%a6)   # save exc status
1016
1017         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1018         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1019         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1020
1021         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1022
1023         unlk            %a6
1024
1025         bra.l           _real_unfl
1026
1027 # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1028 # we must jump to real_inex().
1029 funfl_inex_on:
1030
1031 # The `060 FPU multiplier hardware is such that if the result of a
1032 # multiply operation is the smallest possible normalized number
1033 # (0x00000000_80000000_00000000), then the machine will take an
1034 # underflow exception.
1035 # But, whether bogus or not, if inexact is enabled AND it occurred,
1036 # then we have to branch to real_inex.
1037
1038         btst            &inex2_bit,FPSR_EXCEPT(%a6)
1039         beq.w           funfl_exit
1040
1041 funfl_inex_on2:
1042
1043         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to stack
1044
1045         mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
1046         mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
1047
1048         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1049         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1050         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1051
1052         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1053
1054         unlk            %a6
1055
1056         bra.l           _real_inex
1057
1058 #######################################################################
1059 funfl_out:
1060
1061
1062 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1063 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1064 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1065
1066 # the src operand is definitely a NORM(!), so tag it as such
1067         mov.b           &NORM,STAG(%a6)         # set src optype tag
1068
1069         clr.l           %d0
1070         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
1071
1072         and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1073
1074         fmov.l          &0x0,%fpcr              # zero current control regs
1075         fmov.l          &0x0,%fpsr
1076
1077         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1078
1079         bsr.l           fout
1080
1081         btst            &unfl_bit,FPCR_ENABLE(%a6)
1082         bne.w           funfl_unfl_on2
1083
1084         btst            &inex2_bit,FPCR_ENABLE(%a6)
1085         bne.w           funfl_inex_on2
1086
1087         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1088         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1089         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1090
1091         unlk            %a6
1092 #$#     add.l           &24,%sp
1093
1094         btst            &0x7,(%sp)              # is trace on?
1095         beq.l           _fpsp_done              # no
1096
1097         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
1098         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
1099         bra.l           _real_trace
1100
1101 #########################################################################
1102 # XDEF **************************************************************** #
1103 #       _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented       #
1104 #                       Data Type" exception.                           #
1105 #                                                                       #
1106 #       This handler should be the first code executed upon taking the  #
1107 #       FP Unimplemented Data Type exception in an operating system.    #
1108 #                                                                       #
1109 # XREF **************************************************************** #
1110 #       _imem_read_{word,long}() - read instruction word/longword       #
1111 #       fix_skewed_ops() - adjust src operand in fsave frame            #
1112 #       set_tag_x() - determine optype of src/dst operands              #
1113 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
1114 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
1115 #       load_fpn2() - load dst operand from FP regfile                  #
1116 #       load_fpn1() - load src operand from FP regfile                  #
1117 #       fout() - emulate an opclass 3 instruction                       #
1118 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1119 #       _real_inex() - "callout" to operating system inexact handler    #
1120 #       _fpsp_done() - "callout" for exit; work all done                #
1121 #       _real_trace() - "callout" for Trace enabled exception           #
1122 #       funimp_skew() - adjust fsave src ops to "incorrect" value       #
1123 #       _real_snan() - "callout" for SNAN exception                     #
1124 #       _real_operr() - "callout" for OPERR exception                   #
1125 #       _real_ovfl() - "callout" for OVFL exception                     #
1126 #       _real_unfl() - "callout" for UNFL exception                     #
1127 #       get_packed() - fetch packed operand from memory                 #
1128 #                                                                       #
1129 # INPUT *************************************************************** #
1130 #       - The system stack contains the "Unimp Data Type" stk frame     #
1131 #       - The fsave frame contains the ssrc op (for UNNORM/DENORM)      #
1132 #                                                                       #
1133 # OUTPUT ************************************************************** #
1134 #       If Inexact exception (opclass 3):                               #
1135 #       - The system stack is changed to an Inexact exception stk frame #
1136 #       If SNAN exception (opclass 3):                                  #
1137 #       - The system stack is changed to an SNAN exception stk frame    #
1138 #       If OPERR exception (opclass 3):                                 #
1139 #       - The system stack is changed to an OPERR exception stk frame   #
1140 #       If OVFL exception (opclass 3):                                  #
1141 #       - The system stack is changed to an OVFL exception stk frame    #
1142 #       If UNFL exception (opclass 3):                                  #
1143 #       - The system stack is changed to an UNFL exception stack frame  #
1144 #       If Trace exception enabled:                                     #
1145 #       - The system stack is changed to a Trace exception stack frame  #
1146 #       Else: (normal case)                                             #
1147 #       - Correct result has been stored as appropriate                 #
1148 #                                                                       #
1149 # ALGORITHM *********************************************************** #
1150 #       Two main instruction types can enter here: (1) DENORM or UNNORM #
1151 # unimplemented data types. These can be either opclass 0,2 or 3        #
1152 # instructions, and (2) PACKED unimplemented data format instructions   #
1153 # also of opclasses 0,2, or 3.                                          #
1154 #       For UNNORM/DENORM opclass 0 and 2, the handler fetches the src  #
1155 # operand from the fsave state frame and the dst operand (if dyadic)    #
1156 # from the FP register file. The instruction is then emulated by        #
1157 # choosing an emulation routine from a table of routines indexed by     #
1158 # instruction type. Once the instruction has been emulated and result   #
1159 # saved, then we check to see if any enabled exceptions resulted from   #
1160 # instruction emulation. If none, then we exit through the "callout"    #
1161 # _fpsp_done(). If there is an enabled FP exception, then we insert     #
1162 # this exception into the FPU in the fsave state frame and then exit    #
1163 # through _fpsp_done().                                                 #
1164 #       PACKED opclass 0 and 2 is similar in how the instruction is     #
1165 # emulated and exceptions handled. The differences occur in how the     #
1166 # handler loads the packed op (by calling get_packed() routine) and     #
1167 # by the fact that a Trace exception could be pending for PACKED ops.   #
1168 # If a Trace exception is pending, then the current exception stack     #
1169 # frame is changed to a Trace exception stack frame and an exit is      #
1170 # made through _real_trace().                                           #
1171 #       For UNNORM/DENORM opclass 3, the actual move out to memory is   #
1172 # performed by calling the routine fout(). If no exception should occur #
1173 # as the result of emulation, then an exit either occurs through        #
1174 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1175 # (a Trace stack frame must be created here, too). If an FP exception   #
1176 # should occur, then we must create an exception stack frame of that    #
1177 # type and jump to either _real_snan(), _real_operr(), _real_inex(),    #
1178 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3        #
1179 # emulation is performed in a similar manner.                           #
1180 #                                                                       #
1181 #########################################################################
1182
1183 #
1184 # (1) DENORM and UNNORM (unimplemented) data types:
1185 #
1186 #                               post-instruction
1187 #                               *****************
1188 #                               *      EA       *
1189 #        pre-instruction        *               *
1190 #       *****************       *****************
1191 #       * 0x0 *  0x0dc  *       * 0x3 *  0x0dc  *
1192 #       *****************       *****************
1193 #       *     Next      *       *     Next      *
1194 #       *      PC       *       *      PC       *
1195 #       *****************       *****************
1196 #       *      SR       *       *      SR       *
1197 #       *****************       *****************
1198 #
1199 # (2) PACKED format (unsupported) opclasses two and three:
1200 #       *****************
1201 #       *      EA       *
1202 #       *               *
1203 #       *****************
1204 #       * 0x2 *  0x0dc  *
1205 #       *****************
1206 #       *     Next      *
1207 #       *      PC       *
1208 #       *****************
1209 #       *      SR       *
1210 #       *****************
1211 #
1212         global          _fpsp_unsupp
1213 _fpsp_unsupp:
1214
1215         link.w          %a6,&-LOCAL_SIZE        # init stack frame
1216
1217         fsave           FP_SRC(%a6)             # save fp state
1218
1219         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
1220         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1221         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
1222
1223         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
1224         bne.b           fu_s
1225 fu_u:
1226         mov.l           %usp,%a0                # fetch user stack pointer
1227         mov.l           %a0,EXC_A7(%a6)         # save on stack
1228         bra.b           fu_cont
1229 # if the exception is an opclass zero or two unimplemented data type
1230 # exception, then the a7' calculated here is wrong since it doesn't
1231 # stack an ea. however, we don't need an a7' for this case anyways.
1232 fu_s:
1233         lea             0x4+EXC_EA(%a6),%a0     # load old a7'
1234         mov.l           %a0,EXC_A7(%a6)         # save on stack
1235
1236 fu_cont:
1237
1238 # the FPIAR holds the "current PC" of the faulting instruction
1239 # the FPIAR should be set correctly for ALL exceptions passing through
1240 # this point.
1241         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1242         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
1243         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
1244         bsr.l           _imem_read_long         # fetch the instruction words
1245         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
1246
1247 ############################
1248
1249         clr.b           SPCOND_FLG(%a6)         # clear special condition flag
1250
1251 # Separate opclass three (fpn-to-mem) ops since they have a different
1252 # stack frame and protocol.
1253         btst            &0x5,EXC_CMDREG(%a6)    # is it an fmove out?
1254         bne.w           fu_out                  # yes
1255
1256 # Separate packed opclass two instructions.
1257         bfextu          EXC_CMDREG(%a6){&0:&6},%d0
1258         cmpi.b          %d0,&0x13
1259         beq.w           fu_in_pack
1260
1261
1262 # I'm not sure at this point what FPSR bits are valid for this instruction.
1263 # so, since the emulation routines re-create them anyways, zero exception field
1264         andi.l          &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1265
1266         fmov.l          &0x0,%fpcr              # zero current control regs
1267         fmov.l          &0x0,%fpsr
1268
1269 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1270 # precision format if the src format was single or double and the
1271 # source data type was an INF, NAN, DENORM, or UNNORM
1272         lea             FP_SRC(%a6),%a0         # pass ptr to input
1273         bsr.l           fix_skewed_ops
1274
1275 # we don't know whether the src operand or the dst operand (or both) is the
1276 # UNNORM or DENORM. call the function that tags the operand type. if the
1277 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1278         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
1279         bsr.l           set_tag_x               # tag the operand type
1280         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1281         bne.b           fu_op2                  # no
1282         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1283
1284 fu_op2:
1285         mov.b           %d0,STAG(%a6)           # save src optype tag
1286
1287         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1288
1289 # bit five of the fp extension word separates the monadic and dyadic operations
1290 # at this point
1291         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1292         beq.b           fu_extract              # monadic
1293         cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1294         beq.b           fu_extract              # yes, so it's monadic, too
1295
1296         bsr.l           load_fpn2               # load dst into FP_DST
1297
1298         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1299         bsr.l           set_tag_x               # tag the operand type
1300         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1301         bne.b           fu_op2_done             # no
1302         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1303 fu_op2_done:
1304         mov.b           %d0,DTAG(%a6)           # save dst optype tag
1305
1306 fu_extract:
1307         clr.l           %d0
1308         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1309
1310         bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1311
1312         lea             FP_SRC(%a6),%a0
1313         lea             FP_DST(%a6),%a1
1314
1315         mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1316         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1317
1318 #
1319 # Exceptions in order of precedence:
1320 #       BSUN    : none
1321 #       SNAN    : all dyadic ops
1322 #       OPERR   : fsqrt(-NORM)
1323 #       OVFL    : all except ftst,fcmp
1324 #       UNFL    : all except ftst,fcmp
1325 #       DZ      : fdiv
1326 #       INEX2   : all except ftst,fcmp
1327 #       INEX1   : none (packed doesn't go through here)
1328 #
1329
1330 # we determine the highest priority exception(if any) set by the
1331 # emulation routine that has also been enabled by the user.
1332         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions set
1333         bne.b           fu_in_ena               # some are enabled
1334
1335 fu_in_cont:
1336 # fcmp and ftst do not store any result.
1337         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1338         andi.b          &0x38,%d0               # extract bits 3-5
1339         cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1340         beq.b           fu_in_exit              # yes
1341
1342         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1343         bsr.l           store_fpreg             # store the result
1344
1345 fu_in_exit:
1346
1347         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1348         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1349         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1350
1351         unlk            %a6
1352
1353         bra.l           _fpsp_done
1354
1355 fu_in_ena:
1356         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1357         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1358         bne.b           fu_in_exc               # there is at least one set
1359
1360 #
1361 # No exceptions occurred that were also enabled. Now:
1362 #
1363 #       if (OVFL && ovfl_disabled && inexact_enabled) {
1364 #           branch to _real_inex() (even if the result was exact!);
1365 #       } else {
1366 #           save the result in the proper fp reg (unless the op is fcmp or ftst);
1367 #           return;
1368 #       }
1369 #
1370         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1371         beq.b           fu_in_cont              # no
1372
1373 fu_in_ovflchk:
1374         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1375         beq.b           fu_in_cont              # no
1376         bra.w           fu_in_exc_ovfl          # go insert overflow frame
1377
1378 #
1379 # An exception occurred and that exception was enabled:
1380 #
1381 #       shift enabled exception field into lo byte of d0;
1382 #       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1383 #           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1384 #               /*
1385 #                * this is the case where we must call _real_inex() now or else
1386 #                * there will be no other way to pass it the exceptional operand
1387 #                */
1388 #               call _real_inex();
1389 #       } else {
1390 #               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1391 #       }
1392 #
1393 fu_in_exc:
1394         subi.l          &24,%d0                 # fix offset to be 0-8
1395         cmpi.b          %d0,&0x6                # is exception INEX? (6)
1396         bne.b           fu_in_exc_exit          # no
1397
1398 # the enabled exception was inexact
1399         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1400         bne.w           fu_in_exc_unfl          # yes
1401         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1402         bne.w           fu_in_exc_ovfl          # yes
1403
1404 # here, we insert the correct fsave status value into the fsave frame for the
1405 # corresponding exception. the operand in the fsave frame should be the original
1406 # src operand.
1407 fu_in_exc_exit:
1408         mov.l           %d0,-(%sp)              # save d0
1409         bsr.l           funimp_skew             # skew sgl or dbl inputs
1410         mov.l           (%sp)+,%d0              # restore d0
1411
1412         mov.w           (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1413
1414         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1415         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1416         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1417
1418         frestore        FP_SRC(%a6)             # restore src op
1419
1420         unlk            %a6
1421
1422         bra.l           _fpsp_done
1423
1424 tbl_except:
1425         short           0xe000,0xe006,0xe004,0xe005
1426         short           0xe003,0xe002,0xe001,0xe001
1427
1428 fu_in_exc_unfl:
1429         mov.w           &0x4,%d0
1430         bra.b           fu_in_exc_exit
1431 fu_in_exc_ovfl:
1432         mov.w           &0x03,%d0
1433         bra.b           fu_in_exc_exit
1434
1435 # If the input operand to this operation was opclass two and a single
1436 # or double precision denorm, inf, or nan, the operand needs to be
1437 # "corrected" in order to have the proper equivalent extended precision
1438 # number.
1439         global          fix_skewed_ops
1440 fix_skewed_ops:
1441         bfextu          EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1442         cmpi.b          %d0,&0x11               # is class = 2 & fmt = sgl?
1443         beq.b           fso_sgl                 # yes
1444         cmpi.b          %d0,&0x15               # is class = 2 & fmt = dbl?
1445         beq.b           fso_dbl                 # yes
1446         rts                                     # no
1447
1448 fso_sgl:
1449         mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1450         andi.w          &0x7fff,%d0             # strip sign
1451         cmpi.w          %d0,&0x3f80             # is |exp| == $3f80?
1452         beq.b           fso_sgl_dnrm_zero       # yes
1453         cmpi.w          %d0,&0x407f             # no; is |exp| == $407f?
1454         beq.b           fso_infnan              # yes
1455         rts                                     # no
1456
1457 fso_sgl_dnrm_zero:
1458         andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1459         beq.b           fso_zero                # it's a skewed zero
1460 fso_sgl_dnrm:
1461 # here, we count on norm not to alter a0...
1462         bsr.l           norm                    # normalize mantissa
1463         neg.w           %d0                     # -shft amt
1464         addi.w          &0x3f81,%d0             # adjust new exponent
1465         andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1466         or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1467         rts
1468
1469 fso_zero:
1470         andi.w          &0x8000,LOCAL_EX(%a0)   # clear bogus exponent
1471         rts
1472
1473 fso_infnan:
1474         andi.b          &0x7f,LOCAL_HI(%a0)     # clear j-bit
1475         ori.w           &0x7fff,LOCAL_EX(%a0)   # make exponent = $7fff
1476         rts
1477
1478 fso_dbl:
1479         mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1480         andi.w          &0x7fff,%d0             # strip sign
1481         cmpi.w          %d0,&0x3c00             # is |exp| == $3c00?
1482         beq.b           fso_dbl_dnrm_zero       # yes
1483         cmpi.w          %d0,&0x43ff             # no; is |exp| == $43ff?
1484         beq.b           fso_infnan              # yes
1485         rts                                     # no
1486
1487 fso_dbl_dnrm_zero:
1488         andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1489         bne.b           fso_dbl_dnrm            # it's a skewed denorm
1490         tst.l           LOCAL_LO(%a0)           # is it a zero?
1491         beq.b           fso_zero                # yes
1492 fso_dbl_dnrm:
1493 # here, we count on norm not to alter a0...
1494         bsr.l           norm                    # normalize mantissa
1495         neg.w           %d0                     # -shft amt
1496         addi.w          &0x3c01,%d0             # adjust new exponent
1497         andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1498         or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1499         rts
1500
1501 #################################################################
1502
1503 # fmove out took an unimplemented data type exception.
1504 # the src operand is in FP_SRC. Call _fout() to write out the result and
1505 # to determine which exceptions, if any, to take.
1506 fu_out:
1507
1508 # Separate packed move outs from the UNNORM and DENORM move outs.
1509         bfextu          EXC_CMDREG(%a6){&3:&3},%d0
1510         cmpi.b          %d0,&0x3
1511         beq.w           fu_out_pack
1512         cmpi.b          %d0,&0x7
1513         beq.w           fu_out_pack
1514
1515
1516 # I'm not sure at this point what FPSR bits are valid for this instruction.
1517 # so, since the emulation routines re-create them anyways, zero exception field.
1518 # fmove out doesn't affect ccodes.
1519         and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
1520
1521         fmov.l          &0x0,%fpcr              # zero current control regs
1522         fmov.l          &0x0,%fpsr
1523
1524 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1525 # call here. just figure out what it is...
1526         mov.w           FP_SRC_EX(%a6),%d0      # get exponent
1527         andi.w          &0x7fff,%d0             # strip sign
1528         beq.b           fu_out_denorm           # it's a DENORM
1529
1530         lea             FP_SRC(%a6),%a0
1531         bsr.l           unnorm_fix              # yes; fix it
1532
1533         mov.b           %d0,STAG(%a6)
1534
1535         bra.b           fu_out_cont
1536 fu_out_denorm:
1537         mov.b           &DENORM,STAG(%a6)
1538 fu_out_cont:
1539
1540         clr.l           %d0
1541         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1542
1543         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1544
1545         mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
1546         bsr.l           fout                    # call fmove out routine
1547
1548 # Exceptions in order of precedence:
1549 #       BSUN    : none
1550 #       SNAN    : none
1551 #       OPERR   : fmove.{b,w,l} out of large UNNORM
1552 #       OVFL    : fmove.{s,d}
1553 #       UNFL    : fmove.{s,d,x}
1554 #       DZ      : none
1555 #       INEX2   : all
1556 #       INEX1   : none (packed doesn't travel through here)
1557
1558 # determine the highest priority exception(if any) set by the
1559 # emulation routine that has also been enabled by the user.
1560         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1561         bne.w           fu_out_ena              # some are enabled
1562
1563 fu_out_done:
1564
1565         mov.l           EXC_A6(%a6),(%a6)       # in case a6 changed
1566
1567 # on extended precision opclass three instructions using pre-decrement or
1568 # post-increment addressing mode, the address register is not updated. is the
1569 # address register was the stack pointer used from user mode, then let's update
1570 # it here. if it was used from supervisor mode, then we have to handle this
1571 # as a special case.
1572         btst            &0x5,EXC_SR(%a6)
1573         bne.b           fu_out_done_s
1574
1575         mov.l           EXC_A7(%a6),%a0         # restore a7
1576         mov.l           %a0,%usp
1577
1578 fu_out_done_cont:
1579         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1580         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1581         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1582
1583         unlk            %a6
1584
1585         btst            &0x7,(%sp)              # is trace on?
1586         bne.b           fu_out_trace            # yes
1587
1588         bra.l           _fpsp_done
1589
1590 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1591 # ("fmov.x fpm,-(a7)") if so,
1592 fu_out_done_s:
1593         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
1594         bne.b           fu_out_done_cont
1595
1596 # the extended precision result is still in fp0. but, we need to save it
1597 # somewhere on the stack until we can copy it to its final resting place.
1598 # here, we're counting on the top of the stack to be the old place-holders
1599 # for fp0/fp1 which have already been restored. that way, we can write
1600 # over those destinations with the shifted stack frame.
1601         fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1602
1603         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1604         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1605         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1606
1607         mov.l           (%a6),%a6               # restore frame pointer
1608
1609         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1610         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1611
1612 # now, copy the result to the proper place on the stack
1613         mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1614         mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1615         mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1616
1617         add.l           &LOCAL_SIZE-0x8,%sp
1618
1619         btst            &0x7,(%sp)
1620         bne.b           fu_out_trace
1621
1622         bra.l           _fpsp_done
1623
1624 fu_out_ena:
1625         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1626         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1627         bne.b           fu_out_exc              # there is at least one set
1628
1629 # no exceptions were set.
1630 # if a disabled overflow occurred and inexact was enabled but the result
1631 # was exact, then a branch to _real_inex() is made.
1632         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1633         beq.w           fu_out_done             # no
1634
1635 fu_out_ovflchk:
1636         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1637         beq.w           fu_out_done             # no
1638         bra.w           fu_inex                 # yes
1639
1640 #
1641 # The fp move out that took the "Unimplemented Data Type" exception was
1642 # being traced. Since the stack frames are similar, get the "current" PC
1643 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1644 #
1645 #                 UNSUPP FRAME             TRACE FRAME
1646 #               *****************       *****************
1647 #               *      EA       *       *    Current    *
1648 #               *               *       *      PC       *
1649 #               *****************       *****************
1650 #               * 0x3 *  0x0dc  *       * 0x2 *  0x024  *
1651 #               *****************       *****************
1652 #               *     Next      *       *     Next      *
1653 #               *      PC       *       *      PC       *
1654 #               *****************       *****************
1655 #               *      SR       *       *      SR       *
1656 #               *****************       *****************
1657 #
1658 fu_out_trace:
1659         mov.w           &0x2024,0x6(%sp)
1660         fmov.l          %fpiar,0x8(%sp)
1661         bra.l           _real_trace
1662
1663 # an exception occurred and that exception was enabled.
1664 fu_out_exc:
1665         subi.l          &24,%d0                 # fix offset to be 0-8
1666
1667 # we don't mess with the existing fsave frame. just re-insert it and
1668 # jump to the "_real_{}()" handler...
1669         mov.w           (tbl_fu_out.b,%pc,%d0.w*2),%d0
1670         jmp             (tbl_fu_out.b,%pc,%d0.w*1)
1671
1672         swbeg           &0x8
1673 tbl_fu_out:
1674         short           tbl_fu_out      - tbl_fu_out    # BSUN can't happen
1675         short           tbl_fu_out      - tbl_fu_out    # SNAN can't happen
1676         short           fu_operr        - tbl_fu_out    # OPERR
1677         short           fu_ovfl         - tbl_fu_out    # OVFL
1678         short           fu_unfl         - tbl_fu_out    # UNFL
1679         short           tbl_fu_out      - tbl_fu_out    # DZ can't happen
1680         short           fu_inex         - tbl_fu_out    # INEX2
1681         short           tbl_fu_out      - tbl_fu_out    # INEX1 won't make it here
1682
1683 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1684 # frestore it.
1685 fu_snan:
1686         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1687         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1688         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1689
1690         mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd8
1691         mov.w           &0xe006,2+FP_SRC(%a6)
1692
1693         frestore        FP_SRC(%a6)
1694
1695         unlk            %a6
1696
1697
1698         bra.l           _real_snan
1699
1700 fu_operr:
1701         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1702         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1703         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1704
1705         mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
1706         mov.w           &0xe004,2+FP_SRC(%a6)
1707
1708         frestore        FP_SRC(%a6)
1709
1710         unlk            %a6
1711
1712
1713         bra.l           _real_operr
1714
1715 fu_ovfl:
1716         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1717
1718         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1719         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1720         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1721
1722         mov.w           &0x30d4,EXC_VOFF(%a6)   # vector offset = 0xd4
1723         mov.w           &0xe005,2+FP_SRC(%a6)
1724
1725         frestore        FP_SRC(%a6)             # restore EXOP
1726
1727         unlk            %a6
1728
1729         bra.l           _real_ovfl
1730
1731 # underflow can happen for extended precision. extended precision opclass
1732 # three instruction exceptions don't update the stack pointer. so, if the
1733 # exception occurred from user mode, then simply update a7 and exit normally.
1734 # if the exception occurred from supervisor mode, check if
1735 fu_unfl:
1736         mov.l           EXC_A6(%a6),(%a6)       # restore a6
1737
1738         btst            &0x5,EXC_SR(%a6)
1739         bne.w           fu_unfl_s
1740
1741         mov.l           EXC_A7(%a6),%a0         # restore a7 whether we need
1742         mov.l           %a0,%usp                # to or not...
1743
1744 fu_unfl_cont:
1745         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1746
1747         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1748         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1749         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1750
1751         mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1752         mov.w           &0xe003,2+FP_SRC(%a6)
1753
1754         frestore        FP_SRC(%a6)             # restore EXOP
1755
1756         unlk            %a6
1757
1758         bra.l           _real_unfl
1759
1760 fu_unfl_s:
1761         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1762         bne.b           fu_unfl_cont
1763
1764 # the extended precision result is still in fp0. but, we need to save it
1765 # somewhere on the stack until we can copy it to its final resting place
1766 # (where the exc frame is currently). make sure it's not at the top of the
1767 # frame or it will get overwritten when the exc stack frame is shifted "down".
1768         fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1769         fmovm.x         &0x40,FP_DST(%a6)       # put EXOP on stack
1770
1771         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1772         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1773         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1774
1775         mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1776         mov.w           &0xe003,2+FP_DST(%a6)
1777
1778         frestore        FP_DST(%a6)             # restore EXOP
1779
1780         mov.l           (%a6),%a6               # restore frame pointer
1781
1782         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1783         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1784         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1785
1786 # now, copy the result to the proper place on the stack
1787         mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1788         mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1789         mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1790
1791         add.l           &LOCAL_SIZE-0x8,%sp
1792
1793         bra.l           _real_unfl
1794
1795 # fmove in and out enter here.
1796 fu_inex:
1797         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1798
1799         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1800         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1801         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1802
1803         mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
1804         mov.w           &0xe001,2+FP_SRC(%a6)
1805
1806         frestore        FP_SRC(%a6)             # restore EXOP
1807
1808         unlk            %a6
1809
1810
1811         bra.l           _real_inex
1812
1813 #########################################################################
1814 #########################################################################
1815 fu_in_pack:
1816
1817
1818 # I'm not sure at this point what FPSR bits are valid for this instruction.
1819 # so, since the emulation routines re-create them anyways, zero exception field
1820         andi.l          &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1821
1822         fmov.l          &0x0,%fpcr              # zero current control regs
1823         fmov.l          &0x0,%fpsr
1824
1825         bsr.l           get_packed              # fetch packed src operand
1826
1827         lea             FP_SRC(%a6),%a0         # pass ptr to src
1828         bsr.l           set_tag_x               # set src optype tag
1829
1830         mov.b           %d0,STAG(%a6)           # save src optype tag
1831
1832         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1833
1834 # bit five of the fp extension word separates the monadic and dyadic operations
1835 # at this point
1836         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1837         beq.b           fu_extract_p            # monadic
1838         cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1839         beq.b           fu_extract_p            # yes, so it's monadic, too
1840
1841         bsr.l           load_fpn2               # load dst into FP_DST
1842
1843         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1844         bsr.l           set_tag_x               # tag the operand type
1845         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1846         bne.b           fu_op2_done_p           # no
1847         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1848 fu_op2_done_p:
1849         mov.b           %d0,DTAG(%a6)           # save dst optype tag
1850
1851 fu_extract_p:
1852         clr.l           %d0
1853         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1854
1855         bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1856
1857         lea             FP_SRC(%a6),%a0
1858         lea             FP_DST(%a6),%a1
1859
1860         mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1861         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1862
1863 #
1864 # Exceptions in order of precedence:
1865 #       BSUN    : none
1866 #       SNAN    : all dyadic ops
1867 #       OPERR   : fsqrt(-NORM)
1868 #       OVFL    : all except ftst,fcmp
1869 #       UNFL    : all except ftst,fcmp
1870 #       DZ      : fdiv
1871 #       INEX2   : all except ftst,fcmp
1872 #       INEX1   : all
1873 #
1874
1875 # we determine the highest priority exception(if any) set by the
1876 # emulation routine that has also been enabled by the user.
1877         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1878         bne.w           fu_in_ena_p             # some are enabled
1879
1880 fu_in_cont_p:
1881 # fcmp and ftst do not store any result.
1882         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1883         andi.b          &0x38,%d0               # extract bits 3-5
1884         cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1885         beq.b           fu_in_exit_p            # yes
1886
1887         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1888         bsr.l           store_fpreg             # store the result
1889
1890 fu_in_exit_p:
1891
1892         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1893         bne.w           fu_in_exit_s_p          # supervisor
1894
1895         mov.l           EXC_A7(%a6),%a0         # update user a7
1896         mov.l           %a0,%usp
1897
1898 fu_in_exit_cont_p:
1899         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1900         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1901         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1902
1903         unlk            %a6                     # unravel stack frame
1904
1905         btst            &0x7,(%sp)              # is trace on?
1906         bne.w           fu_trace_p              # yes
1907
1908         bra.l           _fpsp_done              # exit to os
1909
1910 # the exception occurred in supervisor mode. check to see if the
1911 # addressing mode was (a7)+. if so, we'll need to shift the
1912 # stack frame "up".
1913 fu_in_exit_s_p:
1914         btst            &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1915         beq.b           fu_in_exit_cont_p       # no
1916
1917         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1918         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1919         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1920
1921         unlk            %a6                     # unravel stack frame
1922
1923 # shift the stack frame "up". we don't really care about the <ea> field.
1924         mov.l           0x4(%sp),0x10(%sp)
1925         mov.l           0x0(%sp),0xc(%sp)
1926         add.l           &0xc,%sp
1927
1928         btst            &0x7,(%sp)              # is trace on?
1929         bne.w           fu_trace_p              # yes
1930
1931         bra.l           _fpsp_done              # exit to os
1932
1933 fu_in_ena_p:
1934         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled & set
1935         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1936         bne.b           fu_in_exc_p             # at least one was set
1937
1938 #
1939 # No exceptions occurred that were also enabled. Now:
1940 #
1941 #       if (OVFL && ovfl_disabled && inexact_enabled) {
1942 #           branch to _real_inex() (even if the result was exact!);
1943 #       } else {
1944 #           save the result in the proper fp reg (unless the op is fcmp or ftst);
1945 #           return;
1946 #       }
1947 #
1948         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1949         beq.w           fu_in_cont_p            # no
1950
1951 fu_in_ovflchk_p:
1952         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1953         beq.w           fu_in_cont_p            # no
1954         bra.w           fu_in_exc_ovfl_p        # do _real_inex() now
1955
1956 #
1957 # An exception occurred and that exception was enabled:
1958 #
1959 #       shift enabled exception field into lo byte of d0;
1960 #       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1961 #           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1962 #               /*
1963 #                * this is the case where we must call _real_inex() now or else
1964 #                * there will be no other way to pass it the exceptional operand
1965 #                */
1966 #               call _real_inex();
1967 #       } else {
1968 #               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1969 #       }
1970 #
1971 fu_in_exc_p:
1972         subi.l          &24,%d0                 # fix offset to be 0-8
1973         cmpi.b          %d0,&0x6                # is exception INEX? (6 or 7)
1974         blt.b           fu_in_exc_exit_p        # no
1975
1976 # the enabled exception was inexact
1977         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1978         bne.w           fu_in_exc_unfl_p        # yes
1979         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1980         bne.w           fu_in_exc_ovfl_p        # yes
1981
1982 # here, we insert the correct fsave status value into the fsave frame for the
1983 # corresponding exception. the operand in the fsave frame should be the original
1984 # src operand.
1985 # as a reminder for future predicted pain and agony, we are passing in fsave the
1986 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1987 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1988 fu_in_exc_exit_p:
1989         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1990         bne.w           fu_in_exc_exit_s_p      # supervisor
1991
1992         mov.l           EXC_A7(%a6),%a0         # update user a7
1993         mov.l           %a0,%usp
1994
1995 fu_in_exc_exit_cont_p:
1996         mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1997
1998         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1999         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2000         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2001
2002         frestore        FP_SRC(%a6)             # restore src op
2003
2004         unlk            %a6
2005
2006         btst            &0x7,(%sp)              # is trace enabled?
2007         bne.w           fu_trace_p              # yes
2008
2009         bra.l           _fpsp_done
2010
2011 tbl_except_p:
2012         short           0xe000,0xe006,0xe004,0xe005
2013         short           0xe003,0xe002,0xe001,0xe001
2014
2015 fu_in_exc_ovfl_p:
2016         mov.w           &0x3,%d0
2017         bra.w           fu_in_exc_exit_p
2018
2019 fu_in_exc_unfl_p:
2020         mov.w           &0x4,%d0
2021         bra.w           fu_in_exc_exit_p
2022
2023 fu_in_exc_exit_s_p:
2024         btst            &mia7_bit,SPCOND_FLG(%a6)
2025         beq.b           fu_in_exc_exit_cont_p
2026
2027         mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2028
2029         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2030         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2031         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2032
2033         frestore        FP_SRC(%a6)             # restore src op
2034
2035         unlk            %a6                     # unravel stack frame
2036
2037 # shift stack frame "up". who cares about <ea> field.
2038         mov.l           0x4(%sp),0x10(%sp)
2039         mov.l           0x0(%sp),0xc(%sp)
2040         add.l           &0xc,%sp
2041
2042         btst            &0x7,(%sp)              # is trace on?
2043         bne.b           fu_trace_p              # yes
2044
2045         bra.l           _fpsp_done              # exit to os
2046
2047 #
2048 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2049 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2050 # trace stack frame then jump to _real_trace().
2051 #
2052 #                 UNSUPP FRAME             TRACE FRAME
2053 #               *****************       *****************
2054 #               *      EA       *       *    Current    *
2055 #               *               *       *      PC       *
2056 #               *****************       *****************
2057 #               * 0x2 * 0x0dc   *       * 0x2 *  0x024  *
2058 #               *****************       *****************
2059 #               *     Next      *       *     Next      *
2060 #               *      PC       *       *      PC       *
2061 #               *****************       *****************
2062 #               *      SR       *       *      SR       *
2063 #               *****************       *****************
2064 fu_trace_p:
2065         mov.w           &0x2024,0x6(%sp)
2066         fmov.l          %fpiar,0x8(%sp)
2067
2068         bra.l           _real_trace
2069
2070 #########################################################
2071 #########################################################
2072 fu_out_pack:
2073
2074
2075 # I'm not sure at this point what FPSR bits are valid for this instruction.
2076 # so, since the emulation routines re-create them anyways, zero exception field.
2077 # fmove out doesn't affect ccodes.
2078         and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
2079
2080         fmov.l          &0x0,%fpcr              # zero current control regs
2081         fmov.l          &0x0,%fpsr
2082
2083         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
2084         bsr.l           load_fpn1
2085
2086 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2087 # able to detect all operand types.
2088         lea             FP_SRC(%a6),%a0
2089         bsr.l           set_tag_x               # tag the operand type
2090         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2091         bne.b           fu_op2_p                # no
2092         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
2093
2094 fu_op2_p:
2095         mov.b           %d0,STAG(%a6)           # save src optype tag
2096
2097         clr.l           %d0
2098         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
2099
2100         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
2101
2102         mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
2103         bsr.l           fout                    # call fmove out routine
2104
2105 # Exceptions in order of precedence:
2106 #       BSUN    : no
2107 #       SNAN    : yes
2108 #       OPERR   : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2109 #       OVFL    : no
2110 #       UNFL    : no
2111 #       DZ      : no
2112 #       INEX2   : yes
2113 #       INEX1   : no
2114
2115 # determine the highest priority exception(if any) set by the
2116 # emulation routine that has also been enabled by the user.
2117         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2118         bne.w           fu_out_ena_p            # some are enabled
2119
2120 fu_out_exit_p:
2121         mov.l           EXC_A6(%a6),(%a6)       # restore a6
2122
2123         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
2124         bne.b           fu_out_exit_s_p         # supervisor
2125
2126         mov.l           EXC_A7(%a6),%a0         # update user a7
2127         mov.l           %a0,%usp
2128
2129 fu_out_exit_cont_p:
2130         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2131         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2132         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2133
2134         unlk            %a6                     # unravel stack frame
2135
2136         btst            &0x7,(%sp)              # is trace on?
2137         bne.w           fu_trace_p              # yes
2138
2139         bra.l           _fpsp_done              # exit to os
2140
2141 # the exception occurred in supervisor mode. check to see if the
2142 # addressing mode was -(a7). if so, we'll need to shift the
2143 # stack frame "down".
2144 fu_out_exit_s_p:
2145         btst            &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2146         beq.b           fu_out_exit_cont_p      # no
2147
2148         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2149         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2150         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2151
2152         mov.l           (%a6),%a6               # restore frame pointer
2153
2154         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2155         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2156
2157 # now, copy the result to the proper place on the stack
2158         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2159         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2160         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2161
2162         add.l           &LOCAL_SIZE-0x8,%sp
2163
2164         btst            &0x7,(%sp)
2165         bne.w           fu_trace_p
2166
2167         bra.l           _fpsp_done
2168
2169 fu_out_ena_p:
2170         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
2171         bfffo           %d0{&24:&8},%d0         # find highest priority exception
2172         beq.w           fu_out_exit_p
2173
2174         mov.l           EXC_A6(%a6),(%a6)       # restore a6
2175
2176 # an exception occurred and that exception was enabled.
2177 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2178 fu_out_exc_p:
2179         cmpi.b          %d0,&0x1a
2180         bgt.w           fu_inex_p2
2181         beq.w           fu_operr_p
2182
2183 fu_snan_p:
2184         btst            &0x5,EXC_SR(%a6)
2185         bne.b           fu_snan_s_p
2186
2187         mov.l           EXC_A7(%a6),%a0
2188         mov.l           %a0,%usp
2189         bra.w           fu_snan
2190
2191 fu_snan_s_p:
2192         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2193         bne.w           fu_snan
2194
2195 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2196 # the strategy is to move the exception frame "down" 12 bytes. then, we
2197 # can store the default result where the exception frame was.
2198         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2199         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2200         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2201
2202         mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd0
2203         mov.w           &0xe006,2+FP_SRC(%a6)   # set fsave status
2204
2205         frestore        FP_SRC(%a6)             # restore src operand
2206
2207         mov.l           (%a6),%a6               # restore frame pointer
2208
2209         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2210         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2211         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2212
2213 # now, we copy the default result to it's proper location
2214         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2215         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2216         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2217
2218         add.l           &LOCAL_SIZE-0x8,%sp
2219
2220
2221         bra.l           _real_snan
2222
2223 fu_operr_p:
2224         btst            &0x5,EXC_SR(%a6)
2225         bne.w           fu_operr_p_s
2226
2227         mov.l           EXC_A7(%a6),%a0
2228         mov.l           %a0,%usp
2229         bra.w           fu_operr
2230
2231 fu_operr_p_s:
2232         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2233         bne.w           fu_operr
2234
2235 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2236 # the strategy is to move the exception frame "down" 12 bytes. then, we
2237 # can store the default result where the exception frame was.
2238         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2239         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2240         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2241
2242         mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
2243         mov.w           &0xe004,2+FP_SRC(%a6)   # set fsave status
2244
2245         frestore        FP_SRC(%a6)             # restore src operand
2246
2247         mov.l           (%a6),%a6               # restore frame pointer
2248
2249         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2250         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2251         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2252
2253 # now, we copy the default result to it's proper location
2254         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2255         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2256         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2257
2258         add.l           &LOCAL_SIZE-0x8,%sp
2259
2260
2261         bra.l           _real_operr
2262
2263 fu_inex_p2:
2264         btst            &0x5,EXC_SR(%a6)
2265         bne.w           fu_inex_s_p2
2266
2267         mov.l           EXC_A7(%a6),%a0
2268         mov.l           %a0,%usp
2269         bra.w           fu_inex
2270
2271 fu_inex_s_p2:
2272         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2273         bne.w           fu_inex
2274
2275 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2276 # the strategy is to move the exception frame "down" 12 bytes. then, we
2277 # can store the default result where the exception frame was.
2278         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2279         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2280         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2281
2282         mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
2283         mov.w           &0xe001,2+FP_SRC(%a6)   # set fsave status
2284
2285         frestore        FP_SRC(%a6)             # restore src operand
2286
2287         mov.l           (%a6),%a6               # restore frame pointer
2288
2289         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2290         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2291         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2292
2293 # now, we copy the default result to it's proper location
2294         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2295         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2296         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2297
2298         add.l           &LOCAL_SIZE-0x8,%sp
2299
2300
2301         bra.l           _real_inex
2302
2303 #########################################################################
2304
2305 #
2306 # if we're stuffing a source operand back into an fsave frame then we
2307 # have to make sure that for single or double source operands that the
2308 # format stuffed is as weird as the hardware usually makes it.
2309 #
2310         global          funimp_skew
2311 funimp_skew:
2312         bfextu          EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2313         cmpi.b          %d0,&0x1                # was src sgl?
2314         beq.b           funimp_skew_sgl         # yes
2315         cmpi.b          %d0,&0x5                # was src dbl?
2316         beq.b           funimp_skew_dbl         # yes
2317         rts
2318
2319 funimp_skew_sgl:
2320         mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2321         andi.w          &0x7fff,%d0             # strip sign
2322         beq.b           funimp_skew_sgl_not
2323         cmpi.w          %d0,&0x3f80
2324         bgt.b           funimp_skew_sgl_not
2325         neg.w           %d0                     # make exponent negative
2326         addi.w          &0x3f81,%d0             # find amt to shift
2327         mov.l           FP_SRC_HI(%a6),%d1      # fetch DENORM hi(man)
2328         lsr.l           %d0,%d1                 # shift it
2329         bset            &31,%d1                 # set j-bit
2330         mov.l           %d1,FP_SRC_HI(%a6)      # insert new hi(man)
2331         andi.w          &0x8000,FP_SRC_EX(%a6)  # clear old exponent
2332         ori.w           &0x3f80,FP_SRC_EX(%a6)  # insert new "skewed" exponent
2333 funimp_skew_sgl_not:
2334         rts
2335
2336 funimp_skew_dbl:
2337         mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2338         andi.w          &0x7fff,%d0             # strip sign
2339         beq.b           funimp_skew_dbl_not
2340         cmpi.w          %d0,&0x3c00
2341         bgt.b           funimp_skew_dbl_not
2342
2343         tst.b           FP_SRC_EX(%a6)          # make "internal format"
2344         smi.b           0x2+FP_SRC(%a6)
2345         mov.w           %d0,FP_SRC_EX(%a6)      # insert exponent with cleared sign
2346         clr.l           %d0                     # clear g,r,s
2347         lea             FP_SRC(%a6),%a0         # pass ptr to src op
2348         mov.w           &0x3c01,%d1             # pass denorm threshold
2349         bsr.l           dnrm_lp                 # denorm it
2350         mov.w           &0x3c00,%d0             # new exponent
2351         tst.b           0x2+FP_SRC(%a6)         # is sign set?
2352         beq.b           fss_dbl_denorm_done     # no
2353         bset            &15,%d0                 # set sign
2354 fss_dbl_denorm_done:
2355         bset            &0x7,FP_SRC_HI(%a6)     # set j-bit
2356         mov.w           %d0,FP_SRC_EX(%a6)      # insert new exponent
2357 funimp_skew_dbl_not:
2358         rts
2359
2360 #########################################################################
2361         global          _mem_write2
2362 _mem_write2:
2363         btst            &0x5,EXC_SR(%a6)
2364         beq.l           _dmem_write
2365         mov.l           0x0(%a0),FP_DST_EX(%a6)
2366         mov.l           0x4(%a0),FP_DST_HI(%a6)
2367         mov.l           0x8(%a0),FP_DST_LO(%a6)
2368         clr.l           %d1
2369         rts
2370
2371 #########################################################################
2372 # XDEF **************************************************************** #
2373 #       _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented       #
2374 #                       effective address" exception.                   #
2375 #                                                                       #
2376 #       This handler should be the first code executed upon taking the  #
2377 #       FP Unimplemented Effective Address exception in an operating    #
2378 #       system.                                                         #
2379 #                                                                       #
2380 # XREF **************************************************************** #
2381 #       _imem_read_long() - read instruction longword                   #
2382 #       fix_skewed_ops() - adjust src operand in fsave frame            #
2383 #       set_tag_x() - determine optype of src/dst operands              #
2384 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
2385 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
2386 #       load_fpn2() - load dst operand from FP regfile                  #
2387 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2388 #       decbin() - convert packed data to FP binary data                #
2389 #       _real_fpu_disabled() - "callout" for "FPU disabled" exception   #
2390 #       _real_access() - "callout" for access error exception           #
2391 #       _mem_read() - read extended immediate operand from memory       #
2392 #       _fpsp_done() - "callout" for exit; work all done                #
2393 #       _real_trace() - "callout" for Trace enabled exception           #
2394 #       fmovm_dynamic() - emulate dynamic fmovm instruction             #
2395 #       fmovm_ctrl() - emulate fmovm control instruction                #
2396 #                                                                       #
2397 # INPUT *************************************************************** #
2398 #       - The system stack contains the "Unimplemented <ea>" stk frame  #
2399 #                                                                       #
2400 # OUTPUT ************************************************************** #
2401 #       If access error:                                                #
2402 #       - The system stack is changed to an access error stack frame    #
2403 #       If FPU disabled:                                                #
2404 #       - The system stack is changed to an FPU disabled stack frame    #
2405 #       If Trace exception enabled:                                     #
2406 #       - The system stack is changed to a Trace exception stack frame  #
2407 #       Else: (normal case)                                             #
2408 #       - None (correct result has been stored as appropriate)          #
2409 #                                                                       #
2410 # ALGORITHM *********************************************************** #
2411 #       This exception handles 3 types of operations:                   #
2412 # (1) FP Instructions using extended precision or packed immediate      #
2413 #     addressing mode.                                                  #
2414 # (2) The "fmovm.x" instruction w/ dynamic register specification.      #
2415 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers.            #
2416 #                                                                       #
2417 #       For immediate data operations, the data is read in w/ a         #
2418 # _mem_read() "callout", converted to FP binary (if packed), and used   #
2419 # as the source operand to the instruction specified by the instruction #
2420 # word. If no FP exception should be reported ads a result of the       #
2421 # emulation, then the result is stored to the destination register and  #
2422 # the handler exits through _fpsp_done(). If an enabled exc has been    #
2423 # signalled as a result of emulation, then an fsave state frame         #
2424 # corresponding to the FP exception type must be entered into the 060   #
2425 # FPU before exiting. In either the enabled or disabled cases, we       #
2426 # must also check if a Trace exception is pending, in which case, we    #
2427 # must create a Trace exception stack frame from the current exception  #
2428 # stack frame. If no Trace is pending, we simply exit through           #
2429 # _fpsp_done().                                                         #
2430 #       For "fmovm.x", call the routine fmovm_dynamic() which will      #
2431 # decode and emulate the instruction. No FP exceptions can be pending   #
2432 # as a result of this operation emulation. A Trace exception can be     #
2433 # pending, though, which means the current stack frame must be changed  #
2434 # to a Trace stack frame and an exit made through _real_trace().        #
2435 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction   #
2436 # was executed from supervisor mode, this handler must store the FP     #
2437 # register file values to the system stack by itself since              #
2438 # fmovm_dynamic() can't handle this. A normal exit is made through      #
2439 # fpsp_done().                                                          #
2440 #       For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2441 # Again, a Trace exception may be pending and an exit made through      #
2442 # _real_trace(). Else, a normal exit is made through _fpsp_done().      #
2443 #                                                                       #
2444 #       Before any of the above is attempted, it must be checked to     #
2445 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2446 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2447 # has higher priority, we check the disabled bit in the PCR. If set,    #
2448 # then we must create an 8 word "FPU disabled" exception stack frame    #
2449 # from the current 4 word exception stack frame. This includes          #
2450 # reproducing the effective address of the instruction to put on the    #
2451 # new stack frame.                                                      #
2452 #                                                                       #
2453 #       In the process of all emulation work, if a _mem_read()          #
2454 # "callout" returns a failing result indicating an access error, then   #
2455 # we must create an access error stack frame from the current stack     #
2456 # frame. This information includes a faulting address and a fault-      #
2457 # status-longword. These are created within this handler.               #
2458 #                                                                       #
2459 #########################################################################
2460
2461         global          _fpsp_effadd
2462 _fpsp_effadd:
2463
2464 # This exception type takes priority over the "Line F Emulator"
2465 # exception. Therefore, the FPU could be disabled when entering here.
2466 # So, we must check to see if it's disabled and handle that case separately.
2467         mov.l           %d0,-(%sp)              # save d0
2468         movc            %pcr,%d0                # load proc cr
2469         btst            &0x1,%d0                # is FPU disabled?
2470         bne.w           iea_disabled            # yes
2471         mov.l           (%sp)+,%d0              # restore d0
2472
2473         link            %a6,&-LOCAL_SIZE        # init stack frame
2474
2475         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2476         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2477         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
2478
2479 # PC of instruction that took the exception is the PC in the frame
2480         mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2481
2482         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2483         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2484         bsr.l           _imem_read_long         # fetch the instruction words
2485         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2486
2487 #########################################################################
2488
2489         tst.w           %d0                     # is operation fmovem?
2490         bmi.w           iea_fmovm               # yes
2491
2492 #
2493 # here, we will have:
2494 #       fabs    fdabs   fsabs           facos           fmod
2495 #       fadd    fdadd   fsadd           fasin           frem
2496 #       fcmp                            fatan           fscale
2497 #       fdiv    fddiv   fsdiv           fatanh          fsin
2498 #       fint                            fcos            fsincos
2499 #       fintrz                          fcosh           fsinh
2500 #       fmove   fdmove  fsmove          fetox           ftan
2501 #       fmul    fdmul   fsmul           fetoxm1         ftanh
2502 #       fneg    fdneg   fsneg           fgetexp         ftentox
2503 #       fsgldiv                         fgetman         ftwotox
2504 #       fsglmul                         flog10
2505 #       fsqrt                           flog2
2506 #       fsub    fdsub   fssub           flogn
2507 #       ftst                            flognp1
2508 # which can all use f<op>.{x,p}
2509 # so, now it's immediate data extended precision AND PACKED FORMAT!
2510 #
2511 iea_op:
2512         andi.l          &0x00ff00ff,USER_FPSR(%a6)
2513
2514         btst            &0xa,%d0                # is src fmt x or p?
2515         bne.b           iea_op_pack             # packed
2516
2517
2518         mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2519         lea             FP_SRC(%a6),%a1         # pass: ptr to super addr
2520         mov.l           &0xc,%d0                # pass: 12 bytes
2521         bsr.l           _imem_read              # read extended immediate
2522
2523         tst.l           %d1                     # did ifetch fail?
2524         bne.w           iea_iacc                # yes
2525
2526         bra.b           iea_op_setsrc
2527
2528 iea_op_pack:
2529
2530         mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2531         lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
2532         mov.l           &0xc,%d0                # pass: 12 bytes
2533         bsr.l           _imem_read              # read packed operand
2534
2535         tst.l           %d1                     # did ifetch fail?
2536         bne.w           iea_iacc                # yes
2537
2538 # The packed operand is an INF or a NAN if the exponent field is all ones.
2539         bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
2540         cmpi.w          %d0,&0x7fff             # INF or NAN?
2541         beq.b           iea_op_setsrc           # operand is an INF or NAN
2542
2543 # The packed operand is a zero if the mantissa is all zero, else it's
2544 # a normal packed op.
2545         mov.b           3+FP_SRC(%a6),%d0       # get byte 4
2546         andi.b          &0x0f,%d0               # clear all but last nybble
2547         bne.b           iea_op_gp_not_spec      # not a zero
2548         tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
2549         bne.b           iea_op_gp_not_spec      # not a zero
2550         tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
2551         beq.b           iea_op_setsrc           # operand is a ZERO
2552 iea_op_gp_not_spec:
2553         lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
2554         bsr.l           decbin                  # convert to extended
2555         fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
2556
2557 iea_op_setsrc:
2558         addi.l          &0xc,EXC_EXTWPTR(%a6)   # update extension word pointer
2559
2560 # FP_SRC now holds the src operand.
2561         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
2562         bsr.l           set_tag_x               # tag the operand type
2563         mov.b           %d0,STAG(%a6)           # could be ANYTHING!!!
2564         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2565         bne.b           iea_op_getdst           # no
2566         bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2567         mov.b           %d0,STAG(%a6)           # set new optype tag
2568 iea_op_getdst:
2569         clr.b           STORE_FLG(%a6)          # clear "store result" boolean
2570
2571         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
2572         beq.b           iea_op_extract          # monadic
2573         btst            &0x4,1+EXC_CMDREG(%a6)  # is operation fsincos,ftst,fcmp?
2574         bne.b           iea_op_spec             # yes
2575
2576 iea_op_loaddst:
2577         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2578         bsr.l           load_fpn2               # load dst operand
2579
2580         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
2581         bsr.l           set_tag_x               # tag the operand type
2582         mov.b           %d0,DTAG(%a6)           # could be ANYTHING!!!
2583         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2584         bne.b           iea_op_extract          # no
2585         bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2586         mov.b           %d0,DTAG(%a6)           # set new optype tag
2587         bra.b           iea_op_extract
2588
2589 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2590 iea_op_spec:
2591         btst            &0x3,1+EXC_CMDREG(%a6)  # is operation fsincos?
2592         beq.b           iea_op_extract          # yes
2593 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2594 # store a result. then, only fcmp will branch back and pick up a dst operand.
2595         st              STORE_FLG(%a6)          # don't store a final result
2596         btst            &0x1,1+EXC_CMDREG(%a6)  # is operation fcmp?
2597         beq.b           iea_op_loaddst          # yes
2598
2599 iea_op_extract:
2600         clr.l           %d0
2601         mov.b           FPCR_MODE(%a6),%d0      # pass: rnd mode,prec
2602
2603         mov.b           1+EXC_CMDREG(%a6),%d1
2604         andi.w          &0x007f,%d1             # extract extension
2605
2606         fmov.l          &0x0,%fpcr
2607         fmov.l          &0x0,%fpsr
2608
2609         lea             FP_SRC(%a6),%a0
2610         lea             FP_DST(%a6),%a1
2611
2612         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2613         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
2614
2615 #
2616 # Exceptions in order of precedence:
2617 #       BSUN    : none
2618 #       SNAN    : all operations
2619 #       OPERR   : all reg-reg or mem-reg operations that can normally operr
2620 #       OVFL    : same as OPERR
2621 #       UNFL    : same as OPERR
2622 #       DZ      : same as OPERR
2623 #       INEX2   : same as OPERR
2624 #       INEX1   : all packed immediate operations
2625 #
2626
2627 # we determine the highest priority exception(if any) set by the
2628 # emulation routine that has also been enabled by the user.
2629         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2630         bne.b           iea_op_ena              # some are enabled
2631
2632 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2633 # these don't save results.
2634 iea_op_save:
2635         tst.b           STORE_FLG(%a6)          # does this op store a result?
2636         bne.b           iea_op_exit1            # exit with no frestore
2637
2638 iea_op_store:
2639         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2640         bsr.l           store_fpreg             # store the result
2641
2642 iea_op_exit1:
2643         mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2644         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2645
2646         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2647         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2648         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2649
2650         unlk            %a6                     # unravel the frame
2651
2652         btst            &0x7,(%sp)              # is trace on?
2653         bne.w           iea_op_trace            # yes
2654
2655         bra.l           _fpsp_done              # exit to os
2656
2657 iea_op_ena:
2658         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enable and set
2659         bfffo           %d0{&24:&8},%d0         # find highest priority exception
2660         bne.b           iea_op_exc              # at least one was set
2661
2662 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2663 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2664         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2665         beq.b           iea_op_save
2666
2667 iea_op_ovfl:
2668         btst            &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2669         beq.b           iea_op_store            # no
2670         bra.b           iea_op_exc_ovfl         # yes
2671
2672 # an enabled exception occurred. we have to insert the exception type back into
2673 # the machine.
2674 iea_op_exc:
2675         subi.l          &24,%d0                 # fix offset to be 0-8
2676         cmpi.b          %d0,&0x6                # is exception INEX?
2677         bne.b           iea_op_exc_force        # no
2678
2679 # the enabled exception was inexact. so, if it occurs with an overflow
2680 # or underflow that was disabled, then we have to force an overflow or
2681 # underflow frame.
2682         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2683         bne.b           iea_op_exc_ovfl         # yes
2684         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2685         bne.b           iea_op_exc_unfl         # yes
2686
2687 iea_op_exc_force:
2688         mov.w           (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2689         bra.b           iea_op_exit2            # exit with frestore
2690
2691 tbl_iea_except:
2692         short           0xe002, 0xe006, 0xe004, 0xe005
2693         short           0xe003, 0xe002, 0xe001, 0xe001
2694
2695 iea_op_exc_ovfl:
2696         mov.w           &0xe005,2+FP_SRC(%a6)
2697         bra.b           iea_op_exit2
2698
2699 iea_op_exc_unfl:
2700         mov.w           &0xe003,2+FP_SRC(%a6)
2701
2702 iea_op_exit2:
2703         mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2704         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2705
2706         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2707         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2708         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2709
2710         frestore        FP_SRC(%a6)             # restore exceptional state
2711
2712         unlk            %a6                     # unravel the frame
2713
2714         btst            &0x7,(%sp)              # is trace on?
2715         bne.b           iea_op_trace            # yes
2716
2717         bra.l           _fpsp_done              # exit to os
2718
2719 #
2720 # The opclass two instruction that took an "Unimplemented Effective Address"
2721 # exception was being traced. Make the "current" PC the FPIAR and put it in
2722 # the trace stack frame then jump to _real_trace().
2723 #
2724 #                UNIMP EA FRAME            TRACE FRAME
2725 #               *****************       *****************
2726 #               * 0x0 *  0x0f0  *       *    Current    *
2727 #               *****************       *      PC       *
2728 #               *    Current    *       *****************
2729 #               *      PC       *       * 0x2 *  0x024  *
2730 #               *****************       *****************
2731 #               *      SR       *       *     Next      *
2732 #               *****************       *      PC       *
2733 #                                       *****************
2734 #                                       *      SR       *
2735 #                                       *****************
2736 iea_op_trace:
2737         mov.l           (%sp),-(%sp)            # shift stack frame "down"
2738         mov.w           0x8(%sp),0x4(%sp)
2739         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
2740         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
2741
2742         bra.l           _real_trace
2743
2744 #########################################################################
2745 iea_fmovm:
2746         btst            &14,%d0                 # ctrl or data reg
2747         beq.w           iea_fmovm_ctrl
2748
2749 iea_fmovm_data:
2750
2751         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode
2752         bne.b           iea_fmovm_data_s
2753
2754 iea_fmovm_data_u:
2755         mov.l           %usp,%a0
2756         mov.l           %a0,EXC_A7(%a6)         # store current a7
2757         bsr.l           fmovm_dynamic           # do dynamic fmovm
2758         mov.l           EXC_A7(%a6),%a0         # load possibly new a7
2759         mov.l           %a0,%usp                # update usp
2760         bra.w           iea_fmovm_exit
2761
2762 iea_fmovm_data_s:
2763         clr.b           SPCOND_FLG(%a6)
2764         lea             0x2+EXC_VOFF(%a6),%a0
2765         mov.l           %a0,EXC_A7(%a6)
2766         bsr.l           fmovm_dynamic           # do dynamic fmovm
2767
2768         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2769         beq.w           iea_fmovm_data_predec
2770         cmpi.b          SPCOND_FLG(%a6),&mia7_flg
2771         bne.w           iea_fmovm_exit
2772
2773 # right now, d0 = the size.
2774 # the data has been fetched from the supervisor stack, but we have not
2775 # incremented the stack pointer by the appropriate number of bytes.
2776 # do it here.
2777 iea_fmovm_data_postinc:
2778         btst            &0x7,EXC_SR(%a6)
2779         bne.b           iea_fmovm_data_pi_trace
2780
2781         mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2782         mov.l           EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2783         mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2784
2785         lea             (EXC_SR,%a6,%d0),%a0
2786         mov.l           %a0,EXC_SR(%a6)
2787
2788         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2789         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2790         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2791
2792         unlk            %a6
2793         mov.l           (%sp)+,%sp
2794         bra.l           _fpsp_done
2795
2796 iea_fmovm_data_pi_trace:
2797         mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2798         mov.l           EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2799         mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2800         mov.l           EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2801
2802         lea             (EXC_SR-0x4,%a6,%d0),%a0
2803         mov.l           %a0,EXC_SR(%a6)
2804
2805         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2806         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2807         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2808
2809         unlk            %a6
2810         mov.l           (%sp)+,%sp
2811         bra.l           _real_trace
2812
2813 # right now, d1 = size and d0 = the strg.
2814 iea_fmovm_data_predec:
2815         mov.b           %d1,EXC_VOFF(%a6)       # store strg
2816         mov.b           %d0,0x1+EXC_VOFF(%a6)   # store size
2817
2818         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2819         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2820         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2821
2822         mov.l           (%a6),-(%sp)            # make a copy of a6
2823         mov.l           %d0,-(%sp)              # save d0
2824         mov.l           %d1,-(%sp)              # save d1
2825         mov.l           EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2826
2827         clr.l           %d0
2828         mov.b           0x1+EXC_VOFF(%a6),%d0   # fetch size
2829         neg.l           %d0                     # get negative of size
2830
2831         btst            &0x7,EXC_SR(%a6)        # is trace enabled?
2832         beq.b           iea_fmovm_data_p2
2833
2834         mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2835         mov.l           EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2836         mov.l           (%sp)+,(EXC_PC-0x4,%a6,%d0)
2837         mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2838
2839         pea             (%a6,%d0)               # create final sp
2840         bra.b           iea_fmovm_data_p3
2841
2842 iea_fmovm_data_p2:
2843         mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2844         mov.l           (%sp)+,(EXC_PC,%a6,%d0)
2845         mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2846
2847         pea             (0x4,%a6,%d0)           # create final sp
2848
2849 iea_fmovm_data_p3:
2850         clr.l           %d1
2851         mov.b           EXC_VOFF(%a6),%d1       # fetch strg
2852
2853         tst.b           %d1
2854         bpl.b           fm_1
2855         fmovm.x         &0x80,(0x4+0x8,%a6,%d0)
2856         addi.l          &0xc,%d0
2857 fm_1:
2858         lsl.b           &0x1,%d1
2859         bpl.b           fm_2
2860         fmovm.x         &0x40,(0x4+0x8,%a6,%d0)
2861         addi.l          &0xc,%d0
2862 fm_2:
2863         lsl.b           &0x1,%d1
2864         bpl.b           fm_3
2865         fmovm.x         &0x20,(0x4+0x8,%a6,%d0)
2866         addi.l          &0xc,%d0
2867 fm_3:
2868         lsl.b           &0x1,%d1
2869         bpl.b           fm_4
2870         fmovm.x         &0x10,(0x4+0x8,%a6,%d0)
2871         addi.l          &0xc,%d0
2872 fm_4:
2873         lsl.b           &0x1,%d1
2874         bpl.b           fm_5
2875         fmovm.x         &0x08,(0x4+0x8,%a6,%d0)
2876         addi.l          &0xc,%d0
2877 fm_5:
2878         lsl.b           &0x1,%d1
2879         bpl.b           fm_6
2880         fmovm.x         &0x04,(0x4+0x8,%a6,%d0)
2881         addi.l          &0xc,%d0
2882 fm_6:
2883         lsl.b           &0x1,%d1
2884         bpl.b           fm_7
2885         fmovm.x         &0x02,(0x4+0x8,%a6,%d0)
2886         addi.l          &0xc,%d0
2887 fm_7:
2888         lsl.b           &0x1,%d1
2889         bpl.b           fm_end
2890         fmovm.x         &0x01,(0x4+0x8,%a6,%d0)
2891 fm_end:
2892         mov.l           0x4(%sp),%d1
2893         mov.l           0x8(%sp),%d0
2894         mov.l           0xc(%sp),%a6
2895         mov.l           (%sp)+,%sp
2896
2897         btst            &0x7,(%sp)              # is trace enabled?
2898         beq.l           _fpsp_done
2899         bra.l           _real_trace
2900
2901 #########################################################################
2902 iea_fmovm_ctrl:
2903
2904         bsr.l           fmovm_ctrl              # load ctrl regs
2905
2906 iea_fmovm_exit:
2907         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2908         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2909         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2910
2911         btst            &0x7,EXC_SR(%a6)        # is trace on?
2912         bne.b           iea_fmovm_trace         # yes
2913
2914         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2915
2916         unlk            %a6                     # unravel the frame
2917
2918         bra.l           _fpsp_done              # exit to os
2919
2920 #
2921 # The control reg instruction that took an "Unimplemented Effective Address"
2922 # exception was being traced. The "Current PC" for the trace frame is the
2923 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2924 # After fixing the stack frame, jump to _real_trace().
2925 #
2926 #                UNIMP EA FRAME            TRACE FRAME
2927 #               *****************       *****************
2928 #               * 0x0 *  0x0f0  *       *    Current    *
2929 #               *****************       *      PC       *
2930 #               *    Current    *       *****************
2931 #               *      PC       *       * 0x2 *  0x024  *
2932 #               *****************       *****************
2933 #               *      SR       *       *     Next      *
2934 #               *****************       *      PC       *
2935 #                                       *****************
2936 #                                       *      SR       *
2937 #                                       *****************
2938 # this ain't a pretty solution, but it works:
2939 # -restore a6 (not with unlk)
2940 # -shift stack frame down over where old a6 used to be
2941 # -add LOCAL_SIZE to stack pointer
2942 iea_fmovm_trace:
2943         mov.l           (%a6),%a6               # restore frame pointer
2944         mov.w           EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2945         mov.l           EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2946         mov.l           EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2947         mov.w           &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2948         add.l           &LOCAL_SIZE,%sp         # clear stack frame
2949
2950         bra.l           _real_trace
2951
2952 #########################################################################
2953 # The FPU is disabled and so we should really have taken the "Line
2954 # F Emulator" exception. So, here we create an 8-word stack frame
2955 # from our 4-word stack frame. This means we must calculate the length
2956 # of the faulting instruction to get the "next PC". This is trivial for
2957 # immediate operands but requires some extra work for fmovm dynamic
2958 # which can use most addressing modes.
2959 iea_disabled:
2960         mov.l           (%sp)+,%d0              # restore d0
2961
2962         link            %a6,&-LOCAL_SIZE        # init stack frame
2963
2964         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2965
2966 # PC of instruction that took the exception is the PC in the frame
2967         mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2968         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2969         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2970         bsr.l           _imem_read_long         # fetch the instruction words
2971         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2972
2973         tst.w           %d0                     # is instr fmovm?
2974         bmi.b           iea_dis_fmovm           # yes
2975 # instruction is using an extended precision immediate operand. therefore,
2976 # the total instruction length is 16 bytes.
2977 iea_dis_immed:
2978         mov.l           &0x10,%d0               # 16 bytes of instruction
2979         bra.b           iea_dis_cont
2980 iea_dis_fmovm:
2981         btst            &0xe,%d0                # is instr fmovm ctrl
2982         bne.b           iea_dis_fmovm_data      # no
2983 # the instruction is a fmovm.l with 2 or 3 registers.
2984         bfextu          %d0{&19:&3},%d1
2985         mov.l           &0xc,%d0
2986         cmpi.b          %d1,&0x7                # move all regs?
2987         bne.b           iea_dis_cont
2988         addq.l          &0x4,%d0
2989         bra.b           iea_dis_cont
2990 # the instruction is an fmovm.x dynamic which can use many addressing
2991 # modes and thus can have several different total instruction lengths.
2992 # call fmovm_calc_ea which will go through the ea calc process and,
2993 # as a by-product, will tell us how long the instruction is.
2994 iea_dis_fmovm_data:
2995         clr.l           %d0
2996         bsr.l           fmovm_calc_ea
2997         mov.l           EXC_EXTWPTR(%a6),%d0
2998         sub.l           EXC_PC(%a6),%d0
2999 iea_dis_cont:
3000         mov.w           %d0,EXC_VOFF(%a6)       # store stack shift value
3001
3002         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3003
3004         unlk            %a6
3005
3006 # here, we actually create the 8-word frame from the 4-word frame,
3007 # with the "next PC" as additional info.
3008 # the <ea> field is let as undefined.
3009         subq.l          &0x8,%sp                # make room for new stack
3010         mov.l           %d0,-(%sp)              # save d0
3011         mov.w           0xc(%sp),0x4(%sp)       # move SR
3012         mov.l           0xe(%sp),0x6(%sp)       # move Current PC
3013         clr.l           %d0
3014         mov.w           0x12(%sp),%d0
3015         mov.l           0x6(%sp),0x10(%sp)      # move Current PC
3016         add.l           %d0,0x6(%sp)            # make Next PC
3017         mov.w           &0x402c,0xa(%sp)        # insert offset,frame format
3018         mov.l           (%sp)+,%d0              # restore d0
3019
3020         bra.l           _real_fpu_disabled
3021
3022 ##########
3023
3024 iea_iacc:
3025         movc            %pcr,%d0
3026         btst            &0x1,%d0
3027         bne.b           iea_iacc_cont
3028         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3029         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3030 iea_iacc_cont:
3031         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3032
3033         unlk            %a6
3034
3035         subq.w          &0x8,%sp                # make stack frame bigger
3036         mov.l           0x8(%sp),(%sp)          # store SR,hi(PC)
3037         mov.w           0xc(%sp),0x4(%sp)       # store lo(PC)
3038         mov.w           &0x4008,0x6(%sp)        # store voff
3039         mov.l           0x2(%sp),0x8(%sp)       # store ea
3040         mov.l           &0x09428001,0xc(%sp)    # store fslw
3041
3042 iea_acc_done:
3043         btst            &0x5,(%sp)              # user or supervisor mode?
3044         beq.b           iea_acc_done2           # user
3045         bset            &0x2,0xd(%sp)           # set supervisor TM bit
3046
3047 iea_acc_done2:
3048         bra.l           _real_access
3049
3050 iea_dacc:
3051         lea             -LOCAL_SIZE(%a6),%sp
3052
3053         movc            %pcr,%d1
3054         btst            &0x1,%d1
3055         bne.b           iea_dacc_cont
3056         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3057         fmovm.l         LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3058 iea_dacc_cont:
3059         mov.l           (%a6),%a6
3060
3061         mov.l           0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3062         mov.w           0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3063         mov.w           &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3064         mov.l           %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3065         mov.w           %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3066         mov.w           &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3067
3068         movm.l          LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3069         add.w           &LOCAL_SIZE-0x4,%sp
3070
3071         bra.b           iea_acc_done
3072
3073 #########################################################################
3074 # XDEF **************************************************************** #
3075 #       _fpsp_operr(): 060FPSP entry point for FP Operr exception.      #
3076 #                                                                       #
3077 #       This handler should be the first code executed upon taking the  #
3078 #       FP Operand Error exception in an operating system.              #
3079 #                                                                       #
3080 # XREF **************************************************************** #
3081 #       _imem_read_long() - read instruction longword                   #
3082 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3083 #       _real_operr() - "callout" to operating system operr handler     #
3084 #       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3085 #       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3086 #       facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3087 #                                                                       #
3088 # INPUT *************************************************************** #
3089 #       - The system stack contains the FP Operr exception frame        #
3090 #       - The fsave frame contains the source operand                   #
3091 #                                                                       #
3092 # OUTPUT ************************************************************** #
3093 #       No access error:                                                #
3094 #       - The system stack is unchanged                                 #
3095 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3096 #                                                                       #
3097 # ALGORITHM *********************************************************** #
3098 #       In a system where the FP Operr exception is enabled, the goal   #
3099 # is to get to the handler specified at _real_operr(). But, on the 060, #
3100 # for opclass zero and two instruction taking this exception, the       #
3101 # input operand in the fsave frame may be incorrect for some cases      #
3102 # and needs to be corrected. This handler calls fix_skewed_ops() to     #
3103 # do just this and then exits through _real_operr().                    #
3104 #       For opclass 3 instructions, the 060 doesn't store the default   #
3105 # operr result out to memory or data register file as it should.        #
3106 # This code must emulate the move out before finally exiting through    #
3107 # _real_inex(). The move out, if to memory, is performed using          #
3108 # _mem_write() "callout" routines that may return a failing result.     #
3109 # In this special case, the handler must exit through facc_out()        #
3110 # which creates an access error stack frame from the current operr      #
3111 # stack frame.                                                          #
3112 #                                                                       #
3113 #########################################################################
3114
3115         global          _fpsp_operr
3116 _fpsp_operr:
3117
3118         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3119
3120         fsave           FP_SRC(%a6)             # grab the "busy" frame
3121
3122         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3123         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3124         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3125
3126 # the FPIAR holds the "current PC" of the faulting instruction
3127         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3128
3129         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3130         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3131         bsr.l           _imem_read_long         # fetch the instruction words
3132         mov.l           %d0,EXC_OPWORD(%a6)
3133
3134 ##############################################################################
3135
3136         btst            &13,%d0                 # is instr an fmove out?
3137         bne.b           foperr_out              # fmove out
3138
3139
3140 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3141 # this would be the case for opclass two operations with a source infinity or
3142 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3143 # cause an operr so we don't need to check for them here.
3144         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3145         bsr.l           fix_skewed_ops          # fix src op
3146
3147 foperr_exit:
3148         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3149         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3150         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3151
3152         frestore        FP_SRC(%a6)
3153
3154         unlk            %a6
3155         bra.l           _real_operr
3156
3157 ########################################################################
3158
3159 #
3160 # the hardware does not save the default result to memory on enabled
3161 # operand error exceptions. we do this here before passing control to
3162 # the user operand error handler.
3163 #
3164 # byte, word, and long destination format operations can pass
3165 # through here. we simply need to test the sign of the src
3166 # operand and save the appropriate minimum or maximum integer value
3167 # to the effective address as pointed to by the stacked effective address.
3168 #
3169 # although packed opclass three operations can take operand error
3170 # exceptions, they won't pass through here since they are caught
3171 # first by the unsupported data format exception handler. that handler
3172 # sends them directly to _real_operr() if necessary.
3173 #
3174 foperr_out:
3175
3176         mov.w           FP_SRC_EX(%a6),%d1      # fetch exponent
3177         andi.w          &0x7fff,%d1
3178         cmpi.w          %d1,&0x7fff
3179         bne.b           foperr_out_not_qnan
3180 # the operand is either an infinity or a QNAN.
3181         tst.l           FP_SRC_LO(%a6)
3182         bne.b           foperr_out_qnan
3183         mov.l           FP_SRC_HI(%a6),%d1
3184         andi.l          &0x7fffffff,%d1
3185         beq.b           foperr_out_not_qnan
3186 foperr_out_qnan:
3187         mov.l           FP_SRC_HI(%a6),L_SCR1(%a6)
3188         bra.b           foperr_out_jmp
3189
3190 foperr_out_not_qnan:
3191         mov.l           &0x7fffffff,%d1
3192         tst.b           FP_SRC_EX(%a6)
3193         bpl.b           foperr_out_not_qnan2
3194         addq.l          &0x1,%d1
3195 foperr_out_not_qnan2:
3196         mov.l           %d1,L_SCR1(%a6)
3197
3198 foperr_out_jmp:
3199         bfextu          %d0{&19:&3},%d0         # extract dst format field
3200         mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3201         mov.w           (tbl_operr.b,%pc,%d0.w*2),%a0
3202         jmp             (tbl_operr.b,%pc,%a0)
3203
3204 tbl_operr:
3205         short           foperr_out_l - tbl_operr # long word integer
3206         short           tbl_operr    - tbl_operr # sgl prec shouldn't happen
3207         short           tbl_operr    - tbl_operr # ext prec shouldn't happen
3208         short           foperr_exit  - tbl_operr # packed won't enter here
3209         short           foperr_out_w - tbl_operr # word integer
3210         short           tbl_operr    - tbl_operr # dbl prec shouldn't happen
3211         short           foperr_out_b - tbl_operr # byte integer
3212         short           tbl_operr    - tbl_operr # packed won't enter here
3213
3214 foperr_out_b:
3215         mov.b           L_SCR1(%a6),%d0         # load positive default result
3216         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3217         ble.b           foperr_out_b_save_dn    # yes
3218         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3219         bsr.l           _dmem_write_byte        # write the default result
3220
3221         tst.l           %d1                     # did dstore fail?
3222         bne.l           facc_out_b              # yes
3223
3224         bra.w           foperr_exit
3225 foperr_out_b_save_dn:
3226         andi.w          &0x0007,%d1
3227         bsr.l           store_dreg_b            # store result to regfile
3228         bra.w           foperr_exit
3229
3230 foperr_out_w:
3231         mov.w           L_SCR1(%a6),%d0         # load positive default result
3232         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3233         ble.b           foperr_out_w_save_dn    # yes
3234         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3235         bsr.l           _dmem_write_word        # write the default result
3236
3237         tst.l           %d1                     # did dstore fail?
3238         bne.l           facc_out_w              # yes
3239
3240         bra.w           foperr_exit
3241 foperr_out_w_save_dn:
3242         andi.w          &0x0007,%d1
3243         bsr.l           store_dreg_w            # store result to regfile
3244         bra.w           foperr_exit
3245
3246 foperr_out_l:
3247         mov.l           L_SCR1(%a6),%d0         # load positive default result
3248         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3249         ble.b           foperr_out_l_save_dn    # yes
3250         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3251         bsr.l           _dmem_write_long        # write the default result
3252
3253         tst.l           %d1                     # did dstore fail?
3254         bne.l           facc_out_l              # yes
3255
3256         bra.w           foperr_exit
3257 foperr_out_l_save_dn:
3258         andi.w          &0x0007,%d1
3259         bsr.l           store_dreg_l            # store result to regfile
3260         bra.w           foperr_exit
3261
3262 #########################################################################
3263 # XDEF **************************************************************** #
3264 #       _fpsp_snan(): 060FPSP entry point for FP SNAN exception.        #
3265 #                                                                       #
3266 #       This handler should be the first code executed upon taking the  #
3267 #       FP Signalling NAN exception in an operating system.             #
3268 #                                                                       #
3269 # XREF **************************************************************** #
3270 #       _imem_read_long() - read instruction longword                   #
3271 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3272 #       _real_snan() - "callout" to operating system SNAN handler       #
3273 #       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3274 #       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3275 #       facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)   #
3276 #       _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>   #
3277 #                                                                       #
3278 # INPUT *************************************************************** #
3279 #       - The system stack contains the FP SNAN exception frame         #
3280 #       - The fsave frame contains the source operand                   #
3281 #                                                                       #
3282 # OUTPUT ************************************************************** #
3283 #       No access error:                                                #
3284 #       - The system stack is unchanged                                 #
3285 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3286 #                                                                       #
3287 # ALGORITHM *********************************************************** #
3288 #       In a system where the FP SNAN exception is enabled, the goal    #
3289 # is to get to the handler specified at _real_snan(). But, on the 060,  #
3290 # for opclass zero and two instructions taking this exception, the      #
3291 # input operand in the fsave frame may be incorrect for some cases      #
3292 # and needs to be corrected. This handler calls fix_skewed_ops() to     #
3293 # do just this and then exits through _real_snan().                     #
3294 #       For opclass 3 instructions, the 060 doesn't store the default   #
3295 # SNAN result out to memory or data register file as it should.         #
3296 # This code must emulate the move out before finally exiting through    #
3297 # _real_snan(). The move out, if to memory, is performed using          #
3298 # _mem_write() "callout" routines that may return a failing result.     #
3299 # In this special case, the handler must exit through facc_out()        #
3300 # which creates an access error stack frame from the current SNAN       #
3301 # stack frame.                                                          #
3302 #       For the case of an extended precision opclass 3 instruction,    #
3303 # if the effective addressing mode was -() or ()+, then the address     #
3304 # register must get updated by calling _calc_ea_fout(). If the <ea>     #
3305 # was -(a7) from supervisor mode, then the exception frame currently    #
3306 # on the system stack must be carefully moved "down" to make room       #
3307 # for the operand being moved.                                          #
3308 #                                                                       #
3309 #########################################################################
3310
3311         global          _fpsp_snan
3312 _fpsp_snan:
3313
3314         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3315
3316         fsave           FP_SRC(%a6)             # grab the "busy" frame
3317
3318         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3319         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3320         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3321
3322 # the FPIAR holds the "current PC" of the faulting instruction
3323         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3324
3325         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3326         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3327         bsr.l           _imem_read_long         # fetch the instruction words
3328         mov.l           %d0,EXC_OPWORD(%a6)
3329
3330 ##############################################################################
3331
3332         btst            &13,%d0                 # is instr an fmove out?
3333         bne.w           fsnan_out               # fmove out
3334
3335
3336 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3337 # this would be the case for opclass two operations with a source infinity or
3338 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3339 # fixed here.
3340         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3341         bsr.l           fix_skewed_ops          # fix src op
3342
3343 fsnan_exit:
3344         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3345         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3346         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3347
3348         frestore        FP_SRC(%a6)
3349
3350         unlk            %a6
3351         bra.l           _real_snan
3352
3353 ########################################################################
3354
3355 #
3356 # the hardware does not save the default result to memory on enabled
3357 # snan exceptions. we do this here before passing control to
3358 # the user snan handler.
3359 #
3360 # byte, word, long, and packed destination format operations can pass
3361 # through here. since packed format operations already were handled by
3362 # fpsp_unsupp(), then we need to do nothing else for them here.
3363 # for byte, word, and long, we simply need to test the sign of the src
3364 # operand and save the appropriate minimum or maximum integer value
3365 # to the effective address as pointed to by the stacked effective address.
3366 #
3367 fsnan_out:
3368
3369         bfextu          %d0{&19:&3},%d0         # extract dst format field
3370         mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3371         mov.w           (tbl_snan.b,%pc,%d0.w*2),%a0
3372         jmp             (tbl_snan.b,%pc,%a0)
3373
3374 tbl_snan:
3375         short           fsnan_out_l - tbl_snan # long word integer
3376         short           fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3377         short           fsnan_out_x - tbl_snan # ext prec shouldn't happen
3378         short           tbl_snan    - tbl_snan # packed needs no help
3379         short           fsnan_out_w - tbl_snan # word integer
3380         short           fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3381         short           fsnan_out_b - tbl_snan # byte integer
3382         short           tbl_snan    - tbl_snan # packed needs no help
3383
3384 fsnan_out_b:
3385         mov.b           FP_SRC_HI(%a6),%d0      # load upper byte of SNAN
3386         bset            &6,%d0                  # set SNAN bit
3387         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3388         ble.b           fsnan_out_b_dn          # yes
3389         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3390         bsr.l           _dmem_write_byte        # write the default result
3391
3392         tst.l           %d1                     # did dstore fail?
3393         bne.l           facc_out_b              # yes
3394
3395         bra.w           fsnan_exit
3396 fsnan_out_b_dn:
3397         andi.w          &0x0007,%d1
3398         bsr.l           store_dreg_b            # store result to regfile
3399         bra.w           fsnan_exit
3400
3401 fsnan_out_w:
3402         mov.w           FP_SRC_HI(%a6),%d0      # load upper word of SNAN
3403         bset            &14,%d0                 # set SNAN bit
3404         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3405         ble.b           fsnan_out_w_dn          # yes
3406         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3407         bsr.l           _dmem_write_word        # write the default result
3408
3409         tst.l           %d1                     # did dstore fail?
3410         bne.l           facc_out_w              # yes
3411
3412         bra.w           fsnan_exit
3413 fsnan_out_w_dn:
3414         andi.w          &0x0007,%d1
3415         bsr.l           store_dreg_w            # store result to regfile
3416         bra.w           fsnan_exit
3417
3418 fsnan_out_l:
3419         mov.l           FP_SRC_HI(%a6),%d0      # load upper longword of SNAN
3420         bset            &30,%d0                 # set SNAN bit
3421         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3422         ble.b           fsnan_out_l_dn          # yes
3423         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3424         bsr.l           _dmem_write_long        # write the default result
3425
3426         tst.l           %d1                     # did dstore fail?
3427         bne.l           facc_out_l              # yes
3428
3429         bra.w           fsnan_exit
3430 fsnan_out_l_dn:
3431         andi.w          &0x0007,%d1
3432         bsr.l           store_dreg_l            # store result to regfile
3433         bra.w           fsnan_exit
3434
3435 fsnan_out_s:
3436         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3437         ble.b           fsnan_out_d_dn          # yes
3438         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3439         andi.l          &0x80000000,%d0         # keep sign
3440         ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3441         mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3442         lsr.l           &0x8,%d1                # shift mantissa for sgl
3443         or.l            %d1,%d0                 # create sgl SNAN
3444         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3445         bsr.l           _dmem_write_long        # write the default result
3446
3447         tst.l           %d1                     # did dstore fail?
3448         bne.l           facc_out_l              # yes
3449
3450         bra.w           fsnan_exit
3451 fsnan_out_d_dn:
3452         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3453         andi.l          &0x80000000,%d0         # keep sign
3454         ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3455         mov.l           %d1,-(%sp)
3456         mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3457         lsr.l           &0x8,%d1                # shift mantissa for sgl
3458         or.l            %d1,%d0                 # create sgl SNAN
3459         mov.l           (%sp)+,%d1
3460         andi.w          &0x0007,%d1
3461         bsr.l           store_dreg_l            # store result to regfile
3462         bra.w           fsnan_exit
3463
3464 fsnan_out_d:
3465         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3466         andi.l          &0x80000000,%d0         # keep sign
3467         ori.l           &0x7ff80000,%d0         # insert new exponent,SNAN bit
3468         mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3469         mov.l           %d0,FP_SCR0_EX(%a6)     # store to temp space
3470         mov.l           &11,%d0                 # load shift amt
3471         lsr.l           %d0,%d1
3472         or.l            %d1,FP_SCR0_EX(%a6)     # create dbl hi
3473         mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3474         andi.l          &0x000007ff,%d1
3475         ror.l           %d0,%d1
3476         mov.l           %d1,FP_SCR0_HI(%a6)     # store to temp space
3477         mov.l           FP_SRC_LO(%a6),%d1      # load lo mantissa
3478         lsr.l           %d0,%d1
3479         or.l            %d1,FP_SCR0_HI(%a6)     # create dbl lo
3480         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3481         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
3482         movq.l          &0x8,%d0                # pass: size of 8 bytes
3483         bsr.l           _dmem_write             # write the default result
3484
3485         tst.l           %d1                     # did dstore fail?
3486         bne.l           facc_out_d              # yes
3487
3488         bra.w           fsnan_exit
3489
3490 # for extended precision, if the addressing mode is pre-decrement or
3491 # post-increment, then the address register did not get updated.
3492 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3493 fsnan_out_x:
3494         clr.b           SPCOND_FLG(%a6)         # clear special case flag
3495
3496         mov.w           FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3497         clr.w           2+FP_SCR0(%a6)
3498         mov.l           FP_SRC_HI(%a6),%d0
3499         bset            &30,%d0
3500         mov.l           %d0,FP_SCR0_HI(%a6)
3501         mov.l           FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3502
3503         btst            &0x5,EXC_SR(%a6)        # supervisor mode exception?
3504         bne.b           fsnan_out_x_s           # yes
3505
3506         mov.l           %usp,%a0                # fetch user stack pointer
3507         mov.l           %a0,EXC_A7(%a6)         # save on stack for calc_ea()
3508         mov.l           (%a6),EXC_A6(%a6)
3509
3510         bsr.l           _calc_ea_fout           # find the correct ea,update An
3511         mov.l           %a0,%a1
3512         mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3513
3514         mov.l           EXC_A7(%a6),%a0
3515         mov.l           %a0,%usp                # restore user stack pointer
3516         mov.l           EXC_A6(%a6),(%a6)
3517
3518 fsnan_out_x_save:
3519         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3520         movq.l          &0xc,%d0                # pass: size of extended
3521         bsr.l           _dmem_write             # write the default result
3522
3523         tst.l           %d1                     # did dstore fail?
3524         bne.l           facc_out_x              # yes
3525
3526         bra.w           fsnan_exit
3527
3528 fsnan_out_x_s:
3529         mov.l           (%a6),EXC_A6(%a6)
3530
3531         bsr.l           _calc_ea_fout           # find the correct ea,update An
3532         mov.l           %a0,%a1
3533         mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3534
3535         mov.l           EXC_A6(%a6),(%a6)
3536
3537         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3538         bne.b           fsnan_out_x_save        # no
3539
3540 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3541         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3542         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3543         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3544
3545         frestore        FP_SRC(%a6)
3546
3547         mov.l           EXC_A6(%a6),%a6         # restore frame pointer
3548
3549         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3550         mov.l           LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3551         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3552
3553         mov.l           LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3554         mov.l           LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3555         mov.l           LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3556
3557         add.l           &LOCAL_SIZE-0x8,%sp
3558
3559         bra.l           _real_snan
3560
3561 #########################################################################
3562 # XDEF **************************************************************** #
3563 #       _fpsp_inex(): 060FPSP entry point for FP Inexact exception.     #
3564 #                                                                       #
3565 #       This handler should be the first code executed upon taking the  #
3566 #       FP Inexact exception in an operating system.                    #
3567 #                                                                       #
3568 # XREF **************************************************************** #
3569 #       _imem_read_long() - read instruction longword                   #
3570 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3571 #       set_tag_x() - determine optype of src/dst operands              #
3572 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
3573 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
3574 #       load_fpn2() - load dst operand from FP regfile                  #
3575 #       smovcr() - emulate an "fmovcr" instruction                      #
3576 #       fout() - emulate an opclass 3 instruction                       #
3577 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3578 #       _real_inex() - "callout" to operating system inexact handler    #
3579 #                                                                       #
3580 # INPUT *************************************************************** #
3581 #       - The system stack contains the FP Inexact exception frame      #
3582 #       - The fsave frame contains the source operand                   #
3583 #                                                                       #
3584 # OUTPUT ************************************************************** #
3585 #       - The system stack is unchanged                                 #
3586 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3587 #                                                                       #
3588 # ALGORITHM *********************************************************** #
3589 #       In a system where the FP Inexact exception is enabled, the goal #
3590 # is to get to the handler specified at _real_inex(). But, on the 060,  #
3591 # for opclass zero and two instruction taking this exception, the       #
3592 # hardware doesn't store the correct result to the destination FP       #
3593 # register as did the '040 and '881/2. This handler must emulate the    #
3594 # instruction in order to get this value and then store it to the       #
3595 # correct register before calling _real_inex().                         #
3596 #       For opclass 3 instructions, the 060 doesn't store the default   #
3597 # inexact result out to memory or data register file as it should.      #
3598 # This code must emulate the move out by calling fout() before finally  #
3599 # exiting through _real_inex().                                         #
3600 #                                                                       #
3601 #########################################################################
3602
3603         global          _fpsp_inex
3604 _fpsp_inex:
3605
3606         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3607
3608         fsave           FP_SRC(%a6)             # grab the "busy" frame
3609
3610         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3611         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3612         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3613
3614 # the FPIAR holds the "current PC" of the faulting instruction
3615         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3616
3617         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3618         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3619         bsr.l           _imem_read_long         # fetch the instruction words
3620         mov.l           %d0,EXC_OPWORD(%a6)
3621
3622 ##############################################################################
3623
3624         btst            &13,%d0                 # is instr an fmove out?
3625         bne.w           finex_out               # fmove out
3626
3627
3628 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3629 # longword integer directly into the upper longword of the mantissa along
3630 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3631         bfextu          %d0{&19:&3},%d0         # fetch instr size
3632         bne.b           finex_cont              # instr size is not long
3633         cmpi.w          FP_SRC_EX(%a6),&0x401e  # is exponent 0x401e?
3634         bne.b           finex_cont              # no
3635         fmov.l          &0x0,%fpcr
3636         fmov.l          FP_SRC_HI(%a6),%fp0     # load integer src
3637         fmov.x          %fp0,FP_SRC(%a6)        # store integer as extended precision
3638         mov.w           &0xe001,0x2+FP_SRC(%a6)
3639
3640 finex_cont:
3641         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3642         bsr.l           fix_skewed_ops          # fix src op
3643
3644 # Here, we zero the ccode and exception byte field since we're going to
3645 # emulate the whole instruction. Notice, though, that we don't kill the
3646 # INEX1 bit. This is because a packed op has long since been converted
3647 # to extended before arriving here. Therefore, we need to retain the
3648 # INEX1 bit from when the operand was first converted.
3649         andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3650
3651         fmov.l          &0x0,%fpcr              # zero current control regs
3652         fmov.l          &0x0,%fpsr
3653
3654         bfextu          EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3655         cmpi.b          %d1,&0x17               # is op an fmovecr?
3656         beq.w           finex_fmovcr            # yes
3657
3658         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3659         bsr.l           set_tag_x               # tag the operand type
3660         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
3661
3662 # bits four and five of the fp extension word separate the monadic and dyadic
3663 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3664 # will never take this exception, but fsincos will.
3665         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
3666         beq.b           finex_extract           # monadic
3667
3668         btst            &0x4,1+EXC_CMDREG(%a6)  # is operation an fsincos?
3669         bne.b           finex_extract           # yes
3670
3671         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3672         bsr.l           load_fpn2               # load dst into FP_DST
3673
3674         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
3675         bsr.l           set_tag_x               # tag the operand type
3676         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
3677         bne.b           finex_op2_done          # no
3678         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
3679 finex_op2_done:
3680         mov.b           %d0,DTAG(%a6)           # save dst optype tag
3681
3682 finex_extract:
3683         clr.l           %d0
3684         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
3685
3686         mov.b           1+EXC_CMDREG(%a6),%d1
3687         andi.w          &0x007f,%d1             # extract extension
3688
3689         lea             FP_SRC(%a6),%a0
3690         lea             FP_DST(%a6),%a1
3691
3692         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3693         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
3694
3695 # the operation has been emulated. the result is in fp0.
3696 finex_save:
3697         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
3698         bsr.l           store_fpreg
3699
3700 finex_exit:
3701         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3702         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3703         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3704
3705         frestore        FP_SRC(%a6)
3706
3707         unlk            %a6
3708         bra.l           _real_inex
3709
3710 finex_fmovcr:
3711         clr.l           %d0
3712         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3713         mov.b           1+EXC_CMDREG(%a6),%d1
3714         andi.l          &0x0000007f,%d1         # pass rom offset
3715         bsr.l           smovcr
3716         bra.b           finex_save
3717
3718 ########################################################################
3719
3720 #
3721 # the hardware does not save the default result to memory on enabled
3722 # inexact exceptions. we do this here before passing control to
3723 # the user inexact handler.
3724 #
3725 # byte, word, and long destination format operations can pass
3726 # through here. so can double and single precision.
3727 # although packed opclass three operations can take inexact
3728 # exceptions, they won't pass through here since they are caught
3729 # first by the unsupported data format exception handler. that handler
3730 # sends them directly to _real_inex() if necessary.
3731 #
3732 finex_out:
3733
3734         mov.b           &NORM,STAG(%a6)         # src is a NORM
3735
3736         clr.l           %d0
3737         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3738
3739         andi.l          &0xffff00ff,USER_FPSR(%a6) # zero exception field
3740
3741         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
3742
3743         bsr.l           fout                    # store the default result
3744
3745         bra.b           finex_exit
3746
3747 #########################################################################
3748 # XDEF **************************************************************** #
3749 #       _fpsp_dz(): 060FPSP entry point for FP DZ exception.            #
3750 #                                                                       #
3751 #       This handler should be the first code executed upon taking      #
3752 #       the FP DZ exception in an operating system.                     #
3753 #                                                                       #
3754 # XREF **************************************************************** #
3755 #       _imem_read_long() - read instruction longword from memory       #
3756 #       fix_skewed_ops() - adjust fsave operand                         #
3757 #       _real_dz() - "callout" exit point from FP DZ handler            #
3758 #                                                                       #
3759 # INPUT *************************************************************** #
3760 #       - The system stack contains the FP DZ exception stack.          #
3761 #       - The fsave frame contains the source operand.                  #
3762 #                                                                       #
3763 # OUTPUT ************************************************************** #
3764 #       - The system stack contains the FP DZ exception stack.          #
3765 #       - The fsave frame contains the adjusted source operand.         #
3766 #                                                                       #
3767 # ALGORITHM *********************************************************** #
3768 #       In a system where the DZ exception is enabled, the goal is to   #
3769 # get to the handler specified at _real_dz(). But, on the 060, when the #
3770 # exception is taken, the input operand in the fsave state frame may    #
3771 # be incorrect for some cases and need to be adjusted. So, this package #
3772 # adjusts the operand using fix_skewed_ops() and then branches to       #
3773 # _real_dz().                                                           #
3774 #                                                                       #
3775 #########################################################################
3776
3777         global          _fpsp_dz
3778 _fpsp_dz:
3779
3780         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3781
3782         fsave           FP_SRC(%a6)             # grab the "busy" frame
3783
3784         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3785         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3786         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3787
3788 # the FPIAR holds the "current PC" of the faulting instruction
3789         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3790
3791         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3792         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3793         bsr.l           _imem_read_long         # fetch the instruction words
3794         mov.l           %d0,EXC_OPWORD(%a6)
3795
3796 ##############################################################################
3797
3798
3799 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3800 # this would be the case for opclass two operations with a source zero
3801 # in the sgl or dbl format.
3802         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3803         bsr.l           fix_skewed_ops          # fix src op
3804
3805 fdz_exit:
3806         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3807         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3808         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3809
3810         frestore        FP_SRC(%a6)
3811
3812         unlk            %a6
3813         bra.l           _real_dz
3814
3815 #########################################################################
3816 # XDEF **************************************************************** #
3817 #       _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc.   #
3818 #                                                                       #
3819 #       This handler should be the first code executed upon taking the  #
3820 #       "Line F Emulator" exception in an operating system.             #
3821 #                                                                       #
3822 # XREF **************************************************************** #
3823 #       _fpsp_unimp() - handle "FP Unimplemented" exceptions            #
3824 #       _real_fpu_disabled() - handle "FPU disabled" exceptions         #
3825 #       _real_fline() - handle "FLINE" exceptions                       #
3826 #       _imem_read_long() - read instruction longword                   #
3827 #                                                                       #
3828 # INPUT *************************************************************** #
3829 #       - The system stack contains a "Line F Emulator" exception       #
3830 #         stack frame.                                                  #
3831 #                                                                       #
3832 # OUTPUT ************************************************************** #
3833 #       - The system stack is unchanged                                 #
3834 #                                                                       #
3835 # ALGORITHM *********************************************************** #
3836 #       When a "Line F Emulator" exception occurs, there are 3 possible #
3837 # exception types, denoted by the exception stack frame format number:  #
3838 #       (1) FPU unimplemented instruction (6 word stack frame)          #
3839 #       (2) FPU disabled (8 word stack frame)                           #
3840 #       (3) Line F (4 word stack frame)                                 #
3841 #                                                                       #
3842 #       This module determines which and forks the flow off to the      #
3843 # appropriate "callout" (for "disabled" and "Line F") or to the         #
3844 # correct emulation code (for "FPU unimplemented").                     #
3845 #       This code also must check for "fmovecr" instructions w/ a       #
3846 # non-zero <ea> field. These may get flagged as "Line F" but should     #
3847 # really be flagged as "FPU Unimplemented". (This is a "feature" on     #
3848 # the '060.                                                             #
3849 #                                                                       #
3850 #########################################################################
3851
3852         global          _fpsp_fline
3853 _fpsp_fline:
3854
3855 # check to see if this exception is a "FP Unimplemented Instruction"
3856 # exception. if so, branch directly to that handler's entry point.
3857         cmpi.w          0x6(%sp),&0x202c
3858         beq.l           _fpsp_unimp
3859
3860 # check to see if the FPU is disabled. if so, jump to the OS entry
3861 # point for that condition.
3862         cmpi.w          0x6(%sp),&0x402c
3863         beq.l           _real_fpu_disabled
3864
3865 # the exception was an "F-Line Illegal" exception. we check to see
3866 # if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3867 # so, convert the F-Line exception stack frame to an FP Unimplemented
3868 # Instruction exception stack frame else branch to the OS entry
3869 # point for the F-Line exception handler.
3870         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3871
3872         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3873
3874         mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
3875         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3876         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3877         bsr.l           _imem_read_long         # fetch instruction words
3878
3879         bfextu          %d0{&0:&10},%d1         # is it an fmovecr?
3880         cmpi.w          %d1,&0x03c8
3881         bne.b           fline_fline             # no
3882
3883         bfextu          %d0{&16:&6},%d1         # is it an fmovecr?
3884         cmpi.b          %d1,&0x17
3885         bne.b           fline_fline             # no
3886
3887 # it's an fmovecr w/ a non-zero <ea> that has entered through
3888 # the F-Line Illegal exception.
3889 # so, we need to convert the F-Line exception stack frame into an
3890 # FP Unimplemented Instruction stack frame and jump to that entry
3891 # point.
3892 #
3893 # but, if the FPU is disabled, then we need to jump to the FPU diabled
3894 # entry point.
3895         movc            %pcr,%d0
3896         btst            &0x1,%d0
3897         beq.b           fline_fmovcr
3898
3899         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3900
3901         unlk            %a6
3902
3903         sub.l           &0x8,%sp                # make room for "Next PC", <ea>
3904         mov.w           0x8(%sp),(%sp)
3905         mov.l           0xa(%sp),0x2(%sp)       # move "Current PC"
3906         mov.w           &0x402c,0x6(%sp)
3907         mov.l           0x2(%sp),0xc(%sp)
3908         addq.l          &0x4,0x2(%sp)           # set "Next PC"
3909
3910         bra.l           _real_fpu_disabled
3911
3912 fline_fmovcr:
3913         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3914
3915         unlk            %a6
3916
3917         fmov.l          0x2(%sp),%fpiar         # set current PC
3918         addq.l          &0x4,0x2(%sp)           # set Next PC
3919
3920         mov.l           (%sp),-(%sp)
3921         mov.l           0x8(%sp),0x4(%sp)
3922         mov.b           &0x20,0x6(%sp)
3923
3924         bra.l           _fpsp_unimp
3925
3926 fline_fline:
3927         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3928
3929         unlk            %a6
3930
3931         bra.l           _real_fline
3932
3933 #########################################################################
3934 # XDEF **************************************************************** #
3935 #       _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented        #
3936 #                      Instruction" exception.                          #
3937 #                                                                       #
3938 #       This handler should be the first code executed upon taking the  #
3939 #       FP Unimplemented Instruction exception in an operating system.  #
3940 #                                                                       #
3941 # XREF **************************************************************** #
3942 #       _imem_read_{word,long}() - read instruction word/longword       #
3943 #       load_fop() - load src/dst ops from memory and/or FP regfile     #
3944 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
3945 #       tbl_trans - addr of table of emulation routines for trnscndls   #
3946 #       _real_access() - "callout" for access error exception           #
3947 #       _fpsp_done() - "callout" for exit; work all done                #
3948 #       _real_trace() - "callout" for Trace enabled exception           #
3949 #       smovcr() - emulate "fmovecr" instruction                        #
3950 #       funimp_skew() - adjust fsave src ops to "incorrect" value       #
3951 #       _ftrapcc() - emulate an "ftrapcc" instruction                   #
3952 #       _fdbcc() - emulate an "fdbcc" instruction                       #
3953 #       _fscc() - emulate an "fscc" instruction                         #
3954 #       _real_trap() - "callout" for Trap exception                     #
3955 #       _real_bsun() - "callout" for enabled Bsun exception             #
3956 #                                                                       #
3957 # INPUT *************************************************************** #
3958 #       - The system stack contains the "Unimplemented Instr" stk frame #
3959 #                                                                       #
3960 # OUTPUT ************************************************************** #
3961 #       If access error:                                                #
3962 #       - The system stack is changed to an access error stack frame    #
3963 #       If Trace exception enabled:                                     #
3964 #       - The system stack is changed to a Trace exception stack frame  #
3965 #       Else: (normal case)                                             #
3966 #       - Correct result has been stored as appropriate                 #
3967 #                                                                       #
3968 # ALGORITHM *********************************************************** #
3969 #       There are two main cases of instructions that may enter here to #
3970 # be emulated: (1) the FPgen instructions, most of which were also      #
3971 # unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc".     #
3972 #       For the first set, this handler calls the routine load_fop()    #
3973 # to load the source and destination (for dyadic) operands to be used   #
3974 # for instruction emulation. The correct emulation routine is then      #
3975 # chosen by decoding the instruction type and indexing into an          #
3976 # emulation subroutine index table. After emulation returns, this       #
3977 # handler checks to see if an exception should occur as a result of the #
3978 # FP instruction emulation. If so, then an FP exception of the correct  #
3979 # type is inserted into the FPU state frame using the "frestore"        #
3980 # instruction before exiting through _fpsp_done(). In either the        #
3981 # exceptional or non-exceptional cases, we must check to see if the     #
3982 # Trace exception is enabled. If so, then we must create a Trace        #
3983 # exception frame from the current exception frame and exit through     #
3984 # _real_trace().                                                        #
3985 #       For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines   #
3986 # _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three    #
3987 # may flag that a BSUN exception should be taken. If so, then the       #
3988 # current exception stack frame is converted into a BSUN exception      #
3989 # stack frame and an exit is made through _real_bsun(). If the          #
3990 # instruction was "ftrapcc" and a Trap exception should result, a Trap  #
3991 # exception stack frame is created from the current frame and an exit   #
3992 # is made through _real_trap(). If a Trace exception is pending, then   #
3993 # a Trace exception frame is created from the current frame and a jump  #
3994 # is made to _real_trace(). Finally, if none of these conditions exist, #
3995 # then the handler exits though the callout _fpsp_done().               #
3996 #                                                                       #
3997 #       In any of the above scenarios, if a _mem_read() or _mem_write() #
3998 # "callout" returns a failing value, then an access error stack frame   #
3999 # is created from the current stack frame and an exit is made through   #
4000 # _real_access().                                                       #
4001 #                                                                       #
4002 #########################################################################
4003
4004 #
4005 # FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
4006 #
4007 #       *****************
4008 #       *               * => <ea> of fp unimp instr.
4009 #       -      EA       -
4010 #       *               *
4011 #       *****************
4012 #       * 0x2 *  0x02c  * => frame format and vector offset(vector #11)
4013 #       *****************
4014 #       *               *
4015 #       -    Next PC    - => PC of instr to execute after exc handling
4016 #       *               *
4017 #       *****************
4018 #       *      SR       * => SR at the time the exception was taken
4019 #       *****************
4020 #
4021 # Note: the !NULL bit does not get set in the fsave frame when the
4022 # machine encounters an fp unimp exception. Therefore, it must be set
4023 # before leaving this handler.
4024 #
4025         global          _fpsp_unimp
4026 _fpsp_unimp:
4027
4028         link.w          %a6,&-LOCAL_SIZE        # init stack frame
4029
4030         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
4031         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4032         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1
4033
4034         btst            &0x5,EXC_SR(%a6)        # user mode exception?
4035         bne.b           funimp_s                # no; supervisor mode
4036
4037 # save the value of the user stack pointer onto the stack frame
4038 funimp_u:
4039         mov.l           %usp,%a0                # fetch user stack pointer
4040         mov.l           %a0,EXC_A7(%a6)         # store in stack frame
4041         bra.b           funimp_cont
4042
4043 # store the value of the supervisor stack pointer BEFORE the exc occurred.
4044 # old_sp is address just above stacked effective address.
4045 funimp_s:
4046         lea             4+EXC_EA(%a6),%a0       # load old a7'
4047         mov.l           %a0,EXC_A7(%a6)         # store a7'
4048         mov.l           %a0,OLD_A7(%a6)         # make a copy
4049
4050 funimp_cont:
4051
4052 # the FPIAR holds the "current PC" of the faulting instruction.
4053         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4054
4055         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4056         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
4057         bsr.l           _imem_read_long         # fetch the instruction words
4058         mov.l           %d0,EXC_OPWORD(%a6)
4059
4060 ############################################################################
4061
4062         fmov.l          &0x0,%fpcr              # clear FPCR
4063         fmov.l          &0x0,%fpsr              # clear FPSR
4064
4065         clr.b           SPCOND_FLG(%a6)         # clear "special case" flag
4066
4067 # Divide the fp instructions into 8 types based on the TYPE field in
4068 # bits 6-8 of the opword(classes 6,7 are undefined).
4069 # (for the '060, only two types  can take this exception)
4070 #       bftst           %d0{&7:&3}              # test TYPE
4071         btst            &22,%d0                 # type 0 or 1 ?
4072         bne.w           funimp_misc             # type 1
4073
4074 #########################################
4075 # TYPE == 0: General instructions       #
4076 #########################################
4077 funimp_gen:
4078
4079         clr.b           STORE_FLG(%a6)          # clear "store result" flag
4080
4081 # clear the ccode byte and exception status byte
4082         andi.l          &0x00ff00ff,USER_FPSR(%a6)
4083
4084         bfextu          %d0{&16:&6},%d1         # extract upper 6 of cmdreg
4085         cmpi.b          %d1,&0x17               # is op an fmovecr?
4086         beq.w           funimp_fmovcr           # yes
4087
4088 funimp_gen_op:
4089         bsr.l           _load_fop               # load
4090
4091         clr.l           %d0
4092         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode
4093
4094         mov.b           1+EXC_CMDREG(%a6),%d1
4095         andi.w          &0x003f,%d1             # extract extension bits
4096         lsl.w           &0x3,%d1                # shift right 3 bits
4097         or.b            STAG(%a6),%d1           # insert src optag bits
4098
4099         lea             FP_DST(%a6),%a1         # pass dst ptr in a1
4100         lea             FP_SRC(%a6),%a0         # pass src ptr in a0
4101
4102         mov.w           (tbl_trans.w,%pc,%d1.w*2),%d1
4103         jsr             (tbl_trans.w,%pc,%d1.w*1) # emulate
4104
4105 funimp_fsave:
4106         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
4107         bne.w           funimp_ena              # some are enabled
4108
4109 funimp_store:
4110         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4111         bsr.l           store_fpreg             # store result to fp regfile
4112
4113 funimp_gen_exit:
4114         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4115         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4116         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4117
4118 funimp_gen_exit_cmp:
4119         cmpi.b          SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4120         beq.b           funimp_gen_exit_a7      # yes
4121
4122         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4123         beq.b           funimp_gen_exit_a7      # yes
4124
4125 funimp_gen_exit_cont:
4126         unlk            %a6
4127
4128 funimp_gen_exit_cont2:
4129         btst            &0x7,(%sp)              # is trace on?
4130         beq.l           _fpsp_done              # no
4131
4132 # this catches a problem with the case where an exception will be re-inserted
4133 # into the machine. the frestore has already been executed...so, the fmov.l
4134 # alone of the control register would trigger an unwanted exception.
4135 # until I feel like fixing this, we'll sidestep the exception.
4136         fsave           -(%sp)
4137         fmov.l          %fpiar,0x14(%sp)        # "Current PC" is in FPIAR
4138         frestore        (%sp)+
4139         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x24
4140         bra.l           _real_trace
4141
4142 funimp_gen_exit_a7:
4143         btst            &0x5,EXC_SR(%a6)        # supervisor or user mode?
4144         bne.b           funimp_gen_exit_a7_s    # supervisor
4145
4146         mov.l           %a0,-(%sp)
4147         mov.l           EXC_A7(%a6),%a0
4148         mov.l           %a0,%usp
4149         mov.l           (%sp)+,%a0
4150         bra.b           funimp_gen_exit_cont
4151
4152 # if the instruction was executed from supervisor mode and the addressing
4153 # mode was (a7)+, then the stack frame for the rte must be shifted "up"
4154 # "n" bytes where "n" is the size of the src operand type.
4155 # f<op>.{b,w,l,s,d,x,p}
4156 funimp_gen_exit_a7_s:
4157         mov.l           %d0,-(%sp)              # save d0
4158         mov.l           EXC_A7(%a6),%d0         # load new a7'
4159         sub.l           OLD_A7(%a6),%d0         # subtract old a7'
4160         mov.l           0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4161         mov.l           EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4162         mov.w           %d0,EXC_SR(%a6)         # store incr number
4163         mov.l           (%sp)+,%d0              # restore d0
4164
4165         unlk            %a6
4166
4167         add.w           (%sp),%sp               # stack frame shifted
4168         bra.b           funimp_gen_exit_cont2
4169
4170 ######################
4171 # fmovecr.x #ccc,fpn #
4172 ######################
4173 funimp_fmovcr:
4174         clr.l           %d0
4175         mov.b           FPCR_MODE(%a6),%d0
4176         mov.b           1+EXC_CMDREG(%a6),%d1
4177         andi.l          &0x0000007f,%d1         # pass rom offset in d1
4178         bsr.l           smovcr
4179         bra.w           funimp_fsave
4180
4181 #########################################################################
4182
4183 #
4184 # the user has enabled some exceptions. we figure not to see this too
4185 # often so that's why it gets lower priority.
4186 #
4187 funimp_ena:
4188
4189 # was an exception set that was also enabled?
4190         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled and set
4191         bfffo           %d0{&24:&8},%d0         # find highest priority exception
4192         bne.b           funimp_exc              # at least one was set
4193
4194 # no exception that was enabled was set BUT if we got an exact overflow
4195 # and overflow wasn't enabled but inexact was (yech!) then this is
4196 # an inexact exception; otherwise, return to normal non-exception flow.
4197         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4198         beq.w           funimp_store            # no; return to normal flow
4199
4200 # the overflow w/ exact result happened but was inexact set in the FPCR?
4201 funimp_ovfl:
4202         btst            &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4203         beq.w           funimp_store            # no; return to normal flow
4204         bra.b           funimp_exc_ovfl         # yes
4205
4206 # some exception happened that was actually enabled.
4207 # we'll insert this new exception into the FPU and then return.
4208 funimp_exc:
4209         subi.l          &24,%d0                 # fix offset to be 0-8
4210         cmpi.b          %d0,&0x6                # is exception INEX?
4211         bne.b           funimp_exc_force        # no
4212
4213 # the enabled exception was inexact. so, if it occurs with an overflow
4214 # or underflow that was disabled, then we have to force an overflow or
4215 # underflow frame. the eventual overflow or underflow handler will see that
4216 # it's actually an inexact and act appropriately. this is the only easy
4217 # way to have the EXOP available for the enabled inexact handler when
4218 # a disabled overflow or underflow has also happened.
4219         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4220         bne.b           funimp_exc_ovfl         # yes
4221         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4222         bne.b           funimp_exc_unfl         # yes
4223
4224 # force the fsave exception status bits to signal an exception of the
4225 # appropriate type. don't forget to "skew" the source operand in case we
4226 # "unskewed" the one the hardware initially gave us.
4227 funimp_exc_force:
4228         mov.l           %d0,-(%sp)              # save d0
4229         bsr.l           funimp_skew             # check for special case
4230         mov.l           (%sp)+,%d0              # restore d0
4231         mov.w           (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4232         bra.b           funimp_gen_exit2        # exit with frestore
4233
4234 tbl_funimp_except:
4235         short           0xe002, 0xe006, 0xe004, 0xe005
4236         short           0xe003, 0xe002, 0xe001, 0xe001
4237
4238 # insert an overflow frame
4239 funimp_exc_ovfl:
4240         bsr.l           funimp_skew             # check for special case
4241         mov.w           &0xe005,2+FP_SRC(%a6)
4242         bra.b           funimp_gen_exit2
4243
4244 # insert an underflow frame
4245 funimp_exc_unfl:
4246         bsr.l           funimp_skew             # check for special case
4247         mov.w           &0xe003,2+FP_SRC(%a6)
4248
4249 # this is the general exit point for an enabled exception that will be
4250 # restored into the machine for the instruction just emulated.
4251 funimp_gen_exit2:
4252         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4253         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4254         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4255
4256         frestore        FP_SRC(%a6)             # insert exceptional status
4257
4258         bra.w           funimp_gen_exit_cmp
4259
4260 ############################################################################
4261
4262 #
4263 # TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4264 #
4265 # These instructions were implemented on the '881/2 and '040 in hardware but
4266 # are emulated in software on the '060.
4267 #
4268 funimp_misc:
4269         bfextu          %d0{&10:&3},%d1         # extract mode field
4270         cmpi.b          %d1,&0x1                # is it an fdb<cc>?
4271         beq.w           funimp_fdbcc            # yes
4272         cmpi.b          %d1,&0x7                # is it an fs<cc>?
4273         bne.w           funimp_fscc             # yes
4274         bfextu          %d0{&13:&3},%d1
4275         cmpi.b          %d1,&0x2                # is it an fs<cc>?
4276         blt.w           funimp_fscc             # yes
4277
4278 #########################
4279 # ftrap<cc>             #
4280 # ftrap<cc>.w #<data>   #
4281 # ftrap<cc>.l #<data>   #
4282 #########################
4283 funimp_ftrapcc:
4284
4285         bsr.l           _ftrapcc                # FTRAP<cc>()
4286
4287         cmpi.b          SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4288         beq.w           funimp_bsun             # yes
4289
4290         cmpi.b          SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4291         bne.w           funimp_done             # no
4292
4293 #        FP UNIMP FRAME            TRAP  FRAME
4294 #       *****************       *****************
4295 #       **    <EA>     **       **  Current PC **
4296 #       *****************       *****************
4297 #       * 0x2 *  0x02c  *       * 0x2 *  0x01c  *
4298 #       *****************       *****************
4299 #       **   Next PC   **       **   Next PC   **
4300 #       *****************       *****************
4301 #       *      SR       *       *      SR       *
4302 #       *****************       *****************
4303 #           (6 words)               (6 words)
4304 #
4305 # the ftrapcc instruction should take a trap. so, here we must create a
4306 # trap stack frame from an unimplemented fp instruction stack frame and
4307 # jump to the user supplied entry point for the trap exception
4308 funimp_ftrapcc_tp:
4309         mov.l           USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4310         mov.w           &0x201c,EXC_VOFF(%a6)   # Vector Offset = 0x01c
4311
4312         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4313         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4314         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4315
4316         unlk            %a6
4317         bra.l           _real_trap
4318
4319 #########################
4320 # fdb<cc> Dn,<label>    #
4321 #########################
4322 funimp_fdbcc:
4323
4324         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4325         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4326         bsr.l           _imem_read_word         # read displacement
4327
4328         tst.l           %d1                     # did ifetch fail?
4329         bne.w           funimp_iacc             # yes
4330
4331         ext.l           %d0                     # sign extend displacement
4332
4333         bsr.l           _fdbcc                  # FDB<cc>()
4334
4335         cmpi.b          SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4336         beq.w           funimp_bsun
4337
4338         bra.w           funimp_done             # branch to finish
4339
4340 #################
4341 # fs<cc>.b <ea> #
4342 #################
4343 funimp_fscc:
4344
4345         bsr.l           _fscc                   # FS<cc>()
4346
4347 # I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4348 # does not need to update "An" before taking a bsun exception.
4349         cmpi.b          SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4350         beq.w           funimp_bsun
4351
4352         btst            &0x5,EXC_SR(%a6)        # yes; is it a user mode exception?
4353         bne.b           funimp_fscc_s           # no
4354
4355 funimp_fscc_u:
4356         mov.l           EXC_A7(%a6),%a0         # yes; set new USP
4357         mov.l           %a0,%usp
4358         bra.w           funimp_done             # branch to finish
4359
4360 # remember, I'm assuming that post-increment is bogus...(it IS!!!)
4361 # so, the least significant WORD of the stacked effective address got
4362 # overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4363 # so that the rte will work correctly without destroying the result.
4364 # even though the operation size is byte, the stack ptr is decr by 2.
4365 #
4366 # remember, also, this instruction may be traced.
4367 funimp_fscc_s:
4368         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4369         bne.w           funimp_done             # no
4370
4371         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4372         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4373         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4374
4375         unlk            %a6
4376
4377         btst            &0x7,(%sp)              # is trace enabled?
4378         bne.b           funimp_fscc_s_trace     # yes
4379
4380         subq.l          &0x2,%sp
4381         mov.l           0x2(%sp),(%sp)          # shift SR,hi(PC) "down"
4382         mov.l           0x6(%sp),0x4(%sp)       # shift lo(PC),voff "down"
4383         bra.l           _fpsp_done
4384
4385 funimp_fscc_s_trace:
4386         subq.l          &0x2,%sp
4387         mov.l           0x2(%sp),(%sp)          # shift SR,hi(PC) "down"
4388         mov.w           0x6(%sp),0x4(%sp)       # shift lo(PC)
4389         mov.w           &0x2024,0x6(%sp)        # fmt/voff = $2024
4390         fmov.l          %fpiar,0x8(%sp)         # insert "current PC"
4391
4392         bra.l           _real_trace
4393
4394 #
4395 # The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4396 # the fp unimplemented instruction exception stack frame into a bsun stack frame,
4397 # restore a bsun exception into the machine, and branch to the user
4398 # supplied bsun hook.
4399 #
4400 #        FP UNIMP FRAME            BSUN FRAME
4401 #       *****************       *****************
4402 #       **    <EA>     **       * 0x0 * 0x0c0   *
4403 #       *****************       *****************
4404 #       * 0x2 *  0x02c  *       ** Current PC  **
4405 #       *****************       *****************
4406 #       **   Next PC   **       *      SR       *
4407 #       *****************       *****************
4408 #       *      SR       *           (4 words)
4409 #       *****************
4410 #           (6 words)
4411 #
4412 funimp_bsun:
4413         mov.w           &0x00c0,2+EXC_EA(%a6)   # Fmt = 0x0; Vector Offset = 0x0c0
4414         mov.l           USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4415         mov.w           EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4416
4417         mov.w           &0xe000,2+FP_SRC(%a6)   # bsun exception enabled
4418
4419         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4420         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4421         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4422
4423         frestore        FP_SRC(%a6)             # restore bsun exception
4424
4425         unlk            %a6
4426
4427         addq.l          &0x4,%sp                # erase sludge
4428
4429         bra.l           _real_bsun              # branch to user bsun hook
4430
4431 #
4432 # all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4433 # and return.
4434 #
4435 # as usual, we have to check for trace mode being on here. since instructions
4436 # modifying the supervisor stack frame don't pass through here, this is a
4437 # relatively easy task.
4438 #
4439 funimp_done:
4440         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
4441         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4442         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4443
4444         unlk            %a6
4445
4446         btst            &0x7,(%sp)              # is trace enabled?
4447         bne.b           funimp_trace            # yes
4448
4449         bra.l           _fpsp_done
4450
4451 #        FP UNIMP FRAME           TRACE  FRAME
4452 #       *****************       *****************
4453 #       **    <EA>     **       **  Current PC **
4454 #       *****************       *****************
4455 #       * 0x2 *  0x02c  *       * 0x2 *  0x024  *
4456 #       *****************       *****************
4457 #       **   Next PC   **       **   Next PC   **
4458 #       *****************       *****************
4459 #       *      SR       *       *      SR       *
4460 #       *****************       *****************
4461 #           (6 words)               (6 words)
4462 #
4463 # the fscc instruction should take a trace trap. so, here we must create a
4464 # trace stack frame from an unimplemented fp instruction stack frame and
4465 # jump to the user supplied entry point for the trace exception
4466 funimp_trace:
4467         fmov.l          %fpiar,0x8(%sp)         # current PC is in fpiar
4468         mov.b           &0x24,0x7(%sp)          # vector offset = 0x024
4469
4470         bra.l           _real_trace
4471
4472 ################################################################
4473
4474         global          tbl_trans
4475         swbeg           &0x1c0
4476 tbl_trans:
4477         short           tbl_trans - tbl_trans   # $00-0 fmovecr all
4478         short           tbl_trans - tbl_trans   # $00-1 fmovecr all
4479         short           tbl_trans - tbl_trans   # $00-2 fmovecr all
4480         short           tbl_trans - tbl_trans   # $00-3 fmovecr all
4481         short           tbl_trans - tbl_trans   # $00-4 fmovecr all
4482         short           tbl_trans - tbl_trans   # $00-5 fmovecr all
4483         short           tbl_trans - tbl_trans   # $00-6 fmovecr all
4484         short           tbl_trans - tbl_trans   # $00-7 fmovecr all
4485
4486         short           tbl_trans - tbl_trans   # $01-0 fint norm
4487         short           tbl_trans - tbl_trans   # $01-1 fint zero
4488         short           tbl_trans - tbl_trans   # $01-2 fint inf
4489         short           tbl_trans - tbl_trans   # $01-3 fint qnan
4490         short           tbl_trans - tbl_trans   # $01-5 fint denorm
4491         short           tbl_trans - tbl_trans   # $01-4 fint snan
4492         short           tbl_trans - tbl_trans   # $01-6 fint unnorm
4493         short           tbl_trans - tbl_trans   # $01-7 ERROR
4494
4495         short           ssinh    - tbl_trans    # $02-0 fsinh norm
4496         short           src_zero - tbl_trans    # $02-1 fsinh zero
4497         short           src_inf  - tbl_trans    # $02-2 fsinh inf
4498         short           src_qnan - tbl_trans    # $02-3 fsinh qnan
4499         short           ssinhd   - tbl_trans    # $02-5 fsinh denorm
4500         short           src_snan - tbl_trans    # $02-4 fsinh snan
4501         short           tbl_trans - tbl_trans   # $02-6 fsinh unnorm
4502         short           tbl_trans - tbl_trans   # $02-7 ERROR
4503
4504         short           tbl_trans - tbl_trans   # $03-0 fintrz norm
4505         short           tbl_trans - tbl_trans   # $03-1 fintrz zero
4506         short           tbl_trans - tbl_trans   # $03-2 fintrz inf
4507         short           tbl_trans - tbl_trans   # $03-3 fintrz qnan
4508         short           tbl_trans - tbl_trans   # $03-5 fintrz denorm
4509         short           tbl_trans - tbl_trans   # $03-4 fintrz snan
4510         short           tbl_trans - tbl_trans   # $03-6 fintrz unnorm
4511         short           tbl_trans - tbl_trans   # $03-7 ERROR
4512
4513         short           tbl_trans - tbl_trans   # $04-0 fsqrt norm
4514         short           tbl_trans - tbl_trans   # $04-1 fsqrt zero
4515         short           tbl_trans - tbl_trans   # $04-2 fsqrt inf
4516         short           tbl_trans - tbl_trans   # $04-3 fsqrt qnan
4517         short           tbl_trans - tbl_trans   # $04-5 fsqrt denorm
4518         short           tbl_trans - tbl_trans   # $04-4 fsqrt snan
4519         short           tbl_trans - tbl_trans   # $04-6 fsqrt unnorm
4520         short           tbl_trans - tbl_trans   # $04-7 ERROR
4521
4522         short           tbl_trans - tbl_trans   # $05-0 ERROR
4523         short           tbl_trans - tbl_trans   # $05-1 ERROR
4524         short           tbl_trans - tbl_trans   # $05-2 ERROR
4525         short           tbl_trans - tbl_trans   # $05-3 ERROR
4526         short           tbl_trans - tbl_trans   # $05-4 ERROR
4527         short           tbl_trans - tbl_trans   # $05-5 ERROR
4528         short           tbl_trans - tbl_trans   # $05-6 ERROR
4529         short           tbl_trans - tbl_trans   # $05-7 ERROR
4530
4531         short           slognp1  - tbl_trans    # $06-0 flognp1 norm
4532         short           src_zero - tbl_trans    # $06-1 flognp1 zero
4533         short           sopr_inf - tbl_trans    # $06-2 flognp1 inf
4534         short           src_qnan - tbl_trans    # $06-3 flognp1 qnan
4535         short           slognp1d - tbl_trans    # $06-5 flognp1 denorm
4536         short           src_snan - tbl_trans    # $06-4 flognp1 snan
4537         short           tbl_trans - tbl_trans   # $06-6 flognp1 unnorm
4538         short           tbl_trans - tbl_trans   # $06-7 ERROR
4539
4540         short           tbl_trans - tbl_trans   # $07-0 ERROR
4541         short           tbl_trans - tbl_trans   # $07-1 ERROR
4542         short           tbl_trans - tbl_trans   # $07-2 ERROR
4543         short           tbl_trans - tbl_trans   # $07-3 ERROR
4544         short           tbl_trans - tbl_trans   # $07-4 ERROR
4545         short           tbl_trans - tbl_trans   # $07-5 ERROR
4546         short           tbl_trans - tbl_trans   # $07-6 ERROR
4547         short           tbl_trans - tbl_trans   # $07-7 ERROR
4548
4549         short           setoxm1  - tbl_trans    # $08-0 fetoxm1 norm
4550         short           src_zero - tbl_trans    # $08-1 fetoxm1 zero
4551         short           setoxm1i - tbl_trans    # $08-2 fetoxm1 inf
4552         short           src_qnan - tbl_trans    # $08-3 fetoxm1 qnan
4553         short           setoxm1d - tbl_trans    # $08-5 fetoxm1 denorm
4554         short           src_snan - tbl_trans    # $08-4 fetoxm1 snan
4555         short           tbl_trans - tbl_trans   # $08-6 fetoxm1 unnorm
4556         short           tbl_trans - tbl_trans   # $08-7 ERROR
4557
4558         short           stanh    - tbl_trans    # $09-0 ftanh norm
4559         short           src_zero - tbl_trans    # $09-1 ftanh zero
4560         short           src_one  - tbl_trans    # $09-2 ftanh inf
4561         short           src_qnan - tbl_trans    # $09-3 ftanh qnan
4562         short           stanhd   - tbl_trans    # $09-5 ftanh denorm
4563         short           src_snan - tbl_trans    # $09-4 ftanh snan
4564         short           tbl_trans - tbl_trans   # $09-6 ftanh unnorm
4565         short           tbl_trans - tbl_trans   # $09-7 ERROR
4566
4567         short           satan    - tbl_trans    # $0a-0 fatan norm
4568         short           src_zero - tbl_trans    # $0a-1 fatan zero
4569         short           spi_2    - tbl_trans    # $0a-2 fatan inf
4570         short           src_qnan - tbl_trans    # $0a-3 fatan qnan
4571         short           satand   - tbl_trans    # $0a-5 fatan denorm
4572         short           src_snan - tbl_trans    # $0a-4 fatan snan
4573         short           tbl_trans - tbl_trans   # $0a-6 fatan unnorm
4574         short           tbl_trans - tbl_trans   # $0a-7 ERROR
4575
4576         short           tbl_trans - tbl_trans   # $0b-0 ERROR
4577         short           tbl_trans - tbl_trans   # $0b-1 ERROR
4578         short           tbl_trans - tbl_trans   # $0b-2 ERROR
4579         short           tbl_trans - tbl_trans   # $0b-3 ERROR
4580         short           tbl_trans - tbl_trans   # $0b-4 ERROR
4581         short           tbl_trans - tbl_trans   # $0b-5 ERROR
4582         short           tbl_trans - tbl_trans   # $0b-6 ERROR
4583         short           tbl_trans - tbl_trans   # $0b-7 ERROR
4584
4585         short           sasin    - tbl_trans    # $0c-0 fasin norm
4586         short           src_zero - tbl_trans    # $0c-1 fasin zero
4587         short           t_operr  - tbl_trans    # $0c-2 fasin inf
4588         short           src_qnan - tbl_trans    # $0c-3 fasin qnan
4589         short           sasind   - tbl_trans    # $0c-5 fasin denorm
4590         short           src_snan - tbl_trans    # $0c-4 fasin snan
4591         short           tbl_trans - tbl_trans   # $0c-6 fasin unnorm
4592         short           tbl_trans - tbl_trans   # $0c-7 ERROR
4593
4594         short           satanh   - tbl_trans    # $0d-0 fatanh norm
4595         short           src_zero - tbl_trans    # $0d-1 fatanh zero
4596         short           t_operr  - tbl_trans    # $0d-2 fatanh inf
4597         short           src_qnan - tbl_trans    # $0d-3 fatanh qnan
4598         short           satanhd  - tbl_trans    # $0d-5 fatanh denorm
4599         short           src_snan - tbl_trans    # $0d-4 fatanh snan
4600         short           tbl_trans - tbl_trans   # $0d-6 fatanh unnorm
4601         short           tbl_trans - tbl_trans   # $0d-7 ERROR
4602
4603         short           ssin     - tbl_trans    # $0e-0 fsin norm
4604         short           src_zero - tbl_trans    # $0e-1 fsin zero
4605         short           t_operr  - tbl_trans    # $0e-2 fsin inf
4606         short           src_qnan - tbl_trans    # $0e-3 fsin qnan
4607         short           ssind    - tbl_trans    # $0e-5 fsin denorm
4608         short           src_snan - tbl_trans    # $0e-4 fsin snan
4609         short           tbl_trans - tbl_trans   # $0e-6 fsin unnorm
4610         short           tbl_trans - tbl_trans   # $0e-7 ERROR
4611
4612         short           stan     - tbl_trans    # $0f-0 ftan norm
4613         short           src_zero - tbl_trans    # $0f-1 ftan zero
4614         short           t_operr  - tbl_trans    # $0f-2 ftan inf
4615         short           src_qnan - tbl_trans    # $0f-3 ftan qnan
4616         short           stand    - tbl_trans    # $0f-5 ftan denorm
4617         short           src_snan - tbl_trans    # $0f-4 ftan snan
4618         short           tbl_trans - tbl_trans   # $0f-6 ftan unnorm
4619         short           tbl_trans - tbl_trans   # $0f-7 ERROR
4620
4621         short           setox    - tbl_trans    # $10-0 fetox norm
4622         short           ld_pone  - tbl_trans    # $10-1 fetox zero
4623         short           szr_inf  - tbl_trans    # $10-2 fetox inf
4624         short           src_qnan - tbl_trans    # $10-3 fetox qnan
4625         short           setoxd   - tbl_trans    # $10-5 fetox denorm
4626         short           src_snan - tbl_trans    # $10-4 fetox snan
4627         short           tbl_trans - tbl_trans   # $10-6 fetox unnorm
4628         short           tbl_trans - tbl_trans   # $10-7 ERROR
4629
4630         short           stwotox  - tbl_trans    # $11-0 ftwotox norm
4631         short           ld_pone  - tbl_trans    # $11-1 ftwotox zero
4632         short           szr_inf  - tbl_trans    # $11-2 ftwotox inf
4633         short           src_qnan - tbl_trans    # $11-3 ftwotox qnan
4634         short           stwotoxd - tbl_trans    # $11-5 ftwotox denorm
4635         short           src_snan - tbl_trans    # $11-4 ftwotox snan
4636         short           tbl_trans - tbl_trans   # $11-6 ftwotox unnorm
4637         short           tbl_trans - tbl_trans   # $11-7 ERROR
4638
4639         short           stentox  - tbl_trans    # $12-0 ftentox norm
4640         short           ld_pone  - tbl_trans    # $12-1 ftentox zero
4641         short           szr_inf  - tbl_trans    # $12-2 ftentox inf
4642         short           src_qnan - tbl_trans    # $12-3 ftentox qnan
4643         short           stentoxd - tbl_trans    # $12-5 ftentox denorm
4644         short           src_snan - tbl_trans    # $12-4 ftentox snan
4645         short           tbl_trans - tbl_trans   # $12-6 ftentox unnorm
4646         short           tbl_trans - tbl_trans   # $12-7 ERROR
4647
4648         short           tbl_trans - tbl_trans   # $13-0 ERROR
4649         short           tbl_trans - tbl_trans   # $13-1 ERROR
4650         short           tbl_trans - tbl_trans   # $13-2 ERROR
4651         short           tbl_trans - tbl_trans   # $13-3 ERROR
4652         short           tbl_trans - tbl_trans   # $13-4 ERROR
4653         short           tbl_trans - tbl_trans   # $13-5 ERROR
4654         short           tbl_trans - tbl_trans   # $13-6 ERROR
4655         short           tbl_trans - tbl_trans   # $13-7 ERROR
4656
4657         short           slogn    - tbl_trans    # $14-0 flogn norm
4658         short           t_dz2    - tbl_trans    # $14-1 flogn zero
4659         short           sopr_inf - tbl_trans    # $14-2 flogn inf
4660         short           src_qnan - tbl_trans    # $14-3 flogn qnan
4661         short           slognd   - tbl_trans    # $14-5 flogn denorm
4662         short           src_snan - tbl_trans    # $14-4 flogn snan
4663         short           tbl_trans - tbl_trans   # $14-6 flogn unnorm
4664         short           tbl_trans - tbl_trans   # $14-7 ERROR
4665
4666         short           slog10   - tbl_trans    # $15-0 flog10 norm
4667         short           t_dz2    - tbl_trans    # $15-1 flog10 zero
4668         short           sopr_inf - tbl_trans    # $15-2 flog10 inf
4669         short           src_qnan - tbl_trans    # $15-3 flog10 qnan
4670         short           slog10d  - tbl_trans    # $15-5 flog10 denorm
4671         short           src_snan - tbl_trans    # $15-4 flog10 snan
4672         short           tbl_trans - tbl_trans   # $15-6 flog10 unnorm
4673         short           tbl_trans - tbl_trans   # $15-7 ERROR
4674
4675         short           slog2    - tbl_trans    # $16-0 flog2 norm
4676         short           t_dz2    - tbl_trans    # $16-1 flog2 zero
4677         short           sopr_inf - tbl_trans    # $16-2 flog2 inf
4678         short           src_qnan - tbl_trans    # $16-3 flog2 qnan
4679         short           slog2d   - tbl_trans    # $16-5 flog2 denorm
4680         short           src_snan - tbl_trans    # $16-4 flog2 snan
4681         short           tbl_trans - tbl_trans   # $16-6 flog2 unnorm
4682         short           tbl_trans - tbl_trans   # $16-7 ERROR
4683
4684         short           tbl_trans - tbl_trans   # $17-0 ERROR
4685         short           tbl_trans - tbl_trans   # $17-1 ERROR
4686         short           tbl_trans - tbl_trans   # $17-2 ERROR
4687         short           tbl_trans - tbl_trans   # $17-3 ERROR
4688         short           tbl_trans - tbl_trans   # $17-4 ERROR
4689         short           tbl_trans - tbl_trans   # $17-5 ERROR
4690         short           tbl_trans - tbl_trans   # $17-6 ERROR
4691         short           tbl_trans - tbl_trans   # $17-7 ERROR
4692
4693         short           tbl_trans - tbl_trans   # $18-0 fabs norm
4694         short           tbl_trans - tbl_trans   # $18-1 fabs zero
4695         short           tbl_trans - tbl_trans   # $18-2 fabs inf
4696         short           tbl_trans - tbl_trans   # $18-3 fabs qnan
4697         short           tbl_trans - tbl_trans   # $18-5 fabs denorm
4698         short           tbl_trans - tbl_trans   # $18-4 fabs snan
4699         short           tbl_trans - tbl_trans   # $18-6 fabs unnorm
4700         short           tbl_trans - tbl_trans   # $18-7 ERROR
4701
4702         short           scosh    - tbl_trans    # $19-0 fcosh norm
4703         short           ld_pone  - tbl_trans    # $19-1 fcosh zero
4704         short           ld_pinf  - tbl_trans    # $19-2 fcosh inf
4705         short           src_qnan - tbl_trans    # $19-3 fcosh qnan
4706         short           scoshd   - tbl_trans    # $19-5 fcosh denorm
4707         short           src_snan - tbl_trans    # $19-4 fcosh snan
4708         short           tbl_trans - tbl_trans   # $19-6 fcosh unnorm
4709         short           tbl_trans - tbl_trans   # $19-7 ERROR
4710
4711         short           tbl_trans - tbl_trans   # $1a-0 fneg norm
4712         short           tbl_trans - tbl_trans   # $1a-1 fneg zero
4713         short           tbl_trans - tbl_trans   # $1a-2 fneg inf
4714         short           tbl_trans - tbl_trans   # $1a-3 fneg qnan
4715         short           tbl_trans - tbl_trans   # $1a-5 fneg denorm
4716         short           tbl_trans - tbl_trans   # $1a-4 fneg snan
4717         short           tbl_trans - tbl_trans   # $1a-6 fneg unnorm
4718         short           tbl_trans - tbl_trans   # $1a-7 ERROR
4719
4720         short           tbl_trans - tbl_trans   # $1b-0 ERROR
4721         short           tbl_trans - tbl_trans   # $1b-1 ERROR
4722         short           tbl_trans - tbl_trans   # $1b-2 ERROR
4723         short           tbl_trans - tbl_trans   # $1b-3 ERROR
4724         short           tbl_trans - tbl_trans   # $1b-4 ERROR
4725         short           tbl_trans - tbl_trans   # $1b-5 ERROR
4726         short           tbl_trans - tbl_trans   # $1b-6 ERROR
4727         short           tbl_trans - tbl_trans   # $1b-7 ERROR
4728
4729         short           sacos    - tbl_trans    # $1c-0 facos norm
4730         short           ld_ppi2  - tbl_trans    # $1c-1 facos zero
4731         short           t_operr  - tbl_trans    # $1c-2 facos inf
4732         short           src_qnan - tbl_trans    # $1c-3 facos qnan
4733         short           sacosd   - tbl_trans    # $1c-5 facos denorm
4734         short           src_snan - tbl_trans    # $1c-4 facos snan
4735         short           tbl_trans - tbl_trans   # $1c-6 facos unnorm
4736         short           tbl_trans - tbl_trans   # $1c-7 ERROR
4737
4738         short           scos     - tbl_trans    # $1d-0 fcos norm
4739         short           ld_pone  - tbl_trans    # $1d-1 fcos zero
4740         short           t_operr  - tbl_trans    # $1d-2 fcos inf
4741         short           src_qnan - tbl_trans    # $1d-3 fcos qnan
4742         short           scosd    - tbl_trans    # $1d-5 fcos denorm
4743         short           src_snan - tbl_trans    # $1d-4 fcos snan
4744         short           tbl_trans - tbl_trans   # $1d-6 fcos unnorm
4745         short           tbl_trans - tbl_trans   # $1d-7 ERROR
4746
4747         short           sgetexp  - tbl_trans    # $1e-0 fgetexp norm
4748         short           src_zero - tbl_trans    # $1e-1 fgetexp zero
4749         short           t_operr  - tbl_trans    # $1e-2 fgetexp inf
4750         short           src_qnan - tbl_trans    # $1e-3 fgetexp qnan
4751         short           sgetexpd - tbl_trans    # $1e-5 fgetexp denorm
4752         short           src_snan - tbl_trans    # $1e-4 fgetexp snan
4753         short           tbl_trans - tbl_trans   # $1e-6 fgetexp unnorm
4754         short           tbl_trans - tbl_trans   # $1e-7 ERROR
4755
4756         short           sgetman  - tbl_trans    # $1f-0 fgetman norm
4757         short           src_zero - tbl_trans    # $1f-1 fgetman zero
4758         short           t_operr  - tbl_trans    # $1f-2 fgetman inf
4759         short           src_qnan - tbl_trans    # $1f-3 fgetman qnan
4760         short           sgetmand - tbl_trans    # $1f-5 fgetman denorm
4761         short           src_snan - tbl_trans    # $1f-4 fgetman snan
4762         short           tbl_trans - tbl_trans   # $1f-6 fgetman unnorm
4763         short           tbl_trans - tbl_trans   # $1f-7 ERROR
4764
4765         short           tbl_trans - tbl_trans   # $20-0 fdiv norm
4766         short           tbl_trans - tbl_trans   # $20-1 fdiv zero
4767         short           tbl_trans - tbl_trans   # $20-2 fdiv inf
4768         short           tbl_trans - tbl_trans   # $20-3 fdiv qnan
4769         short           tbl_trans - tbl_trans   # $20-5 fdiv denorm
4770         short           tbl_trans - tbl_trans   # $20-4 fdiv snan
4771         short           tbl_trans - tbl_trans   # $20-6 fdiv unnorm
4772         short           tbl_trans - tbl_trans   # $20-7 ERROR
4773
4774         short           smod_snorm - tbl_trans  # $21-0 fmod norm
4775         short           smod_szero - tbl_trans  # $21-1 fmod zero
4776         short           smod_sinf - tbl_trans   # $21-2 fmod inf
4777         short           sop_sqnan - tbl_trans   # $21-3 fmod qnan
4778         short           smod_sdnrm - tbl_trans  # $21-5 fmod denorm
4779         short           sop_ssnan - tbl_trans   # $21-4 fmod snan
4780         short           tbl_trans - tbl_trans   # $21-6 fmod unnorm
4781         short           tbl_trans - tbl_trans   # $21-7 ERROR
4782
4783         short           tbl_trans - tbl_trans   # $22-0 fadd norm
4784         short           tbl_trans - tbl_trans   # $22-1 fadd zero
4785         short           tbl_trans - tbl_trans   # $22-2 fadd inf
4786         short           tbl_trans - tbl_trans   # $22-3 fadd qnan
4787         short           tbl_trans - tbl_trans   # $22-5 fadd denorm
4788         short           tbl_trans - tbl_trans   # $22-4 fadd snan
4789         short           tbl_trans - tbl_trans   # $22-6 fadd unnorm
4790         short           tbl_trans - tbl_trans   # $22-7 ERROR
4791
4792         short           tbl_trans - tbl_trans   # $23-0 fmul norm
4793         short           tbl_trans - tbl_trans   # $23-1 fmul zero
4794         short           tbl_trans - tbl_trans   # $23-2 fmul inf
4795         short           tbl_trans - tbl_trans   # $23-3 fmul qnan
4796         short           tbl_trans - tbl_trans   # $23-5 fmul denorm
4797         short           tbl_trans - tbl_trans   # $23-4 fmul snan
4798         short           tbl_trans - tbl_trans   # $23-6 fmul unnorm
4799         short           tbl_trans - tbl_trans   # $23-7 ERROR
4800
4801         short           tbl_trans - tbl_trans   # $24-0 fsgldiv norm
4802         short           tbl_trans - tbl_trans   # $24-1 fsgldiv zero
4803         short           tbl_trans - tbl_trans   # $24-2 fsgldiv inf
4804         short           tbl_trans - tbl_trans   # $24-3 fsgldiv qnan
4805         short           tbl_trans - tbl_trans   # $24-5 fsgldiv denorm
4806         short           tbl_trans - tbl_trans   # $24-4 fsgldiv snan
4807         short           tbl_trans - tbl_trans   # $24-6 fsgldiv unnorm
4808         short           tbl_trans - tbl_trans   # $24-7 ERROR
4809
4810         short           srem_snorm - tbl_trans  # $25-0 frem norm
4811         short           srem_szero - tbl_trans  # $25-1 frem zero
4812         short           srem_sinf - tbl_trans   # $25-2 frem inf
4813         short           sop_sqnan - tbl_trans   # $25-3 frem qnan
4814         short           srem_sdnrm - tbl_trans  # $25-5 frem denorm
4815         short           sop_ssnan - tbl_trans   # $25-4 frem snan
4816         short           tbl_trans - tbl_trans   # $25-6 frem unnorm
4817         short           tbl_trans - tbl_trans   # $25-7 ERROR
4818
4819         short           sscale_snorm - tbl_trans # $26-0 fscale norm
4820         short           sscale_szero - tbl_trans # $26-1 fscale zero
4821         short           sscale_sinf - tbl_trans # $26-2 fscale inf
4822         short           sop_sqnan - tbl_trans   # $26-3 fscale qnan
4823         short           sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4824         short           sop_ssnan - tbl_trans   # $26-4 fscale snan
4825         short           tbl_trans - tbl_trans   # $26-6 fscale unnorm
4826         short           tbl_trans - tbl_trans   # $26-7 ERROR
4827
4828         short           tbl_trans - tbl_trans   # $27-0 fsglmul norm
4829         short           tbl_trans - tbl_trans   # $27-1 fsglmul zero
4830         short           tbl_trans - tbl_trans   # $27-2 fsglmul inf
4831         short           tbl_trans - tbl_trans   # $27-3 fsglmul qnan
4832         short           tbl_trans - tbl_trans   # $27-5 fsglmul denorm
4833         short           tbl_trans - tbl_trans   # $27-4 fsglmul snan
4834         short           tbl_trans - tbl_trans   # $27-6 fsglmul unnorm
4835         short           tbl_trans - tbl_trans   # $27-7 ERROR
4836
4837         short           tbl_trans - tbl_trans   # $28-0 fsub norm
4838         short           tbl_trans - tbl_trans   # $28-1 fsub zero
4839         short           tbl_trans - tbl_trans   # $28-2 fsub inf
4840         short           tbl_trans - tbl_trans   # $28-3 fsub qnan
4841         short           tbl_trans - tbl_trans   # $28-5 fsub denorm
4842         short           tbl_trans - tbl_trans   # $28-4 fsub snan
4843         short           tbl_trans - tbl_trans   # $28-6 fsub unnorm
4844         short           tbl_trans - tbl_trans   # $28-7 ERROR
4845
4846         short           tbl_trans - tbl_trans   # $29-0 ERROR
4847         short           tbl_trans - tbl_trans   # $29-1 ERROR
4848         short           tbl_trans - tbl_trans   # $29-2 ERROR
4849         short           tbl_trans - tbl_trans   # $29-3 ERROR
4850         short           tbl_trans - tbl_trans   # $29-4 ERROR
4851         short           tbl_trans - tbl_trans   # $29-5 ERROR
4852         short           tbl_trans - tbl_trans   # $29-6 ERROR
4853         short           tbl_trans - tbl_trans   # $29-7 ERROR
4854
4855         short           tbl_trans - tbl_trans   # $2a-0 ERROR
4856         short           tbl_trans - tbl_trans   # $2a-1 ERROR
4857         short           tbl_trans - tbl_trans   # $2a-2 ERROR
4858         short           tbl_trans - tbl_trans   # $2a-3 ERROR
4859         short           tbl_trans - tbl_trans   # $2a-4 ERROR
4860         short           tbl_trans - tbl_trans   # $2a-5 ERROR
4861         short           tbl_trans - tbl_trans   # $2a-6 ERROR
4862         short           tbl_trans - tbl_trans   # $2a-7 ERROR
4863
4864         short           tbl_trans - tbl_trans   # $2b-0 ERROR
4865         short           tbl_trans - tbl_trans   # $2b-1 ERROR
4866         short           tbl_trans - tbl_trans   # $2b-2 ERROR
4867         short           tbl_trans - tbl_trans   # $2b-3 ERROR
4868         short           tbl_trans - tbl_trans   # $2b-4 ERROR
4869         short           tbl_trans - tbl_trans   # $2b-5 ERROR
4870         short           tbl_trans - tbl_trans   # $2b-6 ERROR
4871         short           tbl_trans - tbl_trans   # $2b-7 ERROR
4872
4873         short           tbl_trans - tbl_trans   # $2c-0 ERROR
4874         short           tbl_trans - tbl_trans   # $2c-1 ERROR
4875         short           tbl_trans - tbl_trans   # $2c-2 ERROR
4876         short           tbl_trans - tbl_trans   # $2c-3 ERROR
4877         short           tbl_trans - tbl_trans   # $2c-4 ERROR
4878         short           tbl_trans - tbl_trans   # $2c-5 ERROR
4879         short           tbl_trans - tbl_trans   # $2c-6 ERROR
4880         short           tbl_trans - tbl_trans   # $2c-7 ERROR
4881
4882         short           tbl_trans - tbl_trans   # $2d-0 ERROR
4883         short           tbl_trans - tbl_trans   # $2d-1 ERROR
4884         short           tbl_trans - tbl_trans   # $2d-2 ERROR
4885         short           tbl_trans - tbl_trans   # $2d-3 ERROR
4886         short           tbl_trans - tbl_trans   # $2d-4 ERROR
4887         short           tbl_trans - tbl_trans   # $2d-5 ERROR
4888         short           tbl_trans - tbl_trans   # $2d-6 ERROR
4889         short           tbl_trans - tbl_trans   # $2d-7 ERROR
4890
4891         short           tbl_trans - tbl_trans   # $2e-0 ERROR
4892         short           tbl_trans - tbl_trans   # $2e-1 ERROR
4893         short           tbl_trans - tbl_trans   # $2e-2 ERROR
4894         short           tbl_trans - tbl_trans   # $2e-3 ERROR
4895         short           tbl_trans - tbl_trans   # $2e-4 ERROR
4896         short           tbl_trans - tbl_trans   # $2e-5 ERROR
4897         short           tbl_trans - tbl_trans   # $2e-6 ERROR
4898         short           tbl_trans - tbl_trans   # $2e-7 ERROR
4899
4900         short           tbl_trans - tbl_trans   # $2f-0 ERROR
4901         short           tbl_trans - tbl_trans   # $2f-1 ERROR
4902         short           tbl_trans - tbl_trans   # $2f-2 ERROR
4903         short           tbl_trans - tbl_trans   # $2f-3 ERROR
4904         short           tbl_trans - tbl_trans   # $2f-4 ERROR
4905         short           tbl_trans - tbl_trans   # $2f-5 ERROR
4906         short           tbl_trans - tbl_trans   # $2f-6 ERROR
4907         short           tbl_trans - tbl_trans   # $2f-7 ERROR
4908
4909         short           ssincos  - tbl_trans    # $30-0 fsincos norm
4910         short           ssincosz - tbl_trans    # $30-1 fsincos zero
4911         short           ssincosi - tbl_trans    # $30-2 fsincos inf
4912         short           ssincosqnan - tbl_trans # $30-3 fsincos qnan
4913         short           ssincosd - tbl_trans    # $30-5 fsincos denorm
4914         short           ssincossnan - tbl_trans # $30-4 fsincos snan
4915         short           tbl_trans - tbl_trans   # $30-6 fsincos unnorm
4916         short           tbl_trans - tbl_trans   # $30-7 ERROR
4917
4918         short           ssincos  - tbl_trans    # $31-0 fsincos norm
4919         short           ssincosz - tbl_trans    # $31-1 fsincos zero
4920         short           ssincosi - tbl_trans    # $31-2 fsincos inf
4921         short           ssincosqnan - tbl_trans # $31-3 fsincos qnan
4922         short           ssincosd - tbl_trans    # $31-5 fsincos denorm
4923         short           ssincossnan - tbl_trans # $31-4 fsincos snan
4924         short           tbl_trans - tbl_trans   # $31-6 fsincos unnorm
4925         short           tbl_trans - tbl_trans   # $31-7 ERROR
4926
4927         short           ssincos  - tbl_trans    # $32-0 fsincos norm
4928         short           ssincosz - tbl_trans    # $32-1 fsincos zero
4929         short           ssincosi - tbl_trans    # $32-2 fsincos inf
4930         short           ssincosqnan - tbl_trans # $32-3 fsincos qnan
4931         short           ssincosd - tbl_trans    # $32-5 fsincos denorm
4932         short           ssincossnan - tbl_trans # $32-4 fsincos snan
4933         short           tbl_trans - tbl_trans   # $32-6 fsincos unnorm
4934         short           tbl_trans - tbl_trans   # $32-7 ERROR
4935
4936         short           ssincos  - tbl_trans    # $33-0 fsincos norm
4937         short           ssincosz - tbl_trans    # $33-1 fsincos zero
4938         short           ssincosi - tbl_trans    # $33-2 fsincos inf
4939         short           ssincosqnan - tbl_trans # $33-3 fsincos qnan
4940         short           ssincosd - tbl_trans    # $33-5 fsincos denorm
4941         short           ssincossnan - tbl_trans # $33-4 fsincos snan
4942         short           tbl_trans - tbl_trans   # $33-6 fsincos unnorm
4943         short           tbl_trans - tbl_trans   # $33-7 ERROR
4944
4945         short           ssincos  - tbl_trans    # $34-0 fsincos norm
4946         short           ssincosz - tbl_trans    # $34-1 fsincos zero
4947         short           ssincosi - tbl_trans    # $34-2 fsincos inf
4948         short           ssincosqnan - tbl_trans # $34-3 fsincos qnan
4949         short           ssincosd - tbl_trans    # $34-5 fsincos denorm
4950         short           ssincossnan - tbl_trans # $34-4 fsincos snan
4951         short           tbl_trans - tbl_trans   # $34-6 fsincos unnorm
4952         short           tbl_trans - tbl_trans   # $34-7 ERROR
4953
4954         short           ssincos  - tbl_trans    # $35-0 fsincos norm
4955         short           ssincosz - tbl_trans    # $35-1 fsincos zero
4956         short           ssincosi - tbl_trans    # $35-2 fsincos inf
4957         short           ssincosqnan - tbl_trans # $35-3 fsincos qnan
4958         short           ssincosd - tbl_trans    # $35-5 fsincos denorm
4959         short           ssincossnan - tbl_trans # $35-4 fsincos snan
4960         short           tbl_trans - tbl_trans   # $35-6 fsincos unnorm
4961         short           tbl_trans - tbl_trans   # $35-7 ERROR
4962
4963         short           ssincos  - tbl_trans    # $36-0 fsincos norm
4964         short           ssincosz - tbl_trans    # $36-1 fsincos zero
4965         short           ssincosi - tbl_trans    # $36-2 fsincos inf
4966         short           ssincosqnan - tbl_trans # $36-3 fsincos qnan
4967         short           ssincosd - tbl_trans    # $36-5 fsincos denorm
4968         short           ssincossnan - tbl_trans # $36-4 fsincos snan
4969         short           tbl_trans - tbl_trans   # $36-6 fsincos unnorm
4970         short           tbl_trans - tbl_trans   # $36-7 ERROR
4971
4972         short           ssincos  - tbl_trans    # $37-0 fsincos norm
4973         short           ssincosz - tbl_trans    # $37-1 fsincos zero
4974         short           ssincosi - tbl_trans    # $37-2 fsincos inf
4975         short           ssincosqnan - tbl_trans # $37-3 fsincos qnan
4976         short           ssincosd - tbl_trans    # $37-5 fsincos denorm
4977         short           ssincossnan - tbl_trans # $37-4 fsincos snan
4978         short           tbl_trans - tbl_trans   # $37-6 fsincos unnorm
4979         short           tbl_trans - tbl_trans   # $37-7 ERROR
4980
4981 ##########
4982
4983 # the instruction fetch access for the displacement word for the
4984 # fdbcc emulation failed. here, we create an access error frame
4985 # from the current frame and branch to _real_access().
4986 funimp_iacc:
4987         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
4988         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4989         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
4990
4991         mov.l           USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4992
4993         unlk            %a6
4994
4995         mov.l           (%sp),-(%sp)            # store SR,hi(PC)
4996         mov.w           0x8(%sp),0x4(%sp)       # store lo(PC)
4997         mov.w           &0x4008,0x6(%sp)        # store voff
4998         mov.l           0x2(%sp),0x8(%sp)       # store EA
4999         mov.l           &0x09428001,0xc(%sp)    # store FSLW
5000
5001         btst            &0x5,(%sp)              # user or supervisor mode?
5002         beq.b           funimp_iacc_end         # user
5003         bset            &0x2,0xd(%sp)           # set supervisor TM bit
5004
5005 funimp_iacc_end:
5006         bra.l           _real_access
5007
5008 #########################################################################
5009 # ssin():     computes the sine of a normalized input                   #
5010 # ssind():    computes the sine of a denormalized input                 #
5011 # scos():     computes the cosine of a normalized input                 #
5012 # scosd():    computes the cosine of a denormalized input               #
5013 # ssincos():  computes the sine and cosine of a normalized input        #
5014 # ssincosd(): computes the sine and cosine of a denormalized input      #
5015 #                                                                       #
5016 # INPUT *************************************************************** #
5017 #       a0 = pointer to extended precision input                        #
5018 #       d0 = round precision,mode                                       #
5019 #                                                                       #
5020 # OUTPUT ************************************************************** #
5021 #       fp0 = sin(X) or cos(X)                                          #
5022 #                                                                       #
5023 #    For ssincos(X):                                                    #
5024 #       fp0 = sin(X)                                                    #
5025 #       fp1 = cos(X)                                                    #
5026 #                                                                       #
5027 # ACCURACY and MONOTONICITY ******************************************* #
5028 #       The returned result is within 1 ulp in 64 significant bit, i.e. #
5029 #       within 0.5001 ulp to 53 bits if the result is subsequently      #
5030 #       rounded to double precision. The result is provably monotonic   #
5031 #       in double precision.                                            #
5032 #                                                                       #
5033 # ALGORITHM *********************************************************** #
5034 #                                                                       #
5035 #       SIN and COS:                                                    #
5036 #       1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.  #
5037 #                                                                       #
5038 #       2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.                   #
5039 #                                                                       #
5040 #       3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let        #
5041 #               k = N mod 4, so in particular, k = 0,1,2,or 3.          #
5042 #               Overwrite k by k := k + AdjN.                           #
5043 #                                                                       #
5044 #       4. If k is even, go to 6.                                       #
5045 #                                                                       #
5046 #       5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j.                 #
5047 #               Return sgn*cos(r) where cos(r) is approximated by an    #
5048 #               even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)),  #
5049 #               s = r*r.                                                #
5050 #               Exit.                                                   #
5051 #                                                                       #
5052 #       6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)  #
5053 #               where sin(r) is approximated by an odd polynomial in r  #
5054 #               r + r*s*(A1+s*(A2+ ... + s*A7)),        s = r*r.        #
5055 #               Exit.                                                   #
5056 #                                                                       #
5057 #       7. If |X| > 1, go to 9.                                         #
5058 #                                                                       #
5059 #       8. (|X|<2**(-40)) If SIN is invoked, return X;                  #
5060 #               otherwise return 1.                                     #
5061 #                                                                       #
5062 #       9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,           #
5063 #               go back to 3.                                           #
5064 #                                                                       #
5065 #       SINCOS:                                                         #
5066 #       1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.                   #
5067 #                                                                       #
5068 #       2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let        #
5069 #               k = N mod 4, so in particular, k = 0,1,2,or 3.          #
5070 #                                                                       #
5071 #       3. If k is even, go to 5.                                       #
5072 #                                                                       #
5073 #       4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie.  #
5074 #               j1 exclusive or with the l.s.b. of k.                   #
5075 #               sgn1 := (-1)**j1, sgn2 := (-1)**j2.                     #
5076 #               SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where   #
5077 #               sin(r) and cos(r) are computed as odd and even          #
5078 #               polynomials in r, respectively. Exit                    #
5079 #                                                                       #
5080 #       5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.                 #
5081 #               SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where   #
5082 #               sin(r) and cos(r) are computed as odd and even          #
5083 #               polynomials in r, respectively. Exit                    #
5084 #                                                                       #
5085 #       6. If |X| > 1, go to 8.                                         #
5086 #                                                                       #
5087 #       7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.              #
5088 #                                                                       #
5089 #       8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi,           #
5090 #               go back to 2.                                           #
5091 #                                                                       #
5092 #########################################################################
5093
5094 SINA7:  long            0xBD6AAA77,0xCCC994F5
5095 SINA6:  long            0x3DE61209,0x7AAE8DA1
5096 SINA5:  long            0xBE5AE645,0x2A118AE4
5097 SINA4:  long            0x3EC71DE3,0xA5341531
5098 SINA3:  long            0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5099 SINA2:  long            0x3FF80000,0x88888888,0x888859AF,0x00000000
5100 SINA1:  long            0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5101
5102 COSB8:  long            0x3D2AC4D0,0xD6011EE3
5103 COSB7:  long            0xBDA9396F,0x9F45AC19
5104 COSB6:  long            0x3E21EED9,0x0612C972
5105 COSB5:  long            0xBE927E4F,0xB79D9FCF
5106 COSB4:  long            0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5107 COSB3:  long            0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5108 COSB2:  long            0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5109 COSB1:  long            0xBF000000
5110
5111         set             INARG,FP_SCR0
5112
5113         set             X,FP_SCR0
5114 #       set             XDCARE,X+2
5115         set             XFRAC,X+4
5116
5117         set             RPRIME,FP_SCR0
5118         set             SPRIME,FP_SCR1
5119
5120         set             POSNEG1,L_SCR1
5121         set             TWOTO63,L_SCR1
5122
5123         set             ENDFLAG,L_SCR2
5124         set             INT,L_SCR2
5125
5126         set             ADJN,L_SCR3
5127
5128 ############################################
5129         global          ssin
5130 ssin:
5131         mov.l           &0,ADJN(%a6)            # yes; SET ADJN TO 0
5132         bra.b           SINBGN
5133
5134 ############################################
5135         global          scos
5136 scos:
5137         mov.l           &1,ADJN(%a6)            # yes; SET ADJN TO 1
5138
5139 ############################################
5140 SINBGN:
5141 #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5142
5143         fmov.x          (%a0),%fp0              # LOAD INPUT
5144         fmov.x          %fp0,X(%a6)             # save input at X
5145
5146 # "COMPACTIFY" X
5147         mov.l           (%a0),%d1               # put exp in hi word
5148         mov.w           4(%a0),%d1              # fetch hi(man)
5149         and.l           &0x7FFFFFFF,%d1         # strip sign
5150
5151         cmpi.l          %d1,&0x3FD78000         # is |X| >= 2**(-40)?
5152         bge.b           SOK1                    # no
5153         bra.w           SINSM                   # yes; input is very small
5154
5155 SOK1:
5156         cmp.l           %d1,&0x4004BC7E         # is |X| < 15 PI?
5157         blt.b           SINMAIN                 # no
5158         bra.w           SREDUCEX                # yes; input is very large
5159
5160 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5161 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5162 SINMAIN:
5163         fmov.x          %fp0,%fp1
5164         fmul.d          TWOBYPI(%pc),%fp1       # X*2/PI
5165
5166         lea             PITBL+0x200(%pc),%a1    # TABLE OF N*PI/2, N = -32,...,32
5167
5168         fmov.l          %fp1,INT(%a6)           # CONVERT TO INTEGER
5169
5170         mov.l           INT(%a6),%d1            # make a copy of N
5171         asl.l           &4,%d1                  # N *= 16
5172         add.l           %d1,%a1                 # tbl_addr = a1 + (N*16)
5173
5174 # A1 IS THE ADDRESS OF N*PIBY2
5175 # ...WHICH IS IN TWO PIECES Y1 & Y2
5176         fsub.x          (%a1)+,%fp0             # X-Y1
5177         fsub.s          (%a1),%fp0              # fp0 = R = (X-Y1)-Y2
5178
5179 SINCONT:
5180 #--continuation from REDUCEX
5181
5182 #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5183         mov.l           INT(%a6),%d1
5184         add.l           ADJN(%a6),%d1           # SEE IF D0 IS ODD OR EVEN
5185         ror.l           &1,%d1                  # D0 WAS ODD IFF D0 IS NEGATIVE
5186         cmp.l           %d1,&0
5187         blt.w           COSPOLY
5188
5189 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5190 #--THEN WE RETURN       SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5191 #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5192 #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5193 #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5194 #--WHERE T=S*S.
5195 #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5196 #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5197 SINPOLY:
5198         fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
5199
5200         fmov.x          %fp0,X(%a6)             # X IS R
5201         fmul.x          %fp0,%fp0               # FP0 IS S
5202
5203         fmov.d          SINA7(%pc),%fp3
5204         fmov.d          SINA6(%pc),%fp2
5205
5206         fmov.x          %fp0,%fp1
5207         fmul.x          %fp1,%fp1               # FP1 IS T
5208
5209         ror.l           &1,%d1
5210         and.l           &0x80000000,%d1
5211 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5212         eor.l           %d1,X(%a6)              # X IS NOW R'= SGN*R
5213
5214         fmul.x          %fp1,%fp3               # TA7
5215         fmul.x          %fp1,%fp2               # TA6
5216
5217         fadd.d          SINA5(%pc),%fp3         # A5+TA7
5218         fadd.d          SINA4(%pc),%fp2         # A4+TA6
5219
5220         fmul.x          %fp1,%fp3               # T(A5+TA7)
5221         fmul.x          %fp1,%fp2               # T(A4+TA6)
5222
5223         fadd.d          SINA3(%pc),%fp3         # A3+T(A5+TA7)
5224         fadd.x          SINA2(%pc),%fp2         # A2+T(A4+TA6)
5225
5226         fmul.x          %fp3,%fp1               # T(A3+T(A5+TA7))
5227
5228         fmul.x          %fp0,%fp2               # S(A2+T(A4+TA6))
5229         fadd.x          SINA1(%pc),%fp1         # A1+T(A3+T(A5+TA7))
5230         fmul.x          X(%a6),%fp0             # R'*S
5231
5232         fadd.x          %fp2,%fp1               # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5233
5234         fmul.x          %fp1,%fp0               # SIN(R')-R'
5235
5236         fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
5237
5238         fmov.l          %d0,%fpcr               # restore users round mode,prec
5239         fadd.x          X(%a6),%fp0             # last inst - possible exception set
5240         bra             t_inx2
5241
5242 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5243 #--THEN WE RETURN       SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5244 #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5245 #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5246 #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5247 #--WHERE T=S*S.
5248 #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5249 #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5250 #--AND IS THEREFORE STORED AS SINGLE PRECISION.
5251 COSPOLY:
5252         fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
5253
5254         fmul.x          %fp0,%fp0               # FP0 IS S
5255
5256         fmov.d          COSB8(%pc),%fp2
5257         fmov.d          COSB7(%pc),%fp3
5258
5259         fmov.x          %fp0,%fp1
5260         fmul.x          %fp1,%fp1               # FP1 IS T
5261
5262         fmov.x          %fp0,X(%a6)             # X IS S
5263         ror.l           &1,%d1
5264         and.l           &0x80000000,%d1
5265 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5266
5267         fmul.x          %fp1,%fp2               # TB8
5268
5269         eor.l           %d1,X(%a6)              # X IS NOW S'= SGN*S
5270         and.l           &0x80000000,%d1
5271
5272         fmul.x          %fp1,%fp3               # TB7
5273
5274         or.l            &0x3F800000,%d1         # D0 IS SGN IN SINGLE
5275         mov.l           %d1,POSNEG1(%a6)
5276
5277         fadd.d          COSB6(%pc),%fp2         # B6+TB8
5278         fadd.d          COSB5(%pc),%fp3         # B5+TB7
5279
5280         fmul.x          %fp1,%fp2               # T(B6+TB8)
5281         fmul.x          %fp1,%fp3               # T(B5+TB7)
5282
5283         fadd.d          COSB4(%pc),%fp2         # B4+T(B6+TB8)
5284         fadd.x          COSB3(%pc),%fp3         # B3+T(B5+TB7)
5285
5286         fmul.x          %fp1,%fp2               # T(B4+T(B6+TB8))
5287         fmul.x          %fp3,%fp1               # T(B3+T(B5+TB7))
5288
5289         fadd.x          COSB2(%pc),%fp2         # B2+T(B4+T(B6+TB8))
5290         fadd.s          COSB1(%pc),%fp1         # B1+T(B3+T(B5+TB7))
5291
5292         fmul.x          %fp2,%fp0               # S(B2+T(B4+T(B6+TB8)))
5293
5294         fadd.x          %fp1,%fp0
5295
5296         fmul.x          X(%a6),%fp0
5297
5298         fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
5299
5300         fmov.l          %d0,%fpcr               # restore users round mode,prec
5301         fadd.s          POSNEG1(%a6),%fp0       # last inst - possible exception set
5302         bra             t_inx2
5303
5304 ##############################################
5305
5306 # SINe: Big OR Small?
5307 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5308 #--IF |X| < 2**(-40), RETURN X OR 1.
5309 SINBORS:
5310         cmp.l           %d1,&0x3FFF8000
5311         bgt.l           SREDUCEX
5312
5313 SINSM:
5314         mov.l           ADJN(%a6),%d1
5315         cmp.l           %d1,&0
5316         bgt.b           COSTINY
5317
5318 # here, the operation may underflow iff the precision is sgl or dbl.
5319 # extended denorms are handled through another entry point.
5320 SINTINY:
5321 #       mov.w           &0x0000,XDCARE(%a6)     # JUST IN CASE
5322
5323         fmov.l          %d0,%fpcr               # restore users round mode,prec
5324         mov.b           &FMOV_OP,%d1            # last inst is MOVE
5325         fmov.x          X(%a6),%fp0             # last inst - possible exception set
5326         bra             t_catch
5327
5328 COSTINY:
5329         fmov.s          &0x3F800000,%fp0        # fp0 = 1.0
5330         fmov.l          %d0,%fpcr               # restore users round mode,prec
5331         fadd.s          &0x80800000,%fp0        # last inst - possible exception set
5332         bra             t_pinx2
5333
5334 ################################################
5335         global          ssind
5336 #--SIN(X) = X FOR DENORMALIZED X
5337 ssind:
5338         bra             t_extdnrm
5339
5340 ############################################
5341         global          scosd
5342 #--COS(X) = 1 FOR DENORMALIZED X
5343 scosd:
5344         fmov.s          &0x3F800000,%fp0        # fp0 = 1.0
5345         bra             t_pinx2
5346
5347 ##################################################
5348
5349         global          ssincos
5350 ssincos:
5351 #--SET ADJN TO 4
5352         mov.l           &4,ADJN(%a6)
5353
5354         fmov.x          (%a0),%fp0              # LOAD INPUT
5355         fmov.x          %fp0,X(%a6)
5356
5357         mov.l           (%a0),%d1
5358         mov.w           4(%a0),%d1
5359         and.l           &0x7FFFFFFF,%d1         # COMPACTIFY X
5360
5361         cmp.l           %d1,&0x3FD78000         # |X| >= 2**(-40)?
5362         bge.b           SCOK1
5363         bra.w           SCSM
5364
5365 SCOK1:
5366         cmp.l           %d1,&0x4004BC7E         # |X| < 15 PI?
5367         blt.b           SCMAIN
5368         bra.w           SREDUCEX
5369
5370
5371 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5372 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5373 SCMAIN:
5374         fmov.x          %fp0,%fp1
5375
5376         fmul.d          TWOBYPI(%pc),%fp1       # X*2/PI
5377
5378         lea             PITBL+0x200(%pc),%a1    # TABLE OF N*PI/2, N = -32,...,32
5379
5380         fmov.l          %fp1,INT(%a6)           # CONVERT TO INTEGER
5381
5382         mov.l           INT(%a6),%d1
5383         asl.l           &4,%d1
5384         add.l           %d1,%a1                 # ADDRESS OF N*PIBY2, IN Y1, Y2
5385
5386         fsub.x          (%a1)+,%fp0             # X-Y1
5387         fsub.s          (%a1),%fp0              # FP0 IS R = (X-Y1)-Y2
5388
5389 SCCONT:
5390 #--continuation point from REDUCEX
5391
5392         mov.l           INT(%a6),%d1
5393         ror.l           &1,%d1
5394         cmp.l           %d1,&0                  # D0 < 0 IFF N IS ODD
5395         bge.w           NEVEN
5396
5397 SNODD:
5398 #--REGISTERS SAVED SO FAR: D0, A0, FP2.
5399         fmovm.x         &0x04,-(%sp)            # save fp2
5400
5401         fmov.x          %fp0,RPRIME(%a6)
5402         fmul.x          %fp0,%fp0               # FP0 IS S = R*R
5403         fmov.d          SINA7(%pc),%fp1         # A7
5404         fmov.d          COSB8(%pc),%fp2         # B8
5405         fmul.x          %fp0,%fp1               # SA7
5406         fmul.x          %fp0,%fp2               # SB8
5407
5408         mov.l           %d2,-(%sp)
5409         mov.l           %d1,%d2
5410         ror.l           &1,%d2
5411         and.l           &0x80000000,%d2
5412         eor.l           %d1,%d2
5413         and.l           &0x80000000,%d2
5414
5415         fadd.d          SINA6(%pc),%fp1         # A6+SA7
5416         fadd.d          COSB7(%pc),%fp2         # B7+SB8
5417
5418         fmul.x          %fp0,%fp1               # S(A6+SA7)
5419         eor.l           %d2,RPRIME(%a6)
5420         mov.l           (%sp)+,%d2
5421         fmul.x          %fp0,%fp2               # S(B7+SB8)
5422         ror.l           &1,%d1
5423         and.l           &0x80000000,%d1
5424         mov.l           &0x3F800000,POSNEG1(%a6)
5425         eor.l           %d1,POSNEG1(%a6)
5426
5427         fadd.d          SINA5(%pc),%fp1         # A5+S(A6+SA7)
5428         fadd.d          COSB6(%pc),%fp2         # B6+S(B7+SB8)
5429
5430         fmul.x          %fp0,%fp1               # S(A5+S(A6+SA7))
5431         fmul.x          %fp0,%fp2               # S(B6+S(B7+SB8))
5432         fmov.x          %fp0,SPRIME(%a6)
5433
5434         fadd.d          SINA4(%pc),%fp1         # A4+S(A5+S(A6+SA7))
5435         eor.l           %d1,SPRIME(%a6)
5436         fadd.d          COSB5(%pc),%fp2         # B5+S(B6+S(B7+SB8))
5437
5438         fmul.x          %fp0,%fp1               # S(A4+...)
5439         fmul.x          %fp0,%fp2               # S(B5+...)
5440
5441         fadd.d          SINA3(%pc),%fp1         # A3+S(A4+...)
5442         fadd.d          COSB4(%pc),%fp2         # B4+S(B5+...)
5443
5444         fmul.x          %fp0,%fp1               # S(A3+...)
5445         fmul.x          %fp0,%fp2               # S(B4+...)
5446
5447         fadd.x          SINA2(%pc),%fp1         # A2+S(A3+...)
5448         fadd.x          COSB3(%pc),%fp2         # B3+S(B4+...)
5449
5450         fmul.x          %fp0,%fp1               # S(A2+...)
5451         fmul.x          %fp0,%fp2               # S(B3+...)
5452
5453         fadd.x          SINA1(%pc),%fp1         # A1+S(A2+...)
5454         fadd.x          COSB2(%pc),%fp2         # B2+S(B3+...)
5455
5456         fmul.x          %fp0,%fp1               # S(A1+...)
5457         fmul.x          %fp2,%fp0               # S(B2+...)
5458
5459         fmul.x          RPRIME(%a6),%fp1        # R'S(A1+...)
5460         fadd.s          COSB1(%pc),%fp0         # B1+S(B2...)
5461         fmul.x          SPRIME(%a6),%fp0        # S'(B1+S(B2+...))
5462
5463         fmovm.x         (%sp)+,&0x20            # restore fp2
5464
5465         fmov.l          %d0,%fpcr
5466         fadd.x          RPRIME(%a6),%fp1        # COS(X)
5467         bsr             sto_cos                 # store cosine result
5468         fadd.s          POSNEG1(%a6),%fp0       # SIN(X)
5469         bra             t_inx2
5470
5471 NEVEN:
5472 #--REGISTERS SAVED SO FAR: FP2.
5473         fmovm.x         &0x04,-(%sp)            # save fp2
5474
5475         fmov.x          %fp0,RPRIME(%a6)
5476         fmul.x          %fp0,%fp0               # FP0 IS S = R*R
5477
5478         fmov.d          COSB8(%pc),%fp1         # B8
5479         fmov.d          SINA7(%pc),%fp2         # A7
5480
5481         fmul.x          %fp0,%fp1               # SB8
5482         fmov.x          %fp0,SPRIME(%a6)
5483         fmul.x          %fp0,%fp2               # SA7
5484
5485         ror.l           &1,%d1
5486         and.l           &0x80000000,%d1
5487
5488         fadd.d          COSB7(%pc),%fp1         # B7+SB8
5489         fadd.d          SINA6(%pc),%fp2         # A6+SA7
5490
5491         eor.l           %d1,RPRIME(%a6)
5492         eor.l           %d1,SPRIME(%a6)
5493
5494         fmul.x          %fp0,%fp1               # S(B7+SB8)
5495
5496         or.l            &0x3F800000,%d1
5497         mov.l           %d1,POSNEG1(%a6)
5498
5499         fmul.x          %fp0,%fp2               # S(A6+SA7)
5500
5501         fadd.d          COSB6(%pc),%fp1         # B6+S(B7+SB8)
5502         fadd.d          SINA5(%pc),%fp2         # A5+S(A6+SA7)
5503
5504         fmul.x          %fp0,%fp1               # S(B6+S(B7+SB8))
5505         fmul.x          %fp0,%fp2               # S(A5+S(A6+SA7))
5506
5507         fadd.d          COSB5(%pc),%fp1         # B5+S(B6+S(B7+SB8))
5508         fadd.d          SINA4(%pc),%fp2         # A4+S(A5+S(A6+SA7))
5509
5510         fmul.x          %fp0,%fp1               # S(B5+...)
5511         fmul.x          %fp0,%fp2               # S(A4+...)
5512
5513         fadd.d          COSB4(%pc),%fp1         # B4+S(B5+...)
5514         fadd.d          SINA3(%pc),%fp2         # A3+S(A4+...)
5515
5516         fmul.x          %fp0,%fp1               # S(B4+...)
5517         fmul.x          %fp0,%fp2               # S(A3+...)
5518
5519         fadd.x          COSB3(%pc),%fp1         # B3+S(B4+...)
5520         fadd.x          SINA2(%pc),%fp2         # A2+S(A3+...)
5521
5522         fmul.x          %fp0,%fp1               # S(B3+...)
5523         fmul.x          %fp0,%fp2               # S(A2+...)
5524
5525         fadd.x          COSB2(%pc),%fp1         # B2+S(B3+...)
5526         fadd.x          SINA1(%pc),%fp2         # A1+S(A2+...)
5527
5528         fmul.x          %fp0,%fp1               # S(B2+...)
5529         fmul.x          %fp2,%fp0               # s(a1+...)
5530
5531
5532         fadd.s          COSB1(%pc),%fp1         # B1+S(B2...)
5533         fmul.x          RPRIME(%a6),%fp0        # R'S(A1+...)
5534         fmul.x          SPRIME(%a6),%fp1        # S'(B1+S(B2+...))
5535
5536         fmovm.x         (%sp)+,&0x20            # restore fp2
5537
5538         fmov.l          %d0,%fpcr
5539         fadd.s          POSNEG1(%a6),%fp1       # COS(X)
5540         bsr             sto_cos                 # store cosine result
5541         fadd.x          RPRIME(%a6),%fp0        # SIN(X)
5542         bra             t_inx2
5543
5544 ################################################
5545
5546 SCBORS:
5547         cmp.l           %d1,&0x3FFF8000
5548         bgt.w           SREDUCEX
5549
5550 ################################################
5551
5552 SCSM:
5553 #       mov.w           &0x0000,XDCARE(%a6)
5554         fmov.s          &0x3F800000,%fp1
5555
5556         fmov.l          %d0,%fpcr
5557         fsub.s          &0x00800000,%fp1
5558         bsr             sto_cos                 # store cosine result
5559         fmov.l          %fpcr,%d0               # d0 must have fpcr,too
5560         mov.b           &FMOV_OP,%d1            # last inst is MOVE
5561         fmov.x          X(%a6),%fp0
5562         bra             t_catch
5563
5564 ##############################################
5565
5566         global          ssincosd
5567 #--SIN AND COS OF X FOR DENORMALIZED X
5568 ssincosd:
5569         mov.l           %d0,-(%sp)              # save d0
5570         fmov.s          &0x3F800000,%fp1
5571         bsr             sto_cos                 # store cosine result
5572         mov.l           (%sp)+,%d0              # restore d0
5573         bra             t_extdnrm
5574
5575 ############################################
5576
5577 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5578 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5579 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5580 SREDUCEX:
5581         fmovm.x         &0x3c,-(%sp)            # save {fp2-fp5}
5582         mov.l           %d2,-(%sp)              # save d2
5583         fmov.s          &0x00000000,%fp1        # fp1 = 0
5584
5585 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5586 #--there is a danger of unwanted overflow in first LOOP iteration.  In this
5587 #--case, reduce argument by one remainder step to make subsequent reduction
5588 #--safe.
5589         cmp.l           %d1,&0x7ffeffff         # is arg dangerously large?
5590         bne.b           SLOOP                   # no
5591
5592 # yes; create 2**16383*PI/2
5593         mov.w           &0x7ffe,FP_SCR0_EX(%a6)
5594         mov.l           &0xc90fdaa2,FP_SCR0_HI(%a6)
5595         clr.l           FP_SCR0_LO(%a6)
5596
5597 # create low half of 2**16383*PI/2 at FP_SCR1
5598         mov.w           &0x7fdc,FP_SCR1_EX(%a6)
5599         mov.l           &0x85a308d3,FP_SCR1_HI(%a6)
5600         clr.l           FP_SCR1_LO(%a6)
5601
5602         ftest.x         %fp0                    # test sign of argument
5603         fblt.w          sred_neg
5604
5605         or.b            &0x80,FP_SCR0_EX(%a6)   # positive arg
5606         or.b            &0x80,FP_SCR1_EX(%a6)
5607 sred_neg:
5608         fadd.x          FP_SCR0(%a6),%fp0       # high part of reduction is exact
5609         fmov.x          %fp0,%fp1               # save high result in fp1
5610         fadd.x          FP_SCR1(%a6),%fp0       # low part of reduction
5611         fsub.x          %fp0,%fp1               # determine low component of result
5612         fadd.x          FP_SCR1(%a6),%fp1       # fp0/fp1 are reduced argument.
5613
5614 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5615 #--integer quotient will be stored in N
5616 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5617 SLOOP:
5618         fmov.x          %fp0,INARG(%a6)         # +-2**K * F, 1 <= F < 2
5619         mov.w           INARG(%a6),%d1
5620         mov.l           %d1,%a1                 # save a copy of D0
5621         and.l           &0x00007FFF,%d1
5622         sub.l           &0x00003FFF,%d1         # d0 = K
5623         cmp.l           %d1,&28
5624         ble.b           SLASTLOOP
5625 SCONTLOOP:
5626         sub.l           &27,%d1                 # d0 = L := K-27
5627         mov.b           &0,ENDFLAG(%a6)
5628         bra.b           SWORK
5629 SLASTLOOP:
5630         clr.l           %d1                     # d0 = L := 0
5631         mov.b           &1,ENDFLAG(%a6)
5632
5633 SWORK:
5634 #--FIND THE REMAINDER OF (R,r) W.R.T.   2**L * (PI/2). L IS SO CHOSEN
5635 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5636
5637 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5638 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5639
5640         mov.l           &0x00003FFE,%d2         # BIASED EXP OF 2/PI
5641         sub.l           %d1,%d2                 # BIASED EXP OF 2**(-L)*(2/PI)
5642
5643         mov.l           &0xA2F9836E,FP_SCR0_HI(%a6)
5644         mov.l           &0x4E44152A,FP_SCR0_LO(%a6)
5645         mov.w           %d2,FP_SCR0_EX(%a6)     # FP_SCR0 = 2**(-L)*(2/PI)
5646
5647         fmov.x          %fp0,%fp2
5648         fmul.x          FP_SCR0(%a6),%fp2       # fp2 = X * 2**(-L)*(2/PI)
5649
5650 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5651 #--FLOATING POINT FORMAT, THE TWO FMOVE'S       FMOVE.L FP <--> N
5652 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5653 #--(SIGN(INARG)*2**63   +       FP2) - SIGN(INARG)*2**63 WILL GIVE
5654 #--US THE DESIRED VALUE IN FLOATING POINT.
5655         mov.l           %a1,%d2
5656         swap            %d2
5657         and.l           &0x80000000,%d2
5658         or.l            &0x5F000000,%d2         # d2 = SIGN(INARG)*2**63 IN SGL
5659         mov.l           %d2,TWOTO63(%a6)
5660         fadd.s          TWOTO63(%a6),%fp2       # THE FRACTIONAL PART OF FP1 IS ROUNDED
5661         fsub.s          TWOTO63(%a6),%fp2       # fp2 = N
5662 #       fint.x          %fp2
5663
5664 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5665         mov.l           %d1,%d2                 # d2 = L
5666
5667         add.l           &0x00003FFF,%d2         # BIASED EXP OF 2**L * (PI/2)
5668         mov.w           %d2,FP_SCR0_EX(%a6)
5669         mov.l           &0xC90FDAA2,FP_SCR0_HI(%a6)
5670         clr.l           FP_SCR0_LO(%a6)         # FP_SCR0 = 2**(L) * Piby2_1
5671
5672         add.l           &0x00003FDD,%d1
5673         mov.w           %d1,FP_SCR1_EX(%a6)
5674         mov.l           &0x85A308D3,FP_SCR1_HI(%a6)
5675         clr.l           FP_SCR1_LO(%a6)         # FP_SCR1 = 2**(L) * Piby2_2
5676
5677         mov.b           ENDFLAG(%a6),%d1
5678
5679 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5680 #--P2 = 2**(L) * Piby2_2
5681         fmov.x          %fp2,%fp4               # fp4 = N
5682         fmul.x          FP_SCR0(%a6),%fp4       # fp4 = W = N*P1
5683         fmov.x          %fp2,%fp5               # fp5 = N
5684         fmul.x          FP_SCR1(%a6),%fp5       # fp5 = w = N*P2
5685         fmov.x          %fp4,%fp3               # fp3 = W = N*P1
5686
5687 #--we want P+p = W+w  but  |p| <= half ulp of P
5688 #--Then, we need to compute  A := R-P   and  a := r-p
5689         fadd.x          %fp5,%fp3               # fp3 = P
5690         fsub.x          %fp3,%fp4               # fp4 = W-P
5691
5692         fsub.x          %fp3,%fp0               # fp0 = A := R - P
5693         fadd.x          %fp5,%fp4               # fp4 = p = (W-P)+w
5694
5695         fmov.x          %fp0,%fp3               # fp3 = A
5696         fsub.x          %fp4,%fp1               # fp1 = a := r - p
5697
5698 #--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
5699 #--|r| <= half ulp of R.
5700         fadd.x          %fp1,%fp0               # fp0 = R := A+a
5701 #--No need to calculate r if this is the last loop
5702         cmp.b           %d1,&0
5703         bgt.w           SRESTORE
5704
5705 #--Need to calculate r
5706         fsub.x          %fp0,%fp3               # fp3 = A-R
5707         fadd.x          %fp3,%fp1               # fp1 = r := (A-R)+a
5708         bra.w           SLOOP
5709
5710 SRESTORE:
5711         fmov.l          %fp2,INT(%a6)
5712         mov.l           (%sp)+,%d2              # restore d2
5713         fmovm.x         (%sp)+,&0x3c            # restore {fp2-fp5}
5714
5715         mov.l           ADJN(%a6),%d1
5716         cmp.l           %d1,&4
5717
5718         blt.w           SINCONT
5719         bra.w           SCCONT
5720
5721 #########################################################################
5722 # stan():  computes the tangent of a normalized input                   #
5723 # stand(): computes the tangent of a denormalized input                 #
5724 #                                                                       #
5725 # INPUT *************************************************************** #
5726 #       a0 = pointer to extended precision input                        #
5727 #       d0 = round precision,mode                                       #
5728 #                                                                       #
5729 # OUTPUT ************************************************************** #
5730 #       fp0 = tan(X)                                                    #
5731 #                                                                       #
5732 # ACCURACY and MONOTONICITY ******************************************* #
5733 #       The returned result is within 3 ulp in 64 significant bit, i.e. #
5734 #       within 0.5001 ulp to 53 bits if the result is subsequently      #
5735 #       rounded to double precision. The result is provably monotonic   #
5736 #       in double precision.                                            #
5737 #                                                                       #
5738 # ALGORITHM *********************************************************** #
5739 #                                                                       #
5740 #       1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.                   #
5741 #                                                                       #
5742 #       2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let        #
5743 #               k = N mod 2, so in particular, k = 0 or 1.              #
5744 #                                                                       #
5745 #       3. If k is odd, go to 5.                                        #
5746 #                                                                       #
5747 #       4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a  #
5748 #               rational function U/V where                             #
5749 #               U = r + r*s*(P1 + s*(P2 + s*P3)), and                   #
5750 #               V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.      #
5751 #               Exit.                                                   #
5752 #                                                                       #
5753 #       4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5754 #               a rational function U/V where                           #
5755 #               U = r + r*s*(P1 + s*(P2 + s*P3)), and                   #
5756 #               V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,       #
5757 #               -Cot(r) = -V/U. Exit.                                   #
5758 #                                                                       #
5759 #       6. If |X| > 1, go to 8.                                         #
5760 #                                                                       #
5761 #       7. (|X|<2**(-40)) Tan(X) = X. Exit.                             #
5762 #                                                                       #
5763 #       8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back   #
5764 #               to 2.                                                   #
5765 #                                                                       #
5766 #########################################################################
5767
5768 TANQ4:
5769         long            0x3EA0B759,0xF50F8688
5770 TANP3:
5771         long            0xBEF2BAA5,0xA8924F04
5772
5773 TANQ3:
5774         long            0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5775
5776 TANP2:
5777         long            0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5778
5779 TANQ2:
5780         long            0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5781
5782 TANP1:
5783         long            0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5784
5785 TANQ1:
5786         long            0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5787
5788 INVTWOPI:
5789         long            0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5790
5791 TWOPI1:
5792         long            0x40010000,0xC90FDAA2,0x00000000,0x00000000
5793 TWOPI2:
5794         long            0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5795
5796 #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5797 #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5798 #--MOST 69 BITS LONG.
5799 #       global          PITBL
5800 PITBL:
5801         long            0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5802         long            0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5803         long            0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5804         long            0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5805         long            0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5806         long            0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5807         long            0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5808         long            0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5809         long            0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5810         long            0xC0040000,0x90836524,0x88034B96,0x20B00000
5811         long            0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5812         long            0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5813         long            0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5814         long            0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5815         long            0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5816         long            0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5817         long            0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5818         long            0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5819         long            0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5820         long            0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5821         long            0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5822         long            0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5823         long            0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5824         long            0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5825         long            0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5826         long            0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5827         long            0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5828         long            0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5829         long            0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5830         long            0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5831         long            0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5832         long            0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5833         long            0x00000000,0x00000000,0x00000000,0x00000000
5834         long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5835         long            0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5836         long            0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5837         long            0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5838         long            0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5839         long            0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5840         long            0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5841         long            0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5842         long            0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5843         long            0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5844         long            0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5845         long            0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5846         long            0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5847         long            0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5848         long            0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5849         long            0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5850         long            0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5851         long            0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5852         long            0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5853         long            0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5854         long            0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5855         long            0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5856         long            0x40040000,0x90836524,0x88034B96,0xA0B00000
5857         long            0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5858         long            0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5859         long            0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5860         long            0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5861         long            0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5862         long            0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5863         long            0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5864         long            0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5865         long            0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5866
5867         set             INARG,FP_SCR0
5868
5869         set             TWOTO63,L_SCR1
5870         set             INT,L_SCR1
5871         set             ENDFLAG,L_SCR2
5872
5873         global          stan
5874 stan:
5875         fmov.x          (%a0),%fp0              # LOAD INPUT
5876
5877         mov.l           (%a0),%d1
5878         mov.w           4(%a0),%d1
5879         and.l           &0x7FFFFFFF,%d1
5880
5881         cmp.l           %d1,&0x3FD78000         # |X| >= 2**(-40)?
5882         bge.b           TANOK1
5883         bra.w           TANSM
5884 TANOK1:
5885         cmp.l           %d1,&0x4004BC7E         # |X| < 15 PI?
5886         blt.b           TANMAIN
5887         bra.w           REDUCEX
5888
5889 TANMAIN:
5890 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5891 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5892         fmov.x          %fp0,%fp1
5893         fmul.d          TWOBYPI(%pc),%fp1       # X*2/PI
5894
5895         lea.l           PITBL+0x200(%pc),%a1    # TABLE OF N*PI/2, N = -32,...,32
5896
5897         fmov.l          %fp1,%d1                # CONVERT TO INTEGER
5898
5899         asl.l           &4,%d1
5900         add.l           %d1,%a1                 # ADDRESS N*PIBY2 IN Y1, Y2
5901
5902         fsub.x          (%a1)+,%fp0             # X-Y1
5903
5904         fsub.s          (%a1),%fp0              # FP0 IS R = (X-Y1)-Y2
5905
5906         ror.l           &5,%d1
5907         and.l           &0x80000000,%d1         # D0 WAS ODD IFF D0 < 0
5908
5909 TANCONT:
5910         fmovm.x         &0x0c,-(%sp)            # save fp2,fp3
5911
5912         cmp.l           %d1,&0
5913         blt.w           NODD
5914
5915         fmov.x          %fp0,%fp1
5916         fmul.x          %fp1,%fp1               # S = R*R
5917
5918         fmov.d          TANQ4(%pc),%fp3
5919         fmov.d          TANP3(%pc),%fp2
5920
5921         fmul.x          %fp1,%fp3               # SQ4
5922         fmul.x          %fp1,%fp2               # SP3
5923
5924         fadd.d          TANQ3(%pc),%fp3         # Q3+SQ4
5925         fadd.x          TANP2(%pc),%fp2         # P2+SP3
5926
5927         fmul.x          %fp1,%fp3               # S(Q3+SQ4)
5928         fmul.x          %fp1,%fp2               # S(P2+SP3)
5929
5930         fadd.x          TANQ2(%pc),%fp3         # Q2+S(Q3+SQ4)
5931         fadd.x          TANP1(%pc),%fp2         # P1+S(P2+SP3)
5932
5933         fmul.x          %fp1,%fp3               # S(Q2+S(Q3+SQ4))
5934         fmul.x          %fp1,%fp2               # S(P1+S(P2+SP3))
5935
5936         fadd.x          TANQ1(%pc),%fp3         # Q1+S(Q2+S(Q3+SQ4))
5937         fmul.x          %fp0,%fp2               # RS(P1+S(P2+SP3))
5938
5939         fmul.x          %fp3,%fp1               # S(Q1+S(Q2+S(Q3+SQ4)))
5940
5941         fadd.x          %fp2,%fp0               # R+RS(P1+S(P2+SP3))
5942
5943         fadd.s          &0x3F800000,%fp1        # 1+S(Q1+...)
5944
5945         fmovm.x         (%sp)+,&0x30            # restore fp2,fp3
5946
5947         fmov.l          %d0,%fpcr               # restore users round mode,prec
5948         fdiv.x          %fp1,%fp0               # last inst - possible exception set
5949         bra             t_inx2
5950
5951 NODD:
5952         fmov.x          %fp0,%fp1
5953         fmul.x          %fp0,%fp0               # S = R*R
5954
5955         fmov.d          TANQ4(%pc),%fp3
5956         fmov.d          TANP3(%pc),%fp2
5957
5958         fmul.x          %fp0,%fp3               # SQ4
5959         fmul.x          %fp0,%fp2               # SP3
5960
5961         fadd.d          TANQ3(%pc),%fp3         # Q3+SQ4
5962         fadd.x          TANP2(%pc),%fp2         # P2+SP3
5963
5964         fmul.x          %fp0,%fp3               # S(Q3+SQ4)
5965         fmul.x          %fp0,%fp2               # S(P2+SP3)
5966
5967         fadd.x          TANQ2(%pc),%fp3         # Q2+S(Q3+SQ4)
5968         fadd.x          TANP1(%pc),%fp2         # P1+S(P2+SP3)
5969
5970         fmul.x          %fp0,%fp3               # S(Q2+S(Q3+SQ4))
5971         fmul.x          %fp0,%fp2               # S(P1+S(P2+SP3))
5972
5973         fadd.x          TANQ1(%pc),%fp3         # Q1+S(Q2+S(Q3+SQ4))
5974         fmul.x          %fp1,%fp2               # RS(P1+S(P2+SP3))
5975
5976         fmul.x          %fp3,%fp0               # S(Q1+S(Q2+S(Q3+SQ4)))
5977
5978         fadd.x          %fp2,%fp1               # R+RS(P1+S(P2+SP3))
5979         fadd.s          &0x3F800000,%fp0        # 1+S(Q1+...)
5980
5981         fmovm.x         (%sp)+,&0x30            # restore fp2,fp3
5982
5983         fmov.x          %fp1,-(%sp)
5984         eor.l           &0x80000000,(%sp)
5985
5986         fmov.l          %d0,%fpcr               # restore users round mode,prec
5987         fdiv.x          (%sp)+,%fp0             # last inst - possible exception set
5988         bra             t_inx2
5989
5990 TANBORS:
5991 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5992 #--IF |X| < 2**(-40), RETURN X OR 1.
5993         cmp.l           %d1,&0x3FFF8000
5994         bgt.b           REDUCEX
5995
5996 TANSM:
5997         fmov.x          %fp0,-(%sp)
5998         fmov.l          %d0,%fpcr               # restore users round mode,prec
5999         mov.b           &FMOV_OP,%d1            # last inst is MOVE
6000         fmov.x          (%sp)+,%fp0             # last inst - posibble exception set
6001         bra             t_catch
6002
6003         global          stand
6004 #--TAN(X) = X FOR DENORMALIZED X
6005 stand:
6006         bra             t_extdnrm
6007
6008 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6009 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6010 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6011 REDUCEX:
6012         fmovm.x         &0x3c,-(%sp)            # save {fp2-fp5}
6013         mov.l           %d2,-(%sp)              # save d2
6014         fmov.s          &0x00000000,%fp1        # fp1 = 0
6015
6016 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6017 #--there is a danger of unwanted overflow in first LOOP iteration.  In this
6018 #--case, reduce argument by one remainder step to make subsequent reduction
6019 #--safe.
6020         cmp.l           %d1,&0x7ffeffff         # is arg dangerously large?
6021         bne.b           LOOP                    # no
6022
6023 # yes; create 2**16383*PI/2
6024         mov.w           &0x7ffe,FP_SCR0_EX(%a6)
6025         mov.l           &0xc90fdaa2,FP_SCR0_HI(%a6)
6026         clr.l           FP_SCR0_LO(%a6)
6027
6028 # create low half of 2**16383*PI/2 at FP_SCR1
6029         mov.w           &0x7fdc,FP_SCR1_EX(%a6)
6030         mov.l           &0x85a308d3,FP_SCR1_HI(%a6)
6031         clr.l           FP_SCR1_LO(%a6)
6032
6033         ftest.x         %fp0                    # test sign of argument
6034         fblt.w          red_neg
6035
6036         or.b            &0x80,FP_SCR0_EX(%a6)   # positive arg
6037         or.b            &0x80,FP_SCR1_EX(%a6)
6038 red_neg:
6039         fadd.x          FP_SCR0(%a6),%fp0       # high part of reduction is exact
6040         fmov.x          %fp0,%fp1               # save high result in fp1
6041         fadd.x          FP_SCR1(%a6),%fp0       # low part of reduction
6042         fsub.x          %fp0,%fp1               # determine low component of result
6043         fadd.x          FP_SCR1(%a6),%fp1       # fp0/fp1 are reduced argument.
6044
6045 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6046 #--integer quotient will be stored in N
6047 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6048 LOOP:
6049         fmov.x          %fp0,INARG(%a6)         # +-2**K * F, 1 <= F < 2
6050         mov.w           INARG(%a6),%d1
6051         mov.l           %d1,%a1                 # save a copy of D0
6052         and.l           &0x00007FFF,%d1
6053         sub.l           &0x00003FFF,%d1         # d0 = K
6054         cmp.l           %d1,&28
6055         ble.b           LASTLOOP
6056 CONTLOOP:
6057         sub.l           &27,%d1                 # d0 = L := K-27
6058         mov.b           &0,ENDFLAG(%a6)
6059         bra.b           WORK
6060 LASTLOOP:
6061         clr.l           %d1                     # d0 = L := 0
6062         mov.b           &1,ENDFLAG(%a6)
6063
6064 WORK:
6065 #--FIND THE REMAINDER OF (R,r) W.R.T.   2**L * (PI/2). L IS SO CHOSEN
6066 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
6067
6068 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6069 #--2**L * (PIby2_1), 2**L * (PIby2_2)
6070
6071         mov.l           &0x00003FFE,%d2         # BIASED EXP OF 2/PI
6072         sub.l           %d1,%d2                 # BIASED EXP OF 2**(-L)*(2/PI)
6073
6074         mov.l           &0xA2F9836E,FP_SCR0_HI(%a6)
6075         mov.l           &0x4E44152A,FP_SCR0_LO(%a6)
6076         mov.w           %d2,FP_SCR0_EX(%a6)     # FP_SCR0 = 2**(-L)*(2/PI)
6077
6078         fmov.x          %fp0,%fp2
6079         fmul.x          FP_SCR0(%a6),%fp2       # fp2 = X * 2**(-L)*(2/PI)
6080
6081 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6082 #--FLOATING POINT FORMAT, THE TWO FMOVE'S       FMOVE.L FP <--> N
6083 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6084 #--(SIGN(INARG)*2**63   +       FP2) - SIGN(INARG)*2**63 WILL GIVE
6085 #--US THE DESIRED VALUE IN FLOATING POINT.
6086         mov.l           %a1,%d2
6087         swap            %d2
6088         and.l           &0x80000000,%d2
6089         or.l            &0x5F000000,%d2         # d2 = SIGN(INARG)*2**63 IN SGL
6090         mov.l           %d2,TWOTO63(%a6)
6091         fadd.s          TWOTO63(%a6),%fp2       # THE FRACTIONAL PART OF FP1 IS ROUNDED
6092         fsub.s          TWOTO63(%a6),%fp2       # fp2 = N
6093 #       fintrz.x        %fp2,%fp2
6094
6095 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6096         mov.l           %d1,%d2                 # d2 = L
6097
6098         add.l           &0x00003FFF,%d2         # BIASED EXP OF 2**L * (PI/2)
6099         mov.w           %d2,FP_SCR0_EX(%a6)
6100         mov.l           &0xC90FDAA2,FP_SCR0_HI(%a6)
6101         clr.l           FP_SCR0_LO(%a6)         # FP_SCR0 = 2**(L) * Piby2_1
6102
6103         add.l           &0x00003FDD,%d1
6104         mov.w           %d1,FP_SCR1_EX(%a6)
6105         mov.l           &0x85A308D3,FP_SCR1_HI(%a6)
6106         clr.l           FP_SCR1_LO(%a6)         # FP_SCR1 = 2**(L) * Piby2_2
6107
6108         mov.b           ENDFLAG(%a6),%d1
6109
6110 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6111 #--P2 = 2**(L) * Piby2_2
6112         fmov.x          %fp2,%fp4               # fp4 = N
6113         fmul.x          FP_SCR0(%a6),%fp4       # fp4 = W = N*P1
6114         fmov.x          %fp2,%fp5               # fp5 = N
6115         fmul.x          FP_SCR1(%a6),%fp5       # fp5 = w = N*P2
6116         fmov.x          %fp4,%fp3               # fp3 = W = N*P1
6117
6118 #--we want P+p = W+w  but  |p| <= half ulp of P
6119 #--Then, we need to compute  A := R-P   and  a := r-p
6120         fadd.x          %fp5,%fp3               # fp3 = P
6121         fsub.x          %fp3,%fp4               # fp4 = W-P
6122
6123         fsub.x          %fp3,%fp0               # fp0 = A := R - P
6124         fadd.x          %fp5,%fp4               # fp4 = p = (W-P)+w
6125
6126         fmov.x          %fp0,%fp3               # fp3 = A
6127         fsub.x          %fp4,%fp1               # fp1 = a := r - p
6128
6129 #--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
6130 #--|r| <= half ulp of R.
6131         fadd.x          %fp1,%fp0               # fp0 = R := A+a
6132 #--No need to calculate r if this is the last loop
6133         cmp.b           %d1,&0
6134         bgt.w           RESTORE
6135
6136 #--Need to calculate r
6137         fsub.x          %fp0,%fp3               # fp3 = A-R
6138         fadd.x          %fp3,%fp1               # fp1 = r := (A-R)+a
6139         bra.w           LOOP
6140
6141 RESTORE:
6142         fmov.l          %fp2,INT(%a6)
6143         mov.l           (%sp)+,%d2              # restore d2
6144         fmovm.x         (%sp)+,&0x3c            # restore {fp2-fp5}
6145
6146         mov.l           INT(%a6),%d1
6147         ror.l           &1,%d1
6148
6149         bra.w           TANCONT
6150
6151 #########################################################################
6152 # satan():  computes the arctangent of a normalized number              #
6153 # satand(): computes the arctangent of a denormalized number            #
6154 #                                                                       #
6155 # INPUT *************************************************************** #
6156 #       a0 = pointer to extended precision input                        #
6157 #       d0 = round precision,mode                                       #
6158 #                                                                       #
6159 # OUTPUT ************************************************************** #
6160 #       fp0 = arctan(X)                                                 #
6161 #                                                                       #
6162 # ACCURACY and MONOTONICITY ******************************************* #
6163 #       The returned result is within 2 ulps in 64 significant bit,     #
6164 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6165 #       rounded to double precision. The result is provably monotonic   #
6166 #       in double precision.                                            #
6167 #                                                                       #
6168 # ALGORITHM *********************************************************** #
6169 #       Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.               #
6170 #                                                                       #
6171 #       Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x.                    #
6172 #               Note that k = -4, -3,..., or 3.                         #
6173 #               Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5       #
6174 #               significant bits of X with a bit-1 attached at the 6-th #
6175 #               bit position. Define u to be u = (X-F) / (1 + X*F).     #
6176 #                                                                       #
6177 #       Step 3. Approximate arctan(u) by a polynomial poly.             #
6178 #                                                                       #
6179 #       Step 4. Return arctan(F) + poly, arctan(F) is fetched from a    #
6180 #               table of values calculated beforehand. Exit.            #
6181 #                                                                       #
6182 #       Step 5. If |X| >= 16, go to Step 7.                             #
6183 #                                                                       #
6184 #       Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.  #
6185 #                                                                       #
6186 #       Step 7. Define X' = -1/X. Approximate arctan(X') by an odd      #
6187 #               polynomial in X'.                                       #
6188 #               Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.            #
6189 #                                                                       #
6190 #########################################################################
6191
6192 ATANA3: long            0xBFF6687E,0x314987D8
6193 ATANA2: long            0x4002AC69,0x34A26DB3
6194 ATANA1: long            0xBFC2476F,0x4E1DA28E
6195
6196 ATANB6: long            0x3FB34444,0x7F876989
6197 ATANB5: long            0xBFB744EE,0x7FAF45DB
6198 ATANB4: long            0x3FBC71C6,0x46940220
6199 ATANB3: long            0xBFC24924,0x921872F9
6200 ATANB2: long            0x3FC99999,0x99998FA9
6201 ATANB1: long            0xBFD55555,0x55555555
6202
6203 ATANC5: long            0xBFB70BF3,0x98539E6A
6204 ATANC4: long            0x3FBC7187,0x962D1D7D
6205 ATANC3: long            0xBFC24924,0x827107B8
6206 ATANC2: long            0x3FC99999,0x9996263E
6207 ATANC1: long            0xBFD55555,0x55555536
6208
6209 PPIBY2: long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6210 NPIBY2: long            0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6211
6212 PTINY:  long            0x00010000,0x80000000,0x00000000,0x00000000
6213 NTINY:  long            0x80010000,0x80000000,0x00000000,0x00000000
6214
6215 ATANTBL:
6216         long            0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6217         long            0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6218         long            0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6219         long            0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6220         long            0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6221         long            0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6222         long            0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6223         long            0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6224         long            0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6225         long            0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6226         long            0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6227         long            0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6228         long            0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6229         long            0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6230         long            0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6231         long            0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6232         long            0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6233         long            0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6234         long            0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6235         long            0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6236         long            0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6237         long            0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6238         long            0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6239         long            0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6240         long            0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6241         long            0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6242         long            0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6243         long            0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6244         long            0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6245         long            0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6246         long            0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6247         long            0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6248         long            0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6249         long            0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6250         long            0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6251         long            0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6252         long            0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6253         long            0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6254         long            0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6255         long            0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6256         long            0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6257         long            0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6258         long            0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6259         long            0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6260         long            0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6261         long            0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6262         long            0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6263         long            0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6264         long            0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6265         long            0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6266         long            0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6267         long            0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6268         long            0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6269         long            0x3FFE0000,0x97731420,0x365E538C,0x00000000
6270         long            0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6271         long            0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6272         long            0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6273         long            0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6274         long            0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6275         long            0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6276         long            0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6277         long            0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6278         long            0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6279         long            0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6280         long            0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6281         long            0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6282         long            0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6283         long            0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6284         long            0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6285         long            0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6286         long            0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6287         long            0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6288         long            0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6289         long            0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6290         long            0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6291         long            0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6292         long            0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6293         long            0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6294         long            0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6295         long            0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6296         long            0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6297         long            0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6298         long            0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6299         long            0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6300         long            0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6301         long            0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6302         long            0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6303         long            0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6304         long            0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6305         long            0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6306         long            0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6307         long            0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6308         long            0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6309         long            0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6310         long            0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6311         long            0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6312         long            0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6313         long            0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6314         long            0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6315         long            0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6316         long            0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6317         long            0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6318         long            0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6319         long            0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6320         long            0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6321         long            0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6322         long            0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6323         long            0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6324         long            0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6325         long            0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6326         long            0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6327         long            0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6328         long            0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6329         long            0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6330         long            0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6331         long            0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6332         long            0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6333         long            0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6334         long            0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6335         long            0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6336         long            0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6337         long            0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6338         long            0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6339         long            0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6340         long            0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6341         long            0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6342         long            0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6343         long            0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6344
6345         set             X,FP_SCR0
6346         set             XDCARE,X+2
6347         set             XFRAC,X+4
6348         set             XFRACLO,X+8
6349
6350         set             ATANF,FP_SCR1
6351         set             ATANFHI,ATANF+4
6352         set             ATANFLO,ATANF+8
6353
6354         global          satan
6355 #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6356 satan:
6357         fmov.x          (%a0),%fp0              # LOAD INPUT
6358
6359         mov.l           (%a0),%d1
6360         mov.w           4(%a0),%d1
6361         fmov.x          %fp0,X(%a6)
6362         and.l           &0x7FFFFFFF,%d1
6363
6364         cmp.l           %d1,&0x3FFB8000         # |X| >= 1/16?
6365         bge.b           ATANOK1
6366         bra.w           ATANSM
6367
6368 ATANOK1:
6369         cmp.l           %d1,&0x4002FFFF         # |X| < 16 ?
6370         ble.b           ATANMAIN
6371         bra.w           ATANBIG
6372
6373 #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6374 #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6375 #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6376 #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6377 #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6378 #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6379 #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6380 #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6381 #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6382 #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6383 #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6384 #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6385 #--WILL INVOLVE A VERY LONG POLYNOMIAL.
6386
6387 #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6388 #--WE CHOSE F TO BE +-2^K * 1.BBBB1
6389 #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6390 #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6391 #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6392 #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6393
6394 ATANMAIN:
6395
6396         and.l           &0xF8000000,XFRAC(%a6)  # FIRST 5 BITS
6397         or.l            &0x04000000,XFRAC(%a6)  # SET 6-TH BIT TO 1
6398         mov.l           &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6399
6400         fmov.x          %fp0,%fp1               # FP1 IS X
6401         fmul.x          X(%a6),%fp1             # FP1 IS X*F, NOTE THAT X*F > 0
6402         fsub.x          X(%a6),%fp0             # FP0 IS X-F
6403         fadd.s          &0x3F800000,%fp1        # FP1 IS 1 + X*F
6404         fdiv.x          %fp1,%fp0               # FP0 IS U = (X-F)/(1+X*F)
6405
6406 #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6407 #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6408 #--SAVE REGISTERS FP2.
6409
6410         mov.l           %d2,-(%sp)              # SAVE d2 TEMPORARILY
6411         mov.l           %d1,%d2                 # THE EXP AND 16 BITS OF X
6412         and.l           &0x00007800,%d1         # 4 VARYING BITS OF F'S FRACTION
6413         and.l           &0x7FFF0000,%d2         # EXPONENT OF F
6414         sub.l           &0x3FFB0000,%d2         # K+4
6415         asr.l           &1,%d2
6416         add.l           %d2,%d1                 # THE 7 BITS IDENTIFYING F
6417         asr.l           &7,%d1                  # INDEX INTO TBL OF ATAN(|F|)
6418         lea             ATANTBL(%pc),%a1
6419         add.l           %d1,%a1                 # ADDRESS OF ATAN(|F|)
6420         mov.l           (%a1)+,ATANF(%a6)
6421         mov.l           (%a1)+,ATANFHI(%a6)
6422         mov.l           (%a1)+,ATANFLO(%a6)     # ATANF IS NOW ATAN(|F|)
6423         mov.l           X(%a6),%d1              # LOAD SIGN AND EXPO. AGAIN
6424         and.l           &0x80000000,%d1         # SIGN(F)
6425         or.l            %d1,ATANF(%a6)          # ATANF IS NOW SIGN(F)*ATAN(|F|)
6426         mov.l           (%sp)+,%d2              # RESTORE d2
6427
6428 #--THAT'S ALL I HAVE TO DO FOR NOW,
6429 #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6430
6431 #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6432 #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6433 #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6434 #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6435 #--WHAT WE HAVE HERE IS MERELY  A1 = A3, A2 = A1/A3, A3 = A2/A3.
6436 #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6437 #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6438
6439         fmovm.x         &0x04,-(%sp)            # save fp2
6440
6441         fmov.x          %fp0,%fp1
6442         fmul.x          %fp1,%fp1
6443         fmov.d          ATANA3(%pc),%fp2
6444         fadd.x          %fp1,%fp2               # A3+V
6445         fmul.x          %fp1,%fp2               # V*(A3+V)
6446         fmul.x          %fp0,%fp1               # U*V
6447         fadd.d          ATANA2(%pc),%fp2        # A2+V*(A3+V)
6448         fmul.d          ATANA1(%pc),%fp1        # A1*U*V
6449         fmul.x          %fp2,%fp1               # A1*U*V*(A2+V*(A3+V))
6450         fadd.x          %fp1,%fp0               # ATAN(U), FP1 RELEASED
6451
6452         fmovm.x         (%sp)+,&0x20            # restore fp2
6453
6454         fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6455         fadd.x          ATANF(%a6),%fp0         # ATAN(X)
6456         bra             t_inx2
6457
6458 ATANBORS:
6459 #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6460 #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6461         cmp.l           %d1,&0x3FFF8000
6462         bgt.w           ATANBIG                 # I.E. |X| >= 16
6463
6464 ATANSM:
6465 #--|X| <= 1/16
6466 #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6467 #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6468 #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6469 #--WHERE Y = X*X, AND Z = Y*Y.
6470
6471         cmp.l           %d1,&0x3FD78000
6472         blt.w           ATANTINY
6473
6474 #--COMPUTE POLYNOMIAL
6475         fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
6476
6477         fmul.x          %fp0,%fp0               # FPO IS Y = X*X
6478
6479         fmov.x          %fp0,%fp1
6480         fmul.x          %fp1,%fp1               # FP1 IS Z = Y*Y
6481
6482         fmov.d          ATANB6(%pc),%fp2
6483         fmov.d          ATANB5(%pc),%fp3
6484
6485         fmul.x          %fp1,%fp2               # Z*B6
6486         fmul.x          %fp1,%fp3               # Z*B5
6487
6488         fadd.d          ATANB4(%pc),%fp2        # B4+Z*B6
6489         fadd.d          ATANB3(%pc),%fp3        # B3+Z*B5
6490
6491         fmul.x          %fp1,%fp2               # Z*(B4+Z*B6)
6492         fmul.x          %fp3,%fp1               # Z*(B3+Z*B5)
6493
6494         fadd.d          ATANB2(%pc),%fp2        # B2+Z*(B4+Z*B6)
6495         fadd.d          ATANB1(%pc),%fp1        # B1+Z*(B3+Z*B5)
6496
6497         fmul.x          %fp0,%fp2               # Y*(B2+Z*(B4+Z*B6))
6498         fmul.x          X(%a6),%fp0             # X*Y
6499
6500         fadd.x          %fp2,%fp1               # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6501
6502         fmul.x          %fp1,%fp0               # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6503
6504         fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
6505
6506         fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6507         fadd.x          X(%a6),%fp0
6508         bra             t_inx2
6509
6510 ATANTINY:
6511 #--|X| < 2^(-40), ATAN(X) = X
6512
6513         fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6514         mov.b           &FMOV_OP,%d1            # last inst is MOVE
6515         fmov.x          X(%a6),%fp0             # last inst - possible exception set
6516
6517         bra             t_catch
6518
6519 ATANBIG:
6520 #--IF |X| > 2^(100), RETURN     SIGN(X)*(PI/2 - TINY). OTHERWISE,
6521 #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6522         cmp.l           %d1,&0x40638000
6523         bgt.w           ATANHUGE
6524
6525 #--APPROXIMATE ATAN(-1/X) BY
6526 #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6527 #--THIS CAN BE RE-WRITTEN AS
6528 #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6529
6530         fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
6531
6532         fmov.s          &0xBF800000,%fp1        # LOAD -1
6533         fdiv.x          %fp0,%fp1               # FP1 IS -1/X
6534
6535 #--DIVIDE IS STILL CRANKING
6536
6537         fmov.x          %fp1,%fp0               # FP0 IS X'
6538         fmul.x          %fp0,%fp0               # FP0 IS Y = X'*X'
6539         fmov.x          %fp1,X(%a6)             # X IS REALLY X'
6540
6541         fmov.x          %fp0,%fp1
6542         fmul.x          %fp1,%fp1               # FP1 IS Z = Y*Y
6543
6544         fmov.d          ATANC5(%pc),%fp3
6545         fmov.d          ATANC4(%pc),%fp2
6546
6547         fmul.x          %fp1,%fp3               # Z*C5
6548         fmul.x          %fp1,%fp2               # Z*B4
6549
6550         fadd.d          ATANC3(%pc),%fp3        # C3+Z*C5
6551         fadd.d          ATANC2(%pc),%fp2        # C2+Z*C4
6552
6553         fmul.x          %fp3,%fp1               # Z*(C3+Z*C5), FP3 RELEASED
6554         fmul.x          %fp0,%fp2               # Y*(C2+Z*C4)
6555
6556         fadd.d          ATANC1(%pc),%fp1        # C1+Z*(C3+Z*C5)
6557         fmul.x          X(%a6),%fp0             # X'*Y
6558
6559         fadd.x          %fp2,%fp1               # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6560
6561         fmul.x          %fp1,%fp0               # X'*Y*([B1+Z*(B3+Z*B5)]
6562 #                                       ...     +[Y*(B2+Z*(B4+Z*B6))])
6563         fadd.x          X(%a6),%fp0
6564
6565         fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
6566
6567         fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6568         tst.b           (%a0)
6569         bpl.b           pos_big
6570
6571 neg_big:
6572         fadd.x          NPIBY2(%pc),%fp0
6573         bra             t_minx2
6574
6575 pos_big:
6576         fadd.x          PPIBY2(%pc),%fp0
6577         bra             t_pinx2
6578
6579 ATANHUGE:
6580 #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6581         tst.b           (%a0)
6582         bpl.b           pos_huge
6583
6584 neg_huge:
6585         fmov.x          NPIBY2(%pc),%fp0
6586         fmov.l          %d0,%fpcr
6587         fadd.x          PTINY(%pc),%fp0
6588         bra             t_minx2
6589
6590 pos_huge:
6591         fmov.x          PPIBY2(%pc),%fp0
6592         fmov.l          %d0,%fpcr
6593         fadd.x          NTINY(%pc),%fp0
6594         bra             t_pinx2
6595
6596         global          satand
6597 #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6598 satand:
6599         bra             t_extdnrm
6600
6601 #########################################################################
6602 # sasin():  computes the inverse sine of a normalized input             #
6603 # sasind(): computes the inverse sine of a denormalized input           #
6604 #                                                                       #
6605 # INPUT *************************************************************** #
6606 #       a0 = pointer to extended precision input                        #
6607 #       d0 = round precision,mode                                       #
6608 #                                                                       #
6609 # OUTPUT ************************************************************** #
6610 #       fp0 = arcsin(X)                                                 #
6611 #                                                                       #
6612 # ACCURACY and MONOTONICITY ******************************************* #
6613 #       The returned result is within 3 ulps in 64 significant bit,     #
6614 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6615 #       rounded to double precision. The result is provably monotonic   #
6616 #       in double precision.                                            #
6617 #                                                                       #
6618 # ALGORITHM *********************************************************** #
6619 #                                                                       #
6620 #       ASIN                                                            #
6621 #       1. If |X| >= 1, go to 3.                                        #
6622 #                                                                       #
6623 #       2. (|X| < 1) Calculate asin(X) by                               #
6624 #               z := sqrt( [1-X][1+X] )                                 #
6625 #               asin(X) = atan( x / z ).                                #
6626 #               Exit.                                                   #
6627 #                                                                       #
6628 #       3. If |X| > 1, go to 5.                                         #
6629 #                                                                       #
6630 #       4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6631 #                                                                       #
6632 #       5. (|X| > 1) Generate an invalid operation by 0 * infinity.     #
6633 #               Exit.                                                   #
6634 #                                                                       #
6635 #########################################################################
6636
6637         global          sasin
6638 sasin:
6639         fmov.x          (%a0),%fp0              # LOAD INPUT
6640
6641         mov.l           (%a0),%d1
6642         mov.w           4(%a0),%d1
6643         and.l           &0x7FFFFFFF,%d1
6644         cmp.l           %d1,&0x3FFF8000
6645         bge.b           ASINBIG
6646
6647 # This catch is added here for the '060 QSP. Originally, the call to
6648 # satan() would handle this case by causing the exception which would
6649 # not be caught until gen_except(). Now, with the exceptions being
6650 # detected inside of satan(), the exception would have been handled there
6651 # instead of inside sasin() as expected.
6652         cmp.l           %d1,&0x3FD78000
6653         blt.w           ASINTINY
6654
6655 #--THIS IS THE USUAL CASE, |X| < 1
6656 #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6657
6658 ASINMAIN:
6659         fmov.s          &0x3F800000,%fp1
6660         fsub.x          %fp0,%fp1               # 1-X
6661         fmovm.x         &0x4,-(%sp)             #  {fp2}
6662         fmov.s          &0x3F800000,%fp2
6663         fadd.x          %fp0,%fp2               # 1+X
6664         fmul.x          %fp2,%fp1               # (1+X)(1-X)
6665         fmovm.x         (%sp)+,&0x20            #  {fp2}
6666         fsqrt.x         %fp1                    # SQRT([1-X][1+X])
6667         fdiv.x          %fp1,%fp0               # X/SQRT([1-X][1+X])
6668         fmovm.x         &0x01,-(%sp)            # save X/SQRT(...)
6669         lea             (%sp),%a0               # pass ptr to X/SQRT(...)
6670         bsr             satan
6671         add.l           &0xc,%sp                # clear X/SQRT(...) from stack
6672         bra             t_inx2
6673
6674 ASINBIG:
6675         fabs.x          %fp0                    # |X|
6676         fcmp.s          %fp0,&0x3F800000
6677         fbgt            t_operr                 # cause an operr exception
6678
6679 #--|X| = 1, ASIN(X) = +- PI/2.
6680 ASINONE:
6681         fmov.x          PIBY2(%pc),%fp0
6682         mov.l           (%a0),%d1
6683         and.l           &0x80000000,%d1         # SIGN BIT OF X
6684         or.l            &0x3F800000,%d1         # +-1 IN SGL FORMAT
6685         mov.l           %d1,-(%sp)              # push SIGN(X) IN SGL-FMT
6686         fmov.l          %d0,%fpcr
6687         fmul.s          (%sp)+,%fp0
6688         bra             t_inx2
6689
6690 #--|X| < 2^(-40), ATAN(X) = X
6691 ASINTINY:
6692         fmov.l          %d0,%fpcr               # restore users rnd mode,prec
6693         mov.b           &FMOV_OP,%d1            # last inst is MOVE
6694         fmov.x          (%a0),%fp0              # last inst - possible exception
6695         bra             t_catch
6696
6697         global          sasind
6698 #--ASIN(X) = X FOR DENORMALIZED X
6699 sasind:
6700         bra             t_extdnrm
6701
6702 #########################################################################
6703 # sacos():  computes the inverse cosine of a normalized input           #
6704 # sacosd(): computes the inverse cosine of a denormalized input         #
6705 #                                                                       #
6706 # INPUT *************************************************************** #
6707 #       a0 = pointer to extended precision input                        #
6708 #       d0 = round precision,mode                                       #
6709 #                                                                       #
6710 # OUTPUT ************************************************************** #
6711 #       fp0 = arccos(X)                                                 #
6712 #                                                                       #
6713 # ACCURACY and MONOTONICITY ******************************************* #
6714 #       The returned result is within 3 ulps in 64 significant bit,     #
6715 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6716 #       rounded to double precision. The result is provably monotonic   #
6717 #       in double precision.                                            #
6718 #                                                                       #
6719 # ALGORITHM *********************************************************** #
6720 #                                                                       #
6721 #       ACOS                                                            #
6722 #       1. If |X| >= 1, go to 3.                                        #
6723 #                                                                       #
6724 #       2. (|X| < 1) Calculate acos(X) by                               #
6725 #               z := (1-X) / (1+X)                                      #
6726 #               acos(X) = 2 * atan( sqrt(z) ).                          #
6727 #               Exit.                                                   #
6728 #                                                                       #
6729 #       3. If |X| > 1, go to 5.                                         #
6730 #                                                                       #
6731 #       4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.    #
6732 #                                                                       #
6733 #       5. (|X| > 1) Generate an invalid operation by 0 * infinity.     #
6734 #               Exit.                                                   #
6735 #                                                                       #
6736 #########################################################################
6737
6738         global          sacos
6739 sacos:
6740         fmov.x          (%a0),%fp0              # LOAD INPUT
6741
6742         mov.l           (%a0),%d1               # pack exp w/ upper 16 fraction
6743         mov.w           4(%a0),%d1
6744         and.l           &0x7FFFFFFF,%d1
6745         cmp.l           %d1,&0x3FFF8000
6746         bge.b           ACOSBIG
6747
6748 #--THIS IS THE USUAL CASE, |X| < 1
6749 #--ACOS(X) = 2 * ATAN(  SQRT( (1-X)/(1+X) ) )
6750
6751 ACOSMAIN:
6752         fmov.s          &0x3F800000,%fp1
6753         fadd.x          %fp0,%fp1               # 1+X
6754         fneg.x          %fp0                    # -X
6755         fadd.s          &0x3F800000,%fp0        # 1-X
6756         fdiv.x          %fp1,%fp0               # (1-X)/(1+X)
6757         fsqrt.x         %fp0                    # SQRT((1-X)/(1+X))
6758         mov.l           %d0,-(%sp)              # save original users fpcr
6759         clr.l           %d0
6760         fmovm.x         &0x01,-(%sp)            # save SQRT(...) to stack
6761         lea             (%sp),%a0               # pass ptr to sqrt
6762         bsr             satan                   # ATAN(SQRT([1-X]/[1+X]))
6763         add.l           &0xc,%sp                # clear SQRT(...) from stack
6764
6765         fmov.l          (%sp)+,%fpcr            # restore users round prec,mode
6766         fadd.x          %fp0,%fp0               # 2 * ATAN( STUFF )
6767         bra             t_pinx2
6768
6769 ACOSBIG:
6770         fabs.x          %fp0
6771         fcmp.s          %fp0,&0x3F800000
6772         fbgt            t_operr                 # cause an operr exception
6773
6774 #--|X| = 1, ACOS(X) = 0 OR PI
6775         tst.b           (%a0)                   # is X positive or negative?
6776         bpl.b           ACOSP1
6777
6778 #--X = -1
6779 #Returns PI and inexact exception
6780 ACOSM1:
6781         fmov.x          PI(%pc),%fp0            # load PI
6782         fmov.l          %d0,%fpcr               # load round mode,prec
6783         fadd.s          &0x00800000,%fp0        # add a small value
6784         bra             t_pinx2
6785
6786 ACOSP1:
6787         bra             ld_pzero                # answer is positive zero
6788
6789         global          sacosd
6790 #--ACOS(X) = PI/2 FOR DENORMALIZED X
6791 sacosd:
6792         fmov.l          %d0,%fpcr               # load user's rnd mode/prec
6793         fmov.x          PIBY2(%pc),%fp0
6794         bra             t_pinx2
6795
6796 #########################################################################
6797 # setox():    computes the exponential for a normalized input           #
6798 # setoxd():   computes the exponential for a denormalized input         #
6799 # setoxm1():  computes the exponential minus 1 for a normalized input   #
6800 # setoxm1d(): computes the exponential minus 1 for a denormalized input #
6801 #                                                                       #
6802 # INPUT *************************************************************** #
6803 #       a0 = pointer to extended precision input                        #
6804 #       d0 = round precision,mode                                       #
6805 #                                                                       #
6806 # OUTPUT ************************************************************** #
6807 #       fp0 = exp(X) or exp(X)-1                                        #
6808 #                                                                       #
6809 # ACCURACY and MONOTONICITY ******************************************* #
6810 #       The returned result is within 0.85 ulps in 64 significant bit,  #
6811 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6812 #       rounded to double precision. The result is provably monotonic   #
6813 #       in double precision.                                            #
6814 #                                                                       #
6815 # ALGORITHM and IMPLEMENTATION **************************************** #
6816 #                                                                       #
6817 #       setoxd                                                          #
6818 #       ------                                                          #
6819 #       Step 1. Set ans := 1.0                                          #
6820 #                                                                       #
6821 #       Step 2. Return  ans := ans + sign(X)*2^(-126). Exit.            #
6822 #       Notes:  This will always generate one exception -- inexact.     #
6823 #                                                                       #
6824 #                                                                       #
6825 #       setox                                                           #
6826 #       -----                                                           #
6827 #                                                                       #
6828 #       Step 1. Filter out extreme cases of input argument.             #
6829 #               1.1     If |X| >= 2^(-65), go to Step 1.3.              #
6830 #               1.2     Go to Step 7.                                   #
6831 #               1.3     If |X| < 16380 log(2), go to Step 2.            #
6832 #               1.4     Go to Step 8.                                   #
6833 #       Notes:  The usual case should take the branches 1.1 -> 1.3 -> 2.#
6834 #               To avoid the use of floating-point comparisons, a       #
6835 #               compact representation of |X| is used. This format is a #
6836 #               32-bit integer, the upper (more significant) 16 bits    #
6837 #               are the sign and biased exponent field of |X|; the      #
6838 #               lower 16 bits are the 16 most significant fraction      #
6839 #               (including the explicit bit) bits of |X|. Consequently, #
6840 #               the comparisons in Steps 1.1 and 1.3 can be performed   #
6841 #               by integer comparison. Note also that the constant      #
6842 #               16380 log(2) used in Step 1.3 is also in the compact    #
6843 #               form. Thus taking the branch to Step 2 guarantees       #
6844 #               |X| < 16380 log(2). There is no harm to have a small    #
6845 #               number of cases where |X| is less than, but close to,   #
6846 #               16380 log(2) and the branch to Step 9 is taken.         #
6847 #                                                                       #
6848 #       Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).      #
6849 #               2.1     Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6850 #                       was taken)                                      #
6851 #               2.2     N := round-to-nearest-integer( X * 64/log2 ).   #
6852 #               2.3     Calculate       J = N mod 64; so J = 0,1,2,..., #
6853 #                       or 63.                                          #
6854 #               2.4     Calculate       M = (N - J)/64; so N = 64M + J. #
6855 #               2.5     Calculate the address of the stored value of    #
6856 #                       2^(J/64).                                       #
6857 #               2.6     Create the value Scale = 2^M.                   #
6858 #       Notes:  The calculation in 2.2 is really performed by           #
6859 #                       Z := X * constant                               #
6860 #                       N := round-to-nearest-integer(Z)                #
6861 #               where                                                   #
6862 #                       constant := single-precision( 64/log 2 ).       #
6863 #                                                                       #
6864 #               Using a single-precision constant avoids memory         #
6865 #               access. Another effect of using a single-precision      #
6866 #               "constant" is that the calculated value Z is            #
6867 #                                                                       #
6868 #                       Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).      #
6869 #                                                                       #
6870 #               This error has to be considered later in Steps 3 and 4. #
6871 #                                                                       #
6872 #       Step 3. Calculate X - N*log2/64.                                #
6873 #               3.1     R := X + N*L1,                                  #
6874 #                               where L1 := single-precision(-log2/64). #
6875 #               3.2     R := R + N*L2,                                  #
6876 #                               L2 := extended-precision(-log2/64 - L1).#
6877 #       Notes:  a) The way L1 and L2 are chosen ensures L1+L2           #
6878 #               approximate the value -log2/64 to 88 bits of accuracy.  #
6879 #               b) N*L1 is exact because N is no longer than 22 bits    #
6880 #               and L1 is no longer than 24 bits.                       #
6881 #               c) The calculation X+N*L1 is also exact due to          #
6882 #               cancellation. Thus, R is practically X+N(L1+L2) to full #
6883 #               64 bits.                                                #
6884 #               d) It is important to estimate how large can |R| be     #
6885 #               after Step 3.2.                                         #
6886 #                                                                       #
6887 #               N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)     #
6888 #               X*64/log2 (1+eps)       =       N + f,  |f| <= 0.5      #
6889 #               X*64/log2 - N   =       f - eps*X 64/log2               #
6890 #               X - N*log2/64   =       f*log2/64 - eps*X               #
6891 #                                                                       #
6892 #                                                                       #
6893 #               Now |X| <= 16446 log2, thus                             #
6894 #                                                                       #
6895 #                       |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6896 #                                       <= 0.57 log2/64.                #
6897 #                This bound will be used in Step 4.                     #
6898 #                                                                       #
6899 #       Step 4. Approximate exp(R)-1 by a polynomial                    #
6900 #               p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))      #
6901 #       Notes:  a) In order to reduce memory access, the coefficients   #
6902 #               are made as "short" as possible: A1 (which is 1/2), A4  #
6903 #               and A5 are single precision; A2 and A3 are double       #
6904 #               precision.                                              #
6905 #               b) Even with the restrictions above,                    #
6906 #                  |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.  #
6907 #               Note that 0.0062 is slightly bigger than 0.57 log2/64.  #
6908 #               c) To fully use the pipeline, p is separated into       #
6909 #               two independent pieces of roughly equal complexities    #
6910 #                       p = [ R + R*S*(A2 + S*A4) ]     +               #
6911 #                               [ S*(A1 + S*(A3 + S*A5)) ]              #
6912 #               where S = R*R.                                          #
6913 #                                                                       #
6914 #       Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by             #
6915 #                               ans := T + ( T*p + t)                   #
6916 #               where T and t are the stored values for 2^(J/64).       #
6917 #       Notes:  2^(J/64) is stored as T and t where T+t approximates    #
6918 #               2^(J/64) to roughly 85 bits; T is in extended precision #
6919 #               and t is in single precision. Note also that T is       #
6920 #               rounded to 62 bits so that the last two bits of T are   #
6921 #               zero. The reason for such a special form is that T-1,   #
6922 #               T-2, and T-8 will all be exact --- a property that will #
6923 #               give much more accurate computation of the function     #
6924 #               EXPM1.                                                  #
6925 #                                                                       #
6926 #       Step 6. Reconstruction of exp(X)                                #
6927 #                       exp(X) = 2^M * 2^(J/64) * exp(R).               #
6928 #               6.1     If AdjFlag = 0, go to 6.3                       #
6929 #               6.2     ans := ans * AdjScale                           #
6930 #               6.3     Restore the user FPCR                           #
6931 #               6.4     Return ans := ans * Scale. Exit.                #
6932 #       Notes:  If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,       #
6933 #               |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will    #
6934 #               neither overflow nor underflow. If AdjFlag = 1, that    #
6935 #               means that                                              #
6936 #                       X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6937 #               Hence, exp(X) may overflow or underflow or neither.     #
6938 #               When that is the case, AdjScale = 2^(M1) where M1 is    #
6939 #               approximately M. Thus 6.2 will never cause              #
6940 #               over/underflow. Possible exception in 6.4 is overflow   #
6941 #               or underflow. The inexact exception is not generated in #
6942 #               6.4. Although one can argue that the inexact flag       #
6943 #               should always be raised, to simulate that exception     #
6944 #               cost to much than the flag is worth in practical uses.  #
6945 #                                                                       #
6946 #       Step 7. Return 1 + X.                                           #
6947 #               7.1     ans := X                                        #
6948 #               7.2     Restore user FPCR.                              #
6949 #               7.3     Return ans := 1 + ans. Exit                     #
6950 #       Notes:  For non-zero X, the inexact exception will always be    #
6951 #               raised by 7.3. That is the only exception raised by 7.3.#
6952 #               Note also that we use the FMOVEM instruction to move X  #
6953 #               in Step 7.1 to avoid unnecessary trapping. (Although    #
6954 #               the FMOVEM may not seem relevant since X is normalized, #
6955 #               the precaution will be useful in the library version of #
6956 #               this code where the separate entry for denormalized     #
6957 #               inputs will be done away with.)                         #
6958 #                                                                       #
6959 #       Step 8. Handle exp(X) where |X| >= 16380log2.                   #
6960 #               8.1     If |X| > 16480 log2, go to Step 9.              #
6961 #               (mimic 2.2 - 2.6)                                       #
6962 #               8.2     N := round-to-integer( X * 64/log2 )            #
6963 #               8.3     Calculate J = N mod 64, J = 0,1,...,63          #
6964 #               8.4     K := (N-J)/64, M1 := truncate(K/2), M = K-M1,   #
6965 #                       AdjFlag := 1.                                   #
6966 #               8.5     Calculate the address of the stored value       #
6967 #                       2^(J/64).                                       #
6968 #               8.6     Create the values Scale = 2^M, AdjScale = 2^M1. #
6969 #               8.7     Go to Step 3.                                   #
6970 #       Notes:  Refer to notes for 2.2 - 2.6.                           #
6971 #                                                                       #
6972 #       Step 9. Handle exp(X), |X| > 16480 log2.                        #
6973 #               9.1     If X < 0, go to 9.3                             #
6974 #               9.2     ans := Huge, go to 9.4                          #
6975 #               9.3     ans := Tiny.                                    #
6976 #               9.4     Restore user FPCR.                              #
6977 #               9.5     Return ans := ans * ans. Exit.                  #
6978 #       Notes:  Exp(X) will surely overflow or underflow, depending on  #
6979 #               X's sign. "Huge" and "Tiny" are respectively large/tiny #
6980 #               extended-precision numbers whose square over/underflow  #
6981 #               with an inexact result. Thus, 9.5 always raises the     #
6982 #               inexact together with either overflow or underflow.     #
6983 #                                                                       #
6984 #       setoxm1d                                                        #
6985 #       --------                                                        #
6986 #                                                                       #
6987 #       Step 1. Set ans := 0                                            #
6988 #                                                                       #
6989 #       Step 2. Return  ans := X + ans. Exit.                           #
6990 #       Notes:  This will return X with the appropriate rounding        #
6991 #                precision prescribed by the user FPCR.                 #
6992 #                                                                       #
6993 #       setoxm1                                                         #
6994 #       -------                                                         #
6995 #                                                                       #
6996 #       Step 1. Check |X|                                               #
6997 #               1.1     If |X| >= 1/4, go to Step 1.3.                  #
6998 #               1.2     Go to Step 7.                                   #
6999 #               1.3     If |X| < 70 log(2), go to Step 2.               #
7000 #               1.4     Go to Step 10.                                  #
7001 #       Notes:  The usual case should take the branches 1.1 -> 1.3 -> 2.#
7002 #               However, it is conceivable |X| can be small very often  #
7003 #               because EXPM1 is intended to evaluate exp(X)-1          #
7004 #               accurately when |X| is small. For further details on    #
7005 #               the comparisons, see the notes on Step 1 of setox.      #
7006 #                                                                       #
7007 #       Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).      #
7008 #               2.1     N := round-to-nearest-integer( X * 64/log2 ).   #
7009 #               2.2     Calculate       J = N mod 64; so J = 0,1,2,..., #
7010 #                       or 63.                                          #
7011 #               2.3     Calculate       M = (N - J)/64; so N = 64M + J. #
7012 #               2.4     Calculate the address of the stored value of    #
7013 #                       2^(J/64).                                       #
7014 #               2.5     Create the values Sc = 2^M and                  #
7015 #                       OnebySc := -2^(-M).                             #
7016 #       Notes:  See the notes on Step 2 of setox.                       #
7017 #                                                                       #
7018 #       Step 3. Calculate X - N*log2/64.                                #
7019 #               3.1     R := X + N*L1,                                  #
7020 #                               where L1 := single-precision(-log2/64). #
7021 #               3.2     R := R + N*L2,                                  #
7022 #                               L2 := extended-precision(-log2/64 - L1).#
7023 #       Notes:  Applying the analysis of Step 3 of setox in this case   #
7024 #               shows that |R| <= 0.0055 (note that |X| <= 70 log2 in   #
7025 #               this case).                                             #
7026 #                                                                       #
7027 #       Step 4. Approximate exp(R)-1 by a polynomial                    #
7028 #                       p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
7029 #       Notes:  a) In order to reduce memory access, the coefficients   #
7030 #               are made as "short" as possible: A1 (which is 1/2), A5  #
7031 #               and A6 are single precision; A2, A3 and A4 are double   #
7032 #               precision.                                              #
7033 #               b) Even with the restriction above,                     #
7034 #                       |p - (exp(R)-1)| <      |R| * 2^(-72.7)         #
7035 #               for all |R| <= 0.0055.                                  #
7036 #               c) To fully use the pipeline, p is separated into       #
7037 #               two independent pieces of roughly equal complexity      #
7038 #                       p = [ R*S*(A2 + S*(A4 + S*A6)) ]        +       #
7039 #                               [ R + S*(A1 + S*(A3 + S*A5)) ]          #
7040 #               where S = R*R.                                          #
7041 #                                                                       #
7042 #       Step 5. Compute 2^(J/64)*p by                                   #
7043 #                               p := T*p                                #
7044 #               where T and t are the stored values for 2^(J/64).       #
7045 #       Notes:  2^(J/64) is stored as T and t where T+t approximates    #
7046 #               2^(J/64) to roughly 85 bits; T is in extended precision #
7047 #               and t is in single precision. Note also that T is       #
7048 #               rounded to 62 bits so that the last two bits of T are   #
7049 #               zero. The reason for such a special form is that T-1,   #
7050 #               T-2, and T-8 will all be exact --- a property that will #
7051 #               be exploited in Step 6 below. The total relative error  #
7052 #               in p is no bigger than 2^(-67.7) compared to the final  #
7053 #               result.                                                 #
7054 #                                                                       #
7055 #       Step 6. Reconstruction of exp(X)-1                              #
7056 #                       exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).     #
7057 #               6.1     If M <= 63, go to Step 6.3.                     #
7058 #               6.2     ans := T + (p + (t + OnebySc)). Go to 6.6       #
7059 #               6.3     If M >= -3, go to 6.5.                          #
7060 #               6.4     ans := (T + (p + t)) + OnebySc. Go to 6.6       #
7061 #               6.5     ans := (T + OnebySc) + (p + t).                 #
7062 #               6.6     Restore user FPCR.                              #
7063 #               6.7     Return ans := Sc * ans. Exit.                   #
7064 #       Notes:  The various arrangements of the expressions give        #
7065 #               accurate evaluations.                                   #
7066 #                                                                       #
7067 #       Step 7. exp(X)-1 for |X| < 1/4.                                 #
7068 #               7.1     If |X| >= 2^(-65), go to Step 9.                #
7069 #               7.2     Go to Step 8.                                   #
7070 #                                                                       #
7071 #       Step 8. Calculate exp(X)-1, |X| < 2^(-65).                      #
7072 #               8.1     If |X| < 2^(-16312), goto 8.3                   #
7073 #               8.2     Restore FPCR; return ans := X - 2^(-16382).     #
7074 #                       Exit.                                           #
7075 #               8.3     X := X * 2^(140).                               #
7076 #               8.4     Restore FPCR; ans := ans - 2^(-16382).          #
7077 #                Return ans := ans*2^(140). Exit                        #
7078 #       Notes:  The idea is to return "X - tiny" under the user         #
7079 #               precision and rounding modes. To avoid unnecessary      #
7080 #               inefficiency, we stay away from denormalized numbers    #
7081 #               the best we can. For |X| >= 2^(-16312), the             #
7082 #               straightforward 8.2 generates the inexact exception as  #
7083 #               the case warrants.                                      #
7084 #                                                                       #
7085 #       Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial          #
7086 #                       p = X + X*X*(B1 + X*(B2 + ... + X*B12))         #
7087 #       Notes:  a) In order to reduce memory access, the coefficients   #
7088 #               are made as "short" as possible: B1 (which is 1/2), B9  #
7089 #               to B12 are single precision; B3 to B8 are double        #
7090 #               precision; and B2 is double extended.                   #
7091 #               b) Even with the restriction above,                     #
7092 #                       |p - (exp(X)-1)| < |X| 2^(-70.6)                #
7093 #               for all |X| <= 0.251.                                   #
7094 #               Note that 0.251 is slightly bigger than 1/4.            #
7095 #               c) To fully preserve accuracy, the polynomial is        #
7096 #               computed as                                             #
7097 #                       X + ( S*B1 +    Q ) where S = X*X and           #
7098 #                       Q       =       X*S*(B2 + X*(B3 + ... + X*B12)) #
7099 #               d) To fully use the pipeline, Q is separated into       #
7100 #               two independent pieces of roughly equal complexity      #
7101 #                       Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +       #
7102 #                               [ S*S*(B3 + S*(B5 + ... + S*B11)) ]     #
7103 #                                                                       #
7104 #       Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.                #
7105 #               10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all       #
7106 #               practical purposes. Therefore, go to Step 1 of setox.   #
7107 #               10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
7108 #               purposes.                                               #
7109 #               ans := -1                                               #
7110 #               Restore user FPCR                                       #
7111 #               Return ans := ans + 2^(-126). Exit.                     #
7112 #       Notes:  10.2 will always create an inexact and return -1 + tiny #
7113 #               in the user rounding precision and mode.                #
7114 #                                                                       #
7115 #########################################################################
7116
7117 L2:     long            0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7118
7119 EEXPA3: long            0x3FA55555,0x55554CC1
7120 EEXPA2: long            0x3FC55555,0x55554A54
7121
7122 EM1A4:  long            0x3F811111,0x11174385
7123 EM1A3:  long            0x3FA55555,0x55554F5A
7124
7125 EM1A2:  long            0x3FC55555,0x55555555,0x00000000,0x00000000
7126
7127 EM1B8:  long            0x3EC71DE3,0xA5774682
7128 EM1B7:  long            0x3EFA01A0,0x19D7CB68
7129
7130 EM1B6:  long            0x3F2A01A0,0x1A019DF3
7131 EM1B5:  long            0x3F56C16C,0x16C170E2
7132
7133 EM1B4:  long            0x3F811111,0x11111111
7134 EM1B3:  long            0x3FA55555,0x55555555
7135
7136 EM1B2:  long            0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7137         long            0x00000000
7138
7139 TWO140: long            0x48B00000,0x00000000
7140 TWON140:
7141         long            0x37300000,0x00000000
7142
7143 EEXPTBL:
7144         long            0x3FFF0000,0x80000000,0x00000000,0x00000000
7145         long            0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7146         long            0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7147         long            0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7148         long            0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7149         long            0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7150         long            0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7151         long            0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7152         long            0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7153         long            0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7154         long            0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7155         long            0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7156         long            0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7157         long            0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7158         long            0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7159         long            0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7160         long            0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7161         long            0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7162         long            0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7163         long            0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7164         long            0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7165         long            0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7166         long            0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7167         long            0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7168         long            0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7169         long            0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7170         long            0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7171         long            0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7172         long            0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7173         long            0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7174         long            0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7175         long            0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7176         long            0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7177         long            0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7178         long            0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7179         long            0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7180         long            0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7181         long            0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7182         long            0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7183         long            0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7184         long            0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7185         long            0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7186         long            0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7187         long            0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7188         long            0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7189         long            0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7190         long            0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7191         long            0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7192         long            0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7193         long            0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7194         long            0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7195         long            0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7196         long            0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7197         long            0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7198         long            0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7199         long            0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7200         long            0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7201         long            0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7202         long            0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7203         long            0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7204         long            0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7205         long            0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7206         long            0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7207         long            0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7208
7209         set             ADJFLAG,L_SCR2
7210         set             SCALE,FP_SCR0
7211         set             ADJSCALE,FP_SCR1
7212         set             SC,FP_SCR0
7213         set             ONEBYSC,FP_SCR1
7214
7215         global          setox
7216 setox:
7217 #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7218
7219 #--Step 1.
7220         mov.l           (%a0),%d1               # load part of input X
7221         and.l           &0x7FFF0000,%d1         # biased expo. of X
7222         cmp.l           %d1,&0x3FBE0000         # 2^(-65)
7223         bge.b           EXPC1                   # normal case
7224         bra             EXPSM
7225
7226 EXPC1:
7227 #--The case |X| >= 2^(-65)
7228         mov.w           4(%a0),%d1              # expo. and partial sig. of |X|
7229         cmp.l           %d1,&0x400CB167         # 16380 log2 trunc. 16 bits
7230         blt.b           EXPMAIN                 # normal case
7231         bra             EEXPBIG
7232
7233 EXPMAIN:
7234 #--Step 2.
7235 #--This is the normal branch:   2^(-65) <= |X| < 16380 log2.
7236         fmov.x          (%a0),%fp0              # load input from (a0)
7237
7238         fmov.x          %fp0,%fp1
7239         fmul.s          &0x42B8AA3B,%fp0        # 64/log2 * X
7240         fmovm.x         &0xc,-(%sp)             # save fp2 {%fp2/%fp3}
7241         mov.l           &0,ADJFLAG(%a6)
7242         fmov.l          %fp0,%d1                # N = int( X * 64/log2 )
7243         lea             EEXPTBL(%pc),%a1
7244         fmov.l          %d1,%fp0                # convert to floating-format
7245
7246         mov.l           %d1,L_SCR1(%a6)         # save N temporarily
7247         and.l           &0x3F,%d1               # D0 is J = N mod 64
7248         lsl.l           &4,%d1
7249         add.l           %d1,%a1                 # address of 2^(J/64)
7250         mov.l           L_SCR1(%a6),%d1
7251         asr.l           &6,%d1                  # D0 is M
7252         add.w           &0x3FFF,%d1             # biased expo. of 2^(M)
7253         mov.w           L2(%pc),L_SCR1(%a6)     # prefetch L2, no need in CB
7254
7255 EXPCONT1:
7256 #--Step 3.
7257 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7258 #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7259         fmov.x          %fp0,%fp2
7260         fmul.s          &0xBC317218,%fp0        # N * L1, L1 = lead(-log2/64)
7261         fmul.x          L2(%pc),%fp2            # N * L2, L1+L2 = -log2/64
7262         fadd.x          %fp1,%fp0               # X + N*L1
7263         fadd.x          %fp2,%fp0               # fp0 is R, reduced arg.
7264
7265 #--Step 4.
7266 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7267 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7268 #--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7269 #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7270
7271         fmov.x          %fp0,%fp1
7272         fmul.x          %fp1,%fp1               # fp1 IS S = R*R
7273
7274         fmov.s          &0x3AB60B70,%fp2        # fp2 IS A5
7275
7276         fmul.x          %fp1,%fp2               # fp2 IS S*A5
7277         fmov.x          %fp1,%fp3
7278         fmul.s          &0x3C088895,%fp3        # fp3 IS S*A4
7279
7280         fadd.d          EEXPA3(%pc),%fp2        # fp2 IS A3+S*A5
7281         fadd.d          EEXPA2(%pc),%fp3        # fp3 IS A2+S*A4
7282
7283         fmul.x          %fp1,%fp2               # fp2 IS S*(A3+S*A5)
7284         mov.w           %d1,SCALE(%a6)          # SCALE is 2^(M) in extended
7285         mov.l           &0x80000000,SCALE+4(%a6)
7286         clr.l           SCALE+8(%a6)
7287
7288         fmul.x          %fp1,%fp3               # fp3 IS S*(A2+S*A4)
7289
7290         fadd.s          &0x3F000000,%fp2        # fp2 IS A1+S*(A3+S*A5)
7291         fmul.x          %fp0,%fp3               # fp3 IS R*S*(A2+S*A4)
7292
7293         fmul.x          %fp1,%fp2               # fp2 IS S*(A1+S*(A3+S*A5))
7294         fadd.x          %fp3,%fp0               # fp0 IS R+R*S*(A2+S*A4),
7295
7296         fmov.x          (%a1)+,%fp1             # fp1 is lead. pt. of 2^(J/64)
7297         fadd.x          %fp2,%fp0               # fp0 is EXP(R) - 1
7298
7299 #--Step 5
7300 #--final reconstruction process
7301 #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7302
7303         fmul.x          %fp1,%fp0               # 2^(J/64)*(Exp(R)-1)
7304         fmovm.x         (%sp)+,&0x30            # fp2 restored {%fp2/%fp3}
7305         fadd.s          (%a1),%fp0              # accurate 2^(J/64)
7306
7307         fadd.x          %fp1,%fp0               # 2^(J/64) + 2^(J/64)*...
7308         mov.l           ADJFLAG(%a6),%d1
7309
7310 #--Step 6
7311         tst.l           %d1
7312         beq.b           NORMAL
7313 ADJUST:
7314         fmul.x          ADJSCALE(%a6),%fp0
7315 NORMAL:
7316         fmov.l          %d0,%fpcr               # restore user FPCR
7317         mov.b           &FMUL_OP,%d1            # last inst is MUL
7318         fmul.x          SCALE(%a6),%fp0         # multiply 2^(M)
7319         bra             t_catch
7320
7321 EXPSM:
7322 #--Step 7
7323         fmovm.x         (%a0),&0x80             # load X
7324         fmov.l          %d0,%fpcr
7325         fadd.s          &0x3F800000,%fp0        # 1+X in user mode
7326         bra             t_pinx2
7327
7328 EEXPBIG:
7329 #--Step 8
7330         cmp.l           %d1,&0x400CB27C         # 16480 log2
7331         bgt.b           EXP2BIG
7332 #--Steps 8.2 -- 8.6
7333         fmov.x          (%a0),%fp0              # load input from (a0)
7334
7335         fmov.x          %fp0,%fp1
7336         fmul.s          &0x42B8AA3B,%fp0        # 64/log2 * X
7337         fmovm.x         &0xc,-(%sp)             # save fp2 {%fp2/%fp3}
7338         mov.l           &1,ADJFLAG(%a6)
7339         fmov.l          %fp0,%d1                # N = int( X * 64/log2 )
7340         lea             EEXPTBL(%pc),%a1
7341         fmov.l          %d1,%fp0                # convert to floating-format
7342         mov.l           %d1,L_SCR1(%a6)         # save N temporarily
7343         and.l           &0x3F,%d1               # D0 is J = N mod 64
7344         lsl.l           &4,%d1
7345         add.l           %d1,%a1                 # address of 2^(J/64)
7346         mov.l           L_SCR1(%a6),%d1
7347         asr.l           &6,%d1                  # D0 is K
7348         mov.l           %d1,L_SCR1(%a6)         # save K temporarily
7349         asr.l           &1,%d1                  # D0 is M1
7350         sub.l           %d1,L_SCR1(%a6)         # a1 is M
7351         add.w           &0x3FFF,%d1             # biased expo. of 2^(M1)
7352         mov.w           %d1,ADJSCALE(%a6)       # ADJSCALE := 2^(M1)
7353         mov.l           &0x80000000,ADJSCALE+4(%a6)
7354         clr.l           ADJSCALE+8(%a6)
7355         mov.l           L_SCR1(%a6),%d1         # D0 is M
7356         add.w           &0x3FFF,%d1             # biased expo. of 2^(M)
7357         bra.w           EXPCONT1                # go back to Step 3
7358
7359 EXP2BIG:
7360 #--Step 9
7361         tst.b           (%a0)                   # is X positive or negative?
7362         bmi             t_unfl2
7363         bra             t_ovfl2
7364
7365         global          setoxd
7366 setoxd:
7367 #--entry point for EXP(X), X is denormalized
7368         mov.l           (%a0),-(%sp)
7369         andi.l          &0x80000000,(%sp)
7370         ori.l           &0x00800000,(%sp)       # sign(X)*2^(-126)
7371
7372         fmov.s          &0x3F800000,%fp0
7373
7374         fmov.l          %d0,%fpcr
7375         fadd.s          (%sp)+,%fp0
7376         bra             t_pinx2
7377
7378         global          setoxm1
7379 setoxm1:
7380 #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7381
7382 #--Step 1.
7383 #--Step 1.1
7384         mov.l           (%a0),%d1               # load part of input X
7385         and.l           &0x7FFF0000,%d1         # biased expo. of X
7386         cmp.l           %d1,&0x3FFD0000         # 1/4
7387         bge.b           EM1CON1                 # |X| >= 1/4
7388         bra             EM1SM
7389
7390 EM1CON1:
7391 #--Step 1.3
7392 #--The case |X| >= 1/4
7393         mov.w           4(%a0),%d1              # expo. and partial sig. of |X|
7394         cmp.l           %d1,&0x4004C215         # 70log2 rounded up to 16 bits
7395         ble.b           EM1MAIN                 # 1/4 <= |X| <= 70log2
7396         bra             EM1BIG
7397
7398 EM1MAIN:
7399 #--Step 2.
7400 #--This is the case:    1/4 <= |X| <= 70 log2.
7401         fmov.x          (%a0),%fp0              # load input from (a0)
7402
7403         fmov.x          %fp0,%fp1
7404         fmul.s          &0x42B8AA3B,%fp0        # 64/log2 * X
7405         fmovm.x         &0xc,-(%sp)             # save fp2 {%fp2/%fp3}
7406         fmov.l          %fp0,%d1                # N = int( X * 64/log2 )
7407         lea             EEXPTBL(%pc),%a1
7408         fmov.l          %d1,%fp0                # convert to floating-format
7409
7410         mov.l           %d1,L_SCR1(%a6)         # save N temporarily
7411         and.l           &0x3F,%d1               # D0 is J = N mod 64
7412         lsl.l           &4,%d1
7413         add.l           %d1,%a1                 # address of 2^(J/64)
7414         mov.l           L_SCR1(%a6),%d1
7415         asr.l           &6,%d1                  # D0 is M
7416         mov.l           %d1,L_SCR1(%a6)         # save a copy of M
7417
7418 #--Step 3.
7419 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7420 #--a0 points to 2^(J/64), D0 and a1 both contain M
7421         fmov.x          %fp0,%fp2
7422         fmul.s          &0xBC317218,%fp0        # N * L1, L1 = lead(-log2/64)
7423         fmul.x          L2(%pc),%fp2            # N * L2, L1+L2 = -log2/64
7424         fadd.x          %fp1,%fp0               # X + N*L1
7425         fadd.x          %fp2,%fp0               # fp0 is R, reduced arg.
7426         add.w           &0x3FFF,%d1             # D0 is biased expo. of 2^M
7427
7428 #--Step 4.
7429 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7430 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7431 #--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7432 #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7433
7434         fmov.x          %fp0,%fp1
7435         fmul.x          %fp1,%fp1               # fp1 IS S = R*R
7436
7437         fmov.s          &0x3950097B,%fp2        # fp2 IS a6
7438
7439         fmul.x          %fp1,%fp2               # fp2 IS S*A6
7440         fmov.x          %fp1,%fp3
7441         fmul.s          &0x3AB60B6A,%fp3        # fp3 IS S*A5
7442
7443         fadd.d          EM1A4(%pc),%fp2         # fp2 IS A4+S*A6
7444         fadd.d          EM1A3(%pc),%fp3         # fp3 IS A3+S*A5
7445         mov.w           %d1,SC(%a6)             # SC is 2^(M) in extended
7446         mov.l           &0x80000000,SC+4(%a6)
7447         clr.l           SC+8(%a6)
7448
7449         fmul.x          %fp1,%fp2               # fp2 IS S*(A4+S*A6)
7450         mov.l           L_SCR1(%a6),%d1         # D0 is M
7451         neg.w           %d1                     # D0 is -M
7452         fmul.x          %fp1,%fp3               # fp3 IS S*(A3+S*A5)
7453         add.w           &0x3FFF,%d1             # biased expo. of 2^(-M)
7454         fadd.d          EM1A2(%pc),%fp2         # fp2 IS A2+S*(A4+S*A6)
7455         fadd.s          &0x3F000000,%fp3        # fp3 IS A1+S*(A3+S*A5)
7456
7457         fmul.x          %fp1,%fp2               # fp2 IS S*(A2+S*(A4+S*A6))
7458         or.w            &0x8000,%d1             # signed/expo. of -2^(-M)
7459         mov.w           %d1,ONEBYSC(%a6)        # OnebySc is -2^(-M)
7460         mov.l           &0x80000000,ONEBYSC+4(%a6)
7461         clr.l           ONEBYSC+8(%a6)
7462         fmul.x          %fp3,%fp1               # fp1 IS S*(A1+S*(A3+S*A5))
7463
7464         fmul.x          %fp0,%fp2               # fp2 IS R*S*(A2+S*(A4+S*A6))
7465         fadd.x          %fp1,%fp0               # fp0 IS R+S*(A1+S*(A3+S*A5))
7466
7467         fadd.x          %fp2,%fp0               # fp0 IS EXP(R)-1
7468
7469         fmovm.x         (%sp)+,&0x30            # fp2 restored {%fp2/%fp3}
7470
7471 #--Step 5
7472 #--Compute 2^(J/64)*p
7473
7474         fmul.x          (%a1),%fp0              # 2^(J/64)*(Exp(R)-1)
7475
7476 #--Step 6
7477 #--Step 6.1
7478         mov.l           L_SCR1(%a6),%d1         # retrieve M
7479         cmp.l           %d1,&63
7480         ble.b           MLE63
7481 #--Step 6.2     M >= 64
7482         fmov.s          12(%a1),%fp1            # fp1 is t
7483         fadd.x          ONEBYSC(%a6),%fp1       # fp1 is t+OnebySc
7484         fadd.x          %fp1,%fp0               # p+(t+OnebySc), fp1 released
7485         fadd.x          (%a1),%fp0              # T+(p+(t+OnebySc))
7486         bra             EM1SCALE
7487 MLE63:
7488 #--Step 6.3     M <= 63
7489         cmp.l           %d1,&-3
7490         bge.b           MGEN3
7491 MLTN3:
7492 #--Step 6.4     M <= -4
7493         fadd.s          12(%a1),%fp0            # p+t
7494         fadd.x          (%a1),%fp0              # T+(p+t)
7495         fadd.x          ONEBYSC(%a6),%fp0       # OnebySc + (T+(p+t))
7496         bra             EM1SCALE
7497 MGEN3:
7498 #--Step 6.5     -3 <= M <= 63
7499         fmov.x          (%a1)+,%fp1             # fp1 is T
7500         fadd.s          (%a1),%fp0              # fp0 is p+t
7501         fadd.x          ONEBYSC(%a6),%fp1       # fp1 is T+OnebySc
7502         fadd.x          %fp1,%fp0               # (T+OnebySc)+(p+t)
7503
7504 EM1SCALE:
7505 #--Step 6.6
7506         fmov.l          %d0,%fpcr
7507         fmul.x          SC(%a6),%fp0
7508         bra             t_inx2
7509
7510 EM1SM:
7511 #--Step 7       |X| < 1/4.
7512         cmp.l           %d1,&0x3FBE0000         # 2^(-65)
7513         bge.b           EM1POLY
7514
7515 EM1TINY:
7516 #--Step 8       |X| < 2^(-65)
7517         cmp.l           %d1,&0x00330000         # 2^(-16312)
7518         blt.b           EM12TINY
7519 #--Step 8.2
7520         mov.l           &0x80010000,SC(%a6)     # SC is -2^(-16382)
7521         mov.l           &0x80000000,SC+4(%a6)
7522         clr.l           SC+8(%a6)
7523         fmov.x          (%a0),%fp0
7524         fmov.l          %d0,%fpcr
7525         mov.b           &FADD_OP,%d1            # last inst is ADD
7526         fadd.x          SC(%a6),%fp0
7527         bra             t_catch
7528
7529 EM12TINY:
7530 #--Step 8.3
7531         fmov.x          (%a0),%fp0
7532         fmul.d          TWO140(%pc),%fp0
7533         mov.l           &0x80010000,SC(%a6)
7534         mov.l           &0x80000000,SC+4(%a6)
7535         clr.l           SC+8(%a6)
7536         fadd.x          SC(%a6),%fp0
7537         fmov.l          %d0,%fpcr
7538         mov.b           &FMUL_OP,%d1            # last inst is MUL
7539         fmul.d          TWON140(%pc),%fp0
7540         bra             t_catch
7541
7542 EM1POLY:
7543 #--Step 9       exp(X)-1 by a simple polynomial
7544         fmov.x          (%a0),%fp0              # fp0 is X
7545         fmul.x          %fp0,%fp0               # fp0 is S := X*X
7546         fmovm.x         &0xc,-(%sp)             # save fp2 {%fp2/%fp3}
7547         fmov.s          &0x2F30CAA8,%fp1        # fp1 is B12
7548         fmul.x          %fp0,%fp1               # fp1 is S*B12
7549         fmov.s          &0x310F8290,%fp2        # fp2 is B11
7550         fadd.s          &0x32D73220,%fp1        # fp1 is B10+S*B12
7551
7552         fmul.x          %fp0,%fp2               # fp2 is S*B11
7553         fmul.x          %fp0,%fp1               # fp1 is S*(B10 + ...
7554
7555         fadd.s          &0x3493F281,%fp2        # fp2 is B9+S*...
7556         fadd.d          EM1B8(%pc),%fp1         # fp1 is B8+S*...
7557
7558         fmul.x          %fp0,%fp2               # fp2 is S*(B9+...
7559         fmul.x          %fp0,%fp1               # fp1 is S*(B8+...
7560
7561         fadd.d          EM1B7(%pc),%fp2         # fp2 is B7+S*...
7562         fadd.d          EM1B6(%pc),%fp1         # fp1 is B6+S*...
7563
7564         fmul.x          %fp0,%fp2               # fp2 is S*(B7+...
7565         fmul.x          %fp0,%fp1               # fp1 is S*(B6+...
7566
7567         fadd.d          EM1B5(%pc),%fp2         # fp2 is B5+S*...
7568         fadd.d          EM1B4(%pc),%fp1         # fp1 is B4+S*...
7569
7570         fmul.x          %fp0,%fp2               # fp2 is S*(B5+...
7571         fmul.x          %fp0,%fp1               # fp1 is S*(B4+...
7572
7573         fadd.d          EM1B3(%pc),%fp2         # fp2 is B3+S*...
7574         fadd.x          EM1B2(%pc),%fp1         # fp1 is B2+S*...
7575
7576         fmul.x          %fp0,%fp2               # fp2 is S*(B3+...
7577         fmul.x          %fp0,%fp1               # fp1 is S*(B2+...
7578
7579         fmul.x          %fp0,%fp2               # fp2 is S*S*(B3+...)
7580         fmul.x          (%a0),%fp1              # fp1 is X*S*(B2...
7581
7582         fmul.s          &0x3F000000,%fp0        # fp0 is S*B1
7583         fadd.x          %fp2,%fp1               # fp1 is Q
7584
7585         fmovm.x         (%sp)+,&0x30            # fp2 restored {%fp2/%fp3}
7586
7587         fadd.x          %fp1,%fp0               # fp0 is S*B1+Q
7588
7589         fmov.l          %d0,%fpcr
7590         fadd.x          (%a0),%fp0
7591         bra             t_inx2
7592
7593 EM1BIG:
7594 #--Step 10      |X| > 70 log2
7595         mov.l           (%a0),%d1
7596         cmp.l           %d1,&0
7597         bgt.w           EXPC1
7598 #--Step 10.2
7599         fmov.s          &0xBF800000,%fp0        # fp0 is -1
7600         fmov.l          %d0,%fpcr
7601         fadd.s          &0x00800000,%fp0        # -1 + 2^(-126)
7602         bra             t_minx2
7603
7604         global          setoxm1d
7605 setoxm1d:
7606 #--entry point for EXPM1(X), here X is denormalized
7607 #--Step 0.
7608         bra             t_extdnrm
7609
7610 #########################################################################
7611 # sgetexp():  returns the exponent portion of the input argument.       #
7612 #             The exponent bias is removed and the exponent value is    #
7613 #             returned as an extended precision number in fp0.          #
7614 # sgetexpd(): handles denormalized numbers.                             #
7615 #                                                                       #
7616 # sgetman():  extracts the mantissa of the input argument. The          #
7617 #             mantissa is converted to an extended precision number w/  #
7618 #             an exponent of $3fff and is returned in fp0. The range of #
7619 #             the result is [1.0 - 2.0).                                #
7620 # sgetmand(): handles denormalized numbers.                             #
7621 #                                                                       #
7622 # INPUT *************************************************************** #
7623 #       a0  = pointer to extended precision input                       #
7624 #                                                                       #
7625 # OUTPUT ************************************************************** #
7626 #       fp0 = exponent(X) or mantissa(X)                                #
7627 #                                                                       #
7628 #########################################################################
7629
7630         global          sgetexp
7631 sgetexp:
7632         mov.w           SRC_EX(%a0),%d0         # get the exponent
7633         bclr            &0xf,%d0                # clear the sign bit
7634         subi.w          &0x3fff,%d0             # subtract off the bias
7635         fmov.w          %d0,%fp0                # return exp in fp0
7636         blt.b           sgetexpn                # it's negative
7637         rts
7638
7639 sgetexpn:
7640         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7641         rts
7642
7643         global          sgetexpd
7644 sgetexpd:
7645         bsr.l           norm                    # normalize
7646         neg.w           %d0                     # new exp = -(shft amt)
7647         subi.w          &0x3fff,%d0             # subtract off the bias
7648         fmov.w          %d0,%fp0                # return exp in fp0
7649         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7650         rts
7651
7652         global          sgetman
7653 sgetman:
7654         mov.w           SRC_EX(%a0),%d0         # get the exp
7655         ori.w           &0x7fff,%d0             # clear old exp
7656         bclr            &0xe,%d0                # make it the new exp +-3fff
7657
7658 # here, we build the result in a tmp location so as not to disturb the input
7659         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7660         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7661         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
7662         fmov.x          FP_SCR0(%a6),%fp0       # put new value back in fp0
7663         bmi.b           sgetmann                # it's negative
7664         rts
7665
7666 sgetmann:
7667         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7668         rts
7669
7670 #
7671 # For denormalized numbers, shift the mantissa until the j-bit = 1,
7672 # then load the exponent with +/1 $3fff.
7673 #
7674         global          sgetmand
7675 sgetmand:
7676         bsr.l           norm                    # normalize exponent
7677         bra.b           sgetman
7678
7679 #########################################################################
7680 # scosh():  computes the hyperbolic cosine of a normalized input        #
7681 # scoshd(): computes the hyperbolic cosine of a denormalized input      #
7682 #                                                                       #
7683 # INPUT *************************************************************** #
7684 #       a0 = pointer to extended precision input                        #
7685 #       d0 = round precision,mode                                       #
7686 #                                                                       #
7687 # OUTPUT ************************************************************** #
7688 #       fp0 = cosh(X)                                                   #
7689 #                                                                       #
7690 # ACCURACY and MONOTONICITY ******************************************* #
7691 #       The returned result is within 3 ulps in 64 significant bit,     #
7692 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7693 #       rounded to double precision. The result is provably monotonic   #
7694 #       in double precision.                                            #
7695 #                                                                       #
7696 # ALGORITHM *********************************************************** #
7697 #                                                                       #
7698 #       COSH                                                            #
7699 #       1. If |X| > 16380 log2, go to 3.                                #
7700 #                                                                       #
7701 #       2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae      #
7702 #               y = |X|, z = exp(Y), and                                #
7703 #               cosh(X) = (1/2)*( z + 1/z ).                            #
7704 #               Exit.                                                   #
7705 #                                                                       #
7706 #       3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.            #
7707 #                                                                       #
7708 #       4. (16380 log2 < |X| <= 16480 log2)                             #
7709 #               cosh(X) = sign(X) * exp(|X|)/2.                         #
7710 #               However, invoking exp(|X|) may cause premature          #
7711 #               overflow. Thus, we calculate sinh(X) as follows:        #
7712 #               Y       := |X|                                          #
7713 #               Fact    :=      2**(16380)                              #
7714 #               Y'      := Y - 16381 log2                               #
7715 #               cosh(X) := Fact * exp(Y').                              #
7716 #               Exit.                                                   #
7717 #                                                                       #
7718 #       5. (|X| > 16480 log2) sinh(X) must overflow. Return             #
7719 #               Huge*Huge to generate overflow and an infinity with     #
7720 #               the appropriate sign. Huge is the largest finite number #
7721 #               in extended format. Exit.                               #
7722 #                                                                       #
7723 #########################################################################
7724
7725 TWO16380:
7726         long            0x7FFB0000,0x80000000,0x00000000,0x00000000
7727
7728         global          scosh
7729 scosh:
7730         fmov.x          (%a0),%fp0              # LOAD INPUT
7731
7732         mov.l           (%a0),%d1
7733         mov.w           4(%a0),%d1
7734         and.l           &0x7FFFFFFF,%d1
7735         cmp.l           %d1,&0x400CB167
7736         bgt.b           COSHBIG
7737
7738 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7739 #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7740
7741         fabs.x          %fp0                    # |X|
7742
7743         mov.l           %d0,-(%sp)
7744         clr.l           %d0
7745         fmovm.x         &0x01,-(%sp)            # save |X| to stack
7746         lea             (%sp),%a0               # pass ptr to |X|
7747         bsr             setox                   # FP0 IS EXP(|X|)
7748         add.l           &0xc,%sp                # erase |X| from stack
7749         fmul.s          &0x3F000000,%fp0        # (1/2)EXP(|X|)
7750         mov.l           (%sp)+,%d0
7751
7752         fmov.s          &0x3E800000,%fp1        # (1/4)
7753         fdiv.x          %fp0,%fp1               # 1/(2 EXP(|X|))
7754
7755         fmov.l          %d0,%fpcr
7756         mov.b           &FADD_OP,%d1            # last inst is ADD
7757         fadd.x          %fp1,%fp0
7758         bra             t_catch
7759
7760 COSHBIG:
7761         cmp.l           %d1,&0x400CB2B3
7762         bgt.b           COSHHUGE
7763
7764         fabs.x          %fp0
7765         fsub.d          T1(%pc),%fp0            # (|X|-16381LOG2_LEAD)
7766         fsub.d          T2(%pc),%fp0            # |X| - 16381 LOG2, ACCURATE
7767
7768         mov.l           %d0,-(%sp)
7769         clr.l           %d0
7770         fmovm.x         &0x01,-(%sp)            # save fp0 to stack
7771         lea             (%sp),%a0               # pass ptr to fp0
7772         bsr             setox
7773         add.l           &0xc,%sp                # clear fp0 from stack
7774         mov.l           (%sp)+,%d0
7775
7776         fmov.l          %d0,%fpcr
7777         mov.b           &FMUL_OP,%d1            # last inst is MUL
7778         fmul.x          TWO16380(%pc),%fp0
7779         bra             t_catch
7780
7781 COSHHUGE:
7782         bra             t_ovfl2
7783
7784         global          scoshd
7785 #--COSH(X) = 1 FOR DENORMALIZED X
7786 scoshd:
7787         fmov.s          &0x3F800000,%fp0
7788
7789         fmov.l          %d0,%fpcr
7790         fadd.s          &0x00800000,%fp0
7791         bra             t_pinx2
7792
7793 #########################################################################
7794 # ssinh():  computes the hyperbolic sine of a normalized input          #
7795 # ssinhd(): computes the hyperbolic sine of a denormalized input        #
7796 #                                                                       #
7797 # INPUT *************************************************************** #
7798 #       a0 = pointer to extended precision input                        #
7799 #       d0 = round precision,mode                                       #
7800 #                                                                       #
7801 # OUTPUT ************************************************************** #
7802 #       fp0 = sinh(X)                                                   #
7803 #                                                                       #
7804 # ACCURACY and MONOTONICITY ******************************************* #
7805 #       The returned result is within 3 ulps in 64 significant bit,     #
7806 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7807 #       rounded to double precision. The result is provably monotonic   #
7808 #       in double precision.                                            #
7809 #                                                                       #
7810 # ALGORITHM *********************************************************** #
7811 #                                                                       #
7812 #       SINH                                                            #
7813 #       1. If |X| > 16380 log2, go to 3.                                #
7814 #                                                                       #
7815 #       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula       #
7816 #               y = |X|, sgn = sign(X), and z = expm1(Y),               #
7817 #               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).                    #
7818 #          Exit.                                                        #
7819 #                                                                       #
7820 #       3. If |X| > 16480 log2, go to 5.                                #
7821 #                                                                       #
7822 #       4. (16380 log2 < |X| <= 16480 log2)                             #
7823 #               sinh(X) = sign(X) * exp(|X|)/2.                         #
7824 #          However, invoking exp(|X|) may cause premature overflow.     #
7825 #          Thus, we calculate sinh(X) as follows:                       #
7826 #             Y       := |X|                                            #
7827 #             sgn     := sign(X)                                        #
7828 #             sgnFact := sgn * 2**(16380)                               #
7829 #             Y'      := Y - 16381 log2                                 #
7830 #             sinh(X) := sgnFact * exp(Y').                             #
7831 #          Exit.                                                        #
7832 #                                                                       #
7833 #       5. (|X| > 16480 log2) sinh(X) must overflow. Return             #
7834 #          sign(X)*Huge*Huge to generate overflow and an infinity with  #
7835 #          the appropriate sign. Huge is the largest finite number in   #
7836 #          extended format. Exit.                                       #
7837 #                                                                       #
7838 #########################################################################
7839
7840         global          ssinh
7841 ssinh:
7842         fmov.x          (%a0),%fp0              # LOAD INPUT
7843
7844         mov.l           (%a0),%d1
7845         mov.w           4(%a0),%d1
7846         mov.l           %d1,%a1                 # save (compacted) operand
7847         and.l           &0x7FFFFFFF,%d1
7848         cmp.l           %d1,&0x400CB167
7849         bgt.b           SINHBIG
7850
7851 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7852 #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7853
7854         fabs.x          %fp0                    # Y = |X|
7855
7856         movm.l          &0x8040,-(%sp)          # {a1/d0}
7857         fmovm.x         &0x01,-(%sp)            # save Y on stack
7858         lea             (%sp),%a0               # pass ptr to Y
7859         clr.l           %d0
7860         bsr             setoxm1                 # FP0 IS Z = EXPM1(Y)
7861         add.l           &0xc,%sp                # clear Y from stack
7862         fmov.l          &0,%fpcr
7863         movm.l          (%sp)+,&0x0201          # {a1/d0}
7864
7865         fmov.x          %fp0,%fp1
7866         fadd.s          &0x3F800000,%fp1        # 1+Z
7867         fmov.x          %fp0,-(%sp)
7868         fdiv.x          %fp1,%fp0               # Z/(1+Z)
7869         mov.l           %a1,%d1
7870         and.l           &0x80000000,%d1
7871         or.l            &0x3F000000,%d1
7872         fadd.x          (%sp)+,%fp0
7873         mov.l           %d1,-(%sp)
7874
7875         fmov.l          %d0,%fpcr
7876         mov.b           &FMUL_OP,%d1            # last inst is MUL
7877         fmul.s          (%sp)+,%fp0             # last fp inst - possible exceptions set
7878         bra             t_catch
7879
7880 SINHBIG:
7881         cmp.l           %d1,&0x400CB2B3
7882         bgt             t_ovfl
7883         fabs.x          %fp0
7884         fsub.d          T1(%pc),%fp0            # (|X|-16381LOG2_LEAD)
7885         mov.l           &0,-(%sp)
7886         mov.l           &0x80000000,-(%sp)
7887         mov.l           %a1,%d1
7888         and.l           &0x80000000,%d1
7889         or.l            &0x7FFB0000,%d1
7890         mov.l           %d1,-(%sp)              # EXTENDED FMT
7891         fsub.d          T2(%pc),%fp0            # |X| - 16381 LOG2, ACCURATE
7892
7893         mov.l           %d0,-(%sp)
7894         clr.l           %d0
7895         fmovm.x         &0x01,-(%sp)            # save fp0 on stack
7896         lea             (%sp),%a0               # pass ptr to fp0
7897         bsr             setox
7898         add.l           &0xc,%sp                # clear fp0 from stack
7899
7900         mov.l           (%sp)+,%d0
7901         fmov.l          %d0,%fpcr
7902         mov.b           &FMUL_OP,%d1            # last inst is MUL
7903         fmul.x          (%sp)+,%fp0             # possible exception
7904         bra             t_catch
7905
7906         global          ssinhd
7907 #--SINH(X) = X FOR DENORMALIZED X
7908 ssinhd:
7909         bra             t_extdnrm
7910
7911 #########################################################################
7912 # stanh():  computes the hyperbolic tangent of a normalized input       #
7913 # stanhd(): computes the hyperbolic tangent of a denormalized input     #
7914 #                                                                       #
7915 # INPUT *************************************************************** #
7916 #       a0 = pointer to extended precision input                        #
7917 #       d0 = round precision,mode                                       #
7918 #                                                                       #
7919 # OUTPUT ************************************************************** #
7920 #       fp0 = tanh(X)                                                   #
7921 #                                                                       #
7922 # ACCURACY and MONOTONICITY ******************************************* #
7923 #       The returned result is within 3 ulps in 64 significant bit,     #
7924 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7925 #       rounded to double precision. The result is provably monotonic   #
7926 #       in double precision.                                            #
7927 #                                                                       #
7928 # ALGORITHM *********************************************************** #
7929 #                                                                       #
7930 #       TANH                                                            #
7931 #       1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.            #
7932 #                                                                       #
7933 #       2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by           #
7934 #               sgn := sign(X), y := 2|X|, z := expm1(Y), and           #
7935 #               tanh(X) = sgn*( z/(2+z) ).                              #
7936 #               Exit.                                                   #
7937 #                                                                       #
7938 #       3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,          #
7939 #               go to 7.                                                #
7940 #                                                                       #
7941 #       4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.              #
7942 #                                                                       #
7943 #       5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by           #
7944 #               sgn := sign(X), y := 2|X|, z := exp(Y),                 #
7945 #               tanh(X) = sgn - [ sgn*2/(1+z) ].                        #
7946 #               Exit.                                                   #
7947 #                                                                       #
7948 #       6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we  #
7949 #               calculate Tanh(X) by                                    #
7950 #               sgn := sign(X), Tiny := 2**(-126),                      #
7951 #               tanh(X) := sgn - sgn*Tiny.                              #
7952 #               Exit.                                                   #
7953 #                                                                       #
7954 #       7. (|X| < 2**(-40)). Tanh(X) = X.       Exit.                   #
7955 #                                                                       #
7956 #########################################################################
7957
7958         set             X,FP_SCR0
7959         set             XFRAC,X+4
7960
7961         set             SGN,L_SCR3
7962
7963         set             V,FP_SCR0
7964
7965         global          stanh
7966 stanh:
7967         fmov.x          (%a0),%fp0              # LOAD INPUT
7968
7969         fmov.x          %fp0,X(%a6)
7970         mov.l           (%a0),%d1
7971         mov.w           4(%a0),%d1
7972         mov.l           %d1,X(%a6)
7973         and.l           &0x7FFFFFFF,%d1
7974         cmp.l           %d1, &0x3fd78000        # is |X| < 2^(-40)?
7975         blt.w           TANHBORS                # yes
7976         cmp.l           %d1, &0x3fffddce        # is |X| > (5/2)LOG2?
7977         bgt.w           TANHBORS                # yes
7978
7979 #--THIS IS THE USUAL CASE
7980 #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7981
7982         mov.l           X(%a6),%d1
7983         mov.l           %d1,SGN(%a6)
7984         and.l           &0x7FFF0000,%d1
7985         add.l           &0x00010000,%d1         # EXPONENT OF 2|X|
7986         mov.l           %d1,X(%a6)
7987         and.l           &0x80000000,SGN(%a6)
7988         fmov.x          X(%a6),%fp0             # FP0 IS Y = 2|X|
7989
7990         mov.l           %d0,-(%sp)
7991         clr.l           %d0
7992         fmovm.x         &0x1,-(%sp)             # save Y on stack
7993         lea             (%sp),%a0               # pass ptr to Y
7994         bsr             setoxm1                 # FP0 IS Z = EXPM1(Y)
7995         add.l           &0xc,%sp                # clear Y from stack
7996         mov.l           (%sp)+,%d0
7997
7998         fmov.x          %fp0,%fp1
7999         fadd.s          &0x40000000,%fp1        # Z+2
8000         mov.l           SGN(%a6),%d1
8001         fmov.x          %fp1,V(%a6)
8002         eor.l           %d1,V(%a6)
8003
8004         fmov.l          %d0,%fpcr               # restore users round prec,mode
8005         fdiv.x          V(%a6),%fp0
8006         bra             t_inx2
8007
8008 TANHBORS:
8009         cmp.l           %d1,&0x3FFF8000
8010         blt.w           TANHSM
8011
8012         cmp.l           %d1,&0x40048AA1
8013         bgt.w           TANHHUGE
8014
8015 #-- (5/2) LOG2 < |X| < 50 LOG2,
8016 #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8017 #--TANH(X) = SGN -      SGN*2/[EXP(Y)+1].
8018
8019         mov.l           X(%a6),%d1
8020         mov.l           %d1,SGN(%a6)
8021         and.l           &0x7FFF0000,%d1
8022         add.l           &0x00010000,%d1         # EXPO OF 2|X|
8023         mov.l           %d1,X(%a6)              # Y = 2|X|
8024         and.l           &0x80000000,SGN(%a6)
8025         mov.l           SGN(%a6),%d1
8026         fmov.x          X(%a6),%fp0             # Y = 2|X|
8027
8028         mov.l           %d0,-(%sp)
8029         clr.l           %d0
8030         fmovm.x         &0x01,-(%sp)            # save Y on stack
8031         lea             (%sp),%a0               # pass ptr to Y
8032         bsr             setox                   # FP0 IS EXP(Y)
8033         add.l           &0xc,%sp                # clear Y from stack
8034         mov.l           (%sp)+,%d0
8035         mov.l           SGN(%a6),%d1
8036         fadd.s          &0x3F800000,%fp0        # EXP(Y)+1
8037
8038         eor.l           &0xC0000000,%d1         # -SIGN(X)*2
8039         fmov.s          %d1,%fp1                # -SIGN(X)*2 IN SGL FMT
8040         fdiv.x          %fp0,%fp1               # -SIGN(X)2 / [EXP(Y)+1 ]
8041
8042         mov.l           SGN(%a6),%d1
8043         or.l            &0x3F800000,%d1         # SGN
8044         fmov.s          %d1,%fp0                # SGN IN SGL FMT
8045
8046         fmov.l          %d0,%fpcr               # restore users round prec,mode
8047         mov.b           &FADD_OP,%d1            # last inst is ADD
8048         fadd.x          %fp1,%fp0
8049         bra             t_inx2
8050
8051 TANHSM:
8052         fmov.l          %d0,%fpcr               # restore users round prec,mode
8053         mov.b           &FMOV_OP,%d1            # last inst is MOVE
8054         fmov.x          X(%a6),%fp0             # last inst - possible exception set
8055         bra             t_catch
8056
8057 #---RETURN SGN(X) - SGN(X)EPS
8058 TANHHUGE:
8059         mov.l           X(%a6),%d1
8060         and.l           &0x80000000,%d1
8061         or.l            &0x3F800000,%d1
8062         fmov.s          %d1,%fp0
8063         and.l           &0x80000000,%d1
8064         eor.l           &0x80800000,%d1         # -SIGN(X)*EPS
8065
8066         fmov.l          %d0,%fpcr               # restore users round prec,mode
8067         fadd.s          %d1,%fp0
8068         bra             t_inx2
8069
8070         global          stanhd
8071 #--TANH(X) = X FOR DENORMALIZED X
8072 stanhd:
8073         bra             t_extdnrm
8074
8075 #########################################################################
8076 # slogn():    computes the natural logarithm of a normalized input      #
8077 # slognd():   computes the natural logarithm of a denormalized input    #
8078 # slognp1():  computes the log(1+X) of a normalized input               #
8079 # slognp1d(): computes the log(1+X) of a denormalized input             #
8080 #                                                                       #
8081 # INPUT *************************************************************** #
8082 #       a0 = pointer to extended precision input                        #
8083 #       d0 = round precision,mode                                       #
8084 #                                                                       #
8085 # OUTPUT ************************************************************** #
8086 #       fp0 = log(X) or log(1+X)                                        #
8087 #                                                                       #
8088 # ACCURACY and MONOTONICITY ******************************************* #
8089 #       The returned result is within 2 ulps in 64 significant bit,     #
8090 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8091 #       rounded to double precision. The result is provably monotonic   #
8092 #       in double precision.                                            #
8093 #                                                                       #
8094 # ALGORITHM *********************************************************** #
8095 #       LOGN:                                                           #
8096 #       Step 1. If |X-1| < 1/16, approximate log(X) by an odd           #
8097 #               polynomial in u, where u = 2(X-1)/(X+1). Otherwise,     #
8098 #               move on to Step 2.                                      #
8099 #                                                                       #
8100 #       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
8101 #               seven significant bits of Y plus 2**(-7), i.e.          #
8102 #               F = 1.xxxxxx1 in base 2 where the six "x" match those   #
8103 #               of Y. Note that |Y-F| <= 2**(-7).                       #
8104 #                                                                       #
8105 #       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a           #
8106 #               polynomial in u, log(1+u) = poly.                       #
8107 #                                                                       #
8108 #       Step 4. Reconstruct                                             #
8109 #               log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
8110 #               by k*log(2) + (log(F) + poly). The values of log(F) are #
8111 #               calculated beforehand and stored in the program.        #
8112 #                                                                       #
8113 #       lognp1:                                                         #
8114 #       Step 1: If |X| < 1/16, approximate log(1+X) by an odd           #
8115 #               polynomial in u where u = 2X/(2+X). Otherwise, move on  #
8116 #               to Step 2.                                              #
8117 #                                                                       #
8118 #       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done  #
8119 #               in Step 2 of the algorithm for LOGN and compute         #
8120 #               log(1+X) as k*log(2) + log(F) + poly where poly         #
8121 #               approximates log(1+u), u = (Y-F)/F.                     #
8122 #                                                                       #
8123 #       Implementation Notes:                                           #
8124 #       Note 1. There are 64 different possible values for F, thus 64   #
8125 #               log(F)'s need to be tabulated. Moreover, the values of  #
8126 #               1/F are also tabulated so that the division in (Y-F)/F  #
8127 #               can be performed by a multiplication.                   #
8128 #                                                                       #
8129 #       Note 2. In Step 2 of lognp1, in order to preserved accuracy,    #
8130 #               the value Y-F has to be calculated carefully when       #
8131 #               1/2 <= X < 3/2.                                         #
8132 #                                                                       #
8133 #       Note 3. To fully exploit the pipeline, polynomials are usually  #
8134 #               separated into two parts evaluated independently before #
8135 #               being added up.                                         #
8136 #                                                                       #
8137 #########################################################################
8138 LOGOF2:
8139         long            0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8140
8141 one:
8142         long            0x3F800000
8143 zero:
8144         long            0x00000000
8145 infty:
8146         long            0x7F800000
8147 negone:
8148         long            0xBF800000
8149
8150 LOGA6:
8151         long            0x3FC2499A,0xB5E4040B
8152 LOGA5:
8153         long            0xBFC555B5,0x848CB7DB
8154
8155 LOGA4:
8156         long            0x3FC99999,0x987D8730
8157 LOGA3:
8158         long            0xBFCFFFFF,0xFF6F7E97
8159
8160 LOGA2:
8161         long            0x3FD55555,0x555555A4
8162 LOGA1:
8163         long            0xBFE00000,0x00000008
8164
8165 LOGB5:
8166         long            0x3F175496,0xADD7DAD6
8167 LOGB4:
8168         long            0x3F3C71C2,0xFE80C7E0
8169
8170 LOGB3:
8171         long            0x3F624924,0x928BCCFF
8172 LOGB2:
8173         long            0x3F899999,0x999995EC
8174
8175 LOGB1:
8176         long            0x3FB55555,0x55555555
8177 TWO:
8178         long            0x40000000,0x00000000
8179
8180 LTHOLD:
8181         long            0x3f990000,0x80000000,0x00000000,0x00000000
8182
8183 LOGTBL:
8184         long            0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8185         long            0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8186         long            0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8187         long            0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8188         long            0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8189         long            0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8190         long            0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8191         long            0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8192         long            0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8193         long            0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8194         long            0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8195         long            0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8196         long            0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8197         long            0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8198         long            0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8199         long            0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8200         long            0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8201         long            0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8202         long            0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8203         long            0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8204         long            0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8205         long            0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8206         long            0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8207         long            0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8208         long            0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8209         long            0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8210         long            0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8211         long            0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8212         long            0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8213         long            0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8214         long            0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8215         long            0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8216         long            0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8217         long            0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8218         long            0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8219         long            0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8220         long            0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8221         long            0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8222         long            0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8223         long            0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8224         long            0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8225         long            0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8226         long            0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8227         long            0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8228         long            0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8229         long            0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8230         long            0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8231         long            0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8232         long            0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8233         long            0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8234         long            0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8235         long            0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8236         long            0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8237         long            0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8238         long            0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8239         long            0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8240         long            0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8241         long            0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8242         long            0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8243         long            0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8244         long            0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8245         long            0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8246         long            0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8247         long            0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8248         long            0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8249         long            0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8250         long            0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8251         long            0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8252         long            0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8253         long            0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8254         long            0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8255         long            0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8256         long            0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8257         long            0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8258         long            0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8259         long            0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8260         long            0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8261         long            0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8262         long            0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8263         long            0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8264         long            0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8265         long            0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8266         long            0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8267         long            0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8268         long            0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8269         long            0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8270         long            0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8271         long            0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8272         long            0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8273         long            0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8274         long            0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8275         long            0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8276         long            0x3FFE0000,0x94458094,0x45809446,0x00000000
8277         long            0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8278         long            0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8279         long            0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8280         long            0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8281         long            0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8282         long            0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8283         long            0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8284         long            0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8285         long            0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8286         long            0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8287         long            0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8288         long            0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8289         long            0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8290         long            0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8291         long            0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8292         long            0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8293         long            0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8294         long            0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8295         long            0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8296         long            0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8297         long            0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8298         long            0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8299         long            0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8300         long            0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8301         long            0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8302         long            0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8303         long            0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8304         long            0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8305         long            0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8306         long            0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8307         long            0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8308         long            0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8309         long            0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8310         long            0x3FFE0000,0x80808080,0x80808081,0x00000000
8311         long            0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8312
8313         set             ADJK,L_SCR1
8314
8315         set             X,FP_SCR0
8316         set             XDCARE,X+2
8317         set             XFRAC,X+4
8318
8319         set             F,FP_SCR1
8320         set             FFRAC,F+4
8321
8322         set             KLOG2,FP_SCR0
8323
8324         set             SAVEU,FP_SCR0
8325
8326         global          slogn
8327 #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8328 slogn:
8329         fmov.x          (%a0),%fp0              # LOAD INPUT
8330         mov.l           &0x00000000,ADJK(%a6)
8331
8332 LOGBGN:
8333 #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8334 #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8335
8336         mov.l           (%a0),%d1
8337         mov.w           4(%a0),%d1
8338
8339         mov.l           (%a0),X(%a6)
8340         mov.l           4(%a0),X+4(%a6)
8341         mov.l           8(%a0),X+8(%a6)
8342
8343         cmp.l           %d1,&0                  # CHECK IF X IS NEGATIVE
8344         blt.w           LOGNEG                  # LOG OF NEGATIVE ARGUMENT IS INVALID
8345 # X IS POSITIVE, CHECK IF X IS NEAR 1
8346         cmp.l           %d1,&0x3ffef07d         # IS X < 15/16?
8347         blt.b           LOGMAIN                 # YES
8348         cmp.l           %d1,&0x3fff8841         # IS X > 17/16?
8349         ble.w           LOGNEAR1                # NO
8350
8351 LOGMAIN:
8352 #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8353
8354 #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8355 #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8356 #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8357 #--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8358 #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8359 #--LOG(1+U) CAN BE VERY EFFICIENT.
8360 #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8361 #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8362
8363 #--GET K, Y, F, AND ADDRESS OF 1/F.
8364         asr.l           &8,%d1
8365         asr.l           &8,%d1                  # SHIFTED 16 BITS, BIASED EXPO. OF X
8366         sub.l           &0x3FFF,%d1             # THIS IS K
8367         add.l           ADJK(%a6),%d1           # ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
8368         lea             LOGTBL(%pc),%a0         # BASE ADDRESS OF 1/F AND LOG(F)
8369         fmov.l          %d1,%fp1                # CONVERT K TO FLOATING-POINT FORMAT
8370
8371 #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8372         mov.l           &0x3FFF0000,X(%a6)      # X IS NOW Y, I.E. 2^(-K)*X
8373         mov.l           XFRAC(%a6),FFRAC(%a6)
8374         and.l           &0xFE000000,FFRAC(%a6)  # FIRST 7 BITS OF Y
8375         or.l            &0x01000000,FFRAC(%a6)  # GET F: ATTACH A 1 AT THE EIGHTH BIT
8376         mov.l           FFRAC(%a6),%d1  # READY TO GET ADDRESS OF 1/F
8377         and.l           &0x7E000000,%d1
8378         asr.l           &8,%d1
8379         asr.l           &8,%d1
8380         asr.l           &4,%d1                  # SHIFTED 20, D0 IS THE DISPLACEMENT
8381         add.l           %d1,%a0                 # A0 IS THE ADDRESS FOR 1/F
8382
8383         fmov.x          X(%a6),%fp0
8384         mov.l           &0x3fff0000,F(%a6)
8385         clr.l           F+8(%a6)
8386         fsub.x          F(%a6),%fp0             # Y-F
8387         fmovm.x         &0xc,-(%sp)             # SAVE FP2-3 WHILE FP0 IS NOT READY
8388 #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8389 #--REGISTERS SAVED: FPCR, FP1, FP2
8390
8391 LP1CONT1:
8392 #--AN RE-ENTRY POINT FOR LOGNP1
8393         fmul.x          (%a0),%fp0              # FP0 IS U = (Y-F)/F
8394         fmul.x          LOGOF2(%pc),%fp1        # GET K*LOG2 WHILE FP0 IS NOT READY
8395         fmov.x          %fp0,%fp2
8396         fmul.x          %fp2,%fp2               # FP2 IS V=U*U
8397         fmov.x          %fp1,KLOG2(%a6)         # PUT K*LOG2 IN MEMEORY, FREE FP1
8398
8399 #--LOG(1+U) IS APPROXIMATED BY
8400 #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8401 #--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
8402
8403         fmov.x          %fp2,%fp3
8404         fmov.x          %fp2,%fp1
8405
8406         fmul.d          LOGA6(%pc),%fp1         # V*A6
8407         fmul.d          LOGA5(%pc),%fp2         # V*A5
8408
8409         fadd.d          LOGA4(%pc),%fp1         # A4+V*A6
8410         fadd.d          LOGA3(%pc),%fp2         # A3+V*A5
8411
8412         fmul.x          %fp3,%fp1               # V*(A4+V*A6)
8413         fmul.x          %fp3,%fp2               # V*(A3+V*A5)
8414
8415         fadd.d          LOGA2(%pc),%fp1         # A2+V*(A4+V*A6)
8416         fadd.d          LOGA1(%pc),%fp2         # A1+V*(A3+V*A5)
8417
8418         fmul.x          %fp3,%fp1               # V*(A2+V*(A4+V*A6))
8419         add.l           &16,%a0                 # ADDRESS OF LOG(F)
8420         fmul.x          %fp3,%fp2               # V*(A1+V*(A3+V*A5))
8421
8422         fmul.x          %fp0,%fp1               # U*V*(A2+V*(A4+V*A6))
8423         fadd.x          %fp2,%fp0               # U+V*(A1+V*(A3+V*A5))
8424
8425         fadd.x          (%a0),%fp1              # LOG(F)+U*V*(A2+V*(A4+V*A6))
8426         fmovm.x         (%sp)+,&0x30            # RESTORE FP2-3
8427         fadd.x          %fp1,%fp0               # FP0 IS LOG(F) + LOG(1+U)
8428
8429         fmov.l          %d0,%fpcr
8430         fadd.x          KLOG2(%a6),%fp0         # FINAL ADD
8431         bra             t_inx2
8432
8433
8434 LOGNEAR1:
8435
8436 # if the input is exactly equal to one, then exit through ld_pzero.
8437 # if these 2 lines weren't here, the correct answer would be returned
8438 # but the INEX2 bit would be set.
8439         fcmp.b          %fp0,&0x1               # is it equal to one?
8440         fbeq.l          ld_pzero                # yes
8441
8442 #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8443         fmov.x          %fp0,%fp1
8444         fsub.s          one(%pc),%fp1           # FP1 IS X-1
8445         fadd.s          one(%pc),%fp0           # FP0 IS X+1
8446         fadd.x          %fp1,%fp1               # FP1 IS 2(X-1)
8447 #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8448 #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8449
8450 LP1CONT2:
8451 #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8452         fdiv.x          %fp0,%fp1               # FP1 IS U
8453         fmovm.x         &0xc,-(%sp)             # SAVE FP2-3
8454 #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8455 #--LET V=U*U, W=V*V, CALCULATE
8456 #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8457 #--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
8458         fmov.x          %fp1,%fp0
8459         fmul.x          %fp0,%fp0               # FP0 IS V
8460         fmov.x          %fp1,SAVEU(%a6)         # STORE U IN MEMORY, FREE FP1
8461         fmov.x          %fp0,%fp1
8462         fmul.x          %fp1,%fp1               # FP1 IS W
8463
8464         fmov.d          LOGB5(%pc),%fp3
8465         fmov.d          LOGB4(%pc),%fp2
8466
8467         fmul.x          %fp1,%fp3               # W*B5
8468         fmul.x          %fp1,%fp2               # W*B4
8469
8470         fadd.d          LOGB3(%pc),%fp3         # B3+W*B5
8471         fadd.d          LOGB2(%pc),%fp2         # B2+W*B4
8472
8473         fmul.x          %fp3,%fp1               # W*(B3+W*B5), FP3 RELEASED
8474
8475         fmul.x          %fp0,%fp2               # V*(B2+W*B4)
8476
8477         fadd.d          LOGB1(%pc),%fp1         # B1+W*(B3+W*B5)
8478         fmul.x          SAVEU(%a6),%fp0         # FP0 IS U*V
8479
8480         fadd.x          %fp2,%fp1               # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8481         fmovm.x         (%sp)+,&0x30            # FP2-3 RESTORED
8482
8483         fmul.x          %fp1,%fp0               # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8484
8485         fmov.l          %d0,%fpcr
8486         fadd.x          SAVEU(%a6),%fp0
8487         bra             t_inx2
8488
8489 #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8490 LOGNEG:
8491         bra             t_operr
8492
8493         global          slognd
8494 slognd:
8495 #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8496
8497         mov.l           &-100,ADJK(%a6)         # INPUT = 2^(ADJK) * FP0
8498
8499 #----normalize the input value by left shifting k bits (k to be determined
8500 #----below), adjusting exponent and storing -k to  ADJK
8501 #----the value TWOTO100 is no longer needed.
8502 #----Note that this code assumes the denormalized input is NON-ZERO.
8503
8504         movm.l          &0x3f00,-(%sp)          # save some registers  {d2-d7}
8505         mov.l           (%a0),%d3               # D3 is exponent of smallest norm. #
8506         mov.l           4(%a0),%d4
8507         mov.l           8(%a0),%d5              # (D4,D5) is (Hi_X,Lo_X)
8508         clr.l           %d2                     # D2 used for holding K
8509
8510         tst.l           %d4
8511         bne.b           Hi_not0
8512
8513 Hi_0:
8514         mov.l           %d5,%d4
8515         clr.l           %d5
8516         mov.l           &32,%d2
8517         clr.l           %d6
8518         bfffo           %d4{&0:&32},%d6
8519         lsl.l           %d6,%d4
8520         add.l           %d6,%d2                 # (D3,D4,D5) is normalized
8521
8522         mov.l           %d3,X(%a6)
8523         mov.l           %d4,XFRAC(%a6)
8524         mov.l           %d5,XFRAC+4(%a6)
8525         neg.l           %d2
8526         mov.l           %d2,ADJK(%a6)
8527         fmov.x          X(%a6),%fp0
8528         movm.l          (%sp)+,&0xfc            # restore registers {d2-d7}
8529         lea             X(%a6),%a0
8530         bra.w           LOGBGN                  # begin regular log(X)
8531
8532 Hi_not0:
8533         clr.l           %d6
8534         bfffo           %d4{&0:&32},%d6         # find first 1
8535         mov.l           %d6,%d2                 # get k
8536         lsl.l           %d6,%d4
8537         mov.l           %d5,%d7                 # a copy of D5
8538         lsl.l           %d6,%d5
8539         neg.l           %d6
8540         add.l           &32,%d6
8541         lsr.l           %d6,%d7
8542         or.l            %d7,%d4                 # (D3,D4,D5) normalized
8543
8544         mov.l           %d3,X(%a6)
8545         mov.l           %d4,XFRAC(%a6)
8546         mov.l           %d5,XFRAC+4(%a6)
8547         neg.l           %d2
8548         mov.l           %d2,ADJK(%a6)
8549         fmov.x          X(%a6),%fp0
8550         movm.l          (%sp)+,&0xfc            # restore registers {d2-d7}
8551         lea             X(%a6),%a0
8552         bra.w           LOGBGN                  # begin regular log(X)
8553
8554         global          slognp1
8555 #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8556 slognp1:
8557         fmov.x          (%a0),%fp0              # LOAD INPUT
8558         fabs.x          %fp0                    # test magnitude
8559         fcmp.x          %fp0,LTHOLD(%pc)        # compare with min threshold
8560         fbgt.w          LP1REAL                 # if greater, continue
8561         fmov.l          %d0,%fpcr
8562         mov.b           &FMOV_OP,%d1            # last inst is MOVE
8563         fmov.x          (%a0),%fp0              # return signed argument
8564         bra             t_catch
8565
8566 LP1REAL:
8567         fmov.x          (%a0),%fp0              # LOAD INPUT
8568         mov.l           &0x00000000,ADJK(%a6)
8569         fmov.x          %fp0,%fp1               # FP1 IS INPUT Z
8570         fadd.s          one(%pc),%fp0           # X := ROUND(1+Z)
8571         fmov.x          %fp0,X(%a6)
8572         mov.w           XFRAC(%a6),XDCARE(%a6)
8573         mov.l           X(%a6),%d1
8574         cmp.l           %d1,&0
8575         ble.w           LP1NEG0                 # LOG OF ZERO OR -VE
8576         cmp.l           %d1,&0x3ffe8000         # IS BOUNDS [1/2,3/2]?
8577         blt.w           LOGMAIN
8578         cmp.l           %d1,&0x3fffc000
8579         bgt.w           LOGMAIN
8580 #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8581 #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8582 #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8583
8584 LP1NEAR1:
8585 #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8586         cmp.l           %d1,&0x3ffef07d
8587         blt.w           LP1CARE
8588         cmp.l           %d1,&0x3fff8841
8589         bgt.w           LP1CARE
8590
8591 LP1ONE16:
8592 #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8593 #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8594         fadd.x          %fp1,%fp1               # FP1 IS 2Z
8595         fadd.s          one(%pc),%fp0           # FP0 IS 1+X
8596 #--U = FP1/FP0
8597         bra.w           LP1CONT2
8598
8599 LP1CARE:
8600 #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8601 #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8602 #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8603 #--THERE ARE ONLY TWO CASES.
8604 #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8605 #--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
8606 #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8607 #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8608
8609         mov.l           XFRAC(%a6),FFRAC(%a6)
8610         and.l           &0xFE000000,FFRAC(%a6)
8611         or.l            &0x01000000,FFRAC(%a6)  # F OBTAINED
8612         cmp.l           %d1,&0x3FFF8000         # SEE IF 1+Z > 1
8613         bge.b           KISZERO
8614
8615 KISNEG1:
8616         fmov.s          TWO(%pc),%fp0
8617         mov.l           &0x3fff0000,F(%a6)
8618         clr.l           F+8(%a6)
8619         fsub.x          F(%a6),%fp0             # 2-F
8620         mov.l           FFRAC(%a6),%d1
8621         and.l           &0x7E000000,%d1
8622         asr.l           &8,%d1
8623         asr.l           &8,%d1
8624         asr.l           &4,%d1                  # D0 CONTAINS DISPLACEMENT FOR 1/F
8625         fadd.x          %fp1,%fp1               # GET 2Z
8626         fmovm.x         &0xc,-(%sp)             # SAVE FP2  {%fp2/%fp3}
8627         fadd.x          %fp1,%fp0               # FP0 IS Y-F = (2-F)+2Z
8628         lea             LOGTBL(%pc),%a0         # A0 IS ADDRESS OF 1/F
8629         add.l           %d1,%a0
8630         fmov.s          negone(%pc),%fp1        # FP1 IS K = -1
8631         bra.w           LP1CONT1
8632
8633 KISZERO:
8634         fmov.s          one(%pc),%fp0
8635         mov.l           &0x3fff0000,F(%a6)
8636         clr.l           F+8(%a6)
8637         fsub.x          F(%a6),%fp0             # 1-F
8638         mov.l           FFRAC(%a6),%d1
8639         and.l           &0x7E000000,%d1
8640         asr.l           &8,%d1
8641         asr.l           &8,%d1
8642         asr.l           &4,%d1
8643         fadd.x          %fp1,%fp0               # FP0 IS Y-F
8644         fmovm.x         &0xc,-(%sp)             # FP2 SAVED {%fp2/%fp3}
8645         lea             LOGTBL(%pc),%a0
8646         add.l           %d1,%a0                 # A0 IS ADDRESS OF 1/F
8647         fmov.s          zero(%pc),%fp1          # FP1 IS K = 0
8648         bra.w           LP1CONT1
8649
8650 LP1NEG0:
8651 #--FPCR SAVED. D0 IS X IN COMPACT FORM.
8652         cmp.l           %d1,&0
8653         blt.b           LP1NEG
8654 LP1ZERO:
8655         fmov.s          negone(%pc),%fp0
8656
8657         fmov.l          %d0,%fpcr
8658         bra             t_dz
8659
8660 LP1NEG:
8661         fmov.s          zero(%pc),%fp0
8662
8663         fmov.l          %d0,%fpcr
8664         bra             t_operr
8665
8666         global          slognp1d
8667 #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8668 # Simply return the denorm
8669 slognp1d:
8670         bra             t_extdnrm
8671
8672 #########################################################################
8673 # satanh():  computes the inverse hyperbolic tangent of a norm input    #
8674 # satanhd(): computes the inverse hyperbolic tangent of a denorm input  #
8675 #                                                                       #
8676 # INPUT *************************************************************** #
8677 #       a0 = pointer to extended precision input                        #
8678 #       d0 = round precision,mode                                       #
8679 #                                                                       #
8680 # OUTPUT ************************************************************** #
8681 #       fp0 = arctanh(X)                                                #
8682 #                                                                       #
8683 # ACCURACY and MONOTONICITY ******************************************* #
8684 #       The returned result is within 3 ulps in 64 significant bit,     #
8685 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8686 #       rounded to double precision. The result is provably monotonic   #
8687 #       in double precision.                                            #
8688 #                                                                       #
8689 # ALGORITHM *********************************************************** #
8690 #                                                                       #
8691 #       ATANH                                                           #
8692 #       1. If |X| >= 1, go to 3.                                        #
8693 #                                                                       #
8694 #       2. (|X| < 1) Calculate atanh(X) by                              #
8695 #               sgn := sign(X)                                          #
8696 #               y := |X|                                                #
8697 #               z := 2y/(1-y)                                           #
8698 #               atanh(X) := sgn * (1/2) * logp1(z)                      #
8699 #               Exit.                                                   #
8700 #                                                                       #
8701 #       3. If |X| > 1, go to 5.                                         #
8702 #                                                                       #
8703 #       4. (|X| = 1) Generate infinity with an appropriate sign and     #
8704 #               divide-by-zero by                                       #
8705 #               sgn := sign(X)                                          #
8706 #               atan(X) := sgn / (+0).                                  #
8707 #               Exit.                                                   #
8708 #                                                                       #
8709 #       5. (|X| > 1) Generate an invalid operation by 0 * infinity.     #
8710 #               Exit.                                                   #
8711 #                                                                       #
8712 #########################################################################
8713
8714         global          satanh
8715 satanh:
8716         mov.l           (%a0),%d1
8717         mov.w           4(%a0),%d1
8718         and.l           &0x7FFFFFFF,%d1
8719         cmp.l           %d1,&0x3FFF8000
8720         bge.b           ATANHBIG
8721
8722 #--THIS IS THE USUAL CASE, |X| < 1
8723 #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8724
8725         fabs.x          (%a0),%fp0              # Y = |X|
8726         fmov.x          %fp0,%fp1
8727         fneg.x          %fp1                    # -Y
8728         fadd.x          %fp0,%fp0               # 2Y
8729         fadd.s          &0x3F800000,%fp1        # 1-Y
8730         fdiv.x          %fp1,%fp0               # 2Y/(1-Y)
8731         mov.l           (%a0),%d1
8732         and.l           &0x80000000,%d1
8733         or.l            &0x3F000000,%d1         # SIGN(X)*HALF
8734         mov.l           %d1,-(%sp)
8735
8736         mov.l           %d0,-(%sp)              # save rnd prec,mode
8737         clr.l           %d0                     # pass ext prec,RN
8738         fmovm.x         &0x01,-(%sp)            # save Z on stack
8739         lea             (%sp),%a0               # pass ptr to Z
8740         bsr             slognp1                 # LOG1P(Z)
8741         add.l           &0xc,%sp                # clear Z from stack
8742
8743         mov.l           (%sp)+,%d0              # fetch old prec,mode
8744         fmov.l          %d0,%fpcr               # load it
8745         mov.b           &FMUL_OP,%d1            # last inst is MUL
8746         fmul.s          (%sp)+,%fp0
8747         bra             t_catch
8748
8749 ATANHBIG:
8750         fabs.x          (%a0),%fp0              # |X|
8751         fcmp.s          %fp0,&0x3F800000
8752         fbgt            t_operr
8753         bra             t_dz
8754
8755         global          satanhd
8756 #--ATANH(X) = X FOR DENORMALIZED X
8757 satanhd:
8758         bra             t_extdnrm
8759
8760 #########################################################################
8761 # slog10():  computes the base-10 logarithm of a normalized input       #
8762 # slog10d(): computes the base-10 logarithm of a denormalized input     #
8763 # slog2():   computes the base-2 logarithm of a normalized input        #
8764 # slog2d():  computes the base-2 logarithm of a denormalized input      #
8765 #                                                                       #
8766 # INPUT *************************************************************** #
8767 #       a0 = pointer to extended precision input                        #
8768 #       d0 = round precision,mode                                       #
8769 #                                                                       #
8770 # OUTPUT ************************************************************** #
8771 #       fp0 = log_10(X) or log_2(X)                                     #
8772 #                                                                       #
8773 # ACCURACY and MONOTONICITY ******************************************* #
8774 #       The returned result is within 1.7 ulps in 64 significant bit,   #
8775 #       i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8776 #       rounded to double precision. The result is provably monotonic   #
8777 #       in double precision.                                            #
8778 #                                                                       #
8779 # ALGORITHM *********************************************************** #
8780 #                                                                       #
8781 #       slog10d:                                                        #
8782 #                                                                       #
8783 #       Step 0. If X < 0, create a NaN and raise the invalid operation  #
8784 #               flag. Otherwise, save FPCR in D1; set FpCR to default.  #
8785 #       Notes:  Default means round-to-nearest mode, no floating-point  #
8786 #               traps, and precision control = double extended.         #
8787 #                                                                       #
8788 #       Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8789 #       Notes:  Even if X is denormalized, log(X) is always normalized. #
8790 #                                                                       #
8791 #       Step 2.  Compute log_10(X) = log(X) * (1/log(10)).              #
8792 #            2.1 Restore the user FPCR                                  #
8793 #            2.2 Return ans := Y * INV_L10.                             #
8794 #                                                                       #
8795 #       slog10:                                                         #
8796 #                                                                       #
8797 #       Step 0. If X < 0, create a NaN and raise the invalid operation  #
8798 #               flag. Otherwise, save FPCR in D1; set FpCR to default.  #
8799 #       Notes:  Default means round-to-nearest mode, no floating-point  #
8800 #               traps, and precision control = double extended.         #
8801 #                                                                       #
8802 #       Step 1. Call sLogN to obtain Y = log(X), the natural log of X.  #
8803 #                                                                       #
8804 #       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).             #
8805 #            2.1  Restore the user FPCR                                 #
8806 #            2.2  Return ans := Y * INV_L10.                            #
8807 #                                                                       #
8808 #       sLog2d:                                                         #
8809 #                                                                       #
8810 #       Step 0. If X < 0, create a NaN and raise the invalid operation  #
8811 #               flag. Otherwise, save FPCR in D1; set FpCR to default.  #
8812 #       Notes:  Default means round-to-nearest mode, no floating-point  #
8813 #               traps, and precision control = double extended.         #
8814 #                                                                       #
8815 #       Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8816 #       Notes:  Even if X is denormalized, log(X) is always normalized. #
8817 #                                                                       #
8818 #       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).              #
8819 #            2.1  Restore the user FPCR                                 #
8820 #            2.2  Return ans := Y * INV_L2.                             #
8821 #                                                                       #
8822 #       sLog2:                                                          #
8823 #                                                                       #
8824 #       Step 0. If X < 0, create a NaN and raise the invalid operation  #
8825 #               flag. Otherwise, save FPCR in D1; set FpCR to default.  #
8826 #       Notes:  Default means round-to-nearest mode, no floating-point  #
8827 #               traps, and precision control = double extended.         #
8828 #                                                                       #
8829 #       Step 1. If X is not an integer power of two, i.e., X != 2^k,    #
8830 #               go to Step 3.                                           #
8831 #                                                                       #
8832 #       Step 2.   Return k.                                             #
8833 #            2.1  Get integer k, X = 2^k.                               #
8834 #            2.2  Restore the user FPCR.                                #
8835 #            2.3  Return ans := convert-to-double-extended(k).          #
8836 #                                                                       #
8837 #       Step 3. Call sLogN to obtain Y = log(X), the natural log of X.  #
8838 #                                                                       #
8839 #       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).               #
8840 #            4.1  Restore the user FPCR                                 #
8841 #            4.2  Return ans := Y * INV_L2.                             #
8842 #                                                                       #
8843 #########################################################################
8844
8845 INV_L10:
8846         long            0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8847
8848 INV_L2:
8849         long            0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8850
8851         global          slog10
8852 #--entry point for Log10(X), X is normalized
8853 slog10:
8854         fmov.b          &0x1,%fp0
8855         fcmp.x          %fp0,(%a0)              # if operand == 1,
8856         fbeq.l          ld_pzero                # return an EXACT zero
8857
8858         mov.l           (%a0),%d1
8859         blt.w           invalid
8860         mov.l           %d0,-(%sp)
8861         clr.l           %d0
8862         bsr             slogn                   # log(X), X normal.
8863         fmov.l          (%sp)+,%fpcr
8864         fmul.x          INV_L10(%pc),%fp0
8865         bra             t_inx2
8866
8867         global          slog10d
8868 #--entry point for Log10(X), X is denormalized
8869 slog10d:
8870         mov.l           (%a0),%d1
8871         blt.w           invalid
8872         mov.l           %d0,-(%sp)
8873         clr.l           %d0
8874         bsr             slognd                  # log(X), X denorm.
8875         fmov.l          (%sp)+,%fpcr
8876         fmul.x          INV_L10(%pc),%fp0
8877         bra             t_minx2
8878
8879         global          slog2
8880 #--entry point for Log2(X), X is normalized
8881 slog2:
8882         mov.l           (%a0),%d1
8883         blt.w           invalid
8884
8885         mov.l           8(%a0),%d1
8886         bne.b           continue                # X is not 2^k
8887
8888         mov.l           4(%a0),%d1
8889         and.l           &0x7FFFFFFF,%d1
8890         bne.b           continue
8891
8892 #--X = 2^k.
8893         mov.w           (%a0),%d1
8894         and.l           &0x00007FFF,%d1
8895         sub.l           &0x3FFF,%d1
8896         beq.l           ld_pzero
8897         fmov.l          %d0,%fpcr
8898         fmov.l          %d1,%fp0
8899         bra             t_inx2
8900
8901 continue:
8902         mov.l           %d0,-(%sp)
8903         clr.l           %d0
8904         bsr             slogn                   # log(X), X normal.
8905         fmov.l          (%sp)+,%fpcr
8906         fmul.x          INV_L2(%pc),%fp0
8907         bra             t_inx2
8908
8909 invalid:
8910         bra             t_operr
8911
8912         global          slog2d
8913 #--entry point for Log2(X), X is denormalized
8914 slog2d:
8915         mov.l           (%a0),%d1
8916         blt.w           invalid
8917         mov.l           %d0,-(%sp)
8918         clr.l           %d0
8919         bsr             slognd                  # log(X), X denorm.
8920         fmov.l          (%sp)+,%fpcr
8921         fmul.x          INV_L2(%pc),%fp0
8922         bra             t_minx2
8923
8924 #########################################################################
8925 # stwotox():  computes 2**X for a normalized input                      #
8926 # stwotoxd(): computes 2**X for a denormalized input                    #
8927 # stentox():  computes 10**X for a normalized input                     #
8928 # stentoxd(): computes 10**X for a denormalized input                   #
8929 #                                                                       #
8930 # INPUT *************************************************************** #
8931 #       a0 = pointer to extended precision input                        #
8932 #       d0 = round precision,mode                                       #
8933 #                                                                       #
8934 # OUTPUT ************************************************************** #
8935 #       fp0 = 2**X or 10**X                                             #
8936 #                                                                       #
8937 # ACCURACY and MONOTONICITY ******************************************* #
8938 #       The returned result is within 2 ulps in 64 significant bit,     #
8939 #       i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8940 #       rounded to double precision. The result is provably monotonic   #
8941 #       in double precision.                                            #
8942 #                                                                       #
8943 # ALGORITHM *********************************************************** #
8944 #                                                                       #
8945 #       twotox                                                          #
8946 #       1. If |X| > 16480, go to ExpBig.                                #
8947 #                                                                       #
8948 #       2. If |X| < 2**(-70), go to ExpSm.                              #
8949 #                                                                       #
8950 #       3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore  #
8951 #               decompose N as                                          #
8952 #                N = 64(M + M') + j,  j = 0,1,2,...,63.                 #
8953 #                                                                       #
8954 #       4. Overwrite r := r * log2. Then                                #
8955 #               2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).           #
8956 #               Go to expr to compute that expression.                  #
8957 #                                                                       #
8958 #       tentox                                                          #
8959 #       1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.   #
8960 #                                                                       #
8961 #       2. If |X| < 2**(-70), go to ExpSm.                              #
8962 #                                                                       #
8963 #       3. Set y := X*log_2(10)*64 (base 2 log of 10). Set              #
8964 #               N := round-to-int(y). Decompose N as                    #
8965 #                N = 64(M + M') + j,  j = 0,1,2,...,63.                 #
8966 #                                                                       #
8967 #       4. Define r as                                                  #
8968 #               r := ((X - N*L1)-N*L2) * L10                            #
8969 #               where L1, L2 are the leading and trailing parts of      #
8970 #               log_10(2)/64 and L10 is the natural log of 10. Then     #
8971 #               10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).          #
8972 #               Go to expr to compute that expression.                  #
8973 #                                                                       #
8974 #       expr                                                            #
8975 #       1. Fetch 2**(j/64) from table as Fact1 and Fact2.               #
8976 #                                                                       #
8977 #       2. Overwrite Fact1 and Fact2 by                                 #
8978 #               Fact1 := 2**(M) * Fact1                                 #
8979 #               Fact2 := 2**(M) * Fact2                                 #
8980 #               Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).                #
8981 #                                                                       #
8982 #       3. Calculate P where 1 + P approximates exp(r):                 #
8983 #               P = r + r*r*(A1+r*(A2+...+r*A5)).                       #
8984 #                                                                       #
8985 #       4. Let AdjFact := 2**(M'). Return                               #
8986 #               AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).              #
8987 #               Exit.                                                   #
8988 #                                                                       #
8989 #       ExpBig                                                          #
8990 #       1. Generate overflow by Huge * Huge if X > 0; otherwise,        #
8991 #               generate underflow by Tiny * Tiny.                      #
8992 #                                                                       #
8993 #       ExpSm                                                           #
8994 #       1. Return 1 + X.                                                #
8995 #                                                                       #
8996 #########################################################################
8997
8998 L2TEN64:
8999         long            0x406A934F,0x0979A371   # 64LOG10/LOG2
9000 L10TWO1:
9001         long            0x3F734413,0x509F8000   # LOG2/64LOG10
9002
9003 L10TWO2:
9004         long            0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
9005
9006 LOG10:  long            0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
9007
9008 LOG2:   long            0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9009
9010 EXPA5:  long            0x3F56C16D,0x6F7BD0B2
9011 EXPA4:  long            0x3F811112,0x302C712C
9012 EXPA3:  long            0x3FA55555,0x55554CC1
9013 EXPA2:  long            0x3FC55555,0x55554A54
9014 EXPA1:  long            0x3FE00000,0x00000000,0x00000000,0x00000000
9015
9016 TEXPTBL:
9017         long            0x3FFF0000,0x80000000,0x00000000,0x3F738000
9018         long            0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9019         long            0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9020         long            0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9021         long            0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9022         long            0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9023         long            0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9024         long            0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9025         long            0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9026         long            0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9027         long            0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9028         long            0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9029         long            0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9030         long            0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9031         long            0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9032         long            0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9033         long            0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9034         long            0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9035         long            0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9036         long            0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9037         long            0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9038         long            0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9039         long            0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9040         long            0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9041         long            0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9042         long            0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9043         long            0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9044         long            0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9045         long            0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9046         long            0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9047         long            0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9048         long            0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9049         long            0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9050         long            0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9051         long            0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9052         long            0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9053         long            0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9054         long            0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9055         long            0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9056         long            0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9057         long            0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9058         long            0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9059         long            0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9060         long            0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9061         long            0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9062         long            0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9063         long            0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9064         long            0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9065         long            0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9066         long            0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9067         long            0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9068         long            0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9069         long            0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9070         long            0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9071         long            0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9072         long            0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9073         long            0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9074         long            0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9075         long            0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9076         long            0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9077         long            0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9078         long            0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9079         long            0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9080         long            0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9081
9082         set             INT,L_SCR1
9083
9084         set             X,FP_SCR0
9085         set             XDCARE,X+2
9086         set             XFRAC,X+4
9087
9088         set             ADJFACT,FP_SCR0
9089
9090         set             FACT1,FP_SCR0
9091         set             FACT1HI,FACT1+4
9092         set             FACT1LOW,FACT1+8
9093
9094         set             FACT2,FP_SCR1
9095         set             FACT2HI,FACT2+4
9096         set             FACT2LOW,FACT2+8
9097
9098         global          stwotox
9099 #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9100 stwotox:
9101         fmovm.x         (%a0),&0x80             # LOAD INPUT
9102
9103         mov.l           (%a0),%d1
9104         mov.w           4(%a0),%d1
9105         fmov.x          %fp0,X(%a6)
9106         and.l           &0x7FFFFFFF,%d1
9107
9108         cmp.l           %d1,&0x3FB98000         # |X| >= 2**(-70)?
9109         bge.b           TWOOK1
9110         bra.w           EXPBORS
9111
9112 TWOOK1:
9113         cmp.l           %d1,&0x400D80C0         # |X| > 16480?
9114         ble.b           TWOMAIN
9115         bra.w           EXPBORS
9116
9117 TWOMAIN:
9118 #--USUAL CASE, 2^(-70) <= |X| <= 16480
9119
9120         fmov.x          %fp0,%fp1
9121         fmul.s          &0x42800000,%fp1        # 64 * X
9122         fmov.l          %fp1,INT(%a6)           # N = ROUND-TO-INT(64 X)
9123         mov.l           %d2,-(%sp)
9124         lea             TEXPTBL(%pc),%a1        # LOAD ADDRESS OF TABLE OF 2^(J/64)
9125         fmov.l          INT(%a6),%fp1           # N --> FLOATING FMT
9126         mov.l           INT(%a6),%d1
9127         mov.l           %d1,%d2
9128         and.l           &0x3F,%d1               # D0 IS J
9129         asl.l           &4,%d1                  # DISPLACEMENT FOR 2^(J/64)
9130         add.l           %d1,%a1                 # ADDRESS FOR 2^(J/64)
9131         asr.l           &6,%d2                  # d2 IS L, N = 64L + J
9132         mov.l           %d2,%d1
9133         asr.l           &1,%d1                  # D0 IS M
9134         sub.l           %d1,%d2                 # d2 IS M', N = 64(M+M') + J
9135         add.l           &0x3FFF,%d2
9136
9137 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9138 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9139 #--ADJFACT = 2^(M').
9140 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9141
9142         fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
9143
9144         fmul.s          &0x3C800000,%fp1        # (1/64)*N
9145         mov.l           (%a1)+,FACT1(%a6)
9146         mov.l           (%a1)+,FACT1HI(%a6)
9147         mov.l           (%a1)+,FACT1LOW(%a6)
9148         mov.w           (%a1)+,FACT2(%a6)
9149
9150         fsub.x          %fp1,%fp0               # X - (1/64)*INT(64 X)
9151
9152         mov.w           (%a1)+,FACT2HI(%a6)
9153         clr.w           FACT2HI+2(%a6)
9154         clr.l           FACT2LOW(%a6)
9155         add.w           %d1,FACT1(%a6)
9156         fmul.x          LOG2(%pc),%fp0          # FP0 IS R
9157         add.w           %d1,FACT2(%a6)
9158
9159         bra.w           expr
9160
9161 EXPBORS:
9162 #--FPCR, D0 SAVED
9163         cmp.l           %d1,&0x3FFF8000
9164         bgt.b           TEXPBIG
9165
9166 #--|X| IS SMALL, RETURN 1 + X
9167
9168         fmov.l          %d0,%fpcr               # restore users round prec,mode
9169         fadd.s          &0x3F800000,%fp0        # RETURN 1 + X
9170         bra             t_pinx2
9171
9172 TEXPBIG:
9173 #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9174 #--REGISTERS SAVE SO FAR ARE FPCR AND  D0
9175         mov.l           X(%a6),%d1
9176         cmp.l           %d1,&0
9177         blt.b           EXPNEG
9178
9179         bra             t_ovfl2                 # t_ovfl expects positive value
9180
9181 EXPNEG:
9182         bra             t_unfl2                 # t_unfl expects positive value
9183
9184         global          stwotoxd
9185 stwotoxd:
9186 #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9187
9188         fmov.l          %d0,%fpcr               # set user's rounding mode/precision
9189         fmov.s          &0x3F800000,%fp0        # RETURN 1 + X
9190         mov.l           (%a0),%d1
9191         or.l            &0x00800001,%d1
9192         fadd.s          %d1,%fp0
9193         bra             t_pinx2
9194
9195         global          stentox
9196 #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9197 stentox:
9198         fmovm.x         (%a0),&0x80             # LOAD INPUT
9199
9200         mov.l           (%a0),%d1
9201         mov.w           4(%a0),%d1
9202         fmov.x          %fp0,X(%a6)
9203         and.l           &0x7FFFFFFF,%d1
9204
9205         cmp.l           %d1,&0x3FB98000         # |X| >= 2**(-70)?
9206         bge.b           TENOK1
9207         bra.w           EXPBORS
9208
9209 TENOK1:
9210         cmp.l           %d1,&0x400B9B07         # |X| <= 16480*log2/log10 ?
9211         ble.b           TENMAIN
9212         bra.w           EXPBORS
9213
9214 TENMAIN:
9215 #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9216
9217         fmov.x          %fp0,%fp1
9218         fmul.d          L2TEN64(%pc),%fp1       # X*64*LOG10/LOG2
9219         fmov.l          %fp1,INT(%a6)           # N=INT(X*64*LOG10/LOG2)
9220         mov.l           %d2,-(%sp)
9221         lea             TEXPTBL(%pc),%a1        # LOAD ADDRESS OF TABLE OF 2^(J/64)
9222         fmov.l          INT(%a6),%fp1           # N --> FLOATING FMT
9223         mov.l           INT(%a6),%d1
9224         mov.l           %d1,%d2
9225         and.l           &0x3F,%d1               # D0 IS J
9226         asl.l           &4,%d1                  # DISPLACEMENT FOR 2^(J/64)
9227         add.l           %d1,%a1                 # ADDRESS FOR 2^(J/64)
9228         asr.l           &6,%d2                  # d2 IS L, N = 64L + J
9229         mov.l           %d2,%d1
9230         asr.l           &1,%d1                  # D0 IS M
9231         sub.l           %d1,%d2                 # d2 IS M', N = 64(M+M') + J
9232         add.l           &0x3FFF,%d2
9233
9234 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9235 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9236 #--ADJFACT = 2^(M').
9237 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9238         fmovm.x         &0x0c,-(%sp)            # save fp2/fp3
9239
9240         fmov.x          %fp1,%fp2
9241
9242         fmul.d          L10TWO1(%pc),%fp1       # N*(LOG2/64LOG10)_LEAD
9243         mov.l           (%a1)+,FACT1(%a6)
9244
9245         fmul.x          L10TWO2(%pc),%fp2       # N*(LOG2/64LOG10)_TRAIL
9246
9247         mov.l           (%a1)+,FACT1HI(%a6)
9248         mov.l           (%a1)+,FACT1LOW(%a6)
9249         fsub.x          %fp1,%fp0               # X - N L_LEAD
9250         mov.w           (%a1)+,FACT2(%a6)
9251
9252         fsub.x          %fp2,%fp0               # X - N L_TRAIL
9253
9254         mov.w           (%a1)+,FACT2HI(%a6)
9255         clr.w           FACT2HI+2(%a6)
9256         clr.l           FACT2LOW(%a6)
9257
9258         fmul.x          LOG10(%pc),%fp0         # FP0 IS R
9259         add.w           %d1,FACT1(%a6)
9260         add.w           %d1,FACT2(%a6)
9261
9262 expr:
9263 #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9264 #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9265 #--FP0 IS R. THE FOLLOWING CODE COMPUTES
9266 #--     2**(M'+M) * 2**(J/64) * EXP(R)
9267
9268         fmov.x          %fp0,%fp1
9269         fmul.x          %fp1,%fp1               # FP1 IS S = R*R
9270
9271         fmov.d          EXPA5(%pc),%fp2         # FP2 IS A5
9272         fmov.d          EXPA4(%pc),%fp3         # FP3 IS A4
9273
9274         fmul.x          %fp1,%fp2               # FP2 IS S*A5
9275         fmul.x          %fp1,%fp3               # FP3 IS S*A4
9276
9277         fadd.d          EXPA3(%pc),%fp2         # FP2 IS A3+S*A5
9278         fadd.d          EXPA2(%pc),%fp3         # FP3 IS A2+S*A4
9279
9280         fmul.x          %fp1,%fp2               # FP2 IS S*(A3+S*A5)
9281         fmul.x          %fp1,%fp3               # FP3 IS S*(A2+S*A4)
9282
9283         fadd.d          EXPA1(%pc),%fp2         # FP2 IS A1+S*(A3+S*A5)
9284         fmul.x          %fp0,%fp3               # FP3 IS R*S*(A2+S*A4)
9285
9286         fmul.x          %fp1,%fp2               # FP2 IS S*(A1+S*(A3+S*A5))
9287         fadd.x          %fp3,%fp0               # FP0 IS R+R*S*(A2+S*A4)
9288         fadd.x          %fp2,%fp0               # FP0 IS EXP(R) - 1
9289
9290         fmovm.x         (%sp)+,&0x30            # restore fp2/fp3
9291
9292 #--FINAL RECONSTRUCTION PROCESS
9293 #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
9294
9295         fmul.x          FACT1(%a6),%fp0
9296         fadd.x          FACT2(%a6),%fp0
9297         fadd.x          FACT1(%a6),%fp0
9298
9299         fmov.l          %d0,%fpcr               # restore users round prec,mode
9300         mov.w           %d2,ADJFACT(%a6)        # INSERT EXPONENT
9301         mov.l           (%sp)+,%d2
9302         mov.l           &0x80000000,ADJFACT+4(%a6)
9303         clr.l           ADJFACT+8(%a6)
9304         mov.b           &FMUL_OP,%d1            # last inst is MUL
9305         fmul.x          ADJFACT(%a6),%fp0       # FINAL ADJUSTMENT
9306         bra             t_catch
9307
9308         global          stentoxd
9309 stentoxd:
9310 #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9311
9312         fmov.l          %d0,%fpcr               # set user's rounding mode/precision
9313         fmov.s          &0x3F800000,%fp0        # RETURN 1 + X
9314         mov.l           (%a0),%d1
9315         or.l            &0x00800001,%d1
9316         fadd.s          %d1,%fp0
9317         bra             t_pinx2
9318
9319 #########################################################################
9320 # smovcr(): returns the ROM constant at the offset specified in d1      #
9321 #           rounded to the mode and precision specified in d0.          #
9322 #                                                                       #
9323 # INPUT *************************************************************** #
9324 #       d0 = rnd prec,mode                                              #
9325 #       d1 = ROM offset                                                 #
9326 #                                                                       #
9327 # OUTPUT ************************************************************** #
9328 #       fp0 = the ROM constant rounded to the user's rounding mode,prec #
9329 #                                                                       #
9330 #########################################################################
9331
9332         global          smovcr
9333 smovcr:
9334         mov.l           %d1,-(%sp)              # save rom offset for a sec
9335
9336         lsr.b           &0x4,%d0                # shift ctrl bits to lo
9337         mov.l           %d0,%d1                 # make a copy
9338         andi.w          &0x3,%d1                # extract rnd mode
9339         andi.w          &0xc,%d0                # extract rnd prec
9340         swap            %d0                     # put rnd prec in hi
9341         mov.w           %d1,%d0                 # put rnd mode in lo
9342
9343         mov.l           (%sp)+,%d1              # get rom offset
9344
9345 #
9346 # check range of offset
9347 #
9348         tst.b           %d1                     # if zero, offset is to pi
9349         beq.b           pi_tbl                  # it is pi
9350         cmpi.b          %d1,&0x0a               # check range $01 - $0a
9351         ble.b           z_val                   # if in this range, return zero
9352         cmpi.b          %d1,&0x0e               # check range $0b - $0e
9353         ble.b           sm_tbl                  # valid constants in this range
9354         cmpi.b          %d1,&0x2f               # check range $10 - $2f
9355         ble.b           z_val                   # if in this range, return zero
9356         cmpi.b          %d1,&0x3f               # check range $30 - $3f
9357         ble.b           bg_tbl                  # valid constants in this range
9358
9359 z_val:
9360         bra.l           ld_pzero                # return a zero
9361
9362 #
9363 # the answer is PI rounded to the proper precision.
9364 #
9365 # fetch a pointer to the answer table relating to the proper rounding
9366 # precision.
9367 #
9368 pi_tbl:
9369         tst.b           %d0                     # is rmode RN?
9370         bne.b           pi_not_rn               # no
9371 pi_rn:
9372         lea.l           PIRN(%pc),%a0           # yes; load PI RN table addr
9373         bra.w           set_finx
9374 pi_not_rn:
9375         cmpi.b          %d0,&rp_mode            # is rmode RP?
9376         beq.b           pi_rp                   # yes
9377 pi_rzrm:
9378         lea.l           PIRZRM(%pc),%a0         # no; load PI RZ,RM table addr
9379         bra.b           set_finx
9380 pi_rp:
9381         lea.l           PIRP(%pc),%a0           # load PI RP table addr
9382         bra.b           set_finx
9383
9384 #
9385 # the answer is one of:
9386 #       $0B     log10(2)        (inexact)
9387 #       $0C     e               (inexact)
9388 #       $0D     log2(e)         (inexact)
9389 #       $0E     log10(e)        (exact)
9390 #
9391 # fetch a pointer to the answer table relating to the proper rounding
9392 # precision.
9393 #
9394 sm_tbl:
9395         subi.b          &0xb,%d1                # make offset in 0-4 range
9396         tst.b           %d0                     # is rmode RN?
9397         bne.b           sm_not_rn               # no
9398 sm_rn:
9399         lea.l           SMALRN(%pc),%a0         # yes; load RN table addr
9400 sm_tbl_cont:
9401         cmpi.b          %d1,&0x2                # is result log10(e)?
9402         ble.b           set_finx                # no; answer is inexact
9403         bra.b           no_finx                 # yes; answer is exact
9404 sm_not_rn:
9405         cmpi.b          %d0,&rp_mode            # is rmode RP?
9406         beq.b           sm_rp                   # yes
9407 sm_rzrm:
9408         lea.l           SMALRZRM(%pc),%a0       # no; load RZ,RM table addr
9409         bra.b           sm_tbl_cont
9410 sm_rp:
9411         lea.l           SMALRP(%pc),%a0         # load RP table addr
9412         bra.b           sm_tbl_cont
9413
9414 #
9415 # the answer is one of:
9416 #       $30     ln(2)           (inexact)
9417 #       $31     ln(10)          (inexact)
9418 #       $32     10^0            (exact)
9419 #       $33     10^1            (exact)
9420 #       $34     10^2            (exact)
9421 #       $35     10^4            (exact)
9422 #       $36     10^8            (exact)
9423 #       $37     10^16           (exact)
9424 #       $38     10^32           (inexact)
9425 #       $39     10^64           (inexact)
9426 #       $3A     10^128          (inexact)
9427 #       $3B     10^256          (inexact)
9428 #       $3C     10^512          (inexact)
9429 #       $3D     10^1024         (inexact)
9430 #       $3E     10^2048         (inexact)
9431 #       $3F     10^4096         (inexact)
9432 #
9433 # fetch a pointer to the answer table relating to the proper rounding
9434 # precision.
9435 #
9436 bg_tbl:
9437         subi.b          &0x30,%d1               # make offset in 0-f range
9438         tst.b           %d0                     # is rmode RN?
9439         bne.b           bg_not_rn               # no
9440 bg_rn:
9441         lea.l           BIGRN(%pc),%a0          # yes; load RN table addr
9442 bg_tbl_cont:
9443         cmpi.b          %d1,&0x1                # is offset <= $31?
9444         ble.b           set_finx                # yes; answer is inexact
9445         cmpi.b          %d1,&0x7                # is $32 <= offset <= $37?
9446         ble.b           no_finx                 # yes; answer is exact
9447         bra.b           set_finx                # no; answer is inexact
9448 bg_not_rn:
9449         cmpi.b          %d0,&rp_mode            # is rmode RP?
9450         beq.b           bg_rp                   # yes
9451 bg_rzrm:
9452         lea.l           BIGRZRM(%pc),%a0        # no; load RZ,RM table addr
9453         bra.b           bg_tbl_cont
9454 bg_rp:
9455         lea.l           BIGRP(%pc),%a0          # load RP table addr
9456         bra.b           bg_tbl_cont
9457
9458 # answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9459 set_finx:
9460         ori.l           &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9461 no_finx:
9462         mulu.w          &0xc,%d1                # offset points into tables
9463         swap            %d0                     # put rnd prec in lo word
9464         tst.b           %d0                     # is precision extended?
9465
9466         bne.b           not_ext                 # if xprec, do not call round
9467
9468 # Precision is extended
9469         fmovm.x         (%a0,%d1.w),&0x80       # return result in fp0
9470         rts
9471
9472 # Precision is single or double
9473 not_ext:
9474         swap            %d0                     # rnd prec in upper word
9475
9476 # call round() to round the answer to the proper precision.
9477 # exponents out of range for single or double DO NOT cause underflow
9478 # or overflow.
9479         mov.w           0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9480         mov.l           0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9481         mov.l           0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9482         mov.l           %d0,%d1
9483         clr.l           %d0                     # clear g,r,s
9484         lea             FP_SCR1(%a6),%a0        # pass ptr to answer
9485         clr.w           LOCAL_SGN(%a0)          # sign always positive
9486         bsr.l           _round                  # round the mantissa
9487
9488         fmovm.x         (%a0),&0x80             # return rounded result in fp0
9489         rts
9490
9491         align           0x4
9492
9493 PIRN:   long            0x40000000,0xc90fdaa2,0x2168c235        # pi
9494 PIRZRM: long            0x40000000,0xc90fdaa2,0x2168c234        # pi
9495 PIRP:   long            0x40000000,0xc90fdaa2,0x2168c235        # pi
9496
9497 SMALRN: long            0x3ffd0000,0x9a209a84,0xfbcff798        # log10(2)
9498         long            0x40000000,0xadf85458,0xa2bb4a9a        # e
9499         long            0x3fff0000,0xb8aa3b29,0x5c17f0bc        # log2(e)
9500         long            0x3ffd0000,0xde5bd8a9,0x37287195        # log10(e)
9501         long            0x00000000,0x00000000,0x00000000        # 0.0
9502
9503 SMALRZRM:
9504         long            0x3ffd0000,0x9a209a84,0xfbcff798        # log10(2)
9505         long            0x40000000,0xadf85458,0xa2bb4a9a        # e
9506         long            0x3fff0000,0xb8aa3b29,0x5c17f0bb        # log2(e)
9507         long            0x3ffd0000,0xde5bd8a9,0x37287195        # log10(e)
9508         long            0x00000000,0x00000000,0x00000000        # 0.0
9509
9510 SMALRP: long            0x3ffd0000,0x9a209a84,0xfbcff799        # log10(2)
9511         long            0x40000000,0xadf85458,0xa2bb4a9b        # e
9512         long            0x3fff0000,0xb8aa3b29,0x5c17f0bc        # log2(e)
9513         long            0x3ffd0000,0xde5bd8a9,0x37287195        # log10(e)
9514         long            0x00000000,0x00000000,0x00000000        # 0.0
9515
9516 BIGRN:  long            0x3ffe0000,0xb17217f7,0xd1cf79ac        # ln(2)
9517         long            0x40000000,0x935d8ddd,0xaaa8ac17        # ln(10)
9518
9519         long            0x3fff0000,0x80000000,0x00000000        # 10 ^ 0
9520         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
9521         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
9522         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
9523         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
9524         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
9525         long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
9526         long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
9527         long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
9528         long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
9529         long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
9530         long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
9531         long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
9532         long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
9533
9534 BIGRZRM:
9535         long            0x3ffe0000,0xb17217f7,0xd1cf79ab        # ln(2)
9536         long            0x40000000,0x935d8ddd,0xaaa8ac16        # ln(10)
9537
9538         long            0x3fff0000,0x80000000,0x00000000        # 10 ^ 0
9539         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
9540         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
9541         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
9542         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
9543         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
9544         long            0x40690000,0x9DC5ADA8,0x2B70B59D        # 10 ^ 32
9545         long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
9546         long            0x41A80000,0x93BA47C9,0x80E98CDF        # 10 ^ 128
9547         long            0x43510000,0xAA7EEBFB,0x9DF9DE8D        # 10 ^ 256
9548         long            0x46A30000,0xE319A0AE,0xA60E91C6        # 10 ^ 512
9549         long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
9550         long            0x5A920000,0x9E8B3B5D,0xC53D5DE4        # 10 ^ 2048
9551         long            0x75250000,0xC4605202,0x8A20979A        # 10 ^ 4096
9552
9553 BIGRP:
9554         long            0x3ffe0000,0xb17217f7,0xd1cf79ac        # ln(2)
9555         long            0x40000000,0x935d8ddd,0xaaa8ac17        # ln(10)
9556
9557         long            0x3fff0000,0x80000000,0x00000000        # 10 ^ 0
9558         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
9559         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
9560         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
9561         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
9562         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
9563         long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
9564         long            0x40D30000,0xC2781F49,0xFFCFA6D6        # 10 ^ 64
9565         long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
9566         long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
9567         long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
9568         long            0x4D480000,0xC9767586,0x81750C18        # 10 ^ 1024
9569         long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
9570         long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
9571
9572 #########################################################################
9573 # sscale(): computes the destination operand scaled by the source       #
9574 #           operand. If the absoulute value of the source operand is    #
9575 #           >= 2^14, an overflow or underflow is returned.              #
9576 #                                                                       #
9577 # INPUT *************************************************************** #
9578 #       a0  = pointer to double-extended source operand X               #
9579 #       a1  = pointer to double-extended destination operand Y          #
9580 #                                                                       #
9581 # OUTPUT ************************************************************** #
9582 #       fp0 =  scale(X,Y)                                               #
9583 #                                                                       #
9584 #########################################################################
9585
9586 set     SIGN,           L_SCR1
9587
9588         global          sscale
9589 sscale:
9590         mov.l           %d0,-(%sp)              # store off ctrl bits for now
9591
9592         mov.w           DST_EX(%a1),%d1         # get dst exponent
9593         smi.b           SIGN(%a6)               # use SIGN to hold dst sign
9594         andi.l          &0x00007fff,%d1         # strip sign from dst exp
9595
9596         mov.w           SRC_EX(%a0),%d0         # check src bounds
9597         andi.w          &0x7fff,%d0             # clr src sign bit
9598         cmpi.w          %d0,&0x3fff             # is src ~ ZERO?
9599         blt.w           src_small               # yes
9600         cmpi.w          %d0,&0x400c             # no; is src too big?
9601         bgt.w           src_out                 # yes
9602
9603 #
9604 # Source is within 2^14 range.
9605 #
9606 src_ok:
9607         fintrz.x        SRC(%a0),%fp0           # calc int of src
9608         fmov.l          %fp0,%d0                # int src to d0
9609 # don't want any accrued bits from the fintrz showing up later since
9610 # we may need to read the fpsr for the last fp op in t_catch2().
9611         fmov.l          &0x0,%fpsr
9612
9613         tst.b           DST_HI(%a1)             # is dst denormalized?
9614         bmi.b           sok_norm
9615
9616 # the dst is a DENORM. normalize the DENORM and add the adjustment to
9617 # the src value. then, jump to the norm part of the routine.
9618 sok_dnrm:
9619         mov.l           %d0,-(%sp)              # save src for now
9620
9621         mov.w           DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9622         mov.l           DST_HI(%a1),FP_SCR0_HI(%a6)
9623         mov.l           DST_LO(%a1),FP_SCR0_LO(%a6)
9624
9625         lea             FP_SCR0(%a6),%a0        # pass ptr to DENORM
9626         bsr.l           norm                    # normalize the DENORM
9627         neg.l           %d0
9628         add.l           (%sp)+,%d0              # add adjustment to src
9629
9630         fmovm.x         FP_SCR0(%a6),&0x80      # load normalized DENORM
9631
9632         cmpi.w          %d0,&-0x3fff            # is the shft amt really low?
9633         bge.b           sok_norm2               # thank goodness no
9634
9635 # the multiply factor that we're trying to create should be a denorm
9636 # for the multiply to work. therefore, we're going to actually do a
9637 # multiply with a denorm which will cause an unimplemented data type
9638 # exception to be put into the machine which will be caught and corrected
9639 # later. we don't do this with the DENORMs above because this method
9640 # is slower. but, don't fret, I don't see it being used much either.
9641         fmov.l          (%sp)+,%fpcr            # restore user fpcr
9642         mov.l           &0x80000000,%d1         # load normalized mantissa
9643         subi.l          &-0x3fff,%d0            # how many should we shift?
9644         neg.l           %d0                     # make it positive
9645         cmpi.b          %d0,&0x20               # is it > 32?
9646         bge.b           sok_dnrm_32             # yes
9647         lsr.l           %d0,%d1                 # no; bit stays in upper lw
9648         clr.l           -(%sp)                  # insert zero low mantissa
9649         mov.l           %d1,-(%sp)              # insert new high mantissa
9650         clr.l           -(%sp)                  # make zero exponent
9651         bra.b           sok_norm_cont
9652 sok_dnrm_32:
9653         subi.b          &0x20,%d0               # get shift count
9654         lsr.l           %d0,%d1                 # make low mantissa longword
9655         mov.l           %d1,-(%sp)              # insert new low mantissa
9656         clr.l           -(%sp)                  # insert zero high mantissa
9657         clr.l           -(%sp)                  # make zero exponent
9658         bra.b           sok_norm_cont
9659
9660 # the src will force the dst to a DENORM value or worse. so, let's
9661 # create an fp multiply that will create the result.
9662 sok_norm:
9663         fmovm.x         DST(%a1),&0x80          # load fp0 with normalized src
9664 sok_norm2:
9665         fmov.l          (%sp)+,%fpcr            # restore user fpcr
9666
9667         addi.w          &0x3fff,%d0             # turn src amt into exp value
9668         swap            %d0                     # put exponent in high word
9669         clr.l           -(%sp)                  # insert new exponent
9670         mov.l           &0x80000000,-(%sp)      # insert new high mantissa
9671         mov.l           %d0,-(%sp)              # insert new lo mantissa
9672
9673 sok_norm_cont:
9674         fmov.l          %fpcr,%d0               # d0 needs fpcr for t_catch2
9675         mov.b           &FMUL_OP,%d1            # last inst is MUL
9676         fmul.x          (%sp)+,%fp0             # do the multiply
9677         bra             t_catch2                # catch any exceptions
9678
9679 #
9680 # Source is outside of 2^14 range.  Test the sign and branch
9681 # to the appropriate exception handler.
9682 #
9683 src_out:
9684         mov.l           (%sp)+,%d0              # restore ctrl bits
9685         exg             %a0,%a1                 # swap src,dst ptrs
9686         tst.b           SRC_EX(%a1)             # is src negative?
9687         bmi             t_unfl                  # yes; underflow
9688         bra             t_ovfl_sc               # no; overflow
9689
9690 #
9691 # The source input is below 1, so we check for denormalized numbers
9692 # and set unfl.
9693 #
9694 src_small:
9695         tst.b           DST_HI(%a1)             # is dst denormalized?
9696         bpl.b           ssmall_done             # yes
9697
9698         mov.l           (%sp)+,%d0
9699         fmov.l          %d0,%fpcr               # no; load control bits
9700         mov.b           &FMOV_OP,%d1            # last inst is MOVE
9701         fmov.x          DST(%a1),%fp0           # simply return dest
9702         bra             t_catch2
9703 ssmall_done:
9704         mov.l           (%sp)+,%d0              # load control bits into d1
9705         mov.l           %a1,%a0                 # pass ptr to dst
9706         bra             t_resdnrm
9707
9708 #########################################################################
9709 # smod(): computes the fp MOD of the input values X,Y.                  #
9710 # srem(): computes the fp (IEEE) REM of the input values X,Y.           #
9711 #                                                                       #
9712 # INPUT *************************************************************** #
9713 #       a0 = pointer to extended precision input X                      #
9714 #       a1 = pointer to extended precision input Y                      #
9715 #       d0 = round precision,mode                                       #
9716 #                                                                       #
9717 #       The input operands X and Y can be either normalized or          #
9718 #       denormalized.                                                   #
9719 #                                                                       #
9720 # OUTPUT ************************************************************** #
9721 #      fp0 = FREM(X,Y) or FMOD(X,Y)                                     #
9722 #                                                                       #
9723 # ALGORITHM *********************************************************** #
9724 #                                                                       #
9725 #       Step 1.  Save and strip signs of X and Y: signX := sign(X),     #
9726 #                signY := sign(Y), X := |X|, Y := |Y|,                  #
9727 #                signQ := signX EOR signY. Record whether MOD or REM    #
9728 #                is requested.                                          #
9729 #                                                                       #
9730 #       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.              #
9731 #                If (L < 0) then                                        #
9732 #                   R := X, go to Step 4.                               #
9733 #                else                                                   #
9734 #                   R := 2^(-L)X, j := L.                               #
9735 #                endif                                                  #
9736 #                                                                       #
9737 #       Step 3.  Perform MOD(X,Y)                                       #
9738 #            3.1 If R = Y, go to Step 9.                                #
9739 #            3.2 If R > Y, then { R := R - Y, Q := Q + 1}               #
9740 #            3.3 If j = 0, go to Step 4.                                #
9741 #            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to        #
9742 #                Step 3.1.                                              #
9743 #                                                                       #
9744 #       Step 4.  At this point, R = X - QY = MOD(X,Y). Set              #
9745 #                Last_Subtract := false (used in Step 7 below). If      #
9746 #                MOD is requested, go to Step 6.                        #
9747 #                                                                       #
9748 #       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.               #
9749 #            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to        #
9750 #                Step 6.                                                #
9751 #            5.2 If R > Y/2, then { set Last_Subtract := true,          #
9752 #                Q := Q + 1, Y := signY*Y }. Go to Step 6.              #
9753 #            5.3 This is the tricky case of R = Y/2. If Q is odd,       #
9754 #                then { Q := Q + 1, signX := -signX }.                  #
9755 #                                                                       #
9756 #       Step 6.  R := signX*R.                                          #
9757 #                                                                       #
9758 #       Step 7.  If Last_Subtract = true, R := R - Y.                   #
9759 #                                                                       #
9760 #       Step 8.  Return signQ, last 7 bits of Q, and R as required.     #
9761 #                                                                       #
9762 #       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,           #
9763 #                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),                #
9764 #                R := 0. Return signQ, last 7 bits of Q, and R.         #
9765 #                                                                       #
9766 #########################################################################
9767
9768         set             Mod_Flag,L_SCR3
9769         set             Sc_Flag,L_SCR3+1
9770
9771         set             SignY,L_SCR2
9772         set             SignX,L_SCR2+2
9773         set             SignQ,L_SCR3+2
9774
9775         set             Y,FP_SCR0
9776         set             Y_Hi,Y+4
9777         set             Y_Lo,Y+8
9778
9779         set             R,FP_SCR1
9780         set             R_Hi,R+4
9781         set             R_Lo,R+8
9782
9783 Scale:
9784         long            0x00010000,0x80000000,0x00000000,0x00000000
9785
9786         global          smod
9787 smod:
9788         clr.b           FPSR_QBYTE(%a6)
9789         mov.l           %d0,-(%sp)              # save ctrl bits
9790         clr.b           Mod_Flag(%a6)
9791         bra.b           Mod_Rem
9792
9793         global          srem
9794 srem:
9795         clr.b           FPSR_QBYTE(%a6)
9796         mov.l           %d0,-(%sp)              # save ctrl bits
9797         mov.b           &0x1,Mod_Flag(%a6)
9798
9799 Mod_Rem:
9800 #..Save sign of X and Y
9801         movm.l          &0x3f00,-(%sp)          # save data registers
9802         mov.w           SRC_EX(%a0),%d3
9803         mov.w           %d3,SignY(%a6)
9804         and.l           &0x00007FFF,%d3         # Y := |Y|
9805
9806 #
9807         mov.l           SRC_HI(%a0),%d4
9808         mov.l           SRC_LO(%a0),%d5         # (D3,D4,D5) is |Y|
9809
9810         tst.l           %d3
9811         bne.b           Y_Normal
9812
9813         mov.l           &0x00003FFE,%d3         # $3FFD + 1
9814         tst.l           %d4
9815         bne.b           HiY_not0
9816
9817 HiY_0:
9818         mov.l           %d5,%d4
9819         clr.l           %d5
9820         sub.l           &32,%d3
9821         clr.l           %d6
9822         bfffo           %d4{&0:&32},%d6
9823         lsl.l           %d6,%d4
9824         sub.l           %d6,%d3                 # (D3,D4,D5) is normalized
9825 #                                               ...with bias $7FFD
9826         bra.b           Chk_X
9827
9828 HiY_not0:
9829         clr.l           %d6
9830         bfffo           %d4{&0:&32},%d6
9831         sub.l           %d6,%d3
9832         lsl.l           %d6,%d4
9833         mov.l           %d5,%d7                 # a copy of D5
9834         lsl.l           %d6,%d5
9835         neg.l           %d6
9836         add.l           &32,%d6
9837         lsr.l           %d6,%d7
9838         or.l            %d7,%d4                 # (D3,D4,D5) normalized
9839 #                                       ...with bias $7FFD
9840         bra.b           Chk_X
9841
9842 Y_Normal:
9843         add.l           &0x00003FFE,%d3         # (D3,D4,D5) normalized
9844 #                                       ...with bias $7FFD
9845
9846 Chk_X:
9847         mov.w           DST_EX(%a1),%d0
9848         mov.w           %d0,SignX(%a6)
9849         mov.w           SignY(%a6),%d1
9850         eor.l           %d0,%d1
9851         and.l           &0x00008000,%d1
9852         mov.w           %d1,SignQ(%a6)          # sign(Q) obtained
9853         and.l           &0x00007FFF,%d0
9854         mov.l           DST_HI(%a1),%d1
9855         mov.l           DST_LO(%a1),%d2         # (D0,D1,D2) is |X|
9856         tst.l           %d0
9857         bne.b           X_Normal
9858         mov.l           &0x00003FFE,%d0
9859         tst.l           %d1
9860         bne.b           HiX_not0
9861
9862 HiX_0:
9863         mov.l           %d2,%d1
9864         clr.l           %d2
9865         sub.l           &32,%d0
9866         clr.l           %d6
9867         bfffo           %d1{&0:&32},%d6
9868         lsl.l           %d6,%d1
9869         sub.l           %d6,%d0                 # (D0,D1,D2) is normalized
9870 #                                       ...with bias $7FFD
9871         bra.b           Init
9872
9873 HiX_not0:
9874         clr.l           %d6
9875         bfffo           %d1{&0:&32},%d6
9876         sub.l           %d6,%d0
9877         lsl.l           %d6,%d1
9878         mov.l           %d2,%d7                 # a copy of D2
9879         lsl.l           %d6,%d2
9880         neg.l           %d6
9881         add.l           &32,%d6
9882         lsr.l           %d6,%d7
9883         or.l            %d7,%d1                 # (D0,D1,D2) normalized
9884 #                                       ...with bias $7FFD
9885         bra.b           Init
9886
9887 X_Normal:
9888         add.l           &0x00003FFE,%d0         # (D0,D1,D2) normalized
9889 #                                       ...with bias $7FFD
9890
9891 Init:
9892 #
9893         mov.l           %d3,L_SCR1(%a6)         # save biased exp(Y)
9894         mov.l           %d0,-(%sp)              # save biased exp(X)
9895         sub.l           %d3,%d0                 # L := expo(X)-expo(Y)
9896
9897         clr.l           %d6                     # D6 := carry <- 0
9898         clr.l           %d3                     # D3 is Q
9899         mov.l           &0,%a1                  # A1 is k; j+k=L, Q=0
9900
9901 #..(Carry,D1,D2) is R
9902         tst.l           %d0
9903         bge.b           Mod_Loop_pre
9904
9905 #..expo(X) < expo(Y). Thus X = mod(X,Y)
9906 #
9907         mov.l           (%sp)+,%d0              # restore d0
9908         bra.w           Get_Mod
9909
9910 Mod_Loop_pre:
9911         addq.l          &0x4,%sp                # erase exp(X)
9912 #..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
9913 Mod_Loop:
9914         tst.l           %d6                     # test carry bit
9915         bgt.b           R_GT_Y
9916
9917 #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9918         cmp.l           %d1,%d4                 # compare hi(R) and hi(Y)
9919         bne.b           R_NE_Y
9920         cmp.l           %d2,%d5                 # compare lo(R) and lo(Y)
9921         bne.b           R_NE_Y
9922
9923 #..At this point, R = Y
9924         bra.w           Rem_is_0
9925
9926 R_NE_Y:
9927 #..use the borrow of the previous compare
9928         bcs.b           R_LT_Y                  # borrow is set iff R < Y
9929
9930 R_GT_Y:
9931 #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9932 #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9933         sub.l           %d5,%d2                 # lo(R) - lo(Y)
9934         subx.l          %d4,%d1                 # hi(R) - hi(Y)
9935         clr.l           %d6                     # clear carry
9936         addq.l          &1,%d3                  # Q := Q + 1
9937
9938 R_LT_Y:
9939 #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9940         tst.l           %d0                     # see if j = 0.
9941         beq.b           PostLoop
9942
9943         add.l           %d3,%d3                 # Q := 2Q
9944         add.l           %d2,%d2                 # lo(R) = 2lo(R)
9945         roxl.l          &1,%d1                  # hi(R) = 2hi(R) + carry
9946         scs             %d6                     # set Carry if 2(R) overflows
9947         addq.l          &1,%a1                  # k := k+1
9948         subq.l          &1,%d0                  # j := j - 1
9949 #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9950
9951         bra.b           Mod_Loop
9952
9953 PostLoop:
9954 #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9955
9956 #..normalize R.
9957         mov.l           L_SCR1(%a6),%d0         # new biased expo of R
9958         tst.l           %d1
9959         bne.b           HiR_not0
9960
9961 HiR_0:
9962         mov.l           %d2,%d1
9963         clr.l           %d2
9964         sub.l           &32,%d0
9965         clr.l           %d6
9966         bfffo           %d1{&0:&32},%d6
9967         lsl.l           %d6,%d1
9968         sub.l           %d6,%d0                 # (D0,D1,D2) is normalized
9969 #                                       ...with bias $7FFD
9970         bra.b           Get_Mod
9971
9972 HiR_not0:
9973         clr.l           %d6
9974         bfffo           %d1{&0:&32},%d6
9975         bmi.b           Get_Mod                 # already normalized
9976         sub.l           %d6,%d0
9977         lsl.l           %d6,%d1
9978         mov.l           %d2,%d7                 # a copy of D2
9979         lsl.l           %d6,%d2
9980         neg.l           %d6
9981         add.l           &32,%d6
9982         lsr.l           %d6,%d7
9983         or.l            %d7,%d1                 # (D0,D1,D2) normalized
9984
9985 #
9986 Get_Mod:
9987         cmp.l           %d0,&0x000041FE
9988         bge.b           No_Scale
9989 Do_Scale:
9990         mov.w           %d0,R(%a6)
9991         mov.l           %d1,R_Hi(%a6)
9992         mov.l           %d2,R_Lo(%a6)
9993         mov.l           L_SCR1(%a6),%d6
9994         mov.w           %d6,Y(%a6)
9995         mov.l           %d4,Y_Hi(%a6)
9996         mov.l           %d5,Y_Lo(%a6)
9997         fmov.x          R(%a6),%fp0             # no exception
9998         mov.b           &1,Sc_Flag(%a6)
9999         bra.b           ModOrRem
10000 No_Scale:
10001         mov.l           %d1,R_Hi(%a6)
10002         mov.l           %d2,R_Lo(%a6)
10003         sub.l           &0x3FFE,%d0
10004         mov.w           %d0,R(%a6)
10005         mov.l           L_SCR1(%a6),%d6
10006         sub.l           &0x3FFE,%d6
10007         mov.l           %d6,L_SCR1(%a6)
10008         fmov.x          R(%a6),%fp0
10009         mov.w           %d6,Y(%a6)
10010         mov.l           %d4,Y_Hi(%a6)
10011         mov.l           %d5,Y_Lo(%a6)
10012         clr.b           Sc_Flag(%a6)
10013
10014 #
10015 ModOrRem:
10016         tst.b           Mod_Flag(%a6)
10017         beq.b           Fix_Sign
10018
10019         mov.l           L_SCR1(%a6),%d6         # new biased expo(Y)
10020         subq.l          &1,%d6                  # biased expo(Y/2)
10021         cmp.l           %d0,%d6
10022         blt.b           Fix_Sign
10023         bgt.b           Last_Sub
10024
10025         cmp.l           %d1,%d4
10026         bne.b           Not_EQ
10027         cmp.l           %d2,%d5
10028         bne.b           Not_EQ
10029         bra.w           Tie_Case
10030
10031 Not_EQ:
10032         bcs.b           Fix_Sign
10033
10034 Last_Sub:
10035 #
10036         fsub.x          Y(%a6),%fp0             # no exceptions
10037         addq.l          &1,%d3                  # Q := Q + 1
10038
10039 #
10040 Fix_Sign:
10041 #..Get sign of X
10042         mov.w           SignX(%a6),%d6
10043         bge.b           Get_Q
10044         fneg.x          %fp0
10045
10046 #..Get Q
10047 #
10048 Get_Q:
10049         clr.l           %d6
10050         mov.w           SignQ(%a6),%d6          # D6 is sign(Q)
10051         mov.l           &8,%d7
10052         lsr.l           %d7,%d6
10053         and.l           &0x0000007F,%d3         # 7 bits of Q
10054         or.l            %d6,%d3                 # sign and bits of Q
10055 #       swap            %d3
10056 #       fmov.l          %fpsr,%d6
10057 #       and.l           &0xFF00FFFF,%d6
10058 #       or.l            %d3,%d6
10059 #       fmov.l          %d6,%fpsr               # put Q in fpsr
10060         mov.b           %d3,FPSR_QBYTE(%a6)     # put Q in fpsr
10061
10062 #
10063 Restore:
10064         movm.l          (%sp)+,&0xfc            #  {%d2-%d7}
10065         mov.l           (%sp)+,%d0
10066         fmov.l          %d0,%fpcr
10067         tst.b           Sc_Flag(%a6)
10068         beq.b           Finish
10069         mov.b           &FMUL_OP,%d1            # last inst is MUL
10070         fmul.x          Scale(%pc),%fp0         # may cause underflow
10071         bra             t_catch2
10072 # the '040 package did this apparently to see if the dst operand for the
10073 # preceding fmul was a denorm. but, it better not have been since the
10074 # algorithm just got done playing with fp0 and expected no exceptions
10075 # as a result. trust me...
10076 #       bra             t_avoid_unsupp          # check for denorm as a
10077 #                                               ;result of the scaling
10078
10079 Finish:
10080         mov.b           &FMOV_OP,%d1            # last inst is MOVE
10081         fmov.x          %fp0,%fp0               # capture exceptions & round
10082         bra             t_catch2
10083
10084 Rem_is_0:
10085 #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10086         addq.l          &1,%d3
10087         cmp.l           %d0,&8                  # D0 is j
10088         bge.b           Q_Big
10089
10090         lsl.l           %d0,%d3
10091         bra.b           Set_R_0
10092
10093 Q_Big:
10094         clr.l           %d3
10095
10096 Set_R_0:
10097         fmov.s          &0x00000000,%fp0
10098         clr.b           Sc_Flag(%a6)
10099         bra.w           Fix_Sign
10100
10101 Tie_Case:
10102 #..Check parity of Q
10103         mov.l           %d3,%d6
10104         and.l           &0x00000001,%d6
10105         tst.l           %d6
10106         beq.w           Fix_Sign                # Q is even
10107
10108 #..Q is odd, Q := Q + 1, signX := -signX
10109         addq.l          &1,%d3
10110         mov.w           SignX(%a6),%d6
10111         eor.l           &0x00008000,%d6
10112         mov.w           %d6,SignX(%a6)
10113         bra.w           Fix_Sign
10114
10115 qnan:   long            0x7fff0000, 0xffffffff, 0xffffffff
10116
10117 #########################################################################
10118 # XDEF **************************************************************** #
10119 #       t_dz(): Handle DZ exception during transcendental emulation.    #
10120 #               Sets N bit according to sign of source operand.         #
10121 #       t_dz2(): Handle DZ exception during transcendental emulation.   #
10122 #                Sets N bit always.                                     #
10123 #                                                                       #
10124 # XREF **************************************************************** #
10125 #       None                                                            #
10126 #                                                                       #
10127 # INPUT *************************************************************** #
10128 #       a0 = pointer to source operand                                  #
10129 #                                                                       #
10130 # OUTPUT ************************************************************** #
10131 #       fp0 = default result                                            #
10132 #                                                                       #
10133 # ALGORITHM *********************************************************** #
10134 #       - Store properly signed INF into fp0.                           #
10135 #       - Set FPSR exception status dz bit, ccode inf bit, and          #
10136 #         accrued dz bit.                                               #
10137 #                                                                       #
10138 #########################################################################
10139
10140         global          t_dz
10141 t_dz:
10142         tst.b           SRC_EX(%a0)             # no; is src negative?
10143         bmi.b           t_dz2                   # yes
10144
10145 dz_pinf:
10146         fmov.s          &0x7f800000,%fp0        # return +INF in fp0
10147         ori.l           &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10148         rts
10149
10150         global          t_dz2
10151 t_dz2:
10152         fmov.s          &0xff800000,%fp0        # return -INF in fp0
10153         ori.l           &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10154         rts
10155
10156 #################################################################
10157 # OPERR exception:                                              #
10158 #       - set FPSR exception status operr bit, condition code   #
10159 #         nan bit; Store default NAN into fp0                   #
10160 #################################################################
10161         global          t_operr
10162 t_operr:
10163         ori.l           &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10164         fmovm.x         qnan(%pc),&0x80         # return default NAN in fp0
10165         rts
10166
10167 #################################################################
10168 # Extended DENORM:                                              #
10169 #       - For all functions that have a denormalized input and  #
10170 #         that f(x)=x, this is the entry point.                 #
10171 #       - we only return the EXOP here if either underflow or   #
10172 #         inexact is enabled.                                   #
10173 #################################################################
10174
10175 # Entry point for scale w/ extended denorm. The function does
10176 # NOT set INEX2/AUNFL/AINEX.
10177         global          t_resdnrm
10178 t_resdnrm:
10179         ori.l           &unfl_mask,USER_FPSR(%a6) # set UNFL
10180         bra.b           xdnrm_con
10181
10182         global          t_extdnrm
10183 t_extdnrm:
10184         ori.l           &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10185
10186 xdnrm_con:
10187         mov.l           %a0,%a1                 # make copy of src ptr
10188         mov.l           %d0,%d1                 # make copy of rnd prec,mode
10189         andi.b          &0xc0,%d1               # extended precision?
10190         bne.b           xdnrm_sd                # no
10191
10192 # result precision is extended.
10193         tst.b           LOCAL_EX(%a0)           # is denorm negative?
10194         bpl.b           xdnrm_exit              # no
10195
10196         bset            &neg_bit,FPSR_CC(%a6)   # yes; set 'N' ccode bit
10197         bra.b           xdnrm_exit
10198
10199 # result precision is single or double
10200 xdnrm_sd:
10201         mov.l           %a1,-(%sp)
10202         tst.b           LOCAL_EX(%a0)           # is denorm pos or neg?
10203         smi.b           %d1                     # set d0 accodingly
10204         bsr.l           unf_sub
10205         mov.l           (%sp)+,%a1
10206 xdnrm_exit:
10207         fmovm.x         (%a0),&0x80             # return default result in fp0
10208
10209         mov.b           FPCR_ENABLE(%a6),%d0
10210         andi.b          &0x0a,%d0               # is UNFL or INEX enabled?
10211         bne.b           xdnrm_ena               # yes
10212         rts
10213
10214 ################
10215 # unfl enabled #
10216 ################
10217 # we have a DENORM that needs to be converted into an EXOP.
10218 # so, normalize the mantissa, add 0x6000 to the new exponent,
10219 # and return the result in fp1.
10220 xdnrm_ena:
10221         mov.w           LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10222         mov.l           LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10223         mov.l           LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10224
10225         lea             FP_SCR0(%a6),%a0
10226         bsr.l           norm                    # normalize mantissa
10227         addi.l          &0x6000,%d0             # add extra bias
10228         andi.w          &0x8000,FP_SCR0_EX(%a6) # keep old sign
10229         or.w            %d0,FP_SCR0_EX(%a6)     # insert new exponent
10230
10231         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10232         rts
10233
10234 #################################################################
10235 # UNFL exception:                                               #
10236 #       - This routine is for cases where even an EXOP isn't    #
10237 #         large enough to hold the range of this result.        #
10238 #         In such a case, the EXOP equals zero.                 #
10239 #       - Return the default result to the proper precision     #
10240 #         with the sign of this result being the same as that   #
10241 #         of the src operand.                                   #
10242 #       - t_unfl2() is provided to force the result sign to     #
10243 #         positive which is the desired result for fetox().     #
10244 #################################################################
10245         global          t_unfl
10246 t_unfl:
10247         ori.l           &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10248
10249         tst.b           (%a0)                   # is result pos or neg?
10250         smi.b           %d1                     # set d1 accordingly
10251         bsr.l           unf_sub                 # calc default unfl result
10252         fmovm.x         (%a0),&0x80             # return default result in fp0
10253
10254         fmov.s          &0x00000000,%fp1        # return EXOP in fp1
10255         rts
10256
10257 # t_unfl2 ALWAYS tells unf_sub to create a positive result
10258         global          t_unfl2
10259 t_unfl2:
10260         ori.l           &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10261
10262         sf.b            %d1                     # set d0 to represent positive
10263         bsr.l           unf_sub                 # calc default unfl result
10264         fmovm.x         (%a0),&0x80             # return default result in fp0
10265
10266         fmov.s          &0x0000000,%fp1         # return EXOP in fp1
10267         rts
10268
10269 #################################################################
10270 # OVFL exception:                                               #
10271 #       - This routine is for cases where even an EXOP isn't    #
10272 #         large enough to hold the range of this result.        #
10273 #       - Return the default result to the proper precision     #
10274 #         with the sign of this result being the same as that   #
10275 #         of the src operand.                                   #
10276 #       - t_ovfl2() is provided to force the result sign to     #
10277 #         positive which is the desired result for fcosh().     #
10278 #       - t_ovfl_sc() is provided for scale() which only sets   #
10279 #         the inexact bits if the number is inexact for the     #
10280 #         precision indicated.                                  #
10281 #################################################################
10282
10283         global          t_ovfl_sc
10284 t_ovfl_sc:
10285         ori.l           &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10286
10287         mov.b           %d0,%d1                 # fetch rnd mode/prec
10288         andi.b          &0xc0,%d1               # extract rnd prec
10289         beq.b           ovfl_work               # prec is extended
10290
10291         tst.b           LOCAL_HI(%a0)           # is dst a DENORM?
10292         bmi.b           ovfl_sc_norm            # no
10293
10294 # dst op is a DENORM. we have to normalize the mantissa to see if the
10295 # result would be inexact for the given precision. make a copy of the
10296 # dst so we don't screw up the version passed to us.
10297         mov.w           LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10298         mov.l           LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10299         mov.l           LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10300         lea             FP_SCR0(%a6),%a0        # pass ptr to FP_SCR0
10301         movm.l          &0xc080,-(%sp)          # save d0-d1/a0
10302         bsr.l           norm                    # normalize mantissa
10303         movm.l          (%sp)+,&0x0103          # restore d0-d1/a0
10304
10305 ovfl_sc_norm:
10306         cmpi.b          %d1,&0x40               # is prec dbl?
10307         bne.b           ovfl_sc_dbl             # no; sgl
10308 ovfl_sc_sgl:
10309         tst.l           LOCAL_LO(%a0)           # is lo lw of sgl set?
10310         bne.b           ovfl_sc_inx             # yes
10311         tst.b           3+LOCAL_HI(%a0)         # is lo byte of hi lw set?
10312         bne.b           ovfl_sc_inx             # yes
10313         bra.b           ovfl_work               # don't set INEX2
10314 ovfl_sc_dbl:
10315         mov.l           LOCAL_LO(%a0),%d1       # are any of lo 11 bits of
10316         andi.l          &0x7ff,%d1              # dbl mantissa set?
10317         beq.b           ovfl_work               # no; don't set INEX2
10318 ovfl_sc_inx:
10319         ori.l           &inex2_mask,USER_FPSR(%a6) # set INEX2
10320         bra.b           ovfl_work               # continue
10321
10322         global          t_ovfl
10323 t_ovfl:
10324         ori.l           &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10325
10326 ovfl_work:
10327         tst.b           LOCAL_EX(%a0)           # what is the sign?
10328         smi.b           %d1                     # set d1 accordingly
10329         bsr.l           ovf_res                 # calc default ovfl result
10330         mov.b           %d0,FPSR_CC(%a6)        # insert new ccodes
10331         fmovm.x         (%a0),&0x80             # return default result in fp0
10332
10333         fmov.s          &0x00000000,%fp1        # return EXOP in fp1
10334         rts
10335
10336 # t_ovfl2 ALWAYS tells ovf_res to create a positive result
10337         global          t_ovfl2
10338 t_ovfl2:
10339         ori.l           &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10340
10341         sf.b            %d1                     # clear sign flag for positive
10342         bsr.l           ovf_res                 # calc default ovfl result
10343         mov.b           %d0,FPSR_CC(%a6)        # insert new ccodes
10344         fmovm.x         (%a0),&0x80             # return default result in fp0
10345
10346         fmov.s          &0x00000000,%fp1        # return EXOP in fp1
10347         rts
10348
10349 #################################################################
10350 # t_catch():                                                    #
10351 #       - the last operation of a transcendental emulation      #
10352 #         routine may have caused an underflow or overflow.     #
10353 #         we find out if this occurred by doing an fsave and    #
10354 #         checking the exception bit. if one did occur, then we #
10355 #         jump to fgen_except() which creates the default       #
10356 #         result and EXOP for us.                               #
10357 #################################################################
10358         global          t_catch
10359 t_catch:
10360
10361         fsave           -(%sp)
10362         tst.b           0x2(%sp)
10363         bmi.b           catch
10364         add.l           &0xc,%sp
10365
10366 #################################################################
10367 # INEX2 exception:                                              #
10368 #       - The inex2 and ainex bits are set.                     #
10369 #################################################################
10370         global          t_inx2
10371 t_inx2:
10372         fblt.w          t_minx2
10373         fbeq.w          inx2_zero
10374
10375         global          t_pinx2
10376 t_pinx2:
10377         ori.w           &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10378         rts
10379
10380         global          t_minx2
10381 t_minx2:
10382         ori.l           &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10383         rts
10384
10385 inx2_zero:
10386         mov.b           &z_bmask,FPSR_CC(%a6)
10387         ori.w           &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10388         rts
10389
10390 # an underflow or overflow exception occurred.
10391 # we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10392 catch:
10393         ori.w           &inx2a_mask,FPSR_EXCEPT(%a6)
10394 catch2:
10395         bsr.l           fgen_except
10396         add.l           &0xc,%sp
10397         rts
10398
10399         global          t_catch2
10400 t_catch2:
10401
10402         fsave           -(%sp)
10403
10404         tst.b           0x2(%sp)
10405         bmi.b           catch2
10406         add.l           &0xc,%sp
10407
10408         fmov.l          %fpsr,%d0
10409         or.l            %d0,USER_FPSR(%a6)
10410
10411         rts
10412
10413 #########################################################################
10414
10415 #########################################################################
10416 # unf_res(): underflow default result calculation for transcendentals   #
10417 #                                                                       #
10418 # INPUT:                                                                #
10419 #       d0   : rnd mode,precision                                       #
10420 #       d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+))   #
10421 # OUTPUT:                                                               #
10422 #       a0   : points to result (in instruction memory)                 #
10423 #########################################################################
10424 unf_sub:
10425         ori.l           &unfinx_mask,USER_FPSR(%a6)
10426
10427         andi.w          &0x10,%d1               # keep sign bit in 4th spot
10428
10429         lsr.b           &0x4,%d0                # shift rnd prec,mode to lo bits
10430         andi.b          &0xf,%d0                # strip hi rnd mode bit
10431         or.b            %d1,%d0                 # concat {sgn,mode,prec}
10432
10433         mov.l           %d0,%d1                 # make a copy
10434         lsl.b           &0x1,%d1                # mult index 2 by 2
10435
10436         mov.b           (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10437         lea             (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10438         rts
10439
10440 tbl_unf_cc:
10441         byte            0x4, 0x4, 0x4, 0x0
10442         byte            0x4, 0x4, 0x4, 0x0
10443         byte            0x4, 0x4, 0x4, 0x0
10444         byte            0x0, 0x0, 0x0, 0x0
10445         byte            0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10446         byte            0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10447         byte            0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10448
10449 tbl_unf_result:
10450         long            0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10451         long            0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10452         long            0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10453         long            0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10454
10455         long            0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10456         long            0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10457         long            0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10458         long            0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10459
10460         long            0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10461         long            0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10462         long            0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10463         long            0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10464
10465         long            0x0,0x0,0x0,0x0
10466         long            0x0,0x0,0x0,0x0
10467         long            0x0,0x0,0x0,0x0
10468         long            0x0,0x0,0x0,0x0
10469
10470         long            0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10471         long            0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10472         long            0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10473         long            0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10474
10475         long            0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10476         long            0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10477         long            0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10478         long            0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10479
10480         long            0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10481         long            0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10482         long            0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10483         long            0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10484
10485 ############################################################
10486
10487 #########################################################################
10488 # src_zero(): Return signed zero according to sign of src operand.      #
10489 #########################################################################
10490         global          src_zero
10491 src_zero:
10492         tst.b           SRC_EX(%a0)             # get sign of src operand
10493         bmi.b           ld_mzero                # if neg, load neg zero
10494
10495 #
10496 # ld_pzero(): return a positive zero.
10497 #
10498         global          ld_pzero
10499 ld_pzero:
10500         fmov.s          &0x00000000,%fp0        # load +0
10501         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
10502         rts
10503
10504 # ld_mzero(): return a negative zero.
10505         global          ld_mzero
10506 ld_mzero:
10507         fmov.s          &0x80000000,%fp0        # load -0
10508         mov.b           &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10509         rts
10510
10511 #########################################################################
10512 # dst_zero(): Return signed zero according to sign of dst operand.      #
10513 #########################################################################
10514         global          dst_zero
10515 dst_zero:
10516         tst.b           DST_EX(%a1)             # get sign of dst operand
10517         bmi.b           ld_mzero                # if neg, load neg zero
10518         bra.b           ld_pzero                # load positive zero
10519
10520 #########################################################################
10521 # src_inf(): Return signed inf according to sign of src operand.        #
10522 #########################################################################
10523         global          src_inf
10524 src_inf:
10525         tst.b           SRC_EX(%a0)             # get sign of src operand
10526         bmi.b           ld_minf                 # if negative branch
10527
10528 #
10529 # ld_pinf(): return a positive infinity.
10530 #
10531         global          ld_pinf
10532 ld_pinf:
10533         fmov.s          &0x7f800000,%fp0        # load +INF
10534         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10535         rts
10536
10537 #
10538 # ld_minf():return a negative infinity.
10539 #
10540         global          ld_minf
10541 ld_minf:
10542         fmov.s          &0xff800000,%fp0        # load -INF
10543         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10544         rts
10545
10546 #########################################################################
10547 # dst_inf(): Return signed inf according to sign of dst operand.        #
10548 #########################################################################
10549         global          dst_inf
10550 dst_inf:
10551         tst.b           DST_EX(%a1)             # get sign of dst operand
10552         bmi.b           ld_minf                 # if negative branch
10553         bra.b           ld_pinf
10554
10555         global          szr_inf
10556 #################################################################
10557 # szr_inf(): Return +ZERO for a negative src operand or         #
10558 #                   +INF for a positive src operand.            #
10559 #            Routine used for fetox, ftwotox, and ftentox.      #
10560 #################################################################
10561 szr_inf:
10562         tst.b           SRC_EX(%a0)             # check sign of source
10563         bmi.b           ld_pzero
10564         bra.b           ld_pinf
10565
10566 #########################################################################
10567 # sopr_inf(): Return +INF for a positive src operand or                 #
10568 #             jump to operand error routine for a negative src operand. #
10569 #             Routine used for flogn, flognp1, flog10, and flog2.       #
10570 #########################################################################
10571         global          sopr_inf
10572 sopr_inf:
10573         tst.b           SRC_EX(%a0)             # check sign of source
10574         bmi.w           t_operr
10575         bra.b           ld_pinf
10576
10577 #################################################################
10578 # setoxm1i(): Return minus one for a negative src operand or    #
10579 #             positive infinity for a positive src operand.     #
10580 #             Routine used for fetoxm1.                         #
10581 #################################################################
10582         global          setoxm1i
10583 setoxm1i:
10584         tst.b           SRC_EX(%a0)             # check sign of source
10585         bmi.b           ld_mone
10586         bra.b           ld_pinf
10587
10588 #########################################################################
10589 # src_one(): Return signed one according to sign of src operand.        #
10590 #########################################################################
10591         global          src_one
10592 src_one:
10593         tst.b           SRC_EX(%a0)             # check sign of source
10594         bmi.b           ld_mone
10595
10596 #
10597 # ld_pone(): return positive one.
10598 #
10599         global          ld_pone
10600 ld_pone:
10601         fmov.s          &0x3f800000,%fp0        # load +1
10602         clr.b           FPSR_CC(%a6)
10603         rts
10604
10605 #
10606 # ld_mone(): return negative one.
10607 #
10608         global          ld_mone
10609 ld_mone:
10610         fmov.s          &0xbf800000,%fp0        # load -1
10611         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10612         rts
10613
10614 ppiby2: long            0x3fff0000, 0xc90fdaa2, 0x2168c235
10615 mpiby2: long            0xbfff0000, 0xc90fdaa2, 0x2168c235
10616
10617 #################################################################
10618 # spi_2(): Return signed PI/2 according to sign of src operand. #
10619 #################################################################
10620         global          spi_2
10621 spi_2:
10622         tst.b           SRC_EX(%a0)             # check sign of source
10623         bmi.b           ld_mpi2
10624
10625 #
10626 # ld_ppi2(): return positive PI/2.
10627 #
10628         global          ld_ppi2
10629 ld_ppi2:
10630         fmov.l          %d0,%fpcr
10631         fmov.x          ppiby2(%pc),%fp0        # load +pi/2
10632         bra.w           t_pinx2                 # set INEX2
10633
10634 #
10635 # ld_mpi2(): return negative PI/2.
10636 #
10637         global          ld_mpi2
10638 ld_mpi2:
10639         fmov.l          %d0,%fpcr
10640         fmov.x          mpiby2(%pc),%fp0        # load -pi/2
10641         bra.w           t_minx2                 # set INEX2
10642
10643 ####################################################
10644 # The following routines give support for fsincos. #
10645 ####################################################
10646
10647 #
10648 # ssincosz(): When the src operand is ZERO, store a one in the
10649 #             cosine register and return a ZERO in fp0 w/ the same sign
10650 #             as the src operand.
10651 #
10652         global          ssincosz
10653 ssincosz:
10654         fmov.s          &0x3f800000,%fp1
10655         tst.b           SRC_EX(%a0)             # test sign
10656         bpl.b           sincoszp
10657         fmov.s          &0x80000000,%fp0        # return sin result in fp0
10658         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6)
10659         bra.b           sto_cos                 # store cosine result
10660 sincoszp:
10661         fmov.s          &0x00000000,%fp0        # return sin result in fp0
10662         mov.b           &z_bmask,FPSR_CC(%a6)
10663         bra.b           sto_cos                 # store cosine result
10664
10665 #
10666 # ssincosi(): When the src operand is INF, store a QNAN in the cosine
10667 #             register and jump to the operand error routine for negative
10668 #             src operands.
10669 #
10670         global          ssincosi
10671 ssincosi:
10672         fmov.x          qnan(%pc),%fp1          # load NAN
10673         bsr.l           sto_cos                 # store cosine result
10674         bra.w           t_operr
10675
10676 #
10677 # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10678 #                register and branch to the src QNAN routine.
10679 #
10680         global          ssincosqnan
10681 ssincosqnan:
10682         fmov.x          LOCAL_EX(%a0),%fp1
10683         bsr.l           sto_cos
10684         bra.w           src_qnan
10685
10686 #
10687 # ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10688 #                in the cosine register and branch to the src SNAN routine.
10689 #
10690         global          ssincossnan
10691 ssincossnan:
10692         fmov.x          LOCAL_EX(%a0),%fp1
10693         bsr.l           sto_cos
10694         bra.w           src_snan
10695
10696 ########################################################################
10697
10698 #########################################################################
10699 # sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #
10700 #            fp1 holds the result of the cosine portion of ssincos().   #
10701 #            the value in fp1 will not take any exceptions when moved.  #
10702 # INPUT:                                                                #
10703 #       fp1 : fp value to store                                         #
10704 # MODIFIED:                                                             #
10705 #       d0                                                              #
10706 #########################################################################
10707         global          sto_cos
10708 sto_cos:
10709         mov.b           1+EXC_CMDREG(%a6),%d0
10710         andi.w          &0x7,%d0
10711         mov.w           (tbl_sto_cos.b,%pc,%d0.w*2),%d0
10712         jmp             (tbl_sto_cos.b,%pc,%d0.w*1)
10713
10714 tbl_sto_cos:
10715         short           sto_cos_0 - tbl_sto_cos
10716         short           sto_cos_1 - tbl_sto_cos
10717         short           sto_cos_2 - tbl_sto_cos
10718         short           sto_cos_3 - tbl_sto_cos
10719         short           sto_cos_4 - tbl_sto_cos
10720         short           sto_cos_5 - tbl_sto_cos
10721         short           sto_cos_6 - tbl_sto_cos
10722         short           sto_cos_7 - tbl_sto_cos
10723
10724 sto_cos_0:
10725         fmovm.x         &0x40,EXC_FP0(%a6)
10726         rts
10727 sto_cos_1:
10728         fmovm.x         &0x40,EXC_FP1(%a6)
10729         rts
10730 sto_cos_2:
10731         fmov.x          %fp1,%fp2
10732         rts
10733 sto_cos_3:
10734         fmov.x          %fp1,%fp3
10735         rts
10736 sto_cos_4:
10737         fmov.x          %fp1,%fp4
10738         rts
10739 sto_cos_5:
10740         fmov.x          %fp1,%fp5
10741         rts
10742 sto_cos_6:
10743         fmov.x          %fp1,%fp6
10744         rts
10745 sto_cos_7:
10746         fmov.x          %fp1,%fp7
10747         rts
10748
10749 ##################################################################
10750         global          smod_sdnrm
10751         global          smod_snorm
10752 smod_sdnrm:
10753 smod_snorm:
10754         mov.b           DTAG(%a6),%d1
10755         beq.l           smod
10756         cmpi.b          %d1,&ZERO
10757         beq.w           smod_zro
10758         cmpi.b          %d1,&INF
10759         beq.l           t_operr
10760         cmpi.b          %d1,&DENORM
10761         beq.l           smod
10762         cmpi.b          %d1,&SNAN
10763         beq.l           dst_snan
10764         bra.l           dst_qnan
10765
10766         global          smod_szero
10767 smod_szero:
10768         mov.b           DTAG(%a6),%d1
10769         beq.l           t_operr
10770         cmpi.b          %d1,&ZERO
10771         beq.l           t_operr
10772         cmpi.b          %d1,&INF
10773         beq.l           t_operr
10774         cmpi.b          %d1,&DENORM
10775         beq.l           t_operr
10776         cmpi.b          %d1,&QNAN
10777         beq.l           dst_qnan
10778         bra.l           dst_snan
10779
10780         global          smod_sinf
10781 smod_sinf:
10782         mov.b           DTAG(%a6),%d1
10783         beq.l           smod_fpn
10784         cmpi.b          %d1,&ZERO
10785         beq.l           smod_zro
10786         cmpi.b          %d1,&INF
10787         beq.l           t_operr
10788         cmpi.b          %d1,&DENORM
10789         beq.l           smod_fpn
10790         cmpi.b          %d1,&QNAN
10791         beq.l           dst_qnan
10792         bra.l           dst_snan
10793
10794 smod_zro:
10795 srem_zro:
10796         mov.b           SRC_EX(%a0),%d1         # get src sign
10797         mov.b           DST_EX(%a1),%d0         # get dst sign
10798         eor.b           %d0,%d1                 # get qbyte sign
10799         andi.b          &0x80,%d1
10800         mov.b           %d1,FPSR_QBYTE(%a6)
10801         tst.b           %d0
10802         bpl.w           ld_pzero
10803         bra.w           ld_mzero
10804
10805 smod_fpn:
10806 srem_fpn:
10807         clr.b           FPSR_QBYTE(%a6)
10808         mov.l           %d0,-(%sp)
10809         mov.b           SRC_EX(%a0),%d1         # get src sign
10810         mov.b           DST_EX(%a1),%d0         # get dst sign
10811         eor.b           %d0,%d1                 # get qbyte sign
10812         andi.b          &0x80,%d1
10813         mov.b           %d1,FPSR_QBYTE(%a6)
10814         cmpi.b          DTAG(%a6),&DENORM
10815         bne.b           smod_nrm
10816         lea             DST(%a1),%a0
10817         mov.l           (%sp)+,%d0
10818         bra             t_resdnrm
10819 smod_nrm:
10820         fmov.l          (%sp)+,%fpcr
10821         fmov.x          DST(%a1),%fp0
10822         tst.b           DST_EX(%a1)
10823         bmi.b           smod_nrm_neg
10824         rts
10825
10826 smod_nrm_neg:
10827         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode
10828         rts
10829
10830 #########################################################################
10831         global          srem_snorm
10832         global          srem_sdnrm
10833 srem_sdnrm:
10834 srem_snorm:
10835         mov.b           DTAG(%a6),%d1
10836         beq.l           srem
10837         cmpi.b          %d1,&ZERO
10838         beq.w           srem_zro
10839         cmpi.b          %d1,&INF
10840         beq.l           t_operr
10841         cmpi.b          %d1,&DENORM
10842         beq.l           srem
10843         cmpi.b          %d1,&QNAN
10844         beq.l           dst_qnan
10845         bra.l           dst_snan
10846
10847         global          srem_szero
10848 srem_szero:
10849         mov.b           DTAG(%a6),%d1
10850         beq.l           t_operr
10851         cmpi.b          %d1,&ZERO
10852         beq.l           t_operr
10853         cmpi.b          %d1,&INF
10854         beq.l           t_operr
10855         cmpi.b          %d1,&DENORM
10856         beq.l           t_operr
10857         cmpi.b          %d1,&QNAN
10858         beq.l           dst_qnan
10859         bra.l           dst_snan
10860
10861         global          srem_sinf
10862 srem_sinf:
10863         mov.b           DTAG(%a6),%d1
10864         beq.w           srem_fpn
10865         cmpi.b          %d1,&ZERO
10866         beq.w           srem_zro
10867         cmpi.b          %d1,&INF
10868         beq.l           t_operr
10869         cmpi.b          %d1,&DENORM
10870         beq.l           srem_fpn
10871         cmpi.b          %d1,&QNAN
10872         beq.l           dst_qnan
10873         bra.l           dst_snan
10874
10875 #########################################################################
10876         global          sscale_snorm
10877         global          sscale_sdnrm
10878 sscale_snorm:
10879 sscale_sdnrm:
10880         mov.b           DTAG(%a6),%d1
10881         beq.l           sscale
10882         cmpi.b          %d1,&ZERO
10883         beq.l           dst_zero
10884         cmpi.b          %d1,&INF
10885         beq.l           dst_inf
10886         cmpi.b          %d1,&DENORM
10887         beq.l           sscale
10888         cmpi.b          %d1,&QNAN
10889         beq.l           dst_qnan
10890         bra.l           dst_snan
10891
10892         global          sscale_szero
10893 sscale_szero:
10894         mov.b           DTAG(%a6),%d1
10895         beq.l           sscale
10896         cmpi.b          %d1,&ZERO
10897         beq.l           dst_zero
10898         cmpi.b          %d1,&INF
10899         beq.l           dst_inf
10900         cmpi.b          %d1,&DENORM
10901         beq.l           sscale
10902         cmpi.b          %d1,&QNAN
10903         beq.l           dst_qnan
10904         bra.l           dst_snan
10905
10906         global          sscale_sinf
10907 sscale_sinf:
10908         mov.b           DTAG(%a6),%d1
10909         beq.l           t_operr
10910         cmpi.b          %d1,&QNAN
10911         beq.l           dst_qnan
10912         cmpi.b          %d1,&SNAN
10913         beq.l           dst_snan
10914         bra.l           t_operr
10915
10916 ########################################################################
10917
10918 #
10919 # sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10920 #
10921         global          sop_sqnan
10922 sop_sqnan:
10923         mov.b           DTAG(%a6),%d1
10924         cmpi.b          %d1,&QNAN
10925         beq.b           dst_qnan
10926         cmpi.b          %d1,&SNAN
10927         beq.b           dst_snan
10928         bra.b           src_qnan
10929
10930 #
10931 # sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10932 #
10933         global          sop_ssnan
10934 sop_ssnan:
10935         mov.b           DTAG(%a6),%d1
10936         cmpi.b          %d1,&QNAN
10937         beq.b           dst_qnan_src_snan
10938         cmpi.b          %d1,&SNAN
10939         beq.b           dst_snan
10940         bra.b           src_snan
10941
10942 dst_qnan_src_snan:
10943         ori.l           &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10944         bra.b           dst_qnan
10945
10946 #
10947 # dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10948 #
10949         global          dst_snan
10950 dst_snan:
10951         fmov.x          DST(%a1),%fp0           # the fmove sets the SNAN bit
10952         fmov.l          %fpsr,%d0               # catch resulting status
10953         or.l            %d0,USER_FPSR(%a6)      # store status
10954         rts
10955
10956 #
10957 # dst_qnan(): Return the dst QNAN.
10958 #
10959         global          dst_qnan
10960 dst_qnan:
10961         fmov.x          DST(%a1),%fp0           # return the non-signalling nan
10962         tst.b           DST_EX(%a1)             # set ccodes according to QNAN sign
10963         bmi.b           dst_qnan_m
10964 dst_qnan_p:
10965         mov.b           &nan_bmask,FPSR_CC(%a6)
10966         rts
10967 dst_qnan_m:
10968         mov.b           &neg_bmask+nan_bmask,FPSR_CC(%a6)
10969         rts
10970
10971 #
10972 # src_snan(): Return the src SNAN w/ the SNAN bit set.
10973 #
10974         global          src_snan
10975 src_snan:
10976         fmov.x          SRC(%a0),%fp0           # the fmove sets the SNAN bit
10977         fmov.l          %fpsr,%d0               # catch resulting status
10978         or.l            %d0,USER_FPSR(%a6)      # store status
10979         rts
10980
10981 #
10982 # src_qnan(): Return the src QNAN.
10983 #
10984         global          src_qnan
10985 src_qnan:
10986         fmov.x          SRC(%a0),%fp0           # return the non-signalling nan
10987         tst.b           SRC_EX(%a0)             # set ccodes according to QNAN sign
10988         bmi.b           dst_qnan_m
10989 src_qnan_p:
10990         mov.b           &nan_bmask,FPSR_CC(%a6)
10991         rts
10992 src_qnan_m:
10993         mov.b           &neg_bmask+nan_bmask,FPSR_CC(%a6)
10994         rts
10995
10996 #
10997 # fkern2.s:
10998 #       These entry points are used by the exception handler
10999 # routines where an instruction is selected by an index into
11000 # a large jump table corresponding to a given instruction which
11001 # has been decoded. Flow continues here where we now decode
11002 # further accoding to the source operand type.
11003 #
11004
11005         global          fsinh
11006 fsinh:
11007         mov.b           STAG(%a6),%d1
11008         beq.l           ssinh
11009         cmpi.b          %d1,&ZERO
11010         beq.l           src_zero
11011         cmpi.b          %d1,&INF
11012         beq.l           src_inf
11013         cmpi.b          %d1,&DENORM
11014         beq.l           ssinhd
11015         cmpi.b          %d1,&QNAN
11016         beq.l           src_qnan
11017         bra.l           src_snan
11018
11019         global          flognp1
11020 flognp1:
11021         mov.b           STAG(%a6),%d1
11022         beq.l           slognp1
11023         cmpi.b          %d1,&ZERO
11024         beq.l           src_zero
11025         cmpi.b          %d1,&INF
11026         beq.l           sopr_inf
11027         cmpi.b          %d1,&DENORM
11028         beq.l           slognp1d
11029         cmpi.b          %d1,&QNAN
11030         beq.l           src_qnan
11031         bra.l           src_snan
11032
11033         global          fetoxm1
11034 fetoxm1:
11035         mov.b           STAG(%a6),%d1
11036         beq.l           setoxm1
11037         cmpi.b          %d1,&ZERO
11038         beq.l           src_zero
11039         cmpi.b          %d1,&INF
11040         beq.l           setoxm1i
11041         cmpi.b          %d1,&DENORM
11042         beq.l           setoxm1d
11043         cmpi.b          %d1,&QNAN
11044         beq.l           src_qnan
11045         bra.l           src_snan
11046
11047         global          ftanh
11048 ftanh:
11049         mov.b           STAG(%a6),%d1
11050         beq.l           stanh
11051         cmpi.b          %d1,&ZERO
11052         beq.l           src_zero
11053         cmpi.b          %d1,&INF
11054         beq.l           src_one
11055         cmpi.b          %d1,&DENORM
11056         beq.l           stanhd
11057         cmpi.b          %d1,&QNAN
11058         beq.l           src_qnan
11059         bra.l           src_snan
11060
11061         global          fatan
11062 fatan:
11063         mov.b           STAG(%a6),%d1
11064         beq.l           satan
11065         cmpi.b          %d1,&ZERO
11066         beq.l           src_zero
11067         cmpi.b          %d1,&INF
11068         beq.l           spi_2
11069         cmpi.b          %d1,&DENORM
11070         beq.l           satand
11071         cmpi.b          %d1,&QNAN
11072         beq.l           src_qnan
11073         bra.l           src_snan
11074
11075         global          fasin
11076 fasin:
11077         mov.b           STAG(%a6),%d1
11078         beq.l           sasin
11079         cmpi.b          %d1,&ZERO
11080         beq.l           src_zero
11081         cmpi.b          %d1,&INF
11082         beq.l           t_operr
11083         cmpi.b          %d1,&DENORM
11084         beq.l           sasind
11085         cmpi.b          %d1,&QNAN
11086         beq.l           src_qnan
11087         bra.l           src_snan
11088
11089         global          fatanh
11090 fatanh:
11091         mov.b           STAG(%a6),%d1
11092         beq.l           satanh
11093         cmpi.b          %d1,&ZERO
11094         beq.l           src_zero
11095         cmpi.b          %d1,&INF
11096         beq.l           t_operr
11097         cmpi.b          %d1,&DENORM
11098         beq.l           satanhd
11099         cmpi.b          %d1,&QNAN
11100         beq.l           src_qnan
11101         bra.l           src_snan
11102
11103         global          fsine
11104 fsine:
11105         mov.b           STAG(%a6),%d1
11106         beq.l           ssin
11107         cmpi.b          %d1,&ZERO
11108         beq.l           src_zero
11109         cmpi.b          %d1,&INF
11110         beq.l           t_operr
11111         cmpi.b          %d1,&DENORM
11112         beq.l           ssind
11113         cmpi.b          %d1,&QNAN
11114         beq.l           src_qnan
11115         bra.l           src_snan
11116
11117         global          ftan
11118 ftan:
11119         mov.b           STAG(%a6),%d1
11120         beq.l           stan
11121         cmpi.b          %d1,&ZERO
11122         beq.l           src_zero
11123         cmpi.b          %d1,&INF
11124         beq.l           t_operr
11125         cmpi.b          %d1,&DENORM
11126         beq.l           stand
11127         cmpi.b          %d1,&QNAN
11128         beq.l           src_qnan
11129         bra.l           src_snan
11130
11131         global          fetox
11132 fetox:
11133         mov.b           STAG(%a6),%d1
11134         beq.l           setox
11135         cmpi.b          %d1,&ZERO
11136         beq.l           ld_pone
11137         cmpi.b          %d1,&INF
11138         beq.l           szr_inf
11139         cmpi.b          %d1,&DENORM
11140         beq.l           setoxd
11141         cmpi.b          %d1,&QNAN
11142         beq.l           src_qnan
11143         bra.l           src_snan
11144
11145         global          ftwotox
11146 ftwotox:
11147         mov.b           STAG(%a6),%d1
11148         beq.l           stwotox
11149         cmpi.b          %d1,&ZERO
11150         beq.l           ld_pone
11151         cmpi.b          %d1,&INF
11152         beq.l           szr_inf
11153         cmpi.b          %d1,&DENORM
11154         beq.l           stwotoxd
11155         cmpi.b          %d1,&QNAN
11156         beq.l           src_qnan
11157         bra.l           src_snan
11158
11159         global          ftentox
11160 ftentox:
11161         mov.b           STAG(%a6),%d1
11162         beq.l           stentox
11163         cmpi.b          %d1,&ZERO
11164         beq.l           ld_pone
11165         cmpi.b          %d1,&INF
11166         beq.l           szr_inf
11167         cmpi.b          %d1,&DENORM
11168         beq.l           stentoxd
11169         cmpi.b          %d1,&QNAN
11170         beq.l           src_qnan
11171         bra.l           src_snan
11172
11173         global          flogn
11174 flogn:
11175         mov.b           STAG(%a6),%d1
11176         beq.l           slogn
11177         cmpi.b          %d1,&ZERO
11178         beq.l           t_dz2
11179         cmpi.b          %d1,&INF
11180         beq.l           sopr_inf
11181         cmpi.b          %d1,&DENORM
11182         beq.l           slognd
11183         cmpi.b          %d1,&QNAN
11184         beq.l           src_qnan
11185         bra.l           src_snan
11186
11187         global          flog10
11188 flog10:
11189         mov.b           STAG(%a6),%d1
11190         beq.l           slog10
11191         cmpi.b          %d1,&ZERO
11192         beq.l           t_dz2
11193         cmpi.b          %d1,&INF
11194         beq.l           sopr_inf
11195         cmpi.b          %d1,&DENORM
11196         beq.l           slog10d
11197         cmpi.b          %d1,&QNAN
11198         beq.l           src_qnan
11199         bra.l           src_snan
11200
11201         global          flog2
11202 flog2:
11203         mov.b           STAG(%a6),%d1
11204         beq.l           slog2
11205         cmpi.b          %d1,&ZERO
11206         beq.l           t_dz2
11207         cmpi.b          %d1,&INF
11208         beq.l           sopr_inf
11209         cmpi.b          %d1,&DENORM
11210         beq.l           slog2d
11211         cmpi.b          %d1,&QNAN
11212         beq.l           src_qnan
11213         bra.l           src_snan
11214
11215         global          fcosh
11216 fcosh:
11217         mov.b           STAG(%a6),%d1
11218         beq.l           scosh
11219         cmpi.b          %d1,&ZERO
11220         beq.l           ld_pone
11221         cmpi.b          %d1,&INF
11222         beq.l           ld_pinf
11223         cmpi.b          %d1,&DENORM
11224         beq.l           scoshd
11225         cmpi.b          %d1,&QNAN
11226         beq.l           src_qnan
11227         bra.l           src_snan
11228
11229         global          facos
11230 facos:
11231         mov.b           STAG(%a6),%d1
11232         beq.l           sacos
11233         cmpi.b          %d1,&ZERO
11234         beq.l           ld_ppi2
11235         cmpi.b          %d1,&INF
11236         beq.l           t_operr
11237         cmpi.b          %d1,&DENORM
11238         beq.l           sacosd
11239         cmpi.b          %d1,&QNAN
11240         beq.l           src_qnan
11241         bra.l           src_snan
11242
11243         global          fcos
11244 fcos:
11245         mov.b           STAG(%a6),%d1
11246         beq.l           scos
11247         cmpi.b          %d1,&ZERO
11248         beq.l           ld_pone
11249         cmpi.b          %d1,&INF
11250         beq.l           t_operr
11251         cmpi.b          %d1,&DENORM
11252         beq.l           scosd
11253         cmpi.b          %d1,&QNAN
11254         beq.l           src_qnan
11255         bra.l           src_snan
11256
11257         global          fgetexp
11258 fgetexp:
11259         mov.b           STAG(%a6),%d1
11260         beq.l           sgetexp
11261         cmpi.b          %d1,&ZERO
11262         beq.l           src_zero
11263         cmpi.b          %d1,&INF
11264         beq.l           t_operr
11265         cmpi.b          %d1,&DENORM
11266         beq.l           sgetexpd
11267         cmpi.b          %d1,&QNAN
11268         beq.l           src_qnan
11269         bra.l           src_snan
11270
11271         global          fgetman
11272 fgetman:
11273         mov.b           STAG(%a6),%d1
11274         beq.l           sgetman
11275         cmpi.b          %d1,&ZERO
11276         beq.l           src_zero
11277         cmpi.b          %d1,&INF
11278         beq.l           t_operr
11279         cmpi.b          %d1,&DENORM
11280         beq.l           sgetmand
11281         cmpi.b          %d1,&QNAN
11282         beq.l           src_qnan
11283         bra.l           src_snan
11284
11285         global          fsincos
11286 fsincos:
11287         mov.b           STAG(%a6),%d1
11288         beq.l           ssincos
11289         cmpi.b          %d1,&ZERO
11290         beq.l           ssincosz
11291         cmpi.b          %d1,&INF
11292         beq.l           ssincosi
11293         cmpi.b          %d1,&DENORM
11294         beq.l           ssincosd
11295         cmpi.b          %d1,&QNAN
11296         beq.l           ssincosqnan
11297         bra.l           ssincossnan
11298
11299         global          fmod
11300 fmod:
11301         mov.b           STAG(%a6),%d1
11302         beq.l           smod_snorm
11303         cmpi.b          %d1,&ZERO
11304         beq.l           smod_szero
11305         cmpi.b          %d1,&INF
11306         beq.l           smod_sinf
11307         cmpi.b          %d1,&DENORM
11308         beq.l           smod_sdnrm
11309         cmpi.b          %d1,&QNAN
11310         beq.l           sop_sqnan
11311         bra.l           sop_ssnan
11312
11313         global          frem
11314 frem:
11315         mov.b           STAG(%a6),%d1
11316         beq.l           srem_snorm
11317         cmpi.b          %d1,&ZERO
11318         beq.l           srem_szero
11319         cmpi.b          %d1,&INF
11320         beq.l           srem_sinf
11321         cmpi.b          %d1,&DENORM
11322         beq.l           srem_sdnrm
11323         cmpi.b          %d1,&QNAN
11324         beq.l           sop_sqnan
11325         bra.l           sop_ssnan
11326
11327         global          fscale
11328 fscale:
11329         mov.b           STAG(%a6),%d1
11330         beq.l           sscale_snorm
11331         cmpi.b          %d1,&ZERO
11332         beq.l           sscale_szero
11333         cmpi.b          %d1,&INF
11334         beq.l           sscale_sinf
11335         cmpi.b          %d1,&DENORM
11336         beq.l           sscale_sdnrm
11337         cmpi.b          %d1,&QNAN
11338         beq.l           sop_sqnan
11339         bra.l           sop_ssnan
11340
11341 #########################################################################
11342 # XDEF **************************************************************** #
11343 #       fgen_except(): catch an exception during transcendental         #
11344 #                      emulation                                        #
11345 #                                                                       #
11346 # XREF **************************************************************** #
11347 #       fmul() - emulate a multiply instruction                         #
11348 #       fadd() - emulate an add instruction                             #
11349 #       fin() - emulate an fmove instruction                            #
11350 #                                                                       #
11351 # INPUT *************************************************************** #
11352 #       fp0 = destination operand                                       #
11353 #       d0  = type of instruction that took exception                   #
11354 #       fsave frame = source operand                                    #
11355 #                                                                       #
11356 # OUTPUT ************************************************************** #
11357 #       fp0 = result                                                    #
11358 #       fp1 = EXOP                                                      #
11359 #                                                                       #
11360 # ALGORITHM *********************************************************** #
11361 #       An exception occurred on the last instruction of the            #
11362 # transcendental emulation. hopefully, this won't be happening much     #
11363 # because it will be VERY slow.                                         #
11364 #       The only exceptions capable of passing through here are         #
11365 # Overflow, Underflow, and Unsupported Data Type.                       #
11366 #                                                                       #
11367 #########################################################################
11368
11369         global          fgen_except
11370 fgen_except:
11371         cmpi.b          0x3(%sp),&0x7           # is exception UNSUPP?
11372         beq.b           fge_unsupp              # yes
11373
11374         mov.b           &NORM,STAG(%a6)
11375
11376 fge_cont:
11377         mov.b           &NORM,DTAG(%a6)
11378
11379 # ok, I have a problem with putting the dst op at FP_DST. the emulation
11380 # routines aren't supposed to alter the operands but we've just squashed
11381 # FP_DST here...
11382
11383 # 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11384 # then a potential bug. to begin with, only the dyadic functions
11385 # frem,fmod, and fscale would get the dst trashed here. But, for
11386 # the 060SP, the FP_DST is never used again anyways.
11387         fmovm.x         &0x80,FP_DST(%a6)       # dst op is in fp0
11388
11389         lea             0x4(%sp),%a0            # pass: ptr to src op
11390         lea             FP_DST(%a6),%a1         # pass: ptr to dst op
11391
11392         cmpi.b          %d1,&FMOV_OP
11393         beq.b           fge_fin                 # it was an "fmov"
11394         cmpi.b          %d1,&FADD_OP
11395         beq.b           fge_fadd                # it was an "fadd"
11396 fge_fmul:
11397         bsr.l           fmul
11398         rts
11399 fge_fadd:
11400         bsr.l           fadd
11401         rts
11402 fge_fin:
11403         bsr.l           fin
11404         rts
11405
11406 fge_unsupp:
11407         mov.b           &DENORM,STAG(%a6)
11408         bra.b           fge_cont
11409
11410 #
11411 # This table holds the offsets of the emulation routines for each individual
11412 # math operation relative to the address of this table. Included are
11413 # routines like fadd/fmul/fabs as well as the transcendentals.
11414 # The location within the table is determined by the extension bits of the
11415 # operation longword.
11416 #
11417
11418         swbeg           &109
11419 tbl_unsupp:
11420         long            fin             - tbl_unsupp    # 00: fmove
11421         long            fint            - tbl_unsupp    # 01: fint
11422         long            fsinh           - tbl_unsupp    # 02: fsinh
11423         long            fintrz          - tbl_unsupp    # 03: fintrz
11424         long            fsqrt           - tbl_unsupp    # 04: fsqrt
11425         long            tbl_unsupp      - tbl_unsupp
11426         long            flognp1         - tbl_unsupp    # 06: flognp1
11427         long            tbl_unsupp      - tbl_unsupp
11428         long            fetoxm1         - tbl_unsupp    # 08: fetoxm1
11429         long            ftanh           - tbl_unsupp    # 09: ftanh
11430         long            fatan           - tbl_unsupp    # 0a: fatan
11431         long            tbl_unsupp      - tbl_unsupp
11432         long            fasin           - tbl_unsupp    # 0c: fasin
11433         long            fatanh          - tbl_unsupp    # 0d: fatanh
11434         long            fsine           - tbl_unsupp    # 0e: fsin
11435         long            ftan            - tbl_unsupp    # 0f: ftan
11436         long            fetox           - tbl_unsupp    # 10: fetox
11437         long            ftwotox         - tbl_unsupp    # 11: ftwotox
11438         long            ftentox         - tbl_unsupp    # 12: ftentox
11439         long            tbl_unsupp      - tbl_unsupp
11440         long            flogn           - tbl_unsupp    # 14: flogn
11441         long            flog10          - tbl_unsupp    # 15: flog10
11442         long            flog2           - tbl_unsupp    # 16: flog2
11443         long            tbl_unsupp      - tbl_unsupp
11444         long            fabs            - tbl_unsupp    # 18: fabs
11445         long            fcosh           - tbl_unsupp    # 19: fcosh
11446         long            fneg            - tbl_unsupp    # 1a: fneg
11447         long            tbl_unsupp      - tbl_unsupp
11448         long            facos           - tbl_unsupp    # 1c: facos
11449         long            fcos            - tbl_unsupp    # 1d: fcos
11450         long            fgetexp         - tbl_unsupp    # 1e: fgetexp
11451         long            fgetman         - tbl_unsupp    # 1f: fgetman
11452         long            fdiv            - tbl_unsupp    # 20: fdiv
11453         long            fmod            - tbl_unsupp    # 21: fmod
11454         long            fadd            - tbl_unsupp    # 22: fadd
11455         long            fmul            - tbl_unsupp    # 23: fmul
11456         long            fsgldiv         - tbl_unsupp    # 24: fsgldiv
11457         long            frem            - tbl_unsupp    # 25: frem
11458         long            fscale          - tbl_unsupp    # 26: fscale
11459         long            fsglmul         - tbl_unsupp    # 27: fsglmul
11460         long            fsub            - tbl_unsupp    # 28: fsub
11461         long            tbl_unsupp      - tbl_unsupp
11462         long            tbl_unsupp      - tbl_unsupp
11463         long            tbl_unsupp      - tbl_unsupp
11464         long            tbl_unsupp      - tbl_unsupp
11465         long            tbl_unsupp      - tbl_unsupp
11466         long            tbl_unsupp      - tbl_unsupp
11467         long            tbl_unsupp      - tbl_unsupp
11468         long            fsincos         - tbl_unsupp    # 30: fsincos
11469         long            fsincos         - tbl_unsupp    # 31: fsincos
11470         long            fsincos         - tbl_unsupp    # 32: fsincos
11471         long            fsincos         - tbl_unsupp    # 33: fsincos
11472         long            fsincos         - tbl_unsupp    # 34: fsincos
11473         long            fsincos         - tbl_unsupp    # 35: fsincos
11474         long            fsincos         - tbl_unsupp    # 36: fsincos
11475         long            fsincos         - tbl_unsupp    # 37: fsincos
11476         long            fcmp            - tbl_unsupp    # 38: fcmp
11477         long            tbl_unsupp      - tbl_unsupp
11478         long            ftst            - tbl_unsupp    # 3a: ftst
11479         long            tbl_unsupp      - tbl_unsupp
11480         long            tbl_unsupp      - tbl_unsupp
11481         long            tbl_unsupp      - tbl_unsupp
11482         long            tbl_unsupp      - tbl_unsupp
11483         long            tbl_unsupp      - tbl_unsupp
11484         long            fsin            - tbl_unsupp    # 40: fsmove
11485         long            fssqrt          - tbl_unsupp    # 41: fssqrt
11486         long            tbl_unsupp      - tbl_unsupp
11487         long            tbl_unsupp      - tbl_unsupp
11488         long            fdin            - tbl_unsupp    # 44: fdmove
11489         long            fdsqrt          - tbl_unsupp    # 45: fdsqrt
11490         long            tbl_unsupp      - tbl_unsupp
11491         long            tbl_unsupp      - tbl_unsupp
11492         long            tbl_unsupp      - tbl_unsupp
11493         long            tbl_unsupp      - tbl_unsupp
11494         long            tbl_unsupp      - tbl_unsupp
11495         long            tbl_unsupp      - tbl_unsupp
11496         long            tbl_unsupp      - tbl_unsupp
11497         long            tbl_unsupp      - tbl_unsupp
11498         long            tbl_unsupp      - tbl_unsupp
11499         long            tbl_unsupp      - tbl_unsupp
11500         long            tbl_unsupp      - tbl_unsupp
11501         long            tbl_unsupp      - tbl_unsupp
11502         long            tbl_unsupp      - tbl_unsupp
11503         long            tbl_unsupp      - tbl_unsupp
11504         long            tbl_unsupp      - tbl_unsupp
11505         long            tbl_unsupp      - tbl_unsupp
11506         long            tbl_unsupp      - tbl_unsupp
11507         long            tbl_unsupp      - tbl_unsupp
11508         long            fsabs           - tbl_unsupp    # 58: fsabs
11509         long            tbl_unsupp      - tbl_unsupp
11510         long            fsneg           - tbl_unsupp    # 5a: fsneg
11511         long            tbl_unsupp      - tbl_unsupp
11512         long            fdabs           - tbl_unsupp    # 5c: fdabs
11513         long            tbl_unsupp      - tbl_unsupp
11514         long            fdneg           - tbl_unsupp    # 5e: fdneg
11515         long            tbl_unsupp      - tbl_unsupp
11516         long            fsdiv           - tbl_unsupp    # 60: fsdiv
11517         long            tbl_unsupp      - tbl_unsupp
11518         long            fsadd           - tbl_unsupp    # 62: fsadd
11519         long            fsmul           - tbl_unsupp    # 63: fsmul
11520         long            fddiv           - tbl_unsupp    # 64: fddiv
11521         long            tbl_unsupp      - tbl_unsupp
11522         long            fdadd           - tbl_unsupp    # 66: fdadd
11523         long            fdmul           - tbl_unsupp    # 67: fdmul
11524         long            fssub           - tbl_unsupp    # 68: fssub
11525         long            tbl_unsupp      - tbl_unsupp
11526         long            tbl_unsupp      - tbl_unsupp
11527         long            tbl_unsupp      - tbl_unsupp
11528         long            fdsub           - tbl_unsupp    # 6c: fdsub
11529
11530 #########################################################################
11531 # XDEF **************************************************************** #
11532 #       fmul(): emulates the fmul instruction                           #
11533 #       fsmul(): emulates the fsmul instruction                         #
11534 #       fdmul(): emulates the fdmul instruction                         #
11535 #                                                                       #
11536 # XREF **************************************************************** #
11537 #       scale_to_zero_src() - scale src exponent to zero                #
11538 #       scale_to_zero_dst() - scale dst exponent to zero                #
11539 #       unf_res() - return default underflow result                     #
11540 #       ovf_res() - return default overflow result                      #
11541 #       res_qnan() - return QNAN result                                 #
11542 #       res_snan() - return SNAN result                                 #
11543 #                                                                       #
11544 # INPUT *************************************************************** #
11545 #       a0 = pointer to extended precision source operand               #
11546 #       a1 = pointer to extended precision destination operand          #
11547 #       d0  rnd prec,mode                                               #
11548 #                                                                       #
11549 # OUTPUT ************************************************************** #
11550 #       fp0 = result                                                    #
11551 #       fp1 = EXOP (if exception occurred)                              #
11552 #                                                                       #
11553 # ALGORITHM *********************************************************** #
11554 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
11555 # norms/denorms into ext/sgl/dbl precision.                             #
11556 #       For norms/denorms, scale the exponents such that a multiply     #
11557 # instruction won't cause an exception. Use the regular fmul to         #
11558 # compute a result. Check if the regular operands would have taken      #
11559 # an exception. If so, return the default overflow/underflow result     #
11560 # and return the EXOP if exceptions are enabled. Else, scale the        #
11561 # result operand to the proper exponent.                                #
11562 #                                                                       #
11563 #########################################################################
11564
11565         align           0x10
11566 tbl_fmul_ovfl:
11567         long            0x3fff - 0x7ffe         # ext_max
11568         long            0x3fff - 0x407e         # sgl_max
11569         long            0x3fff - 0x43fe         # dbl_max
11570 tbl_fmul_unfl:
11571         long            0x3fff + 0x0001         # ext_unfl
11572         long            0x3fff - 0x3f80         # sgl_unfl
11573         long            0x3fff - 0x3c00         # dbl_unfl
11574
11575         global          fsmul
11576 fsmul:
11577         andi.b          &0x30,%d0               # clear rnd prec
11578         ori.b           &s_mode*0x10,%d0        # insert sgl prec
11579         bra.b           fmul
11580
11581         global          fdmul
11582 fdmul:
11583         andi.b          &0x30,%d0
11584         ori.b           &d_mode*0x10,%d0        # insert dbl prec
11585
11586         global          fmul
11587 fmul:
11588         mov.l           %d0,L_SCR3(%a6)         # store rnd info
11589
11590         clr.w           %d1
11591         mov.b           DTAG(%a6),%d1
11592         lsl.b           &0x3,%d1
11593         or.b            STAG(%a6),%d1           # combine src tags
11594         bne.w           fmul_not_norm           # optimize on non-norm input
11595
11596 fmul_norm:
11597         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
11598         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
11599         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
11600
11601         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
11602         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
11603         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
11604
11605         bsr.l           scale_to_zero_src       # scale src exponent
11606         mov.l           %d0,-(%sp)              # save scale factor 1
11607
11608         bsr.l           scale_to_zero_dst       # scale dst exponent
11609
11610         add.l           %d0,(%sp)               # SCALE_FACTOR = scale1 + scale2
11611
11612         mov.w           2+L_SCR3(%a6),%d1       # fetch precision
11613         lsr.b           &0x6,%d1                # shift to lo bits
11614         mov.l           (%sp)+,%d0              # load S.F.
11615         cmp.l           %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11616         beq.w           fmul_may_ovfl           # result may rnd to overflow
11617         blt.w           fmul_ovfl               # result will overflow
11618
11619         cmp.l           %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11620         beq.w           fmul_may_unfl           # result may rnd to no unfl
11621         bgt.w           fmul_unfl               # result will underflow
11622
11623 #
11624 # NORMAL:
11625 # - the result of the multiply operation will neither overflow nor underflow.
11626 # - do the multiply to the proper precision and rounding mode.
11627 # - scale the result exponent using the scale factor. if both operands were
11628 # normalized then we really don't need to go through this scaling. but for now,
11629 # this will do.
11630 #
11631 fmul_normal:
11632         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11633
11634         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11635         fmov.l          &0x0,%fpsr              # clear FPSR
11636
11637         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11638
11639         fmov.l          %fpsr,%d1               # save status
11640         fmov.l          &0x0,%fpcr              # clear FPCR
11641
11642         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11643
11644 fmul_normal_exit:
11645         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11646         mov.l           %d2,-(%sp)              # save d2
11647         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
11648         mov.l           %d1,%d2                 # make a copy
11649         andi.l          &0x7fff,%d1             # strip sign
11650         andi.w          &0x8000,%d2             # keep old sign
11651         sub.l           %d0,%d1                 # add scale factor
11652         or.w            %d2,%d1                 # concat old sign,new exp
11653         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11654         mov.l           (%sp)+,%d2              # restore d2
11655         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11656         rts
11657
11658 #
11659 # OVERFLOW:
11660 # - the result of the multiply operation is an overflow.
11661 # - do the multiply to the proper precision and rounding mode in order to
11662 # set the inexact bits.
11663 # - calculate the default result and return it in fp0.
11664 # - if overflow or inexact is enabled, we need a multiply result rounded to
11665 # extended precision. if the original operation was extended, then we have this
11666 # result. if the original operation was single or double, we have to do another
11667 # multiply using extended precision and the correct rounding mode. the result
11668 # of this operation then has its exponent scaled by -0x6000 to create the
11669 # exceptional operand.
11670 #
11671 fmul_ovfl:
11672         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11673
11674         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11675         fmov.l          &0x0,%fpsr              # clear FPSR
11676
11677         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11678
11679         fmov.l          %fpsr,%d1               # save status
11680         fmov.l          &0x0,%fpcr              # clear FPCR
11681
11682         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11683
11684 # save setting this until now because this is where fmul_may_ovfl may jump in
11685 fmul_ovfl_tst:
11686         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11687
11688         mov.b           FPCR_ENABLE(%a6),%d1
11689         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
11690         bne.b           fmul_ovfl_ena           # yes
11691
11692 # calculate the default result
11693 fmul_ovfl_dis:
11694         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
11695         sne             %d1                     # set sign param accordingly
11696         mov.l           L_SCR3(%a6),%d0         # pass rnd prec,mode
11697         bsr.l           ovf_res                 # calculate default result
11698         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
11699         fmovm.x         (%a0),&0x80             # return default result in fp0
11700         rts
11701
11702 #
11703 # OVFL is enabled; Create EXOP:
11704 # - if precision is extended, then we have the EXOP. simply bias the exponent
11705 # with an extra -0x6000. if the precision is single or double, we need to
11706 # calculate a result rounded to extended precision.
11707 #
11708 fmul_ovfl_ena:
11709         mov.l           L_SCR3(%a6),%d1
11710         andi.b          &0xc0,%d1               # test the rnd prec
11711         bne.b           fmul_ovfl_ena_sd        # it's sgl or dbl
11712
11713 fmul_ovfl_ena_cont:
11714         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
11715
11716         mov.l           %d2,-(%sp)              # save d2
11717         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11718         mov.w           %d1,%d2                 # make a copy
11719         andi.l          &0x7fff,%d1             # strip sign
11720         sub.l           %d0,%d1                 # add scale factor
11721         subi.l          &0x6000,%d1             # subtract bias
11722         andi.w          &0x7fff,%d1             # clear sign bit
11723         andi.w          &0x8000,%d2             # keep old sign
11724         or.w            %d2,%d1                 # concat old sign,new exp
11725         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11726         mov.l           (%sp)+,%d2              # restore d2
11727         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11728         bra.b           fmul_ovfl_dis
11729
11730 fmul_ovfl_ena_sd:
11731         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11732
11733         mov.l           L_SCR3(%a6),%d1
11734         andi.b          &0x30,%d1               # keep rnd mode only
11735         fmov.l          %d1,%fpcr               # set FPCR
11736
11737         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11738
11739         fmov.l          &0x0,%fpcr              # clear FPCR
11740         bra.b           fmul_ovfl_ena_cont
11741
11742 #
11743 # may OVERFLOW:
11744 # - the result of the multiply operation MAY overflow.
11745 # - do the multiply to the proper precision and rounding mode in order to
11746 # set the inexact bits.
11747 # - calculate the default result and return it in fp0.
11748 #
11749 fmul_may_ovfl:
11750         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11751
11752         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11753         fmov.l          &0x0,%fpsr              # clear FPSR
11754
11755         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11756
11757         fmov.l          %fpsr,%d1               # save status
11758         fmov.l          &0x0,%fpcr              # clear FPCR
11759
11760         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11761
11762         fabs.x          %fp0,%fp1               # make a copy of result
11763         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
11764         fbge.w          fmul_ovfl_tst           # yes; overflow has occurred
11765
11766 # no, it didn't overflow; we have correct result
11767         bra.w           fmul_normal_exit
11768
11769 #
11770 # UNDERFLOW:
11771 # - the result of the multiply operation is an underflow.
11772 # - do the multiply to the proper precision and rounding mode in order to
11773 # set the inexact bits.
11774 # - calculate the default result and return it in fp0.
11775 # - if overflow or inexact is enabled, we need a multiply result rounded to
11776 # extended precision. if the original operation was extended, then we have this
11777 # result. if the original operation was single or double, we have to do another
11778 # multiply using extended precision and the correct rounding mode. the result
11779 # of this operation then has its exponent scaled by -0x6000 to create the
11780 # exceptional operand.
11781 #
11782 fmul_unfl:
11783         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11784
11785 # for fun, let's use only extended precision, round to zero. then, let
11786 # the unf_res() routine figure out all the rest.
11787 # will we get the correct answer.
11788         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11789
11790         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
11791         fmov.l          &0x0,%fpsr              # clear FPSR
11792
11793         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11794
11795         fmov.l          %fpsr,%d1               # save status
11796         fmov.l          &0x0,%fpcr              # clear FPCR
11797
11798         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11799
11800         mov.b           FPCR_ENABLE(%a6),%d1
11801         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
11802         bne.b           fmul_unfl_ena           # yes
11803
11804 fmul_unfl_dis:
11805         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11806
11807         lea             FP_SCR0(%a6),%a0        # pass: result addr
11808         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
11809         bsr.l           unf_res                 # calculate default result
11810         or.b            %d0,FPSR_CC(%a6)        # unf_res2 may have set 'Z'
11811         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11812         rts
11813
11814 #
11815 # UNFL is enabled.
11816 #
11817 fmul_unfl_ena:
11818         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
11819
11820         mov.l           L_SCR3(%a6),%d1
11821         andi.b          &0xc0,%d1               # is precision extended?
11822         bne.b           fmul_unfl_ena_sd        # no, sgl or dbl
11823
11824 # if the rnd mode is anything but RZ, then we have to re-do the above
11825 # multiplication because we used RZ for all.
11826         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11827
11828 fmul_unfl_ena_cont:
11829         fmov.l          &0x0,%fpsr              # clear FPSR
11830
11831         fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
11832
11833         fmov.l          &0x0,%fpcr              # clear FPCR
11834
11835         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
11836         mov.l           %d2,-(%sp)              # save d2
11837         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11838         mov.l           %d1,%d2                 # make a copy
11839         andi.l          &0x7fff,%d1             # strip sign
11840         andi.w          &0x8000,%d2             # keep old sign
11841         sub.l           %d0,%d1                 # add scale factor
11842         addi.l          &0x6000,%d1             # add bias
11843         andi.w          &0x7fff,%d1
11844         or.w            %d2,%d1                 # concat old sign,new exp
11845         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11846         mov.l           (%sp)+,%d2              # restore d2
11847         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11848         bra.w           fmul_unfl_dis
11849
11850 fmul_unfl_ena_sd:
11851         mov.l           L_SCR3(%a6),%d1
11852         andi.b          &0x30,%d1               # use only rnd mode
11853         fmov.l          %d1,%fpcr               # set FPCR
11854
11855         bra.b           fmul_unfl_ena_cont
11856
11857 # MAY UNDERFLOW:
11858 # -use the correct rounding mode and precision. this code favors operations
11859 # that do not underflow.
11860 fmul_may_unfl:
11861         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
11862
11863         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11864         fmov.l          &0x0,%fpsr              # clear FPSR
11865
11866         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
11867
11868         fmov.l          %fpsr,%d1               # save status
11869         fmov.l          &0x0,%fpcr              # clear FPCR
11870
11871         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
11872
11873         fabs.x          %fp0,%fp1               # make a copy of result
11874         fcmp.b          %fp1,&0x2               # is |result| > 2.b?
11875         fbgt.w          fmul_normal_exit        # no; no underflow occurred
11876         fblt.w          fmul_unfl               # yes; underflow occurred
11877
11878 #
11879 # we still don't know if underflow occurred. result is ~ equal to 2. but,
11880 # we don't know if the result was an underflow that rounded up to a 2 or
11881 # a normalized number that rounded down to a 2. so, redo the entire operation
11882 # using RZ as the rounding mode to see what the pre-rounded result is.
11883 # this case should be relatively rare.
11884 #
11885         fmovm.x         FP_SCR1(%a6),&0x40      # load dst operand
11886
11887         mov.l           L_SCR3(%a6),%d1
11888         andi.b          &0xc0,%d1               # keep rnd prec
11889         ori.b           &rz_mode*0x10,%d1       # insert RZ
11890
11891         fmov.l          %d1,%fpcr               # set FPCR
11892         fmov.l          &0x0,%fpsr              # clear FPSR
11893
11894         fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
11895
11896         fmov.l          &0x0,%fpcr              # clear FPCR
11897         fabs.x          %fp1                    # make absolute value
11898         fcmp.b          %fp1,&0x2               # is |result| < 2.b?
11899         fbge.w          fmul_normal_exit        # no; no underflow occurred
11900         bra.w           fmul_unfl               # yes, underflow occurred
11901
11902 ################################################################################
11903
11904 #
11905 # Multiply: inputs are not both normalized; what are they?
11906 #
11907 fmul_not_norm:
11908         mov.w           (tbl_fmul_op.b,%pc,%d1.w*2),%d1
11909         jmp             (tbl_fmul_op.b,%pc,%d1.w)
11910
11911         swbeg           &48
11912 tbl_fmul_op:
11913         short           fmul_norm       - tbl_fmul_op # NORM x NORM
11914         short           fmul_zero       - tbl_fmul_op # NORM x ZERO
11915         short           fmul_inf_src    - tbl_fmul_op # NORM x INF
11916         short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
11917         short           fmul_norm       - tbl_fmul_op # NORM x DENORM
11918         short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
11919         short           tbl_fmul_op     - tbl_fmul_op #
11920         short           tbl_fmul_op     - tbl_fmul_op #
11921
11922         short           fmul_zero       - tbl_fmul_op # ZERO x NORM
11923         short           fmul_zero       - tbl_fmul_op # ZERO x ZERO
11924         short           fmul_res_operr  - tbl_fmul_op # ZERO x INF
11925         short           fmul_res_qnan   - tbl_fmul_op # ZERO x QNAN
11926         short           fmul_zero       - tbl_fmul_op # ZERO x DENORM
11927         short           fmul_res_snan   - tbl_fmul_op # ZERO x SNAN
11928         short           tbl_fmul_op     - tbl_fmul_op #
11929         short           tbl_fmul_op     - tbl_fmul_op #
11930
11931         short           fmul_inf_dst    - tbl_fmul_op # INF x NORM
11932         short           fmul_res_operr  - tbl_fmul_op # INF x ZERO
11933         short           fmul_inf_dst    - tbl_fmul_op # INF x INF
11934         short           fmul_res_qnan   - tbl_fmul_op # INF x QNAN
11935         short           fmul_inf_dst    - tbl_fmul_op # INF x DENORM
11936         short           fmul_res_snan   - tbl_fmul_op # INF x SNAN
11937         short           tbl_fmul_op     - tbl_fmul_op #
11938         short           tbl_fmul_op     - tbl_fmul_op #
11939
11940         short           fmul_res_qnan   - tbl_fmul_op # QNAN x NORM
11941         short           fmul_res_qnan   - tbl_fmul_op # QNAN x ZERO
11942         short           fmul_res_qnan   - tbl_fmul_op # QNAN x INF
11943         short           fmul_res_qnan   - tbl_fmul_op # QNAN x QNAN
11944         short           fmul_res_qnan   - tbl_fmul_op # QNAN x DENORM
11945         short           fmul_res_snan   - tbl_fmul_op # QNAN x SNAN
11946         short           tbl_fmul_op     - tbl_fmul_op #
11947         short           tbl_fmul_op     - tbl_fmul_op #
11948
11949         short           fmul_norm       - tbl_fmul_op # NORM x NORM
11950         short           fmul_zero       - tbl_fmul_op # NORM x ZERO
11951         short           fmul_inf_src    - tbl_fmul_op # NORM x INF
11952         short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
11953         short           fmul_norm       - tbl_fmul_op # NORM x DENORM
11954         short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
11955         short           tbl_fmul_op     - tbl_fmul_op #
11956         short           tbl_fmul_op     - tbl_fmul_op #
11957
11958         short           fmul_res_snan   - tbl_fmul_op # SNAN x NORM
11959         short           fmul_res_snan   - tbl_fmul_op # SNAN x ZERO
11960         short           fmul_res_snan   - tbl_fmul_op # SNAN x INF
11961         short           fmul_res_snan   - tbl_fmul_op # SNAN x QNAN
11962         short           fmul_res_snan   - tbl_fmul_op # SNAN x DENORM
11963         short           fmul_res_snan   - tbl_fmul_op # SNAN x SNAN
11964         short           tbl_fmul_op     - tbl_fmul_op #
11965         short           tbl_fmul_op     - tbl_fmul_op #
11966
11967 fmul_res_operr:
11968         bra.l           res_operr
11969 fmul_res_snan:
11970         bra.l           res_snan
11971 fmul_res_qnan:
11972         bra.l           res_qnan
11973
11974 #
11975 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11976 #
11977         global          fmul_zero               # global for fsglmul
11978 fmul_zero:
11979         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11980         mov.b           DST_EX(%a1),%d1
11981         eor.b           %d0,%d1
11982         bpl.b           fmul_zero_p             # result ZERO is pos.
11983 fmul_zero_n:
11984         fmov.s          &0x80000000,%fp0        # load -ZERO
11985         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11986         rts
11987 fmul_zero_p:
11988         fmov.s          &0x00000000,%fp0        # load +ZERO
11989         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11990         rts
11991
11992 #
11993 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11994 #
11995 # Note: The j-bit for an infinity is a don't-care. However, to be
11996 # strictly compatible w/ the 68881/882, we make sure to return an
11997 # INF w/ the j-bit set if the input INF j-bit was set. Destination
11998 # INFs take priority.
11999 #
12000         global          fmul_inf_dst            # global for fsglmul
12001 fmul_inf_dst:
12002         fmovm.x         DST(%a1),&0x80          # return INF result in fp0
12003         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
12004         mov.b           DST_EX(%a1),%d1
12005         eor.b           %d0,%d1
12006         bpl.b           fmul_inf_dst_p          # result INF is pos.
12007 fmul_inf_dst_n:
12008         fabs.x          %fp0                    # clear result sign
12009         fneg.x          %fp0                    # set result sign
12010         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12011         rts
12012 fmul_inf_dst_p:
12013         fabs.x          %fp0                    # clear result sign
12014         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
12015         rts
12016
12017         global          fmul_inf_src            # global for fsglmul
12018 fmul_inf_src:
12019         fmovm.x         SRC(%a0),&0x80          # return INF result in fp0
12020         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
12021         mov.b           DST_EX(%a1),%d1
12022         eor.b           %d0,%d1
12023         bpl.b           fmul_inf_dst_p          # result INF is pos.
12024         bra.b           fmul_inf_dst_n
12025
12026 #########################################################################
12027 # XDEF **************************************************************** #
12028 #       fin(): emulates the fmove instruction                           #
12029 #       fsin(): emulates the fsmove instruction                         #
12030 #       fdin(): emulates the fdmove instruction                         #
12031 #                                                                       #
12032 # XREF **************************************************************** #
12033 #       norm() - normalize mantissa for EXOP on denorm                  #
12034 #       scale_to_zero_src() - scale src exponent to zero                #
12035 #       ovf_res() - return default overflow result                      #
12036 #       unf_res() - return default underflow result                     #
12037 #       res_qnan_1op() - return QNAN result                             #
12038 #       res_snan_1op() - return SNAN result                             #
12039 #                                                                       #
12040 # INPUT *************************************************************** #
12041 #       a0 = pointer to extended precision source operand               #
12042 #       d0 = round prec/mode                                            #
12043 #                                                                       #
12044 # OUTPUT ************************************************************** #
12045 #       fp0 = result                                                    #
12046 #       fp1 = EXOP (if exception occurred)                              #
12047 #                                                                       #
12048 # ALGORITHM *********************************************************** #
12049 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
12050 # norms into extended, single, and double precision.                    #
12051 #       Norms can be emulated w/ a regular fmove instruction. For       #
12052 # sgl/dbl, must scale exponent and perform an "fmove". Check to see     #
12053 # if the result would have overflowed/underflowed. If so, use unf_res() #
12054 # or ovf_res() to return the default result. Also return EXOP if        #
12055 # exception is enabled. If no exception, return the default result.     #
12056 #       Unnorms don't pass through here.                                #
12057 #                                                                       #
12058 #########################################################################
12059
12060         global          fsin
12061 fsin:
12062         andi.b          &0x30,%d0               # clear rnd prec
12063         ori.b           &s_mode*0x10,%d0        # insert sgl precision
12064         bra.b           fin
12065
12066         global          fdin
12067 fdin:
12068         andi.b          &0x30,%d0               # clear rnd prec
12069         ori.b           &d_mode*0x10,%d0        # insert dbl precision
12070
12071         global          fin
12072 fin:
12073         mov.l           %d0,L_SCR3(%a6)         # store rnd info
12074
12075         mov.b           STAG(%a6),%d1           # fetch src optype tag
12076         bne.w           fin_not_norm            # optimize on non-norm input
12077
12078 #
12079 # FP MOVE IN: NORMs and DENORMs ONLY!
12080 #
12081 fin_norm:
12082         andi.b          &0xc0,%d0               # is precision extended?
12083         bne.w           fin_not_ext             # no, so go handle dbl or sgl
12084
12085 #
12086 # precision selected is extended. so...we cannot get an underflow
12087 # or overflow because of rounding to the correct precision. so...
12088 # skip the scaling and unscaling...
12089 #
12090         tst.b           SRC_EX(%a0)             # is the operand negative?
12091         bpl.b           fin_norm_done           # no
12092         bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
12093 fin_norm_done:
12094         fmovm.x         SRC(%a0),&0x80          # return result in fp0
12095         rts
12096
12097 #
12098 # for an extended precision DENORM, the UNFL exception bit is set
12099 # the accrued bit is NOT set in this instance(no inexactness!)
12100 #
12101 fin_denorm:
12102         andi.b          &0xc0,%d0               # is precision extended?
12103         bne.w           fin_not_ext             # no, so go handle dbl or sgl
12104
12105         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12106         tst.b           SRC_EX(%a0)             # is the operand negative?
12107         bpl.b           fin_denorm_done         # no
12108         bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
12109 fin_denorm_done:
12110         fmovm.x         SRC(%a0),&0x80          # return result in fp0
12111         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12112         bne.b           fin_denorm_unfl_ena     # yes
12113         rts
12114
12115 #
12116 # the input is an extended DENORM and underflow is enabled in the FPCR.
12117 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12118 # exponent and insert back into the operand.
12119 #
12120 fin_denorm_unfl_ena:
12121         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12122         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12123         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12124         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
12125         bsr.l           norm                    # normalize result
12126         neg.w           %d0                     # new exponent = -(shft val)
12127         addi.w          &0x6000,%d0             # add new bias to exponent
12128         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
12129         andi.w          &0x8000,%d1             # keep old sign
12130         andi.w          &0x7fff,%d0             # clear sign position
12131         or.w            %d1,%d0                 # concat new exo,old sign
12132         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
12133         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12134         rts
12135
12136 #
12137 # operand is to be rounded to single or double precision
12138 #
12139 fin_not_ext:
12140         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
12141         bne.b           fin_dbl
12142
12143 #
12144 # operand is to be rounded to single precision
12145 #
12146 fin_sgl:
12147         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12148         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12149         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12150         bsr.l           scale_to_zero_src       # calculate scale factor
12151
12152         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
12153         bge.w           fin_sd_unfl             # yes; go handle underflow
12154         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
12155         beq.w           fin_sd_may_ovfl         # maybe; go check
12156         blt.w           fin_sd_ovfl             # yes; go handle overflow
12157
12158 #
12159 # operand will NOT overflow or underflow when moved into the fp reg file
12160 #
12161 fin_sd_normal:
12162         fmov.l          &0x0,%fpsr              # clear FPSR
12163         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12164
12165         fmov.x          FP_SCR0(%a6),%fp0       # perform move
12166
12167         fmov.l          %fpsr,%d1               # save FPSR
12168         fmov.l          &0x0,%fpcr              # clear FPCR
12169
12170         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12171
12172 fin_sd_normal_exit:
12173         mov.l           %d2,-(%sp)              # save d2
12174         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12175         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
12176         mov.w           %d1,%d2                 # make a copy
12177         andi.l          &0x7fff,%d1             # strip sign
12178         sub.l           %d0,%d1                 # add scale factor
12179         andi.w          &0x8000,%d2             # keep old sign
12180         or.w            %d1,%d2                 # concat old sign,new exponent
12181         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
12182         mov.l           (%sp)+,%d2              # restore d2
12183         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12184         rts
12185
12186 #
12187 # operand is to be rounded to double precision
12188 #
12189 fin_dbl:
12190         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12191         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12192         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12193         bsr.l           scale_to_zero_src       # calculate scale factor
12194
12195         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
12196         bge.w           fin_sd_unfl             # yes; go handle underflow
12197         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
12198         beq.w           fin_sd_may_ovfl         # maybe; go check
12199         blt.w           fin_sd_ovfl             # yes; go handle overflow
12200         bra.w           fin_sd_normal           # no; ho handle normalized op
12201
12202 #
12203 # operand WILL underflow when moved in to the fp register file
12204 #
12205 fin_sd_unfl:
12206         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12207
12208         tst.b           FP_SCR0_EX(%a6)         # is operand negative?
12209         bpl.b           fin_sd_unfl_tst
12210         bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
12211
12212 # if underflow or inexact is enabled, then go calculate the EXOP first.
12213 fin_sd_unfl_tst:
12214         mov.b           FPCR_ENABLE(%a6),%d1
12215         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12216         bne.b           fin_sd_unfl_ena         # yes
12217
12218 fin_sd_unfl_dis:
12219         lea             FP_SCR0(%a6),%a0        # pass: result addr
12220         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
12221         bsr.l           unf_res                 # calculate default result
12222         or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
12223         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12224         rts
12225
12226 #
12227 # operand will underflow AND underflow or inexact is enabled.
12228 # therefore, we must return the result rounded to extended precision.
12229 #
12230 fin_sd_unfl_ena:
12231         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12232         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12233         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
12234
12235         mov.l           %d2,-(%sp)              # save d2
12236         mov.w           %d1,%d2                 # make a copy
12237         andi.l          &0x7fff,%d1             # strip sign
12238         sub.l           %d0,%d1                 # subtract scale factor
12239         andi.w          &0x8000,%d2             # extract old sign
12240         addi.l          &0x6000,%d1             # add new bias
12241         andi.w          &0x7fff,%d1
12242         or.w            %d1,%d2                 # concat old sign,new exp
12243         mov.w           %d2,FP_SCR1_EX(%a6)     # insert new exponent
12244         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
12245         mov.l           (%sp)+,%d2              # restore d2
12246         bra.b           fin_sd_unfl_dis
12247
12248 #
12249 # operand WILL overflow.
12250 #
12251 fin_sd_ovfl:
12252         fmov.l          &0x0,%fpsr              # clear FPSR
12253         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12254
12255         fmov.x          FP_SCR0(%a6),%fp0       # perform move
12256
12257         fmov.l          &0x0,%fpcr              # clear FPCR
12258         fmov.l          %fpsr,%d1               # save FPSR
12259
12260         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12261
12262 fin_sd_ovfl_tst:
12263         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12264
12265         mov.b           FPCR_ENABLE(%a6),%d1
12266         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
12267         bne.b           fin_sd_ovfl_ena         # yes
12268
12269 #
12270 # OVFL is not enabled; therefore, we must create the default result by
12271 # calling ovf_res().
12272 #
12273 fin_sd_ovfl_dis:
12274         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
12275         sne             %d1                     # set sign param accordingly
12276         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
12277         bsr.l           ovf_res                 # calculate default result
12278         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
12279         fmovm.x         (%a0),&0x80             # return default result in fp0
12280         rts
12281
12282 #
12283 # OVFL is enabled.
12284 # the INEX2 bit has already been updated by the round to the correct precision.
12285 # now, round to extended(and don't alter the FPSR).
12286 #
12287 fin_sd_ovfl_ena:
12288         mov.l           %d2,-(%sp)              # save d2
12289         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12290         mov.l           %d1,%d2                 # make a copy
12291         andi.l          &0x7fff,%d1             # strip sign
12292         andi.w          &0x8000,%d2             # keep old sign
12293         sub.l           %d0,%d1                 # add scale factor
12294         sub.l           &0x6000,%d1             # subtract bias
12295         andi.w          &0x7fff,%d1
12296         or.w            %d2,%d1
12297         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12298         mov.l           (%sp)+,%d2              # restore d2
12299         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12300         bra.b           fin_sd_ovfl_dis
12301
12302 #
12303 # the move in MAY overflow. so...
12304 #
12305 fin_sd_may_ovfl:
12306         fmov.l          &0x0,%fpsr              # clear FPSR
12307         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12308
12309         fmov.x          FP_SCR0(%a6),%fp0       # perform the move
12310
12311         fmov.l          %fpsr,%d1               # save status
12312         fmov.l          &0x0,%fpcr              # clear FPCR
12313
12314         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12315
12316         fabs.x          %fp0,%fp1               # make a copy of result
12317         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
12318         fbge.w          fin_sd_ovfl_tst         # yes; overflow has occurred
12319
12320 # no, it didn't overflow; we have correct result
12321         bra.w           fin_sd_normal_exit
12322
12323 ##########################################################################
12324
12325 #
12326 # operand is not a NORM: check its optype and branch accordingly
12327 #
12328 fin_not_norm:
12329         cmpi.b          %d1,&DENORM             # weed out DENORM
12330         beq.w           fin_denorm
12331         cmpi.b          %d1,&SNAN               # weed out SNANs
12332         beq.l           res_snan_1op
12333         cmpi.b          %d1,&QNAN               # weed out QNANs
12334         beq.l           res_qnan_1op
12335
12336 #
12337 # do the fmove in; at this point, only possible ops are ZERO and INF.
12338 # use fmov to determine ccodes.
12339 # prec:mode should be zero at this point but it won't affect answer anyways.
12340 #
12341         fmov.x          SRC(%a0),%fp0           # do fmove in
12342         fmov.l          %fpsr,%d0               # no exceptions possible
12343         rol.l           &0x8,%d0                # put ccodes in lo byte
12344         mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
12345         rts
12346
12347 #########################################################################
12348 # XDEF **************************************************************** #
12349 #       fdiv(): emulates the fdiv instruction                           #
12350 #       fsdiv(): emulates the fsdiv instruction                         #
12351 #       fddiv(): emulates the fddiv instruction                         #
12352 #                                                                       #
12353 # XREF **************************************************************** #
12354 #       scale_to_zero_src() - scale src exponent to zero                #
12355 #       scale_to_zero_dst() - scale dst exponent to zero                #
12356 #       unf_res() - return default underflow result                     #
12357 #       ovf_res() - return default overflow result                      #
12358 #       res_qnan() - return QNAN result                                 #
12359 #       res_snan() - return SNAN result                                 #
12360 #                                                                       #
12361 # INPUT *************************************************************** #
12362 #       a0 = pointer to extended precision source operand               #
12363 #       a1 = pointer to extended precision destination operand          #
12364 #       d0  rnd prec,mode                                               #
12365 #                                                                       #
12366 # OUTPUT ************************************************************** #
12367 #       fp0 = result                                                    #
12368 #       fp1 = EXOP (if exception occurred)                              #
12369 #                                                                       #
12370 # ALGORITHM *********************************************************** #
12371 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
12372 # norms/denorms into ext/sgl/dbl precision.                             #
12373 #       For norms/denorms, scale the exponents such that a divide       #
12374 # instruction won't cause an exception. Use the regular fdiv to         #
12375 # compute a result. Check if the regular operands would have taken      #
12376 # an exception. If so, return the default overflow/underflow result     #
12377 # and return the EXOP if exceptions are enabled. Else, scale the        #
12378 # result operand to the proper exponent.                                #
12379 #                                                                       #
12380 #########################################################################
12381
12382         align           0x10
12383 tbl_fdiv_unfl:
12384         long            0x3fff - 0x0000         # ext_unfl
12385         long            0x3fff - 0x3f81         # sgl_unfl
12386         long            0x3fff - 0x3c01         # dbl_unfl
12387
12388 tbl_fdiv_ovfl:
12389         long            0x3fff - 0x7ffe         # ext overflow exponent
12390         long            0x3fff - 0x407e         # sgl overflow exponent
12391         long            0x3fff - 0x43fe         # dbl overflow exponent
12392
12393         global          fsdiv
12394 fsdiv:
12395         andi.b          &0x30,%d0               # clear rnd prec
12396         ori.b           &s_mode*0x10,%d0        # insert sgl prec
12397         bra.b           fdiv
12398
12399         global          fddiv
12400 fddiv:
12401         andi.b          &0x30,%d0               # clear rnd prec
12402         ori.b           &d_mode*0x10,%d0        # insert dbl prec
12403
12404         global          fdiv
12405 fdiv:
12406         mov.l           %d0,L_SCR3(%a6)         # store rnd info
12407
12408         clr.w           %d1
12409         mov.b           DTAG(%a6),%d1
12410         lsl.b           &0x3,%d1
12411         or.b            STAG(%a6),%d1           # combine src tags
12412
12413         bne.w           fdiv_not_norm           # optimize on non-norm input
12414
12415 #
12416 # DIVIDE: NORMs and DENORMs ONLY!
12417 #
12418 fdiv_norm:
12419         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
12420         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
12421         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
12422
12423         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12424         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12425         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12426
12427         bsr.l           scale_to_zero_src       # scale src exponent
12428         mov.l           %d0,-(%sp)              # save scale factor 1
12429
12430         bsr.l           scale_to_zero_dst       # scale dst exponent
12431
12432         neg.l           (%sp)                   # SCALE FACTOR = scale1 - scale2
12433         add.l           %d0,(%sp)
12434
12435         mov.w           2+L_SCR3(%a6),%d1       # fetch precision
12436         lsr.b           &0x6,%d1                # shift to lo bits
12437         mov.l           (%sp)+,%d0              # load S.F.
12438         cmp.l           %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12439         ble.w           fdiv_may_ovfl           # result will overflow
12440
12441         cmp.l           %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12442         beq.w           fdiv_may_unfl           # maybe
12443         bgt.w           fdiv_unfl               # yes; go handle underflow
12444
12445 fdiv_normal:
12446         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
12447
12448         fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
12449         fmov.l          &0x0,%fpsr              # clear FPSR
12450
12451         fdiv.x          FP_SCR0(%a6),%fp0       # perform divide
12452
12453         fmov.l          %fpsr,%d1               # save FPSR
12454         fmov.l          &0x0,%fpcr              # clear FPCR
12455
12456         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12457
12458 fdiv_normal_exit:
12459         fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
12460         mov.l           %d2,-(%sp)              # store d2
12461         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
12462         mov.l           %d1,%d2                 # make a copy
12463         andi.l          &0x7fff,%d1             # strip sign
12464         andi.w          &0x8000,%d2             # keep old sign
12465         sub.l           %d0,%d1                 # add scale factor
12466         or.w            %d2,%d1                 # concat old sign,new exp
12467         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12468         mov.l           (%sp)+,%d2              # restore d2
12469         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12470         rts
12471
12472 tbl_fdiv_ovfl2:
12473         long            0x7fff
12474         long            0x407f
12475         long            0x43ff
12476
12477 fdiv_no_ovfl:
12478         mov.l           (%sp)+,%d0              # restore scale factor
12479         bra.b           fdiv_normal_exit
12480
12481 fdiv_may_ovfl:
12482         mov.l           %d0,-(%sp)              # save scale factor
12483
12484         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
12485
12486         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12487         fmov.l          &0x0,%fpsr              # set FPSR
12488
12489         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
12490
12491         fmov.l          %fpsr,%d0
12492         fmov.l          &0x0,%fpcr
12493
12494         or.l            %d0,USER_FPSR(%a6)      # save INEX,N
12495
12496         fmovm.x         &0x01,-(%sp)            # save result to stack
12497         mov.w           (%sp),%d0               # fetch new exponent
12498         add.l           &0xc,%sp                # clear result from stack
12499         andi.l          &0x7fff,%d0             # strip sign
12500         sub.l           (%sp),%d0               # add scale factor
12501         cmp.l           %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12502         blt.b           fdiv_no_ovfl
12503         mov.l           (%sp)+,%d0
12504
12505 fdiv_ovfl_tst:
12506         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12507
12508         mov.b           FPCR_ENABLE(%a6),%d1
12509         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
12510         bne.b           fdiv_ovfl_ena           # yes
12511
12512 fdiv_ovfl_dis:
12513         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
12514         sne             %d1                     # set sign param accordingly
12515         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
12516         bsr.l           ovf_res                 # calculate default result
12517         or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
12518         fmovm.x         (%a0),&0x80             # return default result in fp0
12519         rts
12520
12521 fdiv_ovfl_ena:
12522         mov.l           L_SCR3(%a6),%d1
12523         andi.b          &0xc0,%d1               # is precision extended?
12524         bne.b           fdiv_ovfl_ena_sd        # no, do sgl or dbl
12525
12526 fdiv_ovfl_ena_cont:
12527         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
12528
12529         mov.l           %d2,-(%sp)              # save d2
12530         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12531         mov.w           %d1,%d2                 # make a copy
12532         andi.l          &0x7fff,%d1             # strip sign
12533         sub.l           %d0,%d1                 # add scale factor
12534         subi.l          &0x6000,%d1             # subtract bias
12535         andi.w          &0x7fff,%d1             # clear sign bit
12536         andi.w          &0x8000,%d2             # keep old sign
12537         or.w            %d2,%d1                 # concat old sign,new exp
12538         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12539         mov.l           (%sp)+,%d2              # restore d2
12540         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12541         bra.b           fdiv_ovfl_dis
12542
12543 fdiv_ovfl_ena_sd:
12544         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
12545
12546         mov.l           L_SCR3(%a6),%d1
12547         andi.b          &0x30,%d1               # keep rnd mode
12548         fmov.l          %d1,%fpcr               # set FPCR
12549
12550         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
12551
12552         fmov.l          &0x0,%fpcr              # clear FPCR
12553         bra.b           fdiv_ovfl_ena_cont
12554
12555 fdiv_unfl:
12556         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12557
12558         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
12559
12560         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
12561         fmov.l          &0x0,%fpsr              # clear FPSR
12562
12563         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
12564
12565         fmov.l          %fpsr,%d1               # save status
12566         fmov.l          &0x0,%fpcr              # clear FPCR
12567
12568         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12569
12570         mov.b           FPCR_ENABLE(%a6),%d1
12571         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12572         bne.b           fdiv_unfl_ena           # yes
12573
12574 fdiv_unfl_dis:
12575         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12576
12577         lea             FP_SCR0(%a6),%a0        # pass: result addr
12578         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
12579         bsr.l           unf_res                 # calculate default result
12580         or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
12581         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12582         rts
12583
12584 #
12585 # UNFL is enabled.
12586 #
12587 fdiv_unfl_ena:
12588         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
12589
12590         mov.l           L_SCR3(%a6),%d1
12591         andi.b          &0xc0,%d1               # is precision extended?
12592         bne.b           fdiv_unfl_ena_sd        # no, sgl or dbl
12593
12594         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12595
12596 fdiv_unfl_ena_cont:
12597         fmov.l          &0x0,%fpsr              # clear FPSR
12598
12599         fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
12600
12601         fmov.l          &0x0,%fpcr              # clear FPCR
12602
12603         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
12604         mov.l           %d2,-(%sp)              # save d2
12605         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12606         mov.l           %d1,%d2                 # make a copy
12607         andi.l          &0x7fff,%d1             # strip sign
12608         andi.w          &0x8000,%d2             # keep old sign
12609         sub.l           %d0,%d1                 # add scale factoer
12610         addi.l          &0x6000,%d1             # add bias
12611         andi.w          &0x7fff,%d1
12612         or.w            %d2,%d1                 # concat old sign,new exp
12613         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exp
12614         mov.l           (%sp)+,%d2              # restore d2
12615         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12616         bra.w           fdiv_unfl_dis
12617
12618 fdiv_unfl_ena_sd:
12619         mov.l           L_SCR3(%a6),%d1
12620         andi.b          &0x30,%d1               # use only rnd mode
12621         fmov.l          %d1,%fpcr               # set FPCR
12622
12623         bra.b           fdiv_unfl_ena_cont
12624
12625 #
12626 # the divide operation MAY underflow:
12627 #
12628 fdiv_may_unfl:
12629         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
12630
12631         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12632         fmov.l          &0x0,%fpsr              # clear FPSR
12633
12634         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
12635
12636         fmov.l          %fpsr,%d1               # save status
12637         fmov.l          &0x0,%fpcr              # clear FPCR
12638
12639         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12640
12641         fabs.x          %fp0,%fp1               # make a copy of result
12642         fcmp.b          %fp1,&0x1               # is |result| > 1.b?
12643         fbgt.w          fdiv_normal_exit        # no; no underflow occurred
12644         fblt.w          fdiv_unfl               # yes; underflow occurred
12645
12646 #
12647 # we still don't know if underflow occurred. result is ~ equal to 1. but,
12648 # we don't know if the result was an underflow that rounded up to a 1
12649 # or a normalized number that rounded down to a 1. so, redo the entire
12650 # operation using RZ as the rounding mode to see what the pre-rounded
12651 # result is. this case should be relatively rare.
12652 #
12653         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
12654
12655         mov.l           L_SCR3(%a6),%d1
12656         andi.b          &0xc0,%d1               # keep rnd prec
12657         ori.b           &rz_mode*0x10,%d1       # insert RZ
12658
12659         fmov.l          %d1,%fpcr               # set FPCR
12660         fmov.l          &0x0,%fpsr              # clear FPSR
12661
12662         fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
12663
12664         fmov.l          &0x0,%fpcr              # clear FPCR
12665         fabs.x          %fp1                    # make absolute value
12666         fcmp.b          %fp1,&0x1               # is |result| < 1.b?
12667         fbge.w          fdiv_normal_exit        # no; no underflow occurred
12668         bra.w           fdiv_unfl               # yes; underflow occurred
12669
12670 ############################################################################
12671
12672 #
12673 # Divide: inputs are not both normalized; what are they?
12674 #
12675 fdiv_not_norm:
12676         mov.w           (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12677         jmp             (tbl_fdiv_op.b,%pc,%d1.w*1)
12678
12679         swbeg           &48
12680 tbl_fdiv_op:
12681         short           fdiv_norm       - tbl_fdiv_op # NORM / NORM
12682         short           fdiv_inf_load   - tbl_fdiv_op # NORM / ZERO
12683         short           fdiv_zero_load  - tbl_fdiv_op # NORM / INF
12684         short           fdiv_res_qnan   - tbl_fdiv_op # NORM / QNAN
12685         short           fdiv_norm       - tbl_fdiv_op # NORM / DENORM
12686         short           fdiv_res_snan   - tbl_fdiv_op # NORM / SNAN
12687         short           tbl_fdiv_op     - tbl_fdiv_op #
12688         short           tbl_fdiv_op     - tbl_fdiv_op #
12689
12690         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / NORM
12691         short           fdiv_res_operr  - tbl_fdiv_op # ZERO / ZERO
12692         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / INF
12693         short           fdiv_res_qnan   - tbl_fdiv_op # ZERO / QNAN
12694         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / DENORM
12695         short           fdiv_res_snan   - tbl_fdiv_op # ZERO / SNAN
12696         short           tbl_fdiv_op     - tbl_fdiv_op #
12697         short           tbl_fdiv_op     - tbl_fdiv_op #
12698
12699         short           fdiv_inf_dst    - tbl_fdiv_op # INF / NORM
12700         short           fdiv_inf_dst    - tbl_fdiv_op # INF / ZERO
12701         short           fdiv_res_operr  - tbl_fdiv_op # INF / INF
12702         short           fdiv_res_qnan   - tbl_fdiv_op # INF / QNAN
12703         short           fdiv_inf_dst    - tbl_fdiv_op # INF / DENORM
12704         short           fdiv_res_snan   - tbl_fdiv_op # INF / SNAN
12705         short           tbl_fdiv_op     - tbl_fdiv_op #
12706         short           tbl_fdiv_op     - tbl_fdiv_op #
12707
12708         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / NORM
12709         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / ZERO
12710         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / INF
12711         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / QNAN
12712         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / DENORM
12713         short           fdiv_res_snan   - tbl_fdiv_op # QNAN / SNAN
12714         short           tbl_fdiv_op     - tbl_fdiv_op #
12715         short           tbl_fdiv_op     - tbl_fdiv_op #
12716
12717         short           fdiv_norm       - tbl_fdiv_op # DENORM / NORM
12718         short           fdiv_inf_load   - tbl_fdiv_op # DENORM / ZERO
12719         short           fdiv_zero_load  - tbl_fdiv_op # DENORM / INF
12720         short           fdiv_res_qnan   - tbl_fdiv_op # DENORM / QNAN
12721         short           fdiv_norm       - tbl_fdiv_op # DENORM / DENORM
12722         short           fdiv_res_snan   - tbl_fdiv_op # DENORM / SNAN
12723         short           tbl_fdiv_op     - tbl_fdiv_op #
12724         short           tbl_fdiv_op     - tbl_fdiv_op #
12725
12726         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / NORM
12727         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / ZERO
12728         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / INF
12729         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / QNAN
12730         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / DENORM
12731         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / SNAN
12732         short           tbl_fdiv_op     - tbl_fdiv_op #
12733         short           tbl_fdiv_op     - tbl_fdiv_op #
12734
12735 fdiv_res_qnan:
12736         bra.l           res_qnan
12737 fdiv_res_snan:
12738         bra.l           res_snan
12739 fdiv_res_operr:
12740         bra.l           res_operr
12741
12742         global          fdiv_zero_load          # global for fsgldiv
12743 fdiv_zero_load:
12744         mov.b           SRC_EX(%a0),%d0         # result sign is exclusive
12745         mov.b           DST_EX(%a1),%d1         # or of input signs.
12746         eor.b           %d0,%d1
12747         bpl.b           fdiv_zero_load_p        # result is positive
12748         fmov.s          &0x80000000,%fp0        # load a -ZERO
12749         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
12750         rts
12751 fdiv_zero_load_p:
12752         fmov.s          &0x00000000,%fp0        # load a +ZERO
12753         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
12754         rts
12755
12756 #
12757 # The destination was In Range and the source was a ZERO. The result,
12758 # therefore, is an INF w/ the proper sign.
12759 # So, determine the sign and return a new INF (w/ the j-bit cleared).
12760 #
12761         global          fdiv_inf_load           # global for fsgldiv
12762 fdiv_inf_load:
12763         ori.w           &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12764         mov.b           SRC_EX(%a0),%d0         # load both signs
12765         mov.b           DST_EX(%a1),%d1
12766         eor.b           %d0,%d1
12767         bpl.b           fdiv_inf_load_p         # result is positive
12768         fmov.s          &0xff800000,%fp0        # make result -INF
12769         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12770         rts
12771 fdiv_inf_load_p:
12772         fmov.s          &0x7f800000,%fp0        # make result +INF
12773         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
12774         rts
12775
12776 #
12777 # The destination was an INF w/ an In Range or ZERO source, the result is
12778 # an INF w/ the proper sign.
12779 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12780 # dst INF is set, then then j-bit of the result INF is also set).
12781 #
12782         global          fdiv_inf_dst            # global for fsgldiv
12783 fdiv_inf_dst:
12784         mov.b           DST_EX(%a1),%d0         # load both signs
12785         mov.b           SRC_EX(%a0),%d1
12786         eor.b           %d0,%d1
12787         bpl.b           fdiv_inf_dst_p          # result is positive
12788
12789         fmovm.x         DST(%a1),&0x80          # return result in fp0
12790         fabs.x          %fp0                    # clear sign bit
12791         fneg.x          %fp0                    # set sign bit
12792         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12793         rts
12794
12795 fdiv_inf_dst_p:
12796         fmovm.x         DST(%a1),&0x80          # return result in fp0
12797         fabs.x          %fp0                    # return positive INF
12798         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
12799         rts
12800
12801 #########################################################################
12802 # XDEF **************************************************************** #
12803 #       fneg(): emulates the fneg instruction                           #
12804 #       fsneg(): emulates the fsneg instruction                         #
12805 #       fdneg(): emulates the fdneg instruction                         #
12806 #                                                                       #
12807 # XREF **************************************************************** #
12808 #       norm() - normalize a denorm to provide EXOP                     #
12809 #       scale_to_zero_src() - scale sgl/dbl source exponent             #
12810 #       ovf_res() - return default overflow result                      #
12811 #       unf_res() - return default underflow result                     #
12812 #       res_qnan_1op() - return QNAN result                             #
12813 #       res_snan_1op() - return SNAN result                             #
12814 #                                                                       #
12815 # INPUT *************************************************************** #
12816 #       a0 = pointer to extended precision source operand               #
12817 #       d0 = rnd prec,mode                                              #
12818 #                                                                       #
12819 # OUTPUT ************************************************************** #
12820 #       fp0 = result                                                    #
12821 #       fp1 = EXOP (if exception occurred)                              #
12822 #                                                                       #
12823 # ALGORITHM *********************************************************** #
12824 #       Handle NANs, zeroes, and infinities as special cases. Separate  #
12825 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be  #
12826 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled  #
12827 # and an actual fneg performed to see if overflow/underflow would have  #
12828 # occurred. If so, return default underflow/overflow result. Else,      #
12829 # scale the result exponent and return result. FPSR gets set based on   #
12830 # the result value.                                                     #
12831 #                                                                       #
12832 #########################################################################
12833
12834         global          fsneg
12835 fsneg:
12836         andi.b          &0x30,%d0               # clear rnd prec
12837         ori.b           &s_mode*0x10,%d0        # insert sgl precision
12838         bra.b           fneg
12839
12840         global          fdneg
12841 fdneg:
12842         andi.b          &0x30,%d0               # clear rnd prec
12843         ori.b           &d_mode*0x10,%d0        # insert dbl prec
12844
12845         global          fneg
12846 fneg:
12847         mov.l           %d0,L_SCR3(%a6)         # store rnd info
12848         mov.b           STAG(%a6),%d1
12849         bne.w           fneg_not_norm           # optimize on non-norm input
12850
12851 #
12852 # NEGATE SIGN : norms and denorms ONLY!
12853 #
12854 fneg_norm:
12855         andi.b          &0xc0,%d0               # is precision extended?
12856         bne.w           fneg_not_ext            # no; go handle sgl or dbl
12857
12858 #
12859 # precision selected is extended. so...we can not get an underflow
12860 # or overflow because of rounding to the correct precision. so...
12861 # skip the scaling and unscaling...
12862 #
12863         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12864         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12865         mov.w           SRC_EX(%a0),%d0
12866         eori.w          &0x8000,%d0             # negate sign
12867         bpl.b           fneg_norm_load          # sign is positive
12868         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
12869 fneg_norm_load:
12870         mov.w           %d0,FP_SCR0_EX(%a6)
12871         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12872         rts
12873
12874 #
12875 # for an extended precision DENORM, the UNFL exception bit is set
12876 # the accrued bit is NOT set in this instance(no inexactness!)
12877 #
12878 fneg_denorm:
12879         andi.b          &0xc0,%d0               # is precision extended?
12880         bne.b           fneg_not_ext            # no; go handle sgl or dbl
12881
12882         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12883
12884         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12885         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12886         mov.w           SRC_EX(%a0),%d0
12887         eori.w          &0x8000,%d0             # negate sign
12888         bpl.b           fneg_denorm_done        # no
12889         mov.b           &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
12890 fneg_denorm_done:
12891         mov.w           %d0,FP_SCR0_EX(%a6)
12892         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12893
12894         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12895         bne.b           fneg_ext_unfl_ena       # yes
12896         rts
12897
12898 #
12899 # the input is an extended DENORM and underflow is enabled in the FPCR.
12900 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12901 # exponent and insert back into the operand.
12902 #
12903 fneg_ext_unfl_ena:
12904         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
12905         bsr.l           norm                    # normalize result
12906         neg.w           %d0                     # new exponent = -(shft val)
12907         addi.w          &0x6000,%d0             # add new bias to exponent
12908         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
12909         andi.w          &0x8000,%d1             # keep old sign
12910         andi.w          &0x7fff,%d0             # clear sign position
12911         or.w            %d1,%d0                 # concat old sign, new exponent
12912         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
12913         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12914         rts
12915
12916 #
12917 # operand is either single or double
12918 #
12919 fneg_not_ext:
12920         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
12921         bne.b           fneg_dbl
12922
12923 #
12924 # operand is to be rounded to single precision
12925 #
12926 fneg_sgl:
12927         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12928         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12929         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12930         bsr.l           scale_to_zero_src       # calculate scale factor
12931
12932         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
12933         bge.w           fneg_sd_unfl            # yes; go handle underflow
12934         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
12935         beq.w           fneg_sd_may_ovfl        # maybe; go check
12936         blt.w           fneg_sd_ovfl            # yes; go handle overflow
12937
12938 #
12939 # operand will NOT overflow or underflow when moved in to the fp reg file
12940 #
12941 fneg_sd_normal:
12942         fmov.l          &0x0,%fpsr              # clear FPSR
12943         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12944
12945         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
12946
12947         fmov.l          %fpsr,%d1               # save FPSR
12948         fmov.l          &0x0,%fpcr              # clear FPCR
12949
12950         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12951
12952 fneg_sd_normal_exit:
12953         mov.l           %d2,-(%sp)              # save d2
12954         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12955         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
12956         mov.w           %d1,%d2                 # make a copy
12957         andi.l          &0x7fff,%d1             # strip sign
12958         sub.l           %d0,%d1                 # add scale factor
12959         andi.w          &0x8000,%d2             # keep old sign
12960         or.w            %d1,%d2                 # concat old sign,new exp
12961         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
12962         mov.l           (%sp)+,%d2              # restore d2
12963         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12964         rts
12965
12966 #
12967 # operand is to be rounded to double precision
12968 #
12969 fneg_dbl:
12970         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12971         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12972         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12973         bsr.l           scale_to_zero_src       # calculate scale factor
12974
12975         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
12976         bge.b           fneg_sd_unfl            # yes; go handle underflow
12977         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
12978         beq.w           fneg_sd_may_ovfl        # maybe; go check
12979         blt.w           fneg_sd_ovfl            # yes; go handle overflow
12980         bra.w           fneg_sd_normal          # no; ho handle normalized op
12981
12982 #
12983 # operand WILL underflow when moved in to the fp register file
12984 #
12985 fneg_sd_unfl:
12986         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12987
12988         eori.b          &0x80,FP_SCR0_EX(%a6)   # negate sign
12989         bpl.b           fneg_sd_unfl_tst
12990         bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
12991
12992 # if underflow or inexact is enabled, go calculate EXOP first.
12993 fneg_sd_unfl_tst:
12994         mov.b           FPCR_ENABLE(%a6),%d1
12995         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12996         bne.b           fneg_sd_unfl_ena        # yes
12997
12998 fneg_sd_unfl_dis:
12999         lea             FP_SCR0(%a6),%a0        # pass: result addr
13000         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
13001         bsr.l           unf_res                 # calculate default result
13002         or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
13003         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
13004         rts
13005
13006 #
13007 # operand will underflow AND underflow is enabled.
13008 # therefore, we must return the result rounded to extended precision.
13009 #
13010 fneg_sd_unfl_ena:
13011         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13012         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13013         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
13014
13015         mov.l           %d2,-(%sp)              # save d2
13016         mov.l           %d1,%d2                 # make a copy
13017         andi.l          &0x7fff,%d1             # strip sign
13018         andi.w          &0x8000,%d2             # keep old sign
13019         sub.l           %d0,%d1                 # subtract scale factor
13020         addi.l          &0x6000,%d1             # add new bias
13021         andi.w          &0x7fff,%d1
13022         or.w            %d2,%d1                 # concat new sign,new exp
13023         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
13024         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
13025         mov.l           (%sp)+,%d2              # restore d2
13026         bra.b           fneg_sd_unfl_dis
13027
13028 #
13029 # operand WILL overflow.
13030 #
13031 fneg_sd_ovfl:
13032         fmov.l          &0x0,%fpsr              # clear FPSR
13033         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13034
13035         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
13036
13037         fmov.l          &0x0,%fpcr              # clear FPCR
13038         fmov.l          %fpsr,%d1               # save FPSR
13039
13040         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13041
13042 fneg_sd_ovfl_tst:
13043         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13044
13045         mov.b           FPCR_ENABLE(%a6),%d1
13046         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
13047         bne.b           fneg_sd_ovfl_ena        # yes
13048
13049 #
13050 # OVFL is not enabled; therefore, we must create the default result by
13051 # calling ovf_res().
13052 #
13053 fneg_sd_ovfl_dis:
13054         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
13055         sne             %d1                     # set sign param accordingly
13056         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
13057         bsr.l           ovf_res                 # calculate default result
13058         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
13059         fmovm.x         (%a0),&0x80             # return default result in fp0
13060         rts
13061
13062 #
13063 # OVFL is enabled.
13064 # the INEX2 bit has already been updated by the round to the correct precision.
13065 # now, round to extended(and don't alter the FPSR).
13066 #
13067 fneg_sd_ovfl_ena:
13068         mov.l           %d2,-(%sp)              # save d2
13069         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
13070         mov.l           %d1,%d2                 # make a copy
13071         andi.l          &0x7fff,%d1             # strip sign
13072         andi.w          &0x8000,%d2             # keep old sign
13073         sub.l           %d0,%d1                 # add scale factor
13074         subi.l          &0x6000,%d1             # subtract bias
13075         andi.w          &0x7fff,%d1
13076         or.w            %d2,%d1                 # concat sign,exp
13077         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
13078         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
13079         mov.l           (%sp)+,%d2              # restore d2
13080         bra.b           fneg_sd_ovfl_dis
13081
13082 #
13083 # the move in MAY underflow. so...
13084 #
13085 fneg_sd_may_ovfl:
13086         fmov.l          &0x0,%fpsr              # clear FPSR
13087         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13088
13089         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
13090
13091         fmov.l          %fpsr,%d1               # save status
13092         fmov.l          &0x0,%fpcr              # clear FPCR
13093
13094         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13095
13096         fabs.x          %fp0,%fp1               # make a copy of result
13097         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
13098         fbge.w          fneg_sd_ovfl_tst        # yes; overflow has occurred
13099
13100 # no, it didn't overflow; we have correct result
13101         bra.w           fneg_sd_normal_exit
13102
13103 ##########################################################################
13104
13105 #
13106 # input is not normalized; what is it?
13107 #
13108 fneg_not_norm:
13109         cmpi.b          %d1,&DENORM             # weed out DENORM
13110         beq.w           fneg_denorm
13111         cmpi.b          %d1,&SNAN               # weed out SNAN
13112         beq.l           res_snan_1op
13113         cmpi.b          %d1,&QNAN               # weed out QNAN
13114         beq.l           res_qnan_1op
13115
13116 #
13117 # do the fneg; at this point, only possible ops are ZERO and INF.
13118 # use fneg to determine ccodes.
13119 # prec:mode should be zero at this point but it won't affect answer anyways.
13120 #
13121         fneg.x          SRC_EX(%a0),%fp0        # do fneg
13122         fmov.l          %fpsr,%d0
13123         rol.l           &0x8,%d0                # put ccodes in lo byte
13124         mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
13125         rts
13126
13127 #########################################################################
13128 # XDEF **************************************************************** #
13129 #       ftst(): emulates the ftest instruction                          #
13130 #                                                                       #
13131 # XREF **************************************************************** #
13132 #       res{s,q}nan_1op() - set NAN result for monadic instruction      #
13133 #                                                                       #
13134 # INPUT *************************************************************** #
13135 #       a0 = pointer to extended precision source operand               #
13136 #                                                                       #
13137 # OUTPUT ************************************************************** #
13138 #       none                                                            #
13139 #                                                                       #
13140 # ALGORITHM *********************************************************** #
13141 #       Check the source operand tag (STAG) and set the FPCR according  #
13142 # to the operand type and sign.                                         #
13143 #                                                                       #
13144 #########################################################################
13145
13146         global          ftst
13147 ftst:
13148         mov.b           STAG(%a6),%d1
13149         bne.b           ftst_not_norm           # optimize on non-norm input
13150
13151 #
13152 # Norm:
13153 #
13154 ftst_norm:
13155         tst.b           SRC_EX(%a0)             # is operand negative?
13156         bmi.b           ftst_norm_m             # yes
13157         rts
13158 ftst_norm_m:
13159         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13160         rts
13161
13162 #
13163 # input is not normalized; what is it?
13164 #
13165 ftst_not_norm:
13166         cmpi.b          %d1,&ZERO               # weed out ZERO
13167         beq.b           ftst_zero
13168         cmpi.b          %d1,&INF                # weed out INF
13169         beq.b           ftst_inf
13170         cmpi.b          %d1,&SNAN               # weed out SNAN
13171         beq.l           res_snan_1op
13172         cmpi.b          %d1,&QNAN               # weed out QNAN
13173         beq.l           res_qnan_1op
13174
13175 #
13176 # Denorm:
13177 #
13178 ftst_denorm:
13179         tst.b           SRC_EX(%a0)             # is operand negative?
13180         bmi.b           ftst_denorm_m           # yes
13181         rts
13182 ftst_denorm_m:
13183         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13184         rts
13185
13186 #
13187 # Infinity:
13188 #
13189 ftst_inf:
13190         tst.b           SRC_EX(%a0)             # is operand negative?
13191         bmi.b           ftst_inf_m              # yes
13192 ftst_inf_p:
13193         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13194         rts
13195 ftst_inf_m:
13196         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13197         rts
13198
13199 #
13200 # Zero:
13201 #
13202 ftst_zero:
13203         tst.b           SRC_EX(%a0)             # is operand negative?
13204         bmi.b           ftst_zero_m             # yes
13205 ftst_zero_p:
13206         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'N' ccode bit
13207         rts
13208 ftst_zero_m:
13209         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13210         rts
13211
13212 #########################################################################
13213 # XDEF **************************************************************** #
13214 #       fint(): emulates the fint instruction                           #
13215 #                                                                       #
13216 # XREF **************************************************************** #
13217 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
13218 #                                                                       #
13219 # INPUT *************************************************************** #
13220 #       a0 = pointer to extended precision source operand               #
13221 #       d0 = round precision/mode                                       #
13222 #                                                                       #
13223 # OUTPUT ************************************************************** #
13224 #       fp0 = result                                                    #
13225 #                                                                       #
13226 # ALGORITHM *********************************************************** #
13227 #       Separate according to operand type. Unnorms don't pass through  #
13228 # here. For norms, load the rounding mode/prec, execute a "fint", then  #
13229 # store the resulting FPSR bits.                                        #
13230 #       For denorms, force the j-bit to a one and do the same as for    #
13231 # norms. Denorms are so low that the answer will either be a zero or a  #
13232 # one.                                                                  #
13233 #       For zeroes/infs/NANs, return the same while setting the FPSR    #
13234 # as appropriate.                                                       #
13235 #                                                                       #
13236 #########################################################################
13237
13238         global          fint
13239 fint:
13240         mov.b           STAG(%a6),%d1
13241         bne.b           fint_not_norm           # optimize on non-norm input
13242
13243 #
13244 # Norm:
13245 #
13246 fint_norm:
13247         andi.b          &0x30,%d0               # set prec = ext
13248
13249         fmov.l          %d0,%fpcr               # set FPCR
13250         fmov.l          &0x0,%fpsr              # clear FPSR
13251
13252         fint.x          SRC(%a0),%fp0           # execute fint
13253
13254         fmov.l          &0x0,%fpcr              # clear FPCR
13255         fmov.l          %fpsr,%d0               # save FPSR
13256         or.l            %d0,USER_FPSR(%a6)      # set exception bits
13257
13258         rts
13259
13260 #
13261 # input is not normalized; what is it?
13262 #
13263 fint_not_norm:
13264         cmpi.b          %d1,&ZERO               # weed out ZERO
13265         beq.b           fint_zero
13266         cmpi.b          %d1,&INF                # weed out INF
13267         beq.b           fint_inf
13268         cmpi.b          %d1,&DENORM             # weed out DENORM
13269         beq.b           fint_denorm
13270         cmpi.b          %d1,&SNAN               # weed out SNAN
13271         beq.l           res_snan_1op
13272         bra.l           res_qnan_1op            # weed out QNAN
13273
13274 #
13275 # Denorm:
13276 #
13277 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13278 # also, the INEX2 and AINEX exception bits will be set.
13279 # so, we could either set these manually or force the DENORM
13280 # to a very small NORM and ship it to the NORM routine.
13281 # I do the latter.
13282 #
13283 fint_denorm:
13284         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13285         mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
13286         lea             FP_SCR0(%a6),%a0
13287         bra.b           fint_norm
13288
13289 #
13290 # Zero:
13291 #
13292 fint_zero:
13293         tst.b           SRC_EX(%a0)             # is ZERO negative?
13294         bmi.b           fint_zero_m             # yes
13295 fint_zero_p:
13296         fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
13297         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
13298         rts
13299 fint_zero_m:
13300         fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
13301         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13302         rts
13303
13304 #
13305 # Infinity:
13306 #
13307 fint_inf:
13308         fmovm.x         SRC(%a0),&0x80          # return result in fp0
13309         tst.b           SRC_EX(%a0)             # is INF negative?
13310         bmi.b           fint_inf_m              # yes
13311 fint_inf_p:
13312         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13313         rts
13314 fint_inf_m:
13315         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13316         rts
13317
13318 #########################################################################
13319 # XDEF **************************************************************** #
13320 #       fintrz(): emulates the fintrz instruction                       #
13321 #                                                                       #
13322 # XREF **************************************************************** #
13323 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
13324 #                                                                       #
13325 # INPUT *************************************************************** #
13326 #       a0 = pointer to extended precision source operand               #
13327 #       d0 = round precision/mode                                       #
13328 #                                                                       #
13329 # OUTPUT ************************************************************** #
13330 #       fp0 = result                                                    #
13331 #                                                                       #
13332 # ALGORITHM *********************************************************** #
13333 #       Separate according to operand type. Unnorms don't pass through  #
13334 # here. For norms, load the rounding mode/prec, execute a "fintrz",     #
13335 # then store the resulting FPSR bits.                                   #
13336 #       For denorms, force the j-bit to a one and do the same as for    #
13337 # norms. Denorms are so low that the answer will either be a zero or a  #
13338 # one.                                                                  #
13339 #       For zeroes/infs/NANs, return the same while setting the FPSR    #
13340 # as appropriate.                                                       #
13341 #                                                                       #
13342 #########################################################################
13343
13344         global          fintrz
13345 fintrz:
13346         mov.b           STAG(%a6),%d1
13347         bne.b           fintrz_not_norm         # optimize on non-norm input
13348
13349 #
13350 # Norm:
13351 #
13352 fintrz_norm:
13353         fmov.l          &0x0,%fpsr              # clear FPSR
13354
13355         fintrz.x        SRC(%a0),%fp0           # execute fintrz
13356
13357         fmov.l          %fpsr,%d0               # save FPSR
13358         or.l            %d0,USER_FPSR(%a6)      # set exception bits
13359
13360         rts
13361
13362 #
13363 # input is not normalized; what is it?
13364 #
13365 fintrz_not_norm:
13366         cmpi.b          %d1,&ZERO               # weed out ZERO
13367         beq.b           fintrz_zero
13368         cmpi.b          %d1,&INF                # weed out INF
13369         beq.b           fintrz_inf
13370         cmpi.b          %d1,&DENORM             # weed out DENORM
13371         beq.b           fintrz_denorm
13372         cmpi.b          %d1,&SNAN               # weed out SNAN
13373         beq.l           res_snan_1op
13374         bra.l           res_qnan_1op            # weed out QNAN
13375
13376 #
13377 # Denorm:
13378 #
13379 # for DENORMs, the result will be (+/-)ZERO.
13380 # also, the INEX2 and AINEX exception bits will be set.
13381 # so, we could either set these manually or force the DENORM
13382 # to a very small NORM and ship it to the NORM routine.
13383 # I do the latter.
13384 #
13385 fintrz_denorm:
13386         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13387         mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
13388         lea             FP_SCR0(%a6),%a0
13389         bra.b           fintrz_norm
13390
13391 #
13392 # Zero:
13393 #
13394 fintrz_zero:
13395         tst.b           SRC_EX(%a0)             # is ZERO negative?
13396         bmi.b           fintrz_zero_m           # yes
13397 fintrz_zero_p:
13398         fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
13399         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
13400         rts
13401 fintrz_zero_m:
13402         fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
13403         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13404         rts
13405
13406 #
13407 # Infinity:
13408 #
13409 fintrz_inf:
13410         fmovm.x         SRC(%a0),&0x80          # return result in fp0
13411         tst.b           SRC_EX(%a0)             # is INF negative?
13412         bmi.b           fintrz_inf_m            # yes
13413 fintrz_inf_p:
13414         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13415         rts
13416 fintrz_inf_m:
13417         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13418         rts
13419
13420 #########################################################################
13421 # XDEF **************************************************************** #
13422 #       fabs():  emulates the fabs instruction                          #
13423 #       fsabs(): emulates the fsabs instruction                         #
13424 #       fdabs(): emulates the fdabs instruction                         #
13425 #                                                                       #
13426 # XREF **************************************************************** #
13427 #       norm() - normalize denorm mantissa to provide EXOP              #
13428 #       scale_to_zero_src() - make exponent. = 0; get scale factor      #
13429 #       unf_res() - calculate underflow result                          #
13430 #       ovf_res() - calculate overflow result                           #
13431 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
13432 #                                                                       #
13433 # INPUT *************************************************************** #
13434 #       a0 = pointer to extended precision source operand               #
13435 #       d0 = rnd precision/mode                                         #
13436 #                                                                       #
13437 # OUTPUT ************************************************************** #
13438 #       fp0 = result                                                    #
13439 #       fp1 = EXOP (if exception occurred)                              #
13440 #                                                                       #
13441 # ALGORITHM *********************************************************** #
13442 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
13443 # norms into extended, single, and double precision.                    #
13444 #       Simply clear sign for extended precision norm. Ext prec denorm  #
13445 # gets an EXOP created for it since it's an underflow.                  #
13446 #       Double and single precision can overflow and underflow. First,  #
13447 # scale the operand such that the exponent is zero. Perform an "fabs"   #
13448 # using the correct rnd mode/prec. Check to see if the original         #
13449 # exponent would take an exception. If so, use unf_res() or ovf_res()   #
13450 # to calculate the default result. Also, create the EXOP for the        #
13451 # exceptional case. If no exception should occur, insert the correct    #
13452 # result exponent and return.                                           #
13453 #       Unnorms don't pass through here.                                #
13454 #                                                                       #
13455 #########################################################################
13456
13457         global          fsabs
13458 fsabs:
13459         andi.b          &0x30,%d0               # clear rnd prec
13460         ori.b           &s_mode*0x10,%d0        # insert sgl precision
13461         bra.b           fabs
13462
13463         global          fdabs
13464 fdabs:
13465         andi.b          &0x30,%d0               # clear rnd prec
13466         ori.b           &d_mode*0x10,%d0        # insert dbl precision
13467
13468         global          fabs
13469 fabs:
13470         mov.l           %d0,L_SCR3(%a6)         # store rnd info
13471         mov.b           STAG(%a6),%d1
13472         bne.w           fabs_not_norm           # optimize on non-norm input
13473
13474 #
13475 # ABSOLUTE VALUE: norms and denorms ONLY!
13476 #
13477 fabs_norm:
13478         andi.b          &0xc0,%d0               # is precision extended?
13479         bne.b           fabs_not_ext            # no; go handle sgl or dbl
13480
13481 #
13482 # precision selected is extended. so...we can not get an underflow
13483 # or overflow because of rounding to the correct precision. so...
13484 # skip the scaling and unscaling...
13485 #
13486         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13487         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13488         mov.w           SRC_EX(%a0),%d1
13489         bclr            &15,%d1                 # force absolute value
13490         mov.w           %d1,FP_SCR0_EX(%a6)     # insert exponent
13491         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
13492         rts
13493
13494 #
13495 # for an extended precision DENORM, the UNFL exception bit is set
13496 # the accrued bit is NOT set in this instance(no inexactness!)
13497 #
13498 fabs_denorm:
13499         andi.b          &0xc0,%d0               # is precision extended?
13500         bne.b           fabs_not_ext            # no
13501
13502         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13503
13504         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13505         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13506         mov.w           SRC_EX(%a0),%d0
13507         bclr            &15,%d0                 # clear sign
13508         mov.w           %d0,FP_SCR0_EX(%a6)     # insert exponent
13509
13510         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
13511
13512         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13513         bne.b           fabs_ext_unfl_ena
13514         rts
13515
13516 #
13517 # the input is an extended DENORM and underflow is enabled in the FPCR.
13518 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
13519 # exponent and insert back into the operand.
13520 #
13521 fabs_ext_unfl_ena:
13522         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
13523         bsr.l           norm                    # normalize result
13524         neg.w           %d0                     # new exponent = -(shft val)
13525         addi.w          &0x6000,%d0             # add new bias to exponent
13526         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
13527         andi.w          &0x8000,%d1             # keep old sign
13528         andi.w          &0x7fff,%d0             # clear sign position
13529         or.w            %d1,%d0                 # concat old sign, new exponent
13530         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
13531         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
13532         rts
13533
13534 #
13535 # operand is either single or double
13536 #
13537 fabs_not_ext:
13538         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
13539         bne.b           fabs_dbl
13540
13541 #
13542 # operand is to be rounded to single precision
13543 #
13544 fabs_sgl:
13545         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13546         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13547         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13548         bsr.l           scale_to_zero_src       # calculate scale factor
13549
13550         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
13551         bge.w           fabs_sd_unfl            # yes; go handle underflow
13552         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
13553         beq.w           fabs_sd_may_ovfl        # maybe; go check
13554         blt.w           fabs_sd_ovfl            # yes; go handle overflow
13555
13556 #
13557 # operand will NOT overflow or underflow when moved in to the fp reg file
13558 #
13559 fabs_sd_normal:
13560         fmov.l          &0x0,%fpsr              # clear FPSR
13561         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13562
13563         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
13564
13565         fmov.l          %fpsr,%d1               # save FPSR
13566         fmov.l          &0x0,%fpcr              # clear FPCR
13567
13568         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13569
13570 fabs_sd_normal_exit:
13571         mov.l           %d2,-(%sp)              # save d2
13572         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
13573         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
13574         mov.l           %d1,%d2                 # make a copy
13575         andi.l          &0x7fff,%d1             # strip sign
13576         sub.l           %d0,%d1                 # add scale factor
13577         andi.w          &0x8000,%d2             # keep old sign
13578         or.w            %d1,%d2                 # concat old sign,new exp
13579         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
13580         mov.l           (%sp)+,%d2              # restore d2
13581         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
13582         rts
13583
13584 #
13585 # operand is to be rounded to double precision
13586 #
13587 fabs_dbl:
13588         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13589         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13590         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13591         bsr.l           scale_to_zero_src       # calculate scale factor
13592
13593         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
13594         bge.b           fabs_sd_unfl            # yes; go handle underflow
13595         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
13596         beq.w           fabs_sd_may_ovfl        # maybe; go check
13597         blt.w           fabs_sd_ovfl            # yes; go handle overflow
13598         bra.w           fabs_sd_normal          # no; ho handle normalized op
13599
13600 #
13601 # operand WILL underflow when moved in to the fp register file
13602 #
13603 fabs_sd_unfl:
13604         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13605
13606         bclr            &0x7,FP_SCR0_EX(%a6)    # force absolute value
13607
13608 # if underflow or inexact is enabled, go calculate EXOP first.
13609         mov.b           FPCR_ENABLE(%a6),%d1
13610         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
13611         bne.b           fabs_sd_unfl_ena        # yes
13612
13613 fabs_sd_unfl_dis:
13614         lea             FP_SCR0(%a6),%a0        # pass: result addr
13615         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
13616         bsr.l           unf_res                 # calculate default result
13617         or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
13618         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
13619         rts
13620
13621 #
13622 # operand will underflow AND underflow is enabled.
13623 # therefore, we must return the result rounded to extended precision.
13624 #
13625 fabs_sd_unfl_ena:
13626         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13627         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13628         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
13629
13630         mov.l           %d2,-(%sp)              # save d2
13631         mov.l           %d1,%d2                 # make a copy
13632         andi.l          &0x7fff,%d1             # strip sign
13633         andi.w          &0x8000,%d2             # keep old sign
13634         sub.l           %d0,%d1                 # subtract scale factor
13635         addi.l          &0x6000,%d1             # add new bias
13636         andi.w          &0x7fff,%d1
13637         or.w            %d2,%d1                 # concat new sign,new exp
13638         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
13639         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
13640         mov.l           (%sp)+,%d2              # restore d2
13641         bra.b           fabs_sd_unfl_dis
13642
13643 #
13644 # operand WILL overflow.
13645 #
13646 fabs_sd_ovfl:
13647         fmov.l          &0x0,%fpsr              # clear FPSR
13648         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13649
13650         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
13651
13652         fmov.l          &0x0,%fpcr              # clear FPCR
13653         fmov.l          %fpsr,%d1               # save FPSR
13654
13655         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13656
13657 fabs_sd_ovfl_tst:
13658         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13659
13660         mov.b           FPCR_ENABLE(%a6),%d1
13661         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
13662         bne.b           fabs_sd_ovfl_ena        # yes
13663
13664 #
13665 # OVFL is not enabled; therefore, we must create the default result by
13666 # calling ovf_res().
13667 #
13668 fabs_sd_ovfl_dis:
13669         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
13670         sne             %d1                     # set sign param accordingly
13671         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
13672         bsr.l           ovf_res                 # calculate default result
13673         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
13674         fmovm.x         (%a0),&0x80             # return default result in fp0
13675         rts
13676
13677 #
13678 # OVFL is enabled.
13679 # the INEX2 bit has already been updated by the round to the correct precision.
13680 # now, round to extended(and don't alter the FPSR).
13681 #
13682 fabs_sd_ovfl_ena:
13683         mov.l           %d2,-(%sp)              # save d2
13684         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
13685         mov.l           %d1,%d2                 # make a copy
13686         andi.l          &0x7fff,%d1             # strip sign
13687         andi.w          &0x8000,%d2             # keep old sign
13688         sub.l           %d0,%d1                 # add scale factor
13689         subi.l          &0x6000,%d1             # subtract bias
13690         andi.w          &0x7fff,%d1
13691         or.w            %d2,%d1                 # concat sign,exp
13692         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
13693         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
13694         mov.l           (%sp)+,%d2              # restore d2
13695         bra.b           fabs_sd_ovfl_dis
13696
13697 #
13698 # the move in MAY underflow. so...
13699 #
13700 fabs_sd_may_ovfl:
13701         fmov.l          &0x0,%fpsr              # clear FPSR
13702         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
13703
13704         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
13705
13706         fmov.l          %fpsr,%d1               # save status
13707         fmov.l          &0x0,%fpcr              # clear FPCR
13708
13709         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
13710
13711         fabs.x          %fp0,%fp1               # make a copy of result
13712         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
13713         fbge.w          fabs_sd_ovfl_tst        # yes; overflow has occurred
13714
13715 # no, it didn't overflow; we have correct result
13716         bra.w           fabs_sd_normal_exit
13717
13718 ##########################################################################
13719
13720 #
13721 # input is not normalized; what is it?
13722 #
13723 fabs_not_norm:
13724         cmpi.b          %d1,&DENORM             # weed out DENORM
13725         beq.w           fabs_denorm
13726         cmpi.b          %d1,&SNAN               # weed out SNAN
13727         beq.l           res_snan_1op
13728         cmpi.b          %d1,&QNAN               # weed out QNAN
13729         beq.l           res_qnan_1op
13730
13731         fabs.x          SRC(%a0),%fp0           # force absolute value
13732
13733         cmpi.b          %d1,&INF                # weed out INF
13734         beq.b           fabs_inf
13735 fabs_zero:
13736         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
13737         rts
13738 fabs_inf:
13739         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13740         rts
13741
13742 #########################################################################
13743 # XDEF **************************************************************** #
13744 #       fcmp(): fp compare op routine                                   #
13745 #                                                                       #
13746 # XREF **************************************************************** #
13747 #       res_qnan() - return QNAN result                                 #
13748 #       res_snan() - return SNAN result                                 #
13749 #                                                                       #
13750 # INPUT *************************************************************** #
13751 #       a0 = pointer to extended precision source operand               #
13752 #       a1 = pointer to extended precision destination operand          #
13753 #       d0 = round prec/mode                                            #
13754 #                                                                       #
13755 # OUTPUT ************************************************************** #
13756 #       None                                                            #
13757 #                                                                       #
13758 # ALGORITHM *********************************************************** #
13759 #       Handle NANs and denorms as special cases. For everything else,  #
13760 # just use the actual fcmp instruction to produce the correct condition #
13761 # codes.                                                                #
13762 #                                                                       #
13763 #########################################################################
13764
13765         global          fcmp
13766 fcmp:
13767         clr.w           %d1
13768         mov.b           DTAG(%a6),%d1
13769         lsl.b           &0x3,%d1
13770         or.b            STAG(%a6),%d1
13771         bne.b           fcmp_not_norm           # optimize on non-norm input
13772
13773 #
13774 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13775 #
13776 fcmp_norm:
13777         fmovm.x         DST(%a1),&0x80          # load dst op
13778
13779         fcmp.x          %fp0,SRC(%a0)           # do compare
13780
13781         fmov.l          %fpsr,%d0               # save FPSR
13782         rol.l           &0x8,%d0                # extract ccode bits
13783         mov.b           %d0,FPSR_CC(%a6)        # set ccode bits(no exc bits are set)
13784
13785         rts
13786
13787 #
13788 # fcmp: inputs are not both normalized; what are they?
13789 #
13790 fcmp_not_norm:
13791         mov.w           (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13792         jmp             (tbl_fcmp_op.b,%pc,%d1.w*1)
13793
13794         swbeg           &48
13795 tbl_fcmp_op:
13796         short           fcmp_norm       - tbl_fcmp_op # NORM - NORM
13797         short           fcmp_norm       - tbl_fcmp_op # NORM - ZERO
13798         short           fcmp_norm       - tbl_fcmp_op # NORM - INF
13799         short           fcmp_res_qnan   - tbl_fcmp_op # NORM - QNAN
13800         short           fcmp_nrm_dnrm   - tbl_fcmp_op # NORM - DENORM
13801         short           fcmp_res_snan   - tbl_fcmp_op # NORM - SNAN
13802         short           tbl_fcmp_op     - tbl_fcmp_op #
13803         short           tbl_fcmp_op     - tbl_fcmp_op #
13804
13805         short           fcmp_norm       - tbl_fcmp_op # ZERO - NORM
13806         short           fcmp_norm       - tbl_fcmp_op # ZERO - ZERO
13807         short           fcmp_norm       - tbl_fcmp_op # ZERO - INF
13808         short           fcmp_res_qnan   - tbl_fcmp_op # ZERO - QNAN
13809         short           fcmp_dnrm_s     - tbl_fcmp_op # ZERO - DENORM
13810         short           fcmp_res_snan   - tbl_fcmp_op # ZERO - SNAN
13811         short           tbl_fcmp_op     - tbl_fcmp_op #
13812         short           tbl_fcmp_op     - tbl_fcmp_op #
13813
13814         short           fcmp_norm       - tbl_fcmp_op # INF - NORM
13815         short           fcmp_norm       - tbl_fcmp_op # INF - ZERO
13816         short           fcmp_norm       - tbl_fcmp_op # INF - INF
13817         short           fcmp_res_qnan   - tbl_fcmp_op # INF - QNAN
13818         short           fcmp_dnrm_s     - tbl_fcmp_op # INF - DENORM
13819         short           fcmp_res_snan   - tbl_fcmp_op # INF - SNAN
13820         short           tbl_fcmp_op     - tbl_fcmp_op #
13821         short           tbl_fcmp_op     - tbl_fcmp_op #
13822
13823         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - NORM
13824         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - ZERO
13825         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - INF
13826         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - QNAN
13827         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - DENORM
13828         short           fcmp_res_snan   - tbl_fcmp_op # QNAN - SNAN
13829         short           tbl_fcmp_op     - tbl_fcmp_op #
13830         short           tbl_fcmp_op     - tbl_fcmp_op #
13831
13832         short           fcmp_dnrm_nrm   - tbl_fcmp_op # DENORM - NORM
13833         short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - ZERO
13834         short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - INF
13835         short           fcmp_res_qnan   - tbl_fcmp_op # DENORM - QNAN
13836         short           fcmp_dnrm_sd    - tbl_fcmp_op # DENORM - DENORM
13837         short           fcmp_res_snan   - tbl_fcmp_op # DENORM - SNAN
13838         short           tbl_fcmp_op     - tbl_fcmp_op #
13839         short           tbl_fcmp_op     - tbl_fcmp_op #
13840
13841         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - NORM
13842         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - ZERO
13843         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - INF
13844         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - QNAN
13845         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - DENORM
13846         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - SNAN
13847         short           tbl_fcmp_op     - tbl_fcmp_op #
13848         short           tbl_fcmp_op     - tbl_fcmp_op #
13849
13850 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13851 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13852 fcmp_res_qnan:
13853         bsr.l           res_qnan
13854         andi.b          &0xf7,FPSR_CC(%a6)
13855         rts
13856 fcmp_res_snan:
13857         bsr.l           res_snan
13858         andi.b          &0xf7,FPSR_CC(%a6)
13859         rts
13860
13861 #
13862 # DENORMs are a little more difficult.
13863 # If you have a 2 DENORMs, then you can just force the j-bit to a one
13864 # and use the fcmp_norm routine.
13865 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13866 # and use the fcmp_norm routine.
13867 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13868 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
13869 # (1) signs are (+) and the DENORM is the dst or
13870 # (2) signs are (-) and the DENORM is the src
13871 #
13872
13873 fcmp_dnrm_s:
13874         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13875         mov.l           SRC_HI(%a0),%d0
13876         bset            &31,%d0                 # DENORM src; make into small norm
13877         mov.l           %d0,FP_SCR0_HI(%a6)
13878         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13879         lea             FP_SCR0(%a6),%a0
13880         bra.w           fcmp_norm
13881
13882 fcmp_dnrm_d:
13883         mov.l           DST_EX(%a1),FP_SCR0_EX(%a6)
13884         mov.l           DST_HI(%a1),%d0
13885         bset            &31,%d0                 # DENORM src; make into small norm
13886         mov.l           %d0,FP_SCR0_HI(%a6)
13887         mov.l           DST_LO(%a1),FP_SCR0_LO(%a6)
13888         lea             FP_SCR0(%a6),%a1
13889         bra.w           fcmp_norm
13890
13891 fcmp_dnrm_sd:
13892         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
13893         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13894         mov.l           DST_HI(%a1),%d0
13895         bset            &31,%d0                 # DENORM dst; make into small norm
13896         mov.l           %d0,FP_SCR1_HI(%a6)
13897         mov.l           SRC_HI(%a0),%d0
13898         bset            &31,%d0                 # DENORM dst; make into small norm
13899         mov.l           %d0,FP_SCR0_HI(%a6)
13900         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
13901         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13902         lea             FP_SCR1(%a6),%a1
13903         lea             FP_SCR0(%a6),%a0
13904         bra.w           fcmp_norm
13905
13906 fcmp_nrm_dnrm:
13907         mov.b           SRC_EX(%a0),%d0         # determine if like signs
13908         mov.b           DST_EX(%a1),%d1
13909         eor.b           %d0,%d1
13910         bmi.w           fcmp_dnrm_s
13911
13912 # signs are the same, so must determine the answer ourselves.
13913         tst.b           %d0                     # is src op negative?
13914         bmi.b           fcmp_nrm_dnrm_m         # yes
13915         rts
13916 fcmp_nrm_dnrm_m:
13917         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13918         rts
13919
13920 fcmp_dnrm_nrm:
13921         mov.b           SRC_EX(%a0),%d0         # determine if like signs
13922         mov.b           DST_EX(%a1),%d1
13923         eor.b           %d0,%d1
13924         bmi.w           fcmp_dnrm_d
13925
13926 # signs are the same, so must determine the answer ourselves.
13927         tst.b           %d0                     # is src op negative?
13928         bpl.b           fcmp_dnrm_nrm_m         # no
13929         rts
13930 fcmp_dnrm_nrm_m:
13931         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13932         rts
13933
13934 #########################################################################
13935 # XDEF **************************************************************** #
13936 #       fsglmul(): emulates the fsglmul instruction                     #
13937 #                                                                       #
13938 # XREF **************************************************************** #
13939 #       scale_to_zero_src() - scale src exponent to zero                #
13940 #       scale_to_zero_dst() - scale dst exponent to zero                #
13941 #       unf_res4() - return default underflow result for sglop          #
13942 #       ovf_res() - return default overflow result                      #
13943 #       res_qnan() - return QNAN result                                 #
13944 #       res_snan() - return SNAN result                                 #
13945 #                                                                       #
13946 # INPUT *************************************************************** #
13947 #       a0 = pointer to extended precision source operand               #
13948 #       a1 = pointer to extended precision destination operand          #
13949 #       d0  rnd prec,mode                                               #
13950 #                                                                       #
13951 # OUTPUT ************************************************************** #
13952 #       fp0 = result                                                    #
13953 #       fp1 = EXOP (if exception occurred)                              #
13954 #                                                                       #
13955 # ALGORITHM *********************************************************** #
13956 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
13957 # norms/denorms into ext/sgl/dbl precision.                             #
13958 #       For norms/denorms, scale the exponents such that a multiply     #
13959 # instruction won't cause an exception. Use the regular fsglmul to      #
13960 # compute a result. Check if the regular operands would have taken      #
13961 # an exception. If so, return the default overflow/underflow result     #
13962 # and return the EXOP if exceptions are enabled. Else, scale the        #
13963 # result operand to the proper exponent.                                #
13964 #                                                                       #
13965 #########################################################################
13966
13967         global          fsglmul
13968 fsglmul:
13969         mov.l           %d0,L_SCR3(%a6)         # store rnd info
13970
13971         clr.w           %d1
13972         mov.b           DTAG(%a6),%d1
13973         lsl.b           &0x3,%d1
13974         or.b            STAG(%a6),%d1
13975
13976         bne.w           fsglmul_not_norm        # optimize on non-norm input
13977
13978 fsglmul_norm:
13979         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
13980         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
13981         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
13982
13983         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
13984         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
13985         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
13986
13987         bsr.l           scale_to_zero_src       # scale exponent
13988         mov.l           %d0,-(%sp)              # save scale factor 1
13989
13990         bsr.l           scale_to_zero_dst       # scale dst exponent
13991
13992         add.l           (%sp)+,%d0              # SCALE_FACTOR = scale1 + scale2
13993
13994         cmpi.l          %d0,&0x3fff-0x7ffe      # would result ovfl?
13995         beq.w           fsglmul_may_ovfl        # result may rnd to overflow
13996         blt.w           fsglmul_ovfl            # result will overflow
13997
13998         cmpi.l          %d0,&0x3fff+0x0001      # would result unfl?
13999         beq.w           fsglmul_may_unfl        # result may rnd to no unfl
14000         bgt.w           fsglmul_unfl            # result will underflow
14001
14002 fsglmul_normal:
14003         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14004
14005         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14006         fmov.l          &0x0,%fpsr              # clear FPSR
14007
14008         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14009
14010         fmov.l          %fpsr,%d1               # save status
14011         fmov.l          &0x0,%fpcr              # clear FPCR
14012
14013         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14014
14015 fsglmul_normal_exit:
14016         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
14017         mov.l           %d2,-(%sp)              # save d2
14018         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
14019         mov.l           %d1,%d2                 # make a copy
14020         andi.l          &0x7fff,%d1             # strip sign
14021         andi.w          &0x8000,%d2             # keep old sign
14022         sub.l           %d0,%d1                 # add scale factor
14023         or.w            %d2,%d1                 # concat old sign,new exp
14024         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14025         mov.l           (%sp)+,%d2              # restore d2
14026         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
14027         rts
14028
14029 fsglmul_ovfl:
14030         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14031
14032         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14033         fmov.l          &0x0,%fpsr              # clear FPSR
14034
14035         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14036
14037         fmov.l          %fpsr,%d1               # save status
14038         fmov.l          &0x0,%fpcr              # clear FPCR
14039
14040         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14041
14042 fsglmul_ovfl_tst:
14043
14044 # save setting this until now because this is where fsglmul_may_ovfl may jump in
14045         or.l            &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14046
14047         mov.b           FPCR_ENABLE(%a6),%d1
14048         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
14049         bne.b           fsglmul_ovfl_ena        # yes
14050
14051 fsglmul_ovfl_dis:
14052         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
14053         sne             %d1                     # set sign param accordingly
14054         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
14055         andi.b          &0x30,%d0               # force prec = ext
14056         bsr.l           ovf_res                 # calculate default result
14057         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
14058         fmovm.x         (%a0),&0x80             # return default result in fp0
14059         rts
14060
14061 fsglmul_ovfl_ena:
14062         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
14063
14064         mov.l           %d2,-(%sp)              # save d2
14065         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14066         mov.l           %d1,%d2                 # make a copy
14067         andi.l          &0x7fff,%d1             # strip sign
14068         sub.l           %d0,%d1                 # add scale factor
14069         subi.l          &0x6000,%d1             # subtract bias
14070         andi.w          &0x7fff,%d1
14071         andi.w          &0x8000,%d2             # keep old sign
14072         or.w            %d2,%d1                 # concat old sign,new exp
14073         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14074         mov.l           (%sp)+,%d2              # restore d2
14075         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14076         bra.b           fsglmul_ovfl_dis
14077
14078 fsglmul_may_ovfl:
14079         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14080
14081         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14082         fmov.l          &0x0,%fpsr              # clear FPSR
14083
14084         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14085
14086         fmov.l          %fpsr,%d1               # save status
14087         fmov.l          &0x0,%fpcr              # clear FPCR
14088
14089         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14090
14091         fabs.x          %fp0,%fp1               # make a copy of result
14092         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
14093         fbge.w          fsglmul_ovfl_tst        # yes; overflow has occurred
14094
14095 # no, it didn't overflow; we have correct result
14096         bra.w           fsglmul_normal_exit
14097
14098 fsglmul_unfl:
14099         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14100
14101         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14102
14103         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
14104         fmov.l          &0x0,%fpsr              # clear FPSR
14105
14106         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14107
14108         fmov.l          %fpsr,%d1               # save status
14109         fmov.l          &0x0,%fpcr              # clear FPCR
14110
14111         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14112
14113         mov.b           FPCR_ENABLE(%a6),%d1
14114         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
14115         bne.b           fsglmul_unfl_ena        # yes
14116
14117 fsglmul_unfl_dis:
14118         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
14119
14120         lea             FP_SCR0(%a6),%a0        # pass: result addr
14121         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
14122         bsr.l           unf_res4                # calculate default result
14123         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
14124         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
14125         rts
14126
14127 #
14128 # UNFL is enabled.
14129 #
14130 fsglmul_unfl_ena:
14131         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
14132
14133         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14134         fmov.l          &0x0,%fpsr              # clear FPSR
14135
14136         fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
14137
14138         fmov.l          &0x0,%fpcr              # clear FPCR
14139
14140         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
14141         mov.l           %d2,-(%sp)              # save d2
14142         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14143         mov.l           %d1,%d2                 # make a copy
14144         andi.l          &0x7fff,%d1             # strip sign
14145         andi.w          &0x8000,%d2             # keep old sign
14146         sub.l           %d0,%d1                 # add scale factor
14147         addi.l          &0x6000,%d1             # add bias
14148         andi.w          &0x7fff,%d1
14149         or.w            %d2,%d1                 # concat old sign,new exp
14150         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14151         mov.l           (%sp)+,%d2              # restore d2
14152         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14153         bra.w           fsglmul_unfl_dis
14154
14155 fsglmul_may_unfl:
14156         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14157
14158         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14159         fmov.l          &0x0,%fpsr              # clear FPSR
14160
14161         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
14162
14163         fmov.l          %fpsr,%d1               # save status
14164         fmov.l          &0x0,%fpcr              # clear FPCR
14165
14166         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14167
14168         fabs.x          %fp0,%fp1               # make a copy of result
14169         fcmp.b          %fp1,&0x2               # is |result| > 2.b?
14170         fbgt.w          fsglmul_normal_exit     # no; no underflow occurred
14171         fblt.w          fsglmul_unfl            # yes; underflow occurred
14172
14173 #
14174 # we still don't know if underflow occurred. result is ~ equal to 2. but,
14175 # we don't know if the result was an underflow that rounded up to a 2 or
14176 # a normalized number that rounded down to a 2. so, redo the entire operation
14177 # using RZ as the rounding mode to see what the pre-rounded result is.
14178 # this case should be relatively rare.
14179 #
14180         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
14181
14182         mov.l           L_SCR3(%a6),%d1
14183         andi.b          &0xc0,%d1               # keep rnd prec
14184         ori.b           &rz_mode*0x10,%d1       # insert RZ
14185
14186         fmov.l          %d1,%fpcr               # set FPCR
14187         fmov.l          &0x0,%fpsr              # clear FPSR
14188
14189         fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
14190
14191         fmov.l          &0x0,%fpcr              # clear FPCR
14192         fabs.x          %fp1                    # make absolute value
14193         fcmp.b          %fp1,&0x2               # is |result| < 2.b?
14194         fbge.w          fsglmul_normal_exit     # no; no underflow occurred
14195         bra.w           fsglmul_unfl            # yes, underflow occurred
14196
14197 ##############################################################################
14198
14199 #
14200 # Single Precision Multiply: inputs are not both normalized; what are they?
14201 #
14202 fsglmul_not_norm:
14203         mov.w           (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14204         jmp             (tbl_fsglmul_op.b,%pc,%d1.w*1)
14205
14206         swbeg           &48
14207 tbl_fsglmul_op:
14208         short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
14209         short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
14210         short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
14211         short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
14212         short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
14213         short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
14214         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14215         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14216
14217         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x NORM
14218         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x ZERO
14219         short           fsglmul_res_operr       - tbl_fsglmul_op # ZERO x INF
14220         short           fsglmul_res_qnan        - tbl_fsglmul_op # ZERO x QNAN
14221         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x DENORM
14222         short           fsglmul_res_snan        - tbl_fsglmul_op # ZERO x SNAN
14223         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14224         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14225
14226         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x NORM
14227         short           fsglmul_res_operr       - tbl_fsglmul_op # INF x ZERO
14228         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x INF
14229         short           fsglmul_res_qnan        - tbl_fsglmul_op # INF x QNAN
14230         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x DENORM
14231         short           fsglmul_res_snan        - tbl_fsglmul_op # INF x SNAN
14232         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14233         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14234
14235         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x NORM
14236         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x ZERO
14237         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x INF
14238         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x QNAN
14239         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x DENORM
14240         short           fsglmul_res_snan        - tbl_fsglmul_op # QNAN x SNAN
14241         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14242         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14243
14244         short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
14245         short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
14246         short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
14247         short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
14248         short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
14249         short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
14250         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14251         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14252
14253         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x NORM
14254         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x ZERO
14255         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x INF
14256         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x QNAN
14257         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x DENORM
14258         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x SNAN
14259         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14260         short           tbl_fsglmul_op          - tbl_fsglmul_op #
14261
14262 fsglmul_res_operr:
14263         bra.l           res_operr
14264 fsglmul_res_snan:
14265         bra.l           res_snan
14266 fsglmul_res_qnan:
14267         bra.l           res_qnan
14268 fsglmul_zero:
14269         bra.l           fmul_zero
14270 fsglmul_inf_src:
14271         bra.l           fmul_inf_src
14272 fsglmul_inf_dst:
14273         bra.l           fmul_inf_dst
14274
14275 #########################################################################
14276 # XDEF **************************************************************** #
14277 #       fsgldiv(): emulates the fsgldiv instruction                     #
14278 #                                                                       #
14279 # XREF **************************************************************** #
14280 #       scale_to_zero_src() - scale src exponent to zero                #
14281 #       scale_to_zero_dst() - scale dst exponent to zero                #
14282 #       unf_res4() - return default underflow result for sglop          #
14283 #       ovf_res() - return default overflow result                      #
14284 #       res_qnan() - return QNAN result                                 #
14285 #       res_snan() - return SNAN result                                 #
14286 #                                                                       #
14287 # INPUT *************************************************************** #
14288 #       a0 = pointer to extended precision source operand               #
14289 #       a1 = pointer to extended precision destination operand          #
14290 #       d0  rnd prec,mode                                               #
14291 #                                                                       #
14292 # OUTPUT ************************************************************** #
14293 #       fp0 = result                                                    #
14294 #       fp1 = EXOP (if exception occurred)                              #
14295 #                                                                       #
14296 # ALGORITHM *********************************************************** #
14297 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
14298 # norms/denorms into ext/sgl/dbl precision.                             #
14299 #       For norms/denorms, scale the exponents such that a divide       #
14300 # instruction won't cause an exception. Use the regular fsgldiv to      #
14301 # compute a result. Check if the regular operands would have taken      #
14302 # an exception. If so, return the default overflow/underflow result     #
14303 # and return the EXOP if exceptions are enabled. Else, scale the        #
14304 # result operand to the proper exponent.                                #
14305 #                                                                       #
14306 #########################################################################
14307
14308         global          fsgldiv
14309 fsgldiv:
14310         mov.l           %d0,L_SCR3(%a6)         # store rnd info
14311
14312         clr.w           %d1
14313         mov.b           DTAG(%a6),%d1
14314         lsl.b           &0x3,%d1
14315         or.b            STAG(%a6),%d1           # combine src tags
14316
14317         bne.w           fsgldiv_not_norm        # optimize on non-norm input
14318
14319 #
14320 # DIVIDE: NORMs and DENORMs ONLY!
14321 #
14322 fsgldiv_norm:
14323         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
14324         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
14325         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
14326
14327         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
14328         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
14329         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
14330
14331         bsr.l           scale_to_zero_src       # calculate scale factor 1
14332         mov.l           %d0,-(%sp)              # save scale factor 1
14333
14334         bsr.l           scale_to_zero_dst       # calculate scale factor 2
14335
14336         neg.l           (%sp)                   # S.F. = scale1 - scale2
14337         add.l           %d0,(%sp)
14338
14339         mov.w           2+L_SCR3(%a6),%d1       # fetch precision,mode
14340         lsr.b           &0x6,%d1
14341         mov.l           (%sp)+,%d0
14342         cmpi.l          %d0,&0x3fff-0x7ffe
14343         ble.w           fsgldiv_may_ovfl
14344
14345         cmpi.l          %d0,&0x3fff-0x0000      # will result underflow?
14346         beq.w           fsgldiv_may_unfl        # maybe
14347         bgt.w           fsgldiv_unfl            # yes; go handle underflow
14348
14349 fsgldiv_normal:
14350         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14351
14352         fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
14353         fmov.l          &0x0,%fpsr              # clear FPSR
14354
14355         fsgldiv.x       FP_SCR0(%a6),%fp0       # perform sgl divide
14356
14357         fmov.l          %fpsr,%d1               # save FPSR
14358         fmov.l          &0x0,%fpcr              # clear FPCR
14359
14360         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14361
14362 fsgldiv_normal_exit:
14363         fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
14364         mov.l           %d2,-(%sp)              # save d2
14365         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
14366         mov.l           %d1,%d2                 # make a copy
14367         andi.l          &0x7fff,%d1             # strip sign
14368         andi.w          &0x8000,%d2             # keep old sign
14369         sub.l           %d0,%d1                 # add scale factor
14370         or.w            %d2,%d1                 # concat old sign,new exp
14371         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14372         mov.l           (%sp)+,%d2              # restore d2
14373         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
14374         rts
14375
14376 fsgldiv_may_ovfl:
14377         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14378
14379         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14380         fmov.l          &0x0,%fpsr              # set FPSR
14381
14382         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute divide
14383
14384         fmov.l          %fpsr,%d1
14385         fmov.l          &0x0,%fpcr
14386
14387         or.l            %d1,USER_FPSR(%a6)      # save INEX,N
14388
14389         fmovm.x         &0x01,-(%sp)            # save result to stack
14390         mov.w           (%sp),%d1               # fetch new exponent
14391         add.l           &0xc,%sp                # clear result
14392         andi.l          &0x7fff,%d1             # strip sign
14393         sub.l           %d0,%d1                 # add scale factor
14394         cmp.l           %d1,&0x7fff             # did divide overflow?
14395         blt.b           fsgldiv_normal_exit
14396
14397 fsgldiv_ovfl_tst:
14398         or.w            &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14399
14400         mov.b           FPCR_ENABLE(%a6),%d1
14401         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
14402         bne.b           fsgldiv_ovfl_ena        # yes
14403
14404 fsgldiv_ovfl_dis:
14405         btst            &neg_bit,FPSR_CC(%a6)   # is result negative
14406         sne             %d1                     # set sign param accordingly
14407         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
14408         andi.b          &0x30,%d0               # kill precision
14409         bsr.l           ovf_res                 # calculate default result
14410         or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
14411         fmovm.x         (%a0),&0x80             # return default result in fp0
14412         rts
14413
14414 fsgldiv_ovfl_ena:
14415         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
14416
14417         mov.l           %d2,-(%sp)              # save d2
14418         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14419         mov.l           %d1,%d2                 # make a copy
14420         andi.l          &0x7fff,%d1             # strip sign
14421         andi.w          &0x8000,%d2             # keep old sign
14422         sub.l           %d0,%d1                 # add scale factor
14423         subi.l          &0x6000,%d1             # subtract new bias
14424         andi.w          &0x7fff,%d1             # clear ms bit
14425         or.w            %d2,%d1                 # concat old sign,new exp
14426         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14427         mov.l           (%sp)+,%d2              # restore d2
14428         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14429         bra.b           fsgldiv_ovfl_dis
14430
14431 fsgldiv_unfl:
14432         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14433
14434         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14435
14436         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
14437         fmov.l          &0x0,%fpsr              # clear FPSR
14438
14439         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
14440
14441         fmov.l          %fpsr,%d1               # save status
14442         fmov.l          &0x0,%fpcr              # clear FPCR
14443
14444         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14445
14446         mov.b           FPCR_ENABLE(%a6),%d1
14447         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
14448         bne.b           fsgldiv_unfl_ena        # yes
14449
14450 fsgldiv_unfl_dis:
14451         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
14452
14453         lea             FP_SCR0(%a6),%a0        # pass: result addr
14454         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
14455         bsr.l           unf_res4                # calculate default result
14456         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
14457         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
14458         rts
14459
14460 #
14461 # UNFL is enabled.
14462 #
14463 fsgldiv_unfl_ena:
14464         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
14465
14466         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14467         fmov.l          &0x0,%fpsr              # clear FPSR
14468
14469         fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
14470
14471         fmov.l          &0x0,%fpcr              # clear FPCR
14472
14473         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
14474         mov.l           %d2,-(%sp)              # save d2
14475         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14476         mov.l           %d1,%d2                 # make a copy
14477         andi.l          &0x7fff,%d1             # strip sign
14478         andi.w          &0x8000,%d2             # keep old sign
14479         sub.l           %d0,%d1                 # add scale factor
14480         addi.l          &0x6000,%d1             # add bias
14481         andi.w          &0x7fff,%d1             # clear top bit
14482         or.w            %d2,%d1                 # concat old sign, new exp
14483         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14484         mov.l           (%sp)+,%d2              # restore d2
14485         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14486         bra.b           fsgldiv_unfl_dis
14487
14488 #
14489 # the divide operation MAY underflow:
14490 #
14491 fsgldiv_may_unfl:
14492         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14493
14494         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14495         fmov.l          &0x0,%fpsr              # clear FPSR
14496
14497         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
14498
14499         fmov.l          %fpsr,%d1               # save status
14500         fmov.l          &0x0,%fpcr              # clear FPCR
14501
14502         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
14503
14504         fabs.x          %fp0,%fp1               # make a copy of result
14505         fcmp.b          %fp1,&0x1               # is |result| > 1.b?
14506         fbgt.w          fsgldiv_normal_exit     # no; no underflow occurred
14507         fblt.w          fsgldiv_unfl            # yes; underflow occurred
14508
14509 #
14510 # we still don't know if underflow occurred. result is ~ equal to 1. but,
14511 # we don't know if the result was an underflow that rounded up to a 1
14512 # or a normalized number that rounded down to a 1. so, redo the entire
14513 # operation using RZ as the rounding mode to see what the pre-rounded
14514 # result is. this case should be relatively rare.
14515 #
14516         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into %fp1
14517
14518         clr.l           %d1                     # clear scratch register
14519         ori.b           &rz_mode*0x10,%d1       # force RZ rnd mode
14520
14521         fmov.l          %d1,%fpcr               # set FPCR
14522         fmov.l          &0x0,%fpsr              # clear FPSR
14523
14524         fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
14525
14526         fmov.l          &0x0,%fpcr              # clear FPCR
14527         fabs.x          %fp1                    # make absolute value
14528         fcmp.b          %fp1,&0x1               # is |result| < 1.b?
14529         fbge.w          fsgldiv_normal_exit     # no; no underflow occurred
14530         bra.w           fsgldiv_unfl            # yes; underflow occurred
14531
14532 ############################################################################
14533
14534 #
14535 # Divide: inputs are not both normalized; what are they?
14536 #
14537 fsgldiv_not_norm:
14538         mov.w           (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14539         jmp             (tbl_fsgldiv_op.b,%pc,%d1.w*1)
14540
14541         swbeg           &48
14542 tbl_fsgldiv_op:
14543         short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / NORM
14544         short           fsgldiv_inf_load        - tbl_fsgldiv_op # NORM / ZERO
14545         short           fsgldiv_zero_load       - tbl_fsgldiv_op # NORM / INF
14546         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # NORM / QNAN
14547         short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / DENORM
14548         short           fsgldiv_res_snan        - tbl_fsgldiv_op # NORM / SNAN
14549         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14550         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14551
14552         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / NORM
14553         short           fsgldiv_res_operr       - tbl_fsgldiv_op # ZERO / ZERO
14554         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / INF
14555         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # ZERO / QNAN
14556         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / DENORM
14557         short           fsgldiv_res_snan        - tbl_fsgldiv_op # ZERO / SNAN
14558         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14559         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14560
14561         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / NORM
14562         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / ZERO
14563         short           fsgldiv_res_operr       - tbl_fsgldiv_op # INF / INF
14564         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # INF / QNAN
14565         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / DENORM
14566         short           fsgldiv_res_snan        - tbl_fsgldiv_op # INF / SNAN
14567         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14568         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14569
14570         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / NORM
14571         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / ZERO
14572         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / INF
14573         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / QNAN
14574         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / DENORM
14575         short           fsgldiv_res_snan        - tbl_fsgldiv_op # QNAN / SNAN
14576         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14577         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14578
14579         short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / NORM
14580         short           fsgldiv_inf_load        - tbl_fsgldiv_op # DENORM / ZERO
14581         short           fsgldiv_zero_load       - tbl_fsgldiv_op # DENORM / INF
14582         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # DENORM / QNAN
14583         short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / DENORM
14584         short           fsgldiv_res_snan        - tbl_fsgldiv_op # DENORM / SNAN
14585         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14586         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14587
14588         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / NORM
14589         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / ZERO
14590         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / INF
14591         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / QNAN
14592         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / DENORM
14593         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / SNAN
14594         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14595         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
14596
14597 fsgldiv_res_qnan:
14598         bra.l           res_qnan
14599 fsgldiv_res_snan:
14600         bra.l           res_snan
14601 fsgldiv_res_operr:
14602         bra.l           res_operr
14603 fsgldiv_inf_load:
14604         bra.l           fdiv_inf_load
14605 fsgldiv_zero_load:
14606         bra.l           fdiv_zero_load
14607 fsgldiv_inf_dst:
14608         bra.l           fdiv_inf_dst
14609
14610 #########################################################################
14611 # XDEF **************************************************************** #
14612 #       fadd(): emulates the fadd instruction                           #
14613 #       fsadd(): emulates the fadd instruction                          #
14614 #       fdadd(): emulates the fdadd instruction                         #
14615 #                                                                       #
14616 # XREF **************************************************************** #
14617 #       addsub_scaler2() - scale the operands so they won't take exc    #
14618 #       ovf_res() - return default overflow result                      #
14619 #       unf_res() - return default underflow result                     #
14620 #       res_qnan() - set QNAN result                                    #
14621 #       res_snan() - set SNAN result                                    #
14622 #       res_operr() - set OPERR result                                  #
14623 #       scale_to_zero_src() - set src operand exponent equal to zero    #
14624 #       scale_to_zero_dst() - set dst operand exponent equal to zero    #
14625 #                                                                       #
14626 # INPUT *************************************************************** #
14627 #       a0 = pointer to extended precision source operand               #
14628 #       a1 = pointer to extended precision destination operand          #
14629 #                                                                       #
14630 # OUTPUT ************************************************************** #
14631 #       fp0 = result                                                    #
14632 #       fp1 = EXOP (if exception occurred)                              #
14633 #                                                                       #
14634 # ALGORITHM *********************************************************** #
14635 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
14636 # norms into extended, single, and double precision.                    #
14637 #       Do addition after scaling exponents such that exception won't   #
14638 # occur. Then, check result exponent to see if exception would have     #
14639 # occurred. If so, return default result and maybe EXOP. Else, insert   #
14640 # the correct result exponent and return. Set FPSR bits as appropriate. #
14641 #                                                                       #
14642 #########################################################################
14643
14644         global          fsadd
14645 fsadd:
14646         andi.b          &0x30,%d0               # clear rnd prec
14647         ori.b           &s_mode*0x10,%d0        # insert sgl prec
14648         bra.b           fadd
14649
14650         global          fdadd
14651 fdadd:
14652         andi.b          &0x30,%d0               # clear rnd prec
14653         ori.b           &d_mode*0x10,%d0        # insert dbl prec
14654
14655         global          fadd
14656 fadd:
14657         mov.l           %d0,L_SCR3(%a6)         # store rnd info
14658
14659         clr.w           %d1
14660         mov.b           DTAG(%a6),%d1
14661         lsl.b           &0x3,%d1
14662         or.b            STAG(%a6),%d1           # combine src tags
14663
14664         bne.w           fadd_not_norm           # optimize on non-norm input
14665
14666 #
14667 # ADD: norms and denorms
14668 #
14669 fadd_norm:
14670         bsr.l           addsub_scaler2          # scale exponents
14671
14672 fadd_zero_entry:
14673         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14674
14675         fmov.l          &0x0,%fpsr              # clear FPSR
14676         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14677
14678         fadd.x          FP_SCR0(%a6),%fp0       # execute add
14679
14680         fmov.l          &0x0,%fpcr              # clear FPCR
14681         fmov.l          %fpsr,%d1               # fetch INEX2,N,Z
14682
14683         or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
14684
14685         fbeq.w          fadd_zero_exit          # if result is zero, end now
14686
14687         mov.l           %d2,-(%sp)              # save d2
14688
14689         fmovm.x         &0x01,-(%sp)            # save result to stack
14690
14691         mov.w           2+L_SCR3(%a6),%d1
14692         lsr.b           &0x6,%d1
14693
14694         mov.w           (%sp),%d2               # fetch new sign, exp
14695         andi.l          &0x7fff,%d2             # strip sign
14696         sub.l           %d0,%d2                 # add scale factor
14697
14698         cmp.l           %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14699         bge.b           fadd_ovfl               # yes
14700
14701         cmp.l           %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14702         blt.w           fadd_unfl               # yes
14703         beq.w           fadd_may_unfl           # maybe; go find out
14704
14705 fadd_normal:
14706         mov.w           (%sp),%d1
14707         andi.w          &0x8000,%d1             # keep sign
14708         or.w            %d2,%d1                 # concat sign,new exp
14709         mov.w           %d1,(%sp)               # insert new exponent
14710
14711         fmovm.x         (%sp)+,&0x80            # return result in fp0
14712
14713         mov.l           (%sp)+,%d2              # restore d2
14714         rts
14715
14716 fadd_zero_exit:
14717 #       fmov.s          &0x00000000,%fp0        # return zero in fp0
14718         rts
14719
14720 tbl_fadd_ovfl:
14721         long            0x7fff                  # ext ovfl
14722         long            0x407f                  # sgl ovfl
14723         long            0x43ff                  # dbl ovfl
14724
14725 tbl_fadd_unfl:
14726         long            0x0000                  # ext unfl
14727         long            0x3f81                  # sgl unfl
14728         long            0x3c01                  # dbl unfl
14729
14730 fadd_ovfl:
14731         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14732
14733         mov.b           FPCR_ENABLE(%a6),%d1
14734         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
14735         bne.b           fadd_ovfl_ena           # yes
14736
14737         add.l           &0xc,%sp
14738 fadd_ovfl_dis:
14739         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
14740         sne             %d1                     # set sign param accordingly
14741         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
14742         bsr.l           ovf_res                 # calculate default result
14743         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
14744         fmovm.x         (%a0),&0x80             # return default result in fp0
14745         mov.l           (%sp)+,%d2              # restore d2
14746         rts
14747
14748 fadd_ovfl_ena:
14749         mov.b           L_SCR3(%a6),%d1
14750         andi.b          &0xc0,%d1               # is precision extended?
14751         bne.b           fadd_ovfl_ena_sd        # no; prec = sgl or dbl
14752
14753 fadd_ovfl_ena_cont:
14754         mov.w           (%sp),%d1
14755         andi.w          &0x8000,%d1             # keep sign
14756         subi.l          &0x6000,%d2             # add extra bias
14757         andi.w          &0x7fff,%d2
14758         or.w            %d2,%d1                 # concat sign,new exp
14759         mov.w           %d1,(%sp)               # insert new exponent
14760
14761         fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
14762         bra.b           fadd_ovfl_dis
14763
14764 fadd_ovfl_ena_sd:
14765         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14766
14767         mov.l           L_SCR3(%a6),%d1
14768         andi.b          &0x30,%d1               # keep rnd mode
14769         fmov.l          %d1,%fpcr               # set FPCR
14770
14771         fadd.x          FP_SCR0(%a6),%fp0       # execute add
14772
14773         fmov.l          &0x0,%fpcr              # clear FPCR
14774
14775         add.l           &0xc,%sp
14776         fmovm.x         &0x01,-(%sp)
14777         bra.b           fadd_ovfl_ena_cont
14778
14779 fadd_unfl:
14780         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14781
14782         add.l           &0xc,%sp
14783
14784         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
14785
14786         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
14787         fmov.l          &0x0,%fpsr              # clear FPSR
14788
14789         fadd.x          FP_SCR0(%a6),%fp0       # execute add
14790
14791         fmov.l          &0x0,%fpcr              # clear FPCR
14792         fmov.l          %fpsr,%d1               # save status
14793
14794         or.l            %d1,USER_FPSR(%a6)      # save INEX,N
14795
14796         mov.b           FPCR_ENABLE(%a6),%d1
14797         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
14798         bne.b           fadd_unfl_ena           # yes
14799
14800 fadd_unfl_dis:
14801         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
14802
14803         lea             FP_SCR0(%a6),%a0        # pass: result addr
14804         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
14805         bsr.l           unf_res                 # calculate default result
14806         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
14807         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
14808         mov.l           (%sp)+,%d2              # restore d2
14809         rts
14810
14811 fadd_unfl_ena:
14812         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
14813
14814         mov.l           L_SCR3(%a6),%d1
14815         andi.b          &0xc0,%d1               # is precision extended?
14816         bne.b           fadd_unfl_ena_sd        # no; sgl or dbl
14817
14818         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
14819
14820 fadd_unfl_ena_cont:
14821         fmov.l          &0x0,%fpsr              # clear FPSR
14822
14823         fadd.x          FP_SCR0(%a6),%fp1       # execute multiply
14824
14825         fmov.l          &0x0,%fpcr              # clear FPCR
14826
14827         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
14828         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
14829         mov.l           %d1,%d2                 # make a copy
14830         andi.l          &0x7fff,%d1             # strip sign
14831         andi.w          &0x8000,%d2             # keep old sign
14832         sub.l           %d0,%d1                 # add scale factor
14833         addi.l          &0x6000,%d1             # add new bias
14834         andi.w          &0x7fff,%d1             # clear top bit
14835         or.w            %d2,%d1                 # concat sign,new exp
14836         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
14837         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
14838         bra.w           fadd_unfl_dis
14839
14840 fadd_unfl_ena_sd:
14841         mov.l           L_SCR3(%a6),%d1
14842         andi.b          &0x30,%d1               # use only rnd mode
14843         fmov.l          %d1,%fpcr               # set FPCR
14844
14845         bra.b           fadd_unfl_ena_cont
14846
14847 #
14848 # result is equal to the smallest normalized number in the selected precision
14849 # if the precision is extended, this result could not have come from an
14850 # underflow that rounded up.
14851 #
14852 fadd_may_unfl:
14853         mov.l           L_SCR3(%a6),%d1
14854         andi.b          &0xc0,%d1
14855         beq.w           fadd_normal             # yes; no underflow occurred
14856
14857         mov.l           0x4(%sp),%d1            # extract hi(man)
14858         cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
14859         bne.w           fadd_normal             # no; no underflow occurred
14860
14861         tst.l           0x8(%sp)                # is lo(man) = 0x0?
14862         bne.w           fadd_normal             # no; no underflow occurred
14863
14864         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14865         beq.w           fadd_normal             # no; no underflow occurred
14866
14867 #
14868 # ok, so now the result has a exponent equal to the smallest normalized
14869 # exponent for the selected precision. also, the mantissa is equal to
14870 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
14871 # g,r,s.
14872 # now, we must determine whether the pre-rounded result was an underflow
14873 # rounded "up" or a normalized number rounded "down".
14874 # so, we do this be re-executing the add using RZ as the rounding mode and
14875 # seeing if the new result is smaller or equal to the current result.
14876 #
14877         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
14878
14879         mov.l           L_SCR3(%a6),%d1
14880         andi.b          &0xc0,%d1               # keep rnd prec
14881         ori.b           &rz_mode*0x10,%d1       # insert rnd mode
14882         fmov.l          %d1,%fpcr               # set FPCR
14883         fmov.l          &0x0,%fpsr              # clear FPSR
14884
14885         fadd.x          FP_SCR0(%a6),%fp1       # execute add
14886
14887         fmov.l          &0x0,%fpcr              # clear FPCR
14888
14889         fabs.x          %fp0                    # compare absolute values
14890         fabs.x          %fp1
14891         fcmp.x          %fp0,%fp1               # is first result > second?
14892
14893         fbgt.w          fadd_unfl               # yes; it's an underflow
14894         bra.w           fadd_normal             # no; it's not an underflow
14895
14896 ##########################################################################
14897
14898 #
14899 # Add: inputs are not both normalized; what are they?
14900 #
14901 fadd_not_norm:
14902         mov.w           (tbl_fadd_op.b,%pc,%d1.w*2),%d1
14903         jmp             (tbl_fadd_op.b,%pc,%d1.w*1)
14904
14905         swbeg           &48
14906 tbl_fadd_op:
14907         short           fadd_norm       - tbl_fadd_op # NORM + NORM
14908         short           fadd_zero_src   - tbl_fadd_op # NORM + ZERO
14909         short           fadd_inf_src    - tbl_fadd_op # NORM + INF
14910         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
14911         short           fadd_norm       - tbl_fadd_op # NORM + DENORM
14912         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
14913         short           tbl_fadd_op     - tbl_fadd_op #
14914         short           tbl_fadd_op     - tbl_fadd_op #
14915
14916         short           fadd_zero_dst   - tbl_fadd_op # ZERO + NORM
14917         short           fadd_zero_2     - tbl_fadd_op # ZERO + ZERO
14918         short           fadd_inf_src    - tbl_fadd_op # ZERO + INF
14919         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
14920         short           fadd_zero_dst   - tbl_fadd_op # ZERO + DENORM
14921         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
14922         short           tbl_fadd_op     - tbl_fadd_op #
14923         short           tbl_fadd_op     - tbl_fadd_op #
14924
14925         short           fadd_inf_dst    - tbl_fadd_op # INF + NORM
14926         short           fadd_inf_dst    - tbl_fadd_op # INF + ZERO
14927         short           fadd_inf_2      - tbl_fadd_op # INF + INF
14928         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
14929         short           fadd_inf_dst    - tbl_fadd_op # INF + DENORM
14930         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
14931         short           tbl_fadd_op     - tbl_fadd_op #
14932         short           tbl_fadd_op     - tbl_fadd_op #
14933
14934         short           fadd_res_qnan   - tbl_fadd_op # QNAN + NORM
14935         short           fadd_res_qnan   - tbl_fadd_op # QNAN + ZERO
14936         short           fadd_res_qnan   - tbl_fadd_op # QNAN + INF
14937         short           fadd_res_qnan   - tbl_fadd_op # QNAN + QNAN
14938         short           fadd_res_qnan   - tbl_fadd_op # QNAN + DENORM
14939         short           fadd_res_snan   - tbl_fadd_op # QNAN + SNAN
14940         short           tbl_fadd_op     - tbl_fadd_op #
14941         short           tbl_fadd_op     - tbl_fadd_op #
14942
14943         short           fadd_norm       - tbl_fadd_op # DENORM + NORM
14944         short           fadd_zero_src   - tbl_fadd_op # DENORM + ZERO
14945         short           fadd_inf_src    - tbl_fadd_op # DENORM + INF
14946         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
14947         short           fadd_norm       - tbl_fadd_op # DENORM + DENORM
14948         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
14949         short           tbl_fadd_op     - tbl_fadd_op #
14950         short           tbl_fadd_op     - tbl_fadd_op #
14951
14952         short           fadd_res_snan   - tbl_fadd_op # SNAN + NORM
14953         short           fadd_res_snan   - tbl_fadd_op # SNAN + ZERO
14954         short           fadd_res_snan   - tbl_fadd_op # SNAN + INF
14955         short           fadd_res_snan   - tbl_fadd_op # SNAN + QNAN
14956         short           fadd_res_snan   - tbl_fadd_op # SNAN + DENORM
14957         short           fadd_res_snan   - tbl_fadd_op # SNAN + SNAN
14958         short           tbl_fadd_op     - tbl_fadd_op #
14959         short           tbl_fadd_op     - tbl_fadd_op #
14960
14961 fadd_res_qnan:
14962         bra.l           res_qnan
14963 fadd_res_snan:
14964         bra.l           res_snan
14965
14966 #
14967 # both operands are ZEROes
14968 #
14969 fadd_zero_2:
14970         mov.b           SRC_EX(%a0),%d0         # are the signs opposite
14971         mov.b           DST_EX(%a1),%d1
14972         eor.b           %d0,%d1
14973         bmi.w           fadd_zero_2_chk_rm      # weed out (-ZERO)+(+ZERO)
14974
14975 # the signs are the same. so determine whether they are positive or negative
14976 # and return the appropriately signed zero.
14977         tst.b           %d0                     # are ZEROes positive or negative?
14978         bmi.b           fadd_zero_rm            # negative
14979         fmov.s          &0x00000000,%fp0        # return +ZERO
14980         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
14981         rts
14982
14983 #
14984 # the ZEROes have opposite signs:
14985 # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14986 # - -ZERO is returned in the case of RM.
14987 #
14988 fadd_zero_2_chk_rm:
14989         mov.b           3+L_SCR3(%a6),%d1
14990         andi.b          &0x30,%d1               # extract rnd mode
14991         cmpi.b          %d1,&rm_mode*0x10       # is rnd mode == RM?
14992         beq.b           fadd_zero_rm            # yes
14993         fmov.s          &0x00000000,%fp0        # return +ZERO
14994         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
14995         rts
14996
14997 fadd_zero_rm:
14998         fmov.s          &0x80000000,%fp0        # return -ZERO
14999         mov.b           &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
15000         rts
15001
15002 #
15003 # one operand is a ZERO and the other is a DENORM or NORM. scale
15004 # the DENORM or NORM and jump to the regular fadd routine.
15005 #
15006 fadd_zero_dst:
15007         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15008         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15009         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15010         bsr.l           scale_to_zero_src       # scale the operand
15011         clr.w           FP_SCR1_EX(%a6)
15012         clr.l           FP_SCR1_HI(%a6)
15013         clr.l           FP_SCR1_LO(%a6)
15014         bra.w           fadd_zero_entry         # go execute fadd
15015
15016 fadd_zero_src:
15017         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
15018         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
15019         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
15020         bsr.l           scale_to_zero_dst       # scale the operand
15021         clr.w           FP_SCR0_EX(%a6)
15022         clr.l           FP_SCR0_HI(%a6)
15023         clr.l           FP_SCR0_LO(%a6)
15024         bra.w           fadd_zero_entry         # go execute fadd
15025
15026 #
15027 # both operands are INFs. an OPERR will result if the INFs have
15028 # different signs. else, an INF of the same sign is returned
15029 #
15030 fadd_inf_2:
15031         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
15032         mov.b           DST_EX(%a1),%d1
15033         eor.b           %d1,%d0
15034         bmi.l           res_operr               # weed out (-INF)+(+INF)
15035
15036 # ok, so it's not an OPERR. but, we do have to remember to return the
15037 # src INF since that's where the 881/882 gets the j-bit from...
15038
15039 #
15040 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15041 #
15042 fadd_inf_src:
15043         fmovm.x         SRC(%a0),&0x80          # return src INF
15044         tst.b           SRC_EX(%a0)             # is INF positive?
15045         bpl.b           fadd_inf_done           # yes; we're done
15046         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15047         rts
15048
15049 #
15050 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15051 #
15052 fadd_inf_dst:
15053         fmovm.x         DST(%a1),&0x80          # return dst INF
15054         tst.b           DST_EX(%a1)             # is INF positive?
15055         bpl.b           fadd_inf_done           # yes; we're done
15056         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15057         rts
15058
15059 fadd_inf_done:
15060         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
15061         rts
15062
15063 #########################################################################
15064 # XDEF **************************************************************** #
15065 #       fsub(): emulates the fsub instruction                           #
15066 #       fssub(): emulates the fssub instruction                         #
15067 #       fdsub(): emulates the fdsub instruction                         #
15068 #                                                                       #
15069 # XREF **************************************************************** #
15070 #       addsub_scaler2() - scale the operands so they won't take exc    #
15071 #       ovf_res() - return default overflow result                      #
15072 #       unf_res() - return default underflow result                     #
15073 #       res_qnan() - set QNAN result                                    #
15074 #       res_snan() - set SNAN result                                    #
15075 #       res_operr() - set OPERR result                                  #
15076 #       scale_to_zero_src() - set src operand exponent equal to zero    #
15077 #       scale_to_zero_dst() - set dst operand exponent equal to zero    #
15078 #                                                                       #
15079 # INPUT *************************************************************** #
15080 #       a0 = pointer to extended precision source operand               #
15081 #       a1 = pointer to extended precision destination operand          #
15082 #                                                                       #
15083 # OUTPUT ************************************************************** #
15084 #       fp0 = result                                                    #
15085 #       fp1 = EXOP (if exception occurred)                              #
15086 #                                                                       #
15087 # ALGORITHM *********************************************************** #
15088 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
15089 # norms into extended, single, and double precision.                    #
15090 #       Do subtraction after scaling exponents such that exception won't#
15091 # occur. Then, check result exponent to see if exception would have     #
15092 # occurred. If so, return default result and maybe EXOP. Else, insert   #
15093 # the correct result exponent and return. Set FPSR bits as appropriate. #
15094 #                                                                       #
15095 #########################################################################
15096
15097         global          fssub
15098 fssub:
15099         andi.b          &0x30,%d0               # clear rnd prec
15100         ori.b           &s_mode*0x10,%d0        # insert sgl prec
15101         bra.b           fsub
15102
15103         global          fdsub
15104 fdsub:
15105         andi.b          &0x30,%d0               # clear rnd prec
15106         ori.b           &d_mode*0x10,%d0        # insert dbl prec
15107
15108         global          fsub
15109 fsub:
15110         mov.l           %d0,L_SCR3(%a6)         # store rnd info
15111
15112         clr.w           %d1
15113         mov.b           DTAG(%a6),%d1
15114         lsl.b           &0x3,%d1
15115         or.b            STAG(%a6),%d1           # combine src tags
15116
15117         bne.w           fsub_not_norm           # optimize on non-norm input
15118
15119 #
15120 # SUB: norms and denorms
15121 #
15122 fsub_norm:
15123         bsr.l           addsub_scaler2          # scale exponents
15124
15125 fsub_zero_entry:
15126         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
15127
15128         fmov.l          &0x0,%fpsr              # clear FPSR
15129         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15130
15131         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
15132
15133         fmov.l          &0x0,%fpcr              # clear FPCR
15134         fmov.l          %fpsr,%d1               # fetch INEX2, N, Z
15135
15136         or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
15137
15138         fbeq.w          fsub_zero_exit          # if result zero, end now
15139
15140         mov.l           %d2,-(%sp)              # save d2
15141
15142         fmovm.x         &0x01,-(%sp)            # save result to stack
15143
15144         mov.w           2+L_SCR3(%a6),%d1
15145         lsr.b           &0x6,%d1
15146
15147         mov.w           (%sp),%d2               # fetch new exponent
15148         andi.l          &0x7fff,%d2             # strip sign
15149         sub.l           %d0,%d2                 # add scale factor
15150
15151         cmp.l           %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15152         bge.b           fsub_ovfl               # yes
15153
15154         cmp.l           %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15155         blt.w           fsub_unfl               # yes
15156         beq.w           fsub_may_unfl           # maybe; go find out
15157
15158 fsub_normal:
15159         mov.w           (%sp),%d1
15160         andi.w          &0x8000,%d1             # keep sign
15161         or.w            %d2,%d1                 # insert new exponent
15162         mov.w           %d1,(%sp)               # insert new exponent
15163
15164         fmovm.x         (%sp)+,&0x80            # return result in fp0
15165
15166         mov.l           (%sp)+,%d2              # restore d2
15167         rts
15168
15169 fsub_zero_exit:
15170 #       fmov.s          &0x00000000,%fp0        # return zero in fp0
15171         rts
15172
15173 tbl_fsub_ovfl:
15174         long            0x7fff                  # ext ovfl
15175         long            0x407f                  # sgl ovfl
15176         long            0x43ff                  # dbl ovfl
15177
15178 tbl_fsub_unfl:
15179         long            0x0000                  # ext unfl
15180         long            0x3f81                  # sgl unfl
15181         long            0x3c01                  # dbl unfl
15182
15183 fsub_ovfl:
15184         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15185
15186         mov.b           FPCR_ENABLE(%a6),%d1
15187         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
15188         bne.b           fsub_ovfl_ena           # yes
15189
15190         add.l           &0xc,%sp
15191 fsub_ovfl_dis:
15192         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
15193         sne             %d1                     # set sign param accordingly
15194         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
15195         bsr.l           ovf_res                 # calculate default result
15196         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
15197         fmovm.x         (%a0),&0x80             # return default result in fp0
15198         mov.l           (%sp)+,%d2              # restore d2
15199         rts
15200
15201 fsub_ovfl_ena:
15202         mov.b           L_SCR3(%a6),%d1
15203         andi.b          &0xc0,%d1               # is precision extended?
15204         bne.b           fsub_ovfl_ena_sd        # no
15205
15206 fsub_ovfl_ena_cont:
15207         mov.w           (%sp),%d1               # fetch {sgn,exp}
15208         andi.w          &0x8000,%d1             # keep sign
15209         subi.l          &0x6000,%d2             # subtract new bias
15210         andi.w          &0x7fff,%d2             # clear top bit
15211         or.w            %d2,%d1                 # concat sign,exp
15212         mov.w           %d1,(%sp)               # insert new exponent
15213
15214         fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
15215         bra.b           fsub_ovfl_dis
15216
15217 fsub_ovfl_ena_sd:
15218         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
15219
15220         mov.l           L_SCR3(%a6),%d1
15221         andi.b          &0x30,%d1               # clear rnd prec
15222         fmov.l          %d1,%fpcr               # set FPCR
15223
15224         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
15225
15226         fmov.l          &0x0,%fpcr              # clear FPCR
15227
15228         add.l           &0xc,%sp
15229         fmovm.x         &0x01,-(%sp)
15230         bra.b           fsub_ovfl_ena_cont
15231
15232 fsub_unfl:
15233         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15234
15235         add.l           &0xc,%sp
15236
15237         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
15238
15239         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
15240         fmov.l          &0x0,%fpsr              # clear FPSR
15241
15242         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
15243
15244         fmov.l          &0x0,%fpcr              # clear FPCR
15245         fmov.l          %fpsr,%d1               # save status
15246
15247         or.l            %d1,USER_FPSR(%a6)
15248
15249         mov.b           FPCR_ENABLE(%a6),%d1
15250         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
15251         bne.b           fsub_unfl_ena           # yes
15252
15253 fsub_unfl_dis:
15254         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
15255
15256         lea             FP_SCR0(%a6),%a0        # pass: result addr
15257         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
15258         bsr.l           unf_res                 # calculate default result
15259         or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
15260         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
15261         mov.l           (%sp)+,%d2              # restore d2
15262         rts
15263
15264 fsub_unfl_ena:
15265         fmovm.x         FP_SCR1(%a6),&0x40
15266
15267         mov.l           L_SCR3(%a6),%d1
15268         andi.b          &0xc0,%d1               # is precision extended?
15269         bne.b           fsub_unfl_ena_sd        # no
15270
15271         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15272
15273 fsub_unfl_ena_cont:
15274         fmov.l          &0x0,%fpsr              # clear FPSR
15275
15276         fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
15277
15278         fmov.l          &0x0,%fpcr              # clear FPCR
15279
15280         fmovm.x         &0x40,FP_SCR0(%a6)      # store result to stack
15281         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
15282         mov.l           %d1,%d2                 # make a copy
15283         andi.l          &0x7fff,%d1             # strip sign
15284         andi.w          &0x8000,%d2             # keep old sign
15285         sub.l           %d0,%d1                 # add scale factor
15286         addi.l          &0x6000,%d1             # subtract new bias
15287         andi.w          &0x7fff,%d1             # clear top bit
15288         or.w            %d2,%d1                 # concat sgn,exp
15289         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
15290         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
15291         bra.w           fsub_unfl_dis
15292
15293 fsub_unfl_ena_sd:
15294         mov.l           L_SCR3(%a6),%d1
15295         andi.b          &0x30,%d1               # clear rnd prec
15296         fmov.l          %d1,%fpcr               # set FPCR
15297
15298         bra.b           fsub_unfl_ena_cont
15299
15300 #
15301 # result is equal to the smallest normalized number in the selected precision
15302 # if the precision is extended, this result could not have come from an
15303 # underflow that rounded up.
15304 #
15305 fsub_may_unfl:
15306         mov.l           L_SCR3(%a6),%d1
15307         andi.b          &0xc0,%d1               # fetch rnd prec
15308         beq.w           fsub_normal             # yes; no underflow occurred
15309
15310         mov.l           0x4(%sp),%d1
15311         cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
15312         bne.w           fsub_normal             # no; no underflow occurred
15313
15314         tst.l           0x8(%sp)                # is lo(man) = 0x0?
15315         bne.w           fsub_normal             # no; no underflow occurred
15316
15317         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15318         beq.w           fsub_normal             # no; no underflow occurred
15319
15320 #
15321 # ok, so now the result has a exponent equal to the smallest normalized
15322 # exponent for the selected precision. also, the mantissa is equal to
15323 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
15324 # g,r,s.
15325 # now, we must determine whether the pre-rounded result was an underflow
15326 # rounded "up" or a normalized number rounded "down".
15327 # so, we do this be re-executing the add using RZ as the rounding mode and
15328 # seeing if the new result is smaller or equal to the current result.
15329 #
15330         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
15331
15332         mov.l           L_SCR3(%a6),%d1
15333         andi.b          &0xc0,%d1               # keep rnd prec
15334         ori.b           &rz_mode*0x10,%d1       # insert rnd mode
15335         fmov.l          %d1,%fpcr               # set FPCR
15336         fmov.l          &0x0,%fpsr              # clear FPSR
15337
15338         fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
15339
15340         fmov.l          &0x0,%fpcr              # clear FPCR
15341
15342         fabs.x          %fp0                    # compare absolute values
15343         fabs.x          %fp1
15344         fcmp.x          %fp0,%fp1               # is first result > second?
15345
15346         fbgt.w          fsub_unfl               # yes; it's an underflow
15347         bra.w           fsub_normal             # no; it's not an underflow
15348
15349 ##########################################################################
15350
15351 #
15352 # Sub: inputs are not both normalized; what are they?
15353 #
15354 fsub_not_norm:
15355         mov.w           (tbl_fsub_op.b,%pc,%d1.w*2),%d1
15356         jmp             (tbl_fsub_op.b,%pc,%d1.w*1)
15357
15358         swbeg           &48
15359 tbl_fsub_op:
15360         short           fsub_norm       - tbl_fsub_op # NORM - NORM
15361         short           fsub_zero_src   - tbl_fsub_op # NORM - ZERO
15362         short           fsub_inf_src    - tbl_fsub_op # NORM - INF
15363         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
15364         short           fsub_norm       - tbl_fsub_op # NORM - DENORM
15365         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
15366         short           tbl_fsub_op     - tbl_fsub_op #
15367         short           tbl_fsub_op     - tbl_fsub_op #
15368
15369         short           fsub_zero_dst   - tbl_fsub_op # ZERO - NORM
15370         short           fsub_zero_2     - tbl_fsub_op # ZERO - ZERO
15371         short           fsub_inf_src    - tbl_fsub_op # ZERO - INF
15372         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
15373         short           fsub_zero_dst   - tbl_fsub_op # ZERO - DENORM
15374         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
15375         short           tbl_fsub_op     - tbl_fsub_op #
15376         short           tbl_fsub_op     - tbl_fsub_op #
15377
15378         short           fsub_inf_dst    - tbl_fsub_op # INF - NORM
15379         short           fsub_inf_dst    - tbl_fsub_op # INF - ZERO
15380         short           fsub_inf_2      - tbl_fsub_op # INF - INF
15381         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
15382         short           fsub_inf_dst    - tbl_fsub_op # INF - DENORM
15383         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
15384         short           tbl_fsub_op     - tbl_fsub_op #
15385         short           tbl_fsub_op     - tbl_fsub_op #
15386
15387         short           fsub_res_qnan   - tbl_fsub_op # QNAN - NORM
15388         short           fsub_res_qnan   - tbl_fsub_op # QNAN - ZERO
15389         short           fsub_res_qnan   - tbl_fsub_op # QNAN - INF
15390         short           fsub_res_qnan   - tbl_fsub_op # QNAN - QNAN
15391         short           fsub_res_qnan   - tbl_fsub_op # QNAN - DENORM
15392         short           fsub_res_snan   - tbl_fsub_op # QNAN - SNAN
15393         short           tbl_fsub_op     - tbl_fsub_op #
15394         short           tbl_fsub_op     - tbl_fsub_op #
15395
15396         short           fsub_norm       - tbl_fsub_op # DENORM - NORM
15397         short           fsub_zero_src   - tbl_fsub_op # DENORM - ZERO
15398         short           fsub_inf_src    - tbl_fsub_op # DENORM - INF
15399         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
15400         short           fsub_norm       - tbl_fsub_op # DENORM - DENORM
15401         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
15402         short           tbl_fsub_op     - tbl_fsub_op #
15403         short           tbl_fsub_op     - tbl_fsub_op #
15404
15405         short           fsub_res_snan   - tbl_fsub_op # SNAN - NORM
15406         short           fsub_res_snan   - tbl_fsub_op # SNAN - ZERO
15407         short           fsub_res_snan   - tbl_fsub_op # SNAN - INF
15408         short           fsub_res_snan   - tbl_fsub_op # SNAN - QNAN
15409         short           fsub_res_snan   - tbl_fsub_op # SNAN - DENORM
15410         short           fsub_res_snan   - tbl_fsub_op # SNAN - SNAN
15411         short           tbl_fsub_op     - tbl_fsub_op #
15412         short           tbl_fsub_op     - tbl_fsub_op #
15413
15414 fsub_res_qnan:
15415         bra.l           res_qnan
15416 fsub_res_snan:
15417         bra.l           res_snan
15418
15419 #
15420 # both operands are ZEROes
15421 #
15422 fsub_zero_2:
15423         mov.b           SRC_EX(%a0),%d0
15424         mov.b           DST_EX(%a1),%d1
15425         eor.b           %d1,%d0
15426         bpl.b           fsub_zero_2_chk_rm
15427
15428 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15429         tst.b           %d0                     # is dst negative?
15430         bmi.b           fsub_zero_2_rm          # yes
15431         fmov.s          &0x00000000,%fp0        # no; return +ZERO
15432         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
15433         rts
15434
15435 #
15436 # the ZEROes have the same signs:
15437 # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15438 # - -ZERO is returned in the case of RM.
15439 #
15440 fsub_zero_2_chk_rm:
15441         mov.b           3+L_SCR3(%a6),%d1
15442         andi.b          &0x30,%d1               # extract rnd mode
15443         cmpi.b          %d1,&rm_mode*0x10       # is rnd mode = RM?
15444         beq.b           fsub_zero_2_rm          # yes
15445         fmov.s          &0x00000000,%fp0        # no; return +ZERO
15446         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
15447         rts
15448
15449 fsub_zero_2_rm:
15450         fmov.s          &0x80000000,%fp0        # return -ZERO
15451         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
15452         rts
15453
15454 #
15455 # one operand is a ZERO and the other is a DENORM or a NORM.
15456 # scale the DENORM or NORM and jump to the regular fsub routine.
15457 #
15458 fsub_zero_dst:
15459         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15460         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15461         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15462         bsr.l           scale_to_zero_src       # scale the operand
15463         clr.w           FP_SCR1_EX(%a6)
15464         clr.l           FP_SCR1_HI(%a6)
15465         clr.l           FP_SCR1_LO(%a6)
15466         bra.w           fsub_zero_entry         # go execute fsub
15467
15468 fsub_zero_src:
15469         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
15470         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
15471         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
15472         bsr.l           scale_to_zero_dst       # scale the operand
15473         clr.w           FP_SCR0_EX(%a6)
15474         clr.l           FP_SCR0_HI(%a6)
15475         clr.l           FP_SCR0_LO(%a6)
15476         bra.w           fsub_zero_entry         # go execute fsub
15477
15478 #
15479 # both operands are INFs. an OPERR will result if the INFs have the
15480 # same signs. else,
15481 #
15482 fsub_inf_2:
15483         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
15484         mov.b           DST_EX(%a1),%d1
15485         eor.b           %d1,%d0
15486         bpl.l           res_operr               # weed out (-INF)+(+INF)
15487
15488 # ok, so it's not an OPERR. but we do have to remember to return
15489 # the src INF since that's where the 881/882 gets the j-bit.
15490
15491 fsub_inf_src:
15492         fmovm.x         SRC(%a0),&0x80          # return src INF
15493         fneg.x          %fp0                    # invert sign
15494         fbge.w          fsub_inf_done           # sign is now positive
15495         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15496         rts
15497
15498 fsub_inf_dst:
15499         fmovm.x         DST(%a1),&0x80          # return dst INF
15500         tst.b           DST_EX(%a1)             # is INF negative?
15501         bpl.b           fsub_inf_done           # no
15502         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15503         rts
15504
15505 fsub_inf_done:
15506         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
15507         rts
15508
15509 #########################################################################
15510 # XDEF **************************************************************** #
15511 #       fsqrt(): emulates the fsqrt instruction                         #
15512 #       fssqrt(): emulates the fssqrt instruction                       #
15513 #       fdsqrt(): emulates the fdsqrt instruction                       #
15514 #                                                                       #
15515 # XREF **************************************************************** #
15516 #       scale_sqrt() - scale the source operand                         #
15517 #       unf_res() - return default underflow result                     #
15518 #       ovf_res() - return default overflow result                      #
15519 #       res_qnan_1op() - return QNAN result                             #
15520 #       res_snan_1op() - return SNAN result                             #
15521 #                                                                       #
15522 # INPUT *************************************************************** #
15523 #       a0 = pointer to extended precision source operand               #
15524 #       d0  rnd prec,mode                                               #
15525 #                                                                       #
15526 # OUTPUT ************************************************************** #
15527 #       fp0 = result                                                    #
15528 #       fp1 = EXOP (if exception occurred)                              #
15529 #                                                                       #
15530 # ALGORITHM *********************************************************** #
15531 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
15532 # norms/denorms into ext/sgl/dbl precision.                             #
15533 #       For norms/denorms, scale the exponents such that a sqrt         #
15534 # instruction won't cause an exception. Use the regular fsqrt to        #
15535 # compute a result. Check if the regular operands would have taken      #
15536 # an exception. If so, return the default overflow/underflow result     #
15537 # and return the EXOP if exceptions are enabled. Else, scale the        #
15538 # result operand to the proper exponent.                                #
15539 #                                                                       #
15540 #########################################################################
15541
15542         global          fssqrt
15543 fssqrt:
15544         andi.b          &0x30,%d0               # clear rnd prec
15545         ori.b           &s_mode*0x10,%d0        # insert sgl precision
15546         bra.b           fsqrt
15547
15548         global          fdsqrt
15549 fdsqrt:
15550         andi.b          &0x30,%d0               # clear rnd prec
15551         ori.b           &d_mode*0x10,%d0        # insert dbl precision
15552
15553         global          fsqrt
15554 fsqrt:
15555         mov.l           %d0,L_SCR3(%a6)         # store rnd info
15556         clr.w           %d1
15557         mov.b           STAG(%a6),%d1
15558         bne.w           fsqrt_not_norm          # optimize on non-norm input
15559
15560 #
15561 # SQUARE ROOT: norms and denorms ONLY!
15562 #
15563 fsqrt_norm:
15564         tst.b           SRC_EX(%a0)             # is operand negative?
15565         bmi.l           res_operr               # yes
15566
15567         andi.b          &0xc0,%d0               # is precision extended?
15568         bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
15569
15570         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15571         fmov.l          &0x0,%fpsr              # clear FPSR
15572
15573         fsqrt.x         (%a0),%fp0              # execute square root
15574
15575         fmov.l          %fpsr,%d1
15576         or.l            %d1,USER_FPSR(%a6)      # set N,INEX
15577
15578         rts
15579
15580 fsqrt_denorm:
15581         tst.b           SRC_EX(%a0)             # is operand negative?
15582         bmi.l           res_operr               # yes
15583
15584         andi.b          &0xc0,%d0               # is precision extended?
15585         bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
15586
15587         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15588         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15589         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15590
15591         bsr.l           scale_sqrt              # calculate scale factor
15592
15593         bra.w           fsqrt_sd_normal
15594
15595 #
15596 # operand is either single or double
15597 #
15598 fsqrt_not_ext:
15599         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
15600         bne.w           fsqrt_dbl
15601
15602 #
15603 # operand is to be rounded to single precision
15604 #
15605 fsqrt_sgl:
15606         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15607         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15608         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15609
15610         bsr.l           scale_sqrt              # calculate scale factor
15611
15612         cmpi.l          %d0,&0x3fff-0x3f81      # will move in underflow?
15613         beq.w           fsqrt_sd_may_unfl
15614         bgt.w           fsqrt_sd_unfl           # yes; go handle underflow
15615         cmpi.l          %d0,&0x3fff-0x407f      # will move in overflow?
15616         beq.w           fsqrt_sd_may_ovfl       # maybe; go check
15617         blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
15618
15619 #
15620 # operand will NOT overflow or underflow when moved in to the fp reg file
15621 #
15622 fsqrt_sd_normal:
15623         fmov.l          &0x0,%fpsr              # clear FPSR
15624         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15625
15626         fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
15627
15628         fmov.l          %fpsr,%d1               # save FPSR
15629         fmov.l          &0x0,%fpcr              # clear FPCR
15630
15631         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
15632
15633 fsqrt_sd_normal_exit:
15634         mov.l           %d2,-(%sp)              # save d2
15635         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
15636         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
15637         mov.l           %d1,%d2                 # make a copy
15638         andi.l          &0x7fff,%d1             # strip sign
15639         sub.l           %d0,%d1                 # add scale factor
15640         andi.w          &0x8000,%d2             # keep old sign
15641         or.w            %d1,%d2                 # concat old sign,new exp
15642         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
15643         mov.l           (%sp)+,%d2              # restore d2
15644         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
15645         rts
15646
15647 #
15648 # operand is to be rounded to double precision
15649 #
15650 fsqrt_dbl:
15651         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
15652         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15653         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15654
15655         bsr.l           scale_sqrt              # calculate scale factor
15656
15657         cmpi.l          %d0,&0x3fff-0x3c01      # will move in underflow?
15658         beq.w           fsqrt_sd_may_unfl
15659         bgt.b           fsqrt_sd_unfl           # yes; go handle underflow
15660         cmpi.l          %d0,&0x3fff-0x43ff      # will move in overflow?
15661         beq.w           fsqrt_sd_may_ovfl       # maybe; go check
15662         blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
15663         bra.w           fsqrt_sd_normal         # no; ho handle normalized op
15664
15665 # we're on the line here and the distinguising characteristic is whether
15666 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15667 # elsewise fall through to underflow.
15668 fsqrt_sd_may_unfl:
15669         btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
15670         bne.w           fsqrt_sd_normal         # yes, so no underflow
15671
15672 #
15673 # operand WILL underflow when moved in to the fp register file
15674 #
15675 fsqrt_sd_unfl:
15676         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15677
15678         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
15679         fmov.l          &0x0,%fpsr              # clear FPSR
15680
15681         fsqrt.x         FP_SCR0(%a6),%fp0       # execute square root
15682
15683         fmov.l          %fpsr,%d1               # save status
15684         fmov.l          &0x0,%fpcr              # clear FPCR
15685
15686         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
15687
15688 # if underflow or inexact is enabled, go calculate EXOP first.
15689         mov.b           FPCR_ENABLE(%a6),%d1
15690         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
15691         bne.b           fsqrt_sd_unfl_ena       # yes
15692
15693 fsqrt_sd_unfl_dis:
15694         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
15695
15696         lea             FP_SCR0(%a6),%a0        # pass: result addr
15697         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
15698         bsr.l           unf_res                 # calculate default result
15699         or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
15700         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
15701         rts
15702
15703 #
15704 # operand will underflow AND underflow is enabled.
15705 # therefore, we must return the result rounded to extended precision.
15706 #
15707 fsqrt_sd_unfl_ena:
15708         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15709         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15710         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
15711
15712         mov.l           %d2,-(%sp)              # save d2
15713         mov.l           %d1,%d2                 # make a copy
15714         andi.l          &0x7fff,%d1             # strip sign
15715         andi.w          &0x8000,%d2             # keep old sign
15716         sub.l           %d0,%d1                 # subtract scale factor
15717         addi.l          &0x6000,%d1             # add new bias
15718         andi.w          &0x7fff,%d1
15719         or.w            %d2,%d1                 # concat new sign,new exp
15720         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
15721         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
15722         mov.l           (%sp)+,%d2              # restore d2
15723         bra.b           fsqrt_sd_unfl_dis
15724
15725 #
15726 # operand WILL overflow.
15727 #
15728 fsqrt_sd_ovfl:
15729         fmov.l          &0x0,%fpsr              # clear FPSR
15730         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15731
15732         fsqrt.x         FP_SCR0(%a6),%fp0       # perform square root
15733
15734         fmov.l          &0x0,%fpcr              # clear FPCR
15735         fmov.l          %fpsr,%d1               # save FPSR
15736
15737         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
15738
15739 fsqrt_sd_ovfl_tst:
15740         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15741
15742         mov.b           FPCR_ENABLE(%a6),%d1
15743         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
15744         bne.b           fsqrt_sd_ovfl_ena       # yes
15745
15746 #
15747 # OVFL is not enabled; therefore, we must create the default result by
15748 # calling ovf_res().
15749 #
15750 fsqrt_sd_ovfl_dis:
15751         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
15752         sne             %d1                     # set sign param accordingly
15753         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
15754         bsr.l           ovf_res                 # calculate default result
15755         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
15756         fmovm.x         (%a0),&0x80             # return default result in fp0
15757         rts
15758
15759 #
15760 # OVFL is enabled.
15761 # the INEX2 bit has already been updated by the round to the correct precision.
15762 # now, round to extended(and don't alter the FPSR).
15763 #
15764 fsqrt_sd_ovfl_ena:
15765         mov.l           %d2,-(%sp)              # save d2
15766         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
15767         mov.l           %d1,%d2                 # make a copy
15768         andi.l          &0x7fff,%d1             # strip sign
15769         andi.w          &0x8000,%d2             # keep old sign
15770         sub.l           %d0,%d1                 # add scale factor
15771         subi.l          &0x6000,%d1             # subtract bias
15772         andi.w          &0x7fff,%d1
15773         or.w            %d2,%d1                 # concat sign,exp
15774         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
15775         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
15776         mov.l           (%sp)+,%d2              # restore d2
15777         bra.b           fsqrt_sd_ovfl_dis
15778
15779 #
15780 # the move in MAY underflow. so...
15781 #
15782 fsqrt_sd_may_ovfl:
15783         btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
15784         bne.w           fsqrt_sd_ovfl           # yes, so overflow
15785
15786         fmov.l          &0x0,%fpsr              # clear FPSR
15787         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
15788
15789         fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
15790
15791         fmov.l          %fpsr,%d1               # save status
15792         fmov.l          &0x0,%fpcr              # clear FPCR
15793
15794         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
15795
15796         fmov.x          %fp0,%fp1               # make a copy of result
15797         fcmp.b          %fp1,&0x1               # is |result| >= 1.b?
15798         fbge.w          fsqrt_sd_ovfl_tst       # yes; overflow has occurred
15799
15800 # no, it didn't overflow; we have correct result
15801         bra.w           fsqrt_sd_normal_exit
15802
15803 ##########################################################################
15804
15805 #
15806 # input is not normalized; what is it?
15807 #
15808 fsqrt_not_norm:
15809         cmpi.b          %d1,&DENORM             # weed out DENORM
15810         beq.w           fsqrt_denorm
15811         cmpi.b          %d1,&ZERO               # weed out ZERO
15812         beq.b           fsqrt_zero
15813         cmpi.b          %d1,&INF                # weed out INF
15814         beq.b           fsqrt_inf
15815         cmpi.b          %d1,&SNAN               # weed out SNAN
15816         beq.l           res_snan_1op
15817         bra.l           res_qnan_1op
15818
15819 #
15820 #       fsqrt(+0) = +0
15821 #       fsqrt(-0) = -0
15822 #       fsqrt(+INF) = +INF
15823 #       fsqrt(-INF) = OPERR
15824 #
15825 fsqrt_zero:
15826         tst.b           SRC_EX(%a0)             # is ZERO positive or negative?
15827         bmi.b           fsqrt_zero_m            # negative
15828 fsqrt_zero_p:
15829         fmov.s          &0x00000000,%fp0        # return +ZERO
15830         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
15831         rts
15832 fsqrt_zero_m:
15833         fmov.s          &0x80000000,%fp0        # return -ZERO
15834         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
15835         rts
15836
15837 fsqrt_inf:
15838         tst.b           SRC_EX(%a0)             # is INF positive or negative?
15839         bmi.l           res_operr               # negative
15840 fsqrt_inf_p:
15841         fmovm.x         SRC(%a0),&0x80          # return +INF in fp0
15842         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
15843         rts
15844
15845 ##########################################################################
15846
15847 #########################################################################
15848 # XDEF **************************************************************** #
15849 #       addsub_scaler2(): scale inputs to fadd/fsub such that no        #
15850 #                         OVFL/UNFL exceptions will result              #
15851 #                                                                       #
15852 # XREF **************************************************************** #
15853 #       norm() - normalize mantissa after adjusting exponent            #
15854 #                                                                       #
15855 # INPUT *************************************************************** #
15856 #       FP_SRC(a6) = fp op1(src)                                        #
15857 #       FP_DST(a6) = fp op2(dst)                                        #
15858 #                                                                       #
15859 # OUTPUT ************************************************************** #
15860 #       FP_SRC(a6) = fp op1 scaled(src)                                 #
15861 #       FP_DST(a6) = fp op2 scaled(dst)                                 #
15862 #       d0         = scale amount                                       #
15863 #                                                                       #
15864 # ALGORITHM *********************************************************** #
15865 #       If the DST exponent is > the SRC exponent, set the DST exponent #
15866 # equal to 0x3fff and scale the SRC exponent by the value that the      #
15867 # DST exponent was scaled by. If the SRC exponent is greater or equal,  #
15868 # do the opposite. Return this scale factor in d0.                      #
15869 #       If the two exponents differ by > the number of mantissa bits    #
15870 # plus two, then set the smallest exponent to a very small value as a   #
15871 # quick shortcut.                                                       #
15872 #                                                                       #
15873 #########################################################################
15874
15875         global          addsub_scaler2
15876 addsub_scaler2:
15877         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
15878         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
15879         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
15880         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
15881         mov.w           SRC_EX(%a0),%d0
15882         mov.w           DST_EX(%a1),%d1
15883         mov.w           %d0,FP_SCR0_EX(%a6)
15884         mov.w           %d1,FP_SCR1_EX(%a6)
15885
15886         andi.w          &0x7fff,%d0
15887         andi.w          &0x7fff,%d1
15888         mov.w           %d0,L_SCR1(%a6)         # store src exponent
15889         mov.w           %d1,2+L_SCR1(%a6)       # store dst exponent
15890
15891         cmp.w           %d0, %d1                # is src exp >= dst exp?
15892         bge.l           src_exp_ge2
15893
15894 # dst exp is >  src exp; scale dst to exp = 0x3fff
15895 dst_exp_gt2:
15896         bsr.l           scale_to_zero_dst
15897         mov.l           %d0,-(%sp)              # save scale factor
15898
15899         cmpi.b          STAG(%a6),&DENORM       # is dst denormalized?
15900         bne.b           cmpexp12
15901
15902         lea             FP_SCR0(%a6),%a0
15903         bsr.l           norm                    # normalize the denorm; result is new exp
15904         neg.w           %d0                     # new exp = -(shft val)
15905         mov.w           %d0,L_SCR1(%a6)         # inset new exp
15906
15907 cmpexp12:
15908         mov.w           2+L_SCR1(%a6),%d0
15909         subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
15910
15911         cmp.w           %d0,L_SCR1(%a6)         # is difference >= len(mantissa)+2?
15912         bge.b           quick_scale12
15913
15914         mov.w           L_SCR1(%a6),%d0
15915         add.w           0x2(%sp),%d0            # scale src exponent by scale factor
15916         mov.w           FP_SCR0_EX(%a6),%d1
15917         and.w           &0x8000,%d1
15918         or.w            %d1,%d0                 # concat {sgn,new exp}
15919         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new dst exponent
15920
15921         mov.l           (%sp)+,%d0              # return SCALE factor
15922         rts
15923
15924 quick_scale12:
15925         andi.w          &0x8000,FP_SCR0_EX(%a6) # zero src exponent
15926         bset            &0x0,1+FP_SCR0_EX(%a6)  # set exp = 1
15927
15928         mov.l           (%sp)+,%d0              # return SCALE factor
15929         rts
15930
15931 # src exp is >= dst exp; scale src to exp = 0x3fff
15932 src_exp_ge2:
15933         bsr.l           scale_to_zero_src
15934         mov.l           %d0,-(%sp)              # save scale factor
15935
15936         cmpi.b          DTAG(%a6),&DENORM       # is dst denormalized?
15937         bne.b           cmpexp22
15938         lea             FP_SCR1(%a6),%a0
15939         bsr.l           norm                    # normalize the denorm; result is new exp
15940         neg.w           %d0                     # new exp = -(shft val)
15941         mov.w           %d0,2+L_SCR1(%a6)       # inset new exp
15942
15943 cmpexp22:
15944         mov.w           L_SCR1(%a6),%d0
15945         subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
15946
15947         cmp.w           %d0,2+L_SCR1(%a6)       # is difference >= len(mantissa)+2?
15948         bge.b           quick_scale22
15949
15950         mov.w           2+L_SCR1(%a6),%d0
15951         add.w           0x2(%sp),%d0            # scale dst exponent by scale factor
15952         mov.w           FP_SCR1_EX(%a6),%d1
15953         andi.w          &0x8000,%d1
15954         or.w            %d1,%d0                 # concat {sgn,new exp}
15955         mov.w           %d0,FP_SCR1_EX(%a6)     # insert new dst exponent
15956
15957         mov.l           (%sp)+,%d0              # return SCALE factor
15958         rts
15959
15960 quick_scale22:
15961         andi.w          &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
15962         bset            &0x0,1+FP_SCR1_EX(%a6)  # set exp = 1
15963
15964         mov.l           (%sp)+,%d0              # return SCALE factor
15965         rts
15966
15967 ##########################################################################
15968
15969 #########################################################################
15970 # XDEF **************************************************************** #
15971 #       scale_to_zero_src(): scale the exponent of extended precision   #
15972 #                            value at FP_SCR0(a6).                      #
15973 #                                                                       #
15974 # XREF **************************************************************** #
15975 #       norm() - normalize the mantissa if the operand was a DENORM     #
15976 #                                                                       #
15977 # INPUT *************************************************************** #
15978 #       FP_SCR0(a6) = extended precision operand to be scaled           #
15979 #                                                                       #
15980 # OUTPUT ************************************************************** #
15981 #       FP_SCR0(a6) = scaled extended precision operand                 #
15982 #       d0          = scale value                                       #
15983 #                                                                       #
15984 # ALGORITHM *********************************************************** #
15985 #       Set the exponent of the input operand to 0x3fff. Save the value #
15986 # of the difference between the original and new exponent. Then,        #
15987 # normalize the operand if it was a DENORM. Add this normalization      #
15988 # value to the previous value. Return the result.                       #
15989 #                                                                       #
15990 #########################################################################
15991
15992         global          scale_to_zero_src
15993 scale_to_zero_src:
15994         mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
15995         mov.w           %d1,%d0                 # make a copy
15996
15997         andi.l          &0x7fff,%d1             # extract operand's exponent
15998
15999         andi.w          &0x8000,%d0             # extract operand's sgn
16000         or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
16001
16002         mov.w           %d0,FP_SCR0_EX(%a6)     # insert biased exponent
16003
16004         cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
16005         beq.b           stzs_denorm             # normalize the DENORM
16006
16007 stzs_norm:
16008         mov.l           &0x3fff,%d0
16009         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
16010
16011         rts
16012
16013 stzs_denorm:
16014         lea             FP_SCR0(%a6),%a0        # pass ptr to src op
16015         bsr.l           norm                    # normalize denorm
16016         neg.l           %d0                     # new exponent = -(shft val)
16017         mov.l           %d0,%d1                 # prepare for op_norm call
16018         bra.b           stzs_norm               # finish scaling
16019
16020 ###
16021
16022 #########################################################################
16023 # XDEF **************************************************************** #
16024 #       scale_sqrt(): scale the input operand exponent so a subsequent  #
16025 #                     fsqrt operation won't take an exception.          #
16026 #                                                                       #
16027 # XREF **************************************************************** #
16028 #       norm() - normalize the mantissa if the operand was a DENORM     #
16029 #                                                                       #
16030 # INPUT *************************************************************** #
16031 #       FP_SCR0(a6) = extended precision operand to be scaled           #
16032 #                                                                       #
16033 # OUTPUT ************************************************************** #
16034 #       FP_SCR0(a6) = scaled extended precision operand                 #
16035 #       d0          = scale value                                       #
16036 #                                                                       #
16037 # ALGORITHM *********************************************************** #
16038 #       If the input operand is a DENORM, normalize it.                 #
16039 #       If the exponent of the input operand is even, set the exponent  #
16040 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the       #
16041 # exponent of the input operand is off, set the exponent to ox3fff and  #
16042 # return a scale factor of "(exp-0x3fff)/2".                            #
16043 #                                                                       #
16044 #########################################################################
16045
16046         global          scale_sqrt
16047 scale_sqrt:
16048         cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
16049         beq.b           ss_denorm               # normalize the DENORM
16050
16051         mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
16052         andi.l          &0x7fff,%d1             # extract operand's exponent
16053
16054         andi.w          &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
16055
16056         btst            &0x0,%d1                # is exp even or odd?
16057         beq.b           ss_norm_even
16058
16059         ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16060
16061         mov.l           &0x3fff,%d0
16062         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
16063         asr.l           &0x1,%d0                # divide scale factor by 2
16064         rts
16065
16066 ss_norm_even:
16067         ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16068
16069         mov.l           &0x3ffe,%d0
16070         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
16071         asr.l           &0x1,%d0                # divide scale factor by 2
16072         rts
16073
16074 ss_denorm:
16075         lea             FP_SCR0(%a6),%a0        # pass ptr to src op
16076         bsr.l           norm                    # normalize denorm
16077
16078         btst            &0x0,%d0                # is exp even or odd?
16079         beq.b           ss_denorm_even
16080
16081         ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16082
16083         add.l           &0x3fff,%d0
16084         asr.l           &0x1,%d0                # divide scale factor by 2
16085         rts
16086
16087 ss_denorm_even:
16088         ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16089
16090         add.l           &0x3ffe,%d0
16091         asr.l           &0x1,%d0                # divide scale factor by 2
16092         rts
16093
16094 ###
16095
16096 #########################################################################
16097 # XDEF **************************************************************** #
16098 #       scale_to_zero_dst(): scale the exponent of extended precision   #
16099 #                            value at FP_SCR1(a6).                      #
16100 #                                                                       #
16101 # XREF **************************************************************** #
16102 #       norm() - normalize the mantissa if the operand was a DENORM     #
16103 #                                                                       #
16104 # INPUT *************************************************************** #
16105 #       FP_SCR1(a6) = extended precision operand to be scaled           #
16106 #                                                                       #
16107 # OUTPUT ************************************************************** #
16108 #       FP_SCR1(a6) = scaled extended precision operand                 #
16109 #       d0          = scale value                                       #
16110 #                                                                       #
16111 # ALGORITHM *********************************************************** #
16112 #       Set the exponent of the input operand to 0x3fff. Save the value #
16113 # of the difference between the original and new exponent. Then,        #
16114 # normalize the operand if it was a DENORM. Add this normalization      #
16115 # value to the previous value. Return the result.                       #
16116 #                                                                       #
16117 #########################################################################
16118
16119         global          scale_to_zero_dst
16120 scale_to_zero_dst:
16121         mov.w           FP_SCR1_EX(%a6),%d1     # extract operand's {sgn,exp}
16122         mov.w           %d1,%d0                 # make a copy
16123
16124         andi.l          &0x7fff,%d1             # extract operand's exponent
16125
16126         andi.w          &0x8000,%d0             # extract operand's sgn
16127         or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
16128
16129         mov.w           %d0,FP_SCR1_EX(%a6)     # insert biased exponent
16130
16131         cmpi.b          DTAG(%a6),&DENORM       # is operand normalized?
16132         beq.b           stzd_denorm             # normalize the DENORM
16133
16134 stzd_norm:
16135         mov.l           &0x3fff,%d0
16136         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
16137         rts
16138
16139 stzd_denorm:
16140         lea             FP_SCR1(%a6),%a0        # pass ptr to dst op
16141         bsr.l           norm                    # normalize denorm
16142         neg.l           %d0                     # new exponent = -(shft val)
16143         mov.l           %d0,%d1                 # prepare for op_norm call
16144         bra.b           stzd_norm               # finish scaling
16145
16146 ##########################################################################
16147
16148 #########################################################################
16149 # XDEF **************************************************************** #
16150 #       res_qnan(): return default result w/ QNAN operand for dyadic    #
16151 #       res_snan(): return default result w/ SNAN operand for dyadic    #
16152 #       res_qnan_1op(): return dflt result w/ QNAN operand for monadic  #
16153 #       res_snan_1op(): return dflt result w/ SNAN operand for monadic  #
16154 #                                                                       #
16155 # XREF **************************************************************** #
16156 #       None                                                            #
16157 #                                                                       #
16158 # INPUT *************************************************************** #
16159 #       FP_SRC(a6) = pointer to extended precision src operand          #
16160 #       FP_DST(a6) = pointer to extended precision dst operand          #
16161 #                                                                       #
16162 # OUTPUT ************************************************************** #
16163 #       fp0 = default result                                            #
16164 #                                                                       #
16165 # ALGORITHM *********************************************************** #
16166 #       If either operand (but not both operands) of an operation is a  #
16167 # nonsignalling NAN, then that NAN is returned as the result. If both   #
16168 # operands are nonsignalling NANs, then the destination operand         #
16169 # nonsignalling NAN is returned as the result.                          #
16170 #       If either operand to an operation is a signalling NAN (SNAN),   #
16171 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap      #
16172 # enable bit is set in the FPCR, then the trap is taken and the         #
16173 # destination is not modified. If the SNAN trap enable bit is not set,  #
16174 # then the SNAN is converted to a nonsignalling NAN (by setting the     #
16175 # SNAN bit in the operand to one), and the operation continues as       #
16176 # described in the preceding paragraph, for nonsignalling NANs.         #
16177 #       Make sure the appropriate FPSR bits are set before exiting.     #
16178 #                                                                       #
16179 #########################################################################
16180
16181         global          res_qnan
16182         global          res_snan
16183 res_qnan:
16184 res_snan:
16185         cmp.b           DTAG(%a6), &SNAN        # is the dst an SNAN?
16186         beq.b           dst_snan2
16187         cmp.b           DTAG(%a6), &QNAN        # is the dst a  QNAN?
16188         beq.b           dst_qnan2
16189 src_nan:
16190         cmp.b           STAG(%a6), &QNAN
16191         beq.b           src_qnan2
16192         global          res_snan_1op
16193 res_snan_1op:
16194 src_snan2:
16195         bset            &0x6, FP_SRC_HI(%a6)    # set SNAN bit
16196         or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16197         lea             FP_SRC(%a6), %a0
16198         bra.b           nan_comp
16199         global          res_qnan_1op
16200 res_qnan_1op:
16201 src_qnan2:
16202         or.l            &nan_mask, USER_FPSR(%a6)
16203         lea             FP_SRC(%a6), %a0
16204         bra.b           nan_comp
16205 dst_snan2:
16206         or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16207         bset            &0x6, FP_DST_HI(%a6)    # set SNAN bit
16208         lea             FP_DST(%a6), %a0
16209         bra.b           nan_comp
16210 dst_qnan2:
16211         lea             FP_DST(%a6), %a0
16212         cmp.b           STAG(%a6), &SNAN
16213         bne             nan_done
16214         or.l            &aiop_mask+snan_mask, USER_FPSR(%a6)
16215 nan_done:
16216         or.l            &nan_mask, USER_FPSR(%a6)
16217 nan_comp:
16218         btst            &0x7, FTEMP_EX(%a0)     # is NAN neg?
16219         beq.b           nan_not_neg
16220         or.l            &neg_mask, USER_FPSR(%a6)
16221 nan_not_neg:
16222         fmovm.x         (%a0), &0x80
16223         rts
16224
16225 #########################################################################
16226 # XDEF **************************************************************** #
16227 #       res_operr(): return default result during operand error         #
16228 #                                                                       #
16229 # XREF **************************************************************** #
16230 #       None                                                            #
16231 #                                                                       #
16232 # INPUT *************************************************************** #
16233 #       None                                                            #
16234 #                                                                       #
16235 # OUTPUT ************************************************************** #
16236 #       fp0 = default operand error result                              #
16237 #                                                                       #
16238 # ALGORITHM *********************************************************** #
16239 #       An nonsignalling NAN is returned as the default result when     #
16240 # an operand error occurs for the following cases:                      #
16241 #                                                                       #
16242 #       Multiply: (Infinity x Zero)                                     #
16243 #       Divide  : (Zero / Zero) || (Infinity / Infinity)                #
16244 #                                                                       #
16245 #########################################################################
16246
16247         global          res_operr
16248 res_operr:
16249         or.l            &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16250         fmovm.x         nan_return(%pc), &0x80
16251         rts
16252
16253 nan_return:
16254         long            0x7fff0000, 0xffffffff, 0xffffffff
16255
16256 #########################################################################
16257 # fdbcc(): routine to emulate the fdbcc instruction                     #
16258 #                                                                       #
16259 # XDEF **************************************************************** #
16260 #       _fdbcc()                                                        #
16261 #                                                                       #
16262 # XREF **************************************************************** #
16263 #       fetch_dreg() - fetch Dn value                                   #
16264 #       store_dreg_l() - store updated Dn value                         #
16265 #                                                                       #
16266 # INPUT *************************************************************** #
16267 #       d0 = displacement                                               #
16268 #                                                                       #
16269 # OUTPUT ************************************************************** #
16270 #       none                                                            #
16271 #                                                                       #
16272 # ALGORITHM *********************************************************** #
16273 #       This routine checks which conditional predicate is specified by #
16274 # the stacked fdbcc instruction opcode and then branches to a routine   #
16275 # for that predicate. The corresponding fbcc instruction is then used   #
16276 # to see whether the condition (specified by the stacked FPSR) is true  #
16277 # or false.                                                             #
16278 #       If a BSUN exception should be indicated, the BSUN and ABSUN     #
16279 # bits are set in the stacked FPSR. If the BSUN exception is enabled,   #
16280 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an   #
16281 # enabled BSUN should not be flagged and the predicate is true, then    #
16282 # Dn is fetched and decremented by one. If Dn is not equal to -1, add   #
16283 # the displacement value to the stacked PC so that when an "rte" is     #
16284 # finally executed, the branch occurs.                                  #
16285 #                                                                       #
16286 #########################################################################
16287         global          _fdbcc
16288 _fdbcc:
16289         mov.l           %d0,L_SCR1(%a6)         # save displacement
16290
16291         mov.w           EXC_CMDREG(%a6),%d0     # fetch predicate
16292
16293         clr.l           %d1                     # clear scratch reg
16294         mov.b           FPSR_CC(%a6),%d1        # fetch fp ccodes
16295         ror.l           &0x8,%d1                # rotate to top byte
16296         fmov.l          %d1,%fpsr               # insert into FPSR
16297
16298         mov.w           (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16299         jmp             (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16300
16301 tbl_fdbcc:
16302         short           fdbcc_f         -       tbl_fdbcc       # 00
16303         short           fdbcc_eq        -       tbl_fdbcc       # 01
16304         short           fdbcc_ogt       -       tbl_fdbcc       # 02
16305         short           fdbcc_oge       -       tbl_fdbcc       # 03
16306         short           fdbcc_olt       -       tbl_fdbcc       # 04
16307         short           fdbcc_ole       -       tbl_fdbcc       # 05
16308         short           fdbcc_ogl       -       tbl_fdbcc       # 06
16309         short           fdbcc_or        -       tbl_fdbcc       # 07
16310         short           fdbcc_un        -       tbl_fdbcc       # 08
16311         short           fdbcc_ueq       -       tbl_fdbcc       # 09
16312         short           fdbcc_ugt       -       tbl_fdbcc       # 10
16313         short           fdbcc_uge       -       tbl_fdbcc       # 11
16314         short           fdbcc_ult       -       tbl_fdbcc       # 12
16315         short           fdbcc_ule       -       tbl_fdbcc       # 13
16316         short           fdbcc_neq       -       tbl_fdbcc       # 14
16317         short           fdbcc_t         -       tbl_fdbcc       # 15
16318         short           fdbcc_sf        -       tbl_fdbcc       # 16
16319         short           fdbcc_seq       -       tbl_fdbcc       # 17
16320         short           fdbcc_gt        -       tbl_fdbcc       # 18
16321         short           fdbcc_ge        -       tbl_fdbcc       # 19
16322         short           fdbcc_lt        -       tbl_fdbcc       # 20
16323         short           fdbcc_le        -       tbl_fdbcc       # 21
16324         short           fdbcc_gl        -       tbl_fdbcc       # 22
16325         short           fdbcc_gle       -       tbl_fdbcc       # 23
16326         short           fdbcc_ngle      -       tbl_fdbcc       # 24
16327         short           fdbcc_ngl       -       tbl_fdbcc       # 25
16328         short           fdbcc_nle       -       tbl_fdbcc       # 26
16329         short           fdbcc_nlt       -       tbl_fdbcc       # 27
16330         short           fdbcc_nge       -       tbl_fdbcc       # 28
16331         short           fdbcc_ngt       -       tbl_fdbcc       # 29
16332         short           fdbcc_sneq      -       tbl_fdbcc       # 30
16333         short           fdbcc_st        -       tbl_fdbcc       # 31
16334
16335 #########################################################################
16336 #                                                                       #
16337 # IEEE Nonaware tests                                                   #
16338 #                                                                       #
16339 # For the IEEE nonaware tests, only the false branch changes the        #
16340 # counter. However, the true branch may set bsun so we check to see     #
16341 # if the NAN bit is set, in which case BSUN and AIOP will be set.       #
16342 #                                                                       #
16343 # The cases EQ and NE are shared by the Aware and Nonaware groups       #
16344 # and are incapable of setting the BSUN exception bit.                  #
16345 #                                                                       #
16346 # Typically, only one of the two possible branch directions could       #
16347 # have the NAN bit set.                                                 #
16348 # (This is assuming the mutual exclusiveness of FPSR cc bit groupings   #
16349 #  is preserved.)                                                       #
16350 #                                                                       #
16351 #########################################################################
16352
16353 #
16354 # equal:
16355 #
16356 #       Z
16357 #
16358 fdbcc_eq:
16359         fbeq.w          fdbcc_eq_yes            # equal?
16360 fdbcc_eq_no:
16361         bra.w           fdbcc_false             # no; go handle counter
16362 fdbcc_eq_yes:
16363         rts
16364
16365 #
16366 # not equal:
16367 #       _
16368 #       Z
16369 #
16370 fdbcc_neq:
16371         fbneq.w         fdbcc_neq_yes           # not equal?
16372 fdbcc_neq_no:
16373         bra.w           fdbcc_false             # no; go handle counter
16374 fdbcc_neq_yes:
16375         rts
16376
16377 #
16378 # greater than:
16379 #       _______
16380 #       NANvZvN
16381 #
16382 fdbcc_gt:
16383         fbgt.w          fdbcc_gt_yes            # greater than?
16384         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16385         beq.w           fdbcc_false             # no;go handle counter
16386         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16387         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16388         bne.w           fdbcc_bsun              # yes; we have an exception
16389         bra.w           fdbcc_false             # no; go handle counter
16390 fdbcc_gt_yes:
16391         rts                                     # do nothing
16392
16393 #
16394 # not greater than:
16395 #
16396 #       NANvZvN
16397 #
16398 fdbcc_ngt:
16399         fbngt.w         fdbcc_ngt_yes           # not greater than?
16400 fdbcc_ngt_no:
16401         bra.w           fdbcc_false             # no; go handle counter
16402 fdbcc_ngt_yes:
16403         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16404         beq.b           fdbcc_ngt_done          # no;go finish
16405         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16406         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16407         bne.w           fdbcc_bsun              # yes; we have an exception
16408 fdbcc_ngt_done:
16409         rts                                     # no; do nothing
16410
16411 #
16412 # greater than or equal:
16413 #          _____
16414 #       Zv(NANvN)
16415 #
16416 fdbcc_ge:
16417         fbge.w          fdbcc_ge_yes            # greater than or equal?
16418 fdbcc_ge_no:
16419         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16420         beq.w           fdbcc_false             # no;go handle counter
16421         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16422         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16423         bne.w           fdbcc_bsun              # yes; we have an exception
16424         bra.w           fdbcc_false             # no; go handle counter
16425 fdbcc_ge_yes:
16426         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16427         beq.b           fdbcc_ge_yes_done       # no;go do nothing
16428         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16429         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16430         bne.w           fdbcc_bsun              # yes; we have an exception
16431 fdbcc_ge_yes_done:
16432         rts                                     # do nothing
16433
16434 #
16435 # not (greater than or equal):
16436 #              _
16437 #       NANv(N^Z)
16438 #
16439 fdbcc_nge:
16440         fbnge.w         fdbcc_nge_yes           # not (greater than or equal)?
16441 fdbcc_nge_no:
16442         bra.w           fdbcc_false             # no; go handle counter
16443 fdbcc_nge_yes:
16444         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16445         beq.b           fdbcc_nge_done          # no;go finish
16446         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16447         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16448         bne.w           fdbcc_bsun              # yes; we have an exception
16449 fdbcc_nge_done:
16450         rts                                     # no; do nothing
16451
16452 #
16453 # less than:
16454 #          _____
16455 #       N^(NANvZ)
16456 #
16457 fdbcc_lt:
16458         fblt.w          fdbcc_lt_yes            # less than?
16459 fdbcc_lt_no:
16460         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16461         beq.w           fdbcc_false             # no; go handle counter
16462         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16463         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16464         bne.w           fdbcc_bsun              # yes; we have an exception
16465         bra.w           fdbcc_false             # no; go handle counter
16466 fdbcc_lt_yes:
16467         rts                                     # do nothing
16468
16469 #
16470 # not less than:
16471 #              _
16472 #       NANv(ZvN)
16473 #
16474 fdbcc_nlt:
16475         fbnlt.w         fdbcc_nlt_yes           # not less than?
16476 fdbcc_nlt_no:
16477         bra.w           fdbcc_false             # no; go handle counter
16478 fdbcc_nlt_yes:
16479         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16480         beq.b           fdbcc_nlt_done          # no;go finish
16481         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16482         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16483         bne.w           fdbcc_bsun              # yes; we have an exception
16484 fdbcc_nlt_done:
16485         rts                                     # no; do nothing
16486
16487 #
16488 # less than or equal:
16489 #            ___
16490 #       Zv(N^NAN)
16491 #
16492 fdbcc_le:
16493         fble.w          fdbcc_le_yes            # less than or equal?
16494 fdbcc_le_no:
16495         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16496         beq.w           fdbcc_false             # no; go handle counter
16497         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16498         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16499         bne.w           fdbcc_bsun              # yes; we have an exception
16500         bra.w           fdbcc_false             # no; go handle counter
16501 fdbcc_le_yes:
16502         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16503         beq.b           fdbcc_le_yes_done       # no; go do nothing
16504         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16505         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16506         bne.w           fdbcc_bsun              # yes; we have an exception
16507 fdbcc_le_yes_done:
16508         rts                                     # do nothing
16509
16510 #
16511 # not (less than or equal):
16512 #            ___
16513 #       NANv(NvZ)
16514 #
16515 fdbcc_nle:
16516         fbnle.w         fdbcc_nle_yes           # not (less than or equal)?
16517 fdbcc_nle_no:
16518         bra.w           fdbcc_false             # no; go handle counter
16519 fdbcc_nle_yes:
16520         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16521         beq.w           fdbcc_nle_done          # no; go finish
16522         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16523         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16524         bne.w           fdbcc_bsun              # yes; we have an exception
16525 fdbcc_nle_done:
16526         rts                                     # no; do nothing
16527
16528 #
16529 # greater or less than:
16530 #       _____
16531 #       NANvZ
16532 #
16533 fdbcc_gl:
16534         fbgl.w          fdbcc_gl_yes            # greater or less than?
16535 fdbcc_gl_no:
16536         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16537         beq.w           fdbcc_false             # no; handle counter
16538         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16539         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16540         bne.w           fdbcc_bsun              # yes; we have an exception
16541         bra.w           fdbcc_false             # no; go handle counter
16542 fdbcc_gl_yes:
16543         rts                                     # do nothing
16544
16545 #
16546 # not (greater or less than):
16547 #
16548 #       NANvZ
16549 #
16550 fdbcc_ngl:
16551         fbngl.w         fdbcc_ngl_yes           # not (greater or less than)?
16552 fdbcc_ngl_no:
16553         bra.w           fdbcc_false             # no; go handle counter
16554 fdbcc_ngl_yes:
16555         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
16556         beq.b           fdbcc_ngl_done          # no; go finish
16557         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16558         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16559         bne.w           fdbcc_bsun              # yes; we have an exception
16560 fdbcc_ngl_done:
16561         rts                                     # no; do nothing
16562
16563 #
16564 # greater, less, or equal:
16565 #       ___
16566 #       NAN
16567 #
16568 fdbcc_gle:
16569         fbgle.w         fdbcc_gle_yes           # greater, less, or equal?
16570 fdbcc_gle_no:
16571         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16572         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16573         bne.w           fdbcc_bsun              # yes; we have an exception
16574         bra.w           fdbcc_false             # no; go handle counter
16575 fdbcc_gle_yes:
16576         rts                                     # do nothing
16577
16578 #
16579 # not (greater, less, or equal):
16580 #
16581 #       NAN
16582 #
16583 fdbcc_ngle:
16584         fbngle.w        fdbcc_ngle_yes          # not (greater, less, or equal)?
16585 fdbcc_ngle_no:
16586         bra.w           fdbcc_false             # no; go handle counter
16587 fdbcc_ngle_yes:
16588         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16589         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16590         bne.w           fdbcc_bsun              # yes; we have an exception
16591         rts                                     # no; do nothing
16592
16593 #########################################################################
16594 #                                                                       #
16595 # Miscellaneous tests                                                   #
16596 #                                                                       #
16597 # For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16598 #                                                                       #
16599 #########################################################################
16600
16601 #
16602 # false:
16603 #
16604 #       False
16605 #
16606 fdbcc_f:                                        # no bsun possible
16607         bra.w           fdbcc_false             # go handle counter
16608
16609 #
16610 # true:
16611 #
16612 #       True
16613 #
16614 fdbcc_t:                                        # no bsun possible
16615         rts                                     # do nothing
16616
16617 #
16618 # signalling false:
16619 #
16620 #       False
16621 #
16622 fdbcc_sf:
16623         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16624         beq.w           fdbcc_false             # no;go handle counter
16625         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16626         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16627         bne.w           fdbcc_bsun              # yes; we have an exception
16628         bra.w           fdbcc_false             # go handle counter
16629
16630 #
16631 # signalling true:
16632 #
16633 #       True
16634 #
16635 fdbcc_st:
16636         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16637         beq.b           fdbcc_st_done           # no;go finish
16638         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16639         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16640         bne.w           fdbcc_bsun              # yes; we have an exception
16641 fdbcc_st_done:
16642         rts
16643
16644 #
16645 # signalling equal:
16646 #
16647 #       Z
16648 #
16649 fdbcc_seq:
16650         fbseq.w         fdbcc_seq_yes           # signalling equal?
16651 fdbcc_seq_no:
16652         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16653         beq.w           fdbcc_false             # no;go handle counter
16654         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16655         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16656         bne.w           fdbcc_bsun              # yes; we have an exception
16657         bra.w           fdbcc_false             # go handle counter
16658 fdbcc_seq_yes:
16659         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16660         beq.b           fdbcc_seq_yes_done      # no;go do nothing
16661         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16662         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16663         bne.w           fdbcc_bsun              # yes; we have an exception
16664 fdbcc_seq_yes_done:
16665         rts                                     # yes; do nothing
16666
16667 #
16668 # signalling not equal:
16669 #       _
16670 #       Z
16671 #
16672 fdbcc_sneq:
16673         fbsneq.w        fdbcc_sneq_yes          # signalling not equal?
16674 fdbcc_sneq_no:
16675         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set?
16676         beq.w           fdbcc_false             # no;go handle counter
16677         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16678         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16679         bne.w           fdbcc_bsun              # yes; we have an exception
16680         bra.w           fdbcc_false             # go handle counter
16681 fdbcc_sneq_yes:
16682         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
16683         beq.w           fdbcc_sneq_done         # no;go finish
16684         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16685         btst            &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16686         bne.w           fdbcc_bsun              # yes; we have an exception
16687 fdbcc_sneq_done:
16688         rts
16689
16690 #########################################################################
16691 #                                                                       #
16692 # IEEE Aware tests                                                      #
16693 #                                                                       #
16694 # For the IEEE aware tests, action is only taken if the result is false.#
16695 # Therefore, the opposite branch type is used to jump to the decrement  #
16696 # routine.                                                              #
16697 # The BSUN exception will not be set for any of these tests.            #
16698 #                                                                       #
16699 #########################################################################
16700
16701 #
16702 # ordered greater than:
16703 #       _______
16704 #       NANvZvN
16705 #
16706 fdbcc_ogt:
16707         fbogt.w         fdbcc_ogt_yes           # ordered greater than?
16708 fdbcc_ogt_no:
16709         bra.w           fdbcc_false             # no; go handle counter
16710 fdbcc_ogt_yes:
16711         rts                                     # yes; do nothing
16712
16713 #
16714 # unordered or less or equal:
16715 #       _______
16716 #       NANvZvN
16717 #
16718 fdbcc_ule:
16719         fbule.w         fdbcc_ule_yes           # unordered or less or equal?
16720 fdbcc_ule_no:
16721         bra.w           fdbcc_false             # no; go handle counter
16722 fdbcc_ule_yes:
16723         rts                                     # yes; do nothing
16724
16725 #
16726 # ordered greater than or equal:
16727 #          _____
16728 #       Zv(NANvN)
16729 #
16730 fdbcc_oge:
16731         fboge.w         fdbcc_oge_yes           # ordered greater than or equal?
16732 fdbcc_oge_no:
16733         bra.w           fdbcc_false             # no; go handle counter
16734 fdbcc_oge_yes:
16735         rts                                     # yes; do nothing
16736
16737 #
16738 # unordered or less than:
16739 #              _
16740 #       NANv(N^Z)
16741 #
16742 fdbcc_ult:
16743         fbult.w         fdbcc_ult_yes           # unordered or less than?
16744 fdbcc_ult_no:
16745         bra.w           fdbcc_false             # no; go handle counter
16746 fdbcc_ult_yes:
16747         rts                                     # yes; do nothing
16748
16749 #
16750 # ordered less than:
16751 #          _____
16752 #       N^(NANvZ)
16753 #
16754 fdbcc_olt:
16755         fbolt.w         fdbcc_olt_yes           # ordered less than?
16756 fdbcc_olt_no:
16757         bra.w           fdbcc_false             # no; go handle counter
16758 fdbcc_olt_yes:
16759         rts                                     # yes; do nothing
16760
16761 #
16762 # unordered or greater or equal:
16763 #
16764 #       NANvZvN
16765 #
16766 fdbcc_uge:
16767         fbuge.w         fdbcc_uge_yes           # unordered or greater than?
16768 fdbcc_uge_no:
16769         bra.w           fdbcc_false             # no; go handle counter
16770 fdbcc_uge_yes:
16771         rts                                     # yes; do nothing
16772
16773 #
16774 # ordered less than or equal:
16775 #            ___
16776 #       Zv(N^NAN)
16777 #
16778 fdbcc_ole:
16779         fbole.w         fdbcc_ole_yes           # ordered greater or less than?
16780 fdbcc_ole_no:
16781         bra.w           fdbcc_false             # no; go handle counter
16782 fdbcc_ole_yes:
16783         rts                                     # yes; do nothing
16784
16785 #
16786 # unordered or greater than:
16787 #            ___
16788 #       NANv(NvZ)
16789 #
16790 fdbcc_ugt:
16791         fbugt.w         fdbcc_ugt_yes           # unordered or greater than?
16792 fdbcc_ugt_no:
16793         bra.w           fdbcc_false             # no; go handle counter
16794 fdbcc_ugt_yes:
16795         rts                                     # yes; do nothing
16796
16797 #
16798 # ordered greater or less than:
16799 #       _____
16800 #       NANvZ
16801 #
16802 fdbcc_ogl:
16803         fbogl.w         fdbcc_ogl_yes           # ordered greater or less than?
16804 fdbcc_ogl_no:
16805         bra.w           fdbcc_false             # no; go handle counter
16806 fdbcc_ogl_yes:
16807         rts                                     # yes; do nothing
16808
16809 #
16810 # unordered or equal:
16811 #
16812 #       NANvZ
16813 #
16814 fdbcc_ueq:
16815         fbueq.w         fdbcc_ueq_yes           # unordered or equal?
16816 fdbcc_ueq_no:
16817         bra.w           fdbcc_false             # no; go handle counter
16818 fdbcc_ueq_yes:
16819         rts                                     # yes; do nothing
16820
16821 #
16822 # ordered:
16823 #       ___
16824 #       NAN
16825 #
16826 fdbcc_or:
16827         fbor.w          fdbcc_or_yes            # ordered?
16828 fdbcc_or_no:
16829         bra.w           fdbcc_false             # no; go handle counter
16830 fdbcc_or_yes:
16831         rts                                     # yes; do nothing
16832
16833 #
16834 # unordered:
16835 #
16836 #       NAN
16837 #
16838 fdbcc_un:
16839         fbun.w          fdbcc_un_yes            # unordered?
16840 fdbcc_un_no:
16841         bra.w           fdbcc_false             # no; go handle counter
16842 fdbcc_un_yes:
16843         rts                                     # yes; do nothing
16844
16845 #######################################################################
16846
16847 #
16848 # the bsun exception bit was not set.
16849 #
16850 # (1) subtract 1 from the count register
16851 # (2) if (cr == -1) then
16852 #       pc = pc of next instruction
16853 #     else
16854 #       pc += sign_ext(16-bit displacement)
16855 #
16856 fdbcc_false:
16857         mov.b           1+EXC_OPWORD(%a6), %d1  # fetch lo opword
16858         andi.w          &0x7, %d1               # extract count register
16859
16860         bsr.l           fetch_dreg              # fetch count value
16861 # make sure that d0 isn't corrupted between calls...
16862
16863         subq.w          &0x1, %d0               # Dn - 1 -> Dn
16864
16865         bsr.l           store_dreg_l            # store new count value
16866
16867         cmpi.w          %d0, &-0x1              # is (Dn == -1)?
16868         bne.b           fdbcc_false_cont        # no;
16869         rts
16870
16871 fdbcc_false_cont:
16872         mov.l           L_SCR1(%a6),%d0         # fetch displacement
16873         add.l           USER_FPIAR(%a6),%d0     # add instruction PC
16874         addq.l          &0x4,%d0                # add instruction length
16875         mov.l           %d0,EXC_PC(%a6)         # set new PC
16876         rts
16877
16878 # the emulation routine set bsun and BSUN was enabled. have to
16879 # fix stack and jump to the bsun handler.
16880 # let the caller of this routine shift the stack frame up to
16881 # eliminate the effective address field.
16882 fdbcc_bsun:
16883         mov.b           &fbsun_flg,SPCOND_FLG(%a6)
16884         rts
16885
16886 #########################################################################
16887 # ftrapcc(): routine to emulate the ftrapcc instruction                 #
16888 #                                                                       #
16889 # XDEF **************************************************************** #
16890 #       _ftrapcc()                                                      #
16891 #                                                                       #
16892 # XREF **************************************************************** #
16893 #       none                                                            #
16894 #                                                                       #
16895 # INPUT *************************************************************** #
16896 #       none                                                            #
16897 #                                                                       #
16898 # OUTPUT ************************************************************** #
16899 #       none                                                            #
16900 #                                                                       #
16901 # ALGORITHM *********************************************************** #
16902 #       This routine checks which conditional predicate is specified by #
16903 # the stacked ftrapcc instruction opcode and then branches to a routine #
16904 # for that predicate. The corresponding fbcc instruction is then used   #
16905 # to see whether the condition (specified by the stacked FPSR) is true  #
16906 # or false.                                                             #
16907 #       If a BSUN exception should be indicated, the BSUN and ABSUN     #
16908 # bits are set in the stacked FPSR. If the BSUN exception is enabled,   #
16909 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an   #
16910 # enabled BSUN should not be flagged and the predicate is true, then    #
16911 # the ftrapcc_flg is set in the SPCOND_FLG location. These special      #
16912 # flags indicate to the calling routine to emulate the exceptional      #
16913 # condition.                                                            #
16914 #                                                                       #
16915 #########################################################################
16916
16917         global          _ftrapcc
16918 _ftrapcc:
16919         mov.w           EXC_CMDREG(%a6),%d0     # fetch predicate
16920
16921         clr.l           %d1                     # clear scratch reg
16922         mov.b           FPSR_CC(%a6),%d1        # fetch fp ccodes
16923         ror.l           &0x8,%d1                # rotate to top byte
16924         fmov.l          %d1,%fpsr               # insert into FPSR
16925
16926         mov.w           (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16927         jmp             (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16928
16929 tbl_ftrapcc:
16930         short           ftrapcc_f       -       tbl_ftrapcc     # 00
16931         short           ftrapcc_eq      -       tbl_ftrapcc     # 01
16932         short           ftrapcc_ogt     -       tbl_ftrapcc     # 02
16933         short           ftrapcc_oge     -       tbl_ftrapcc     # 03
16934         short           ftrapcc_olt     -       tbl_ftrapcc     # 04
16935         short           ftrapcc_ole     -       tbl_ftrapcc     # 05
16936         short           ftrapcc_ogl     -       tbl_ftrapcc     # 06
16937         short           ftrapcc_or      -       tbl_ftrapcc     # 07
16938         short           ftrapcc_un      -       tbl_ftrapcc     # 08
16939         short           ftrapcc_ueq     -       tbl_ftrapcc     # 09
16940         short           ftrapcc_ugt     -       tbl_ftrapcc     # 10
16941         short           ftrapcc_uge     -       tbl_ftrapcc     # 11
16942         short           ftrapcc_ult     -       tbl_ftrapcc     # 12
16943         short           ftrapcc_ule     -       tbl_ftrapcc     # 13
16944         short           ftrapcc_neq     -       tbl_ftrapcc     # 14
16945         short           ftrapcc_t       -       tbl_ftrapcc     # 15
16946         short           ftrapcc_sf      -       tbl_ftrapcc     # 16
16947         short           ftrapcc_seq     -       tbl_ftrapcc     # 17
16948         short           ftrapcc_gt      -       tbl_ftrapcc     # 18
16949         short           ftrapcc_ge      -       tbl_ftrapcc     # 19
16950         short           ftrapcc_lt      -       tbl_ftrapcc     # 20
16951         short           ftrapcc_le      -       tbl_ftrapcc     # 21
16952         short           ftrapcc_gl      -       tbl_ftrapcc     # 22
16953         short           ftrapcc_gle     -       tbl_ftrapcc     # 23
16954         short           ftrapcc_ngle    -       tbl_ftrapcc     # 24
16955         short           ftrapcc_ngl     -       tbl_ftrapcc     # 25
16956         short           ftrapcc_nle     -       tbl_ftrapcc     # 26
16957         short           ftrapcc_nlt     -       tbl_ftrapcc     # 27
16958         short           ftrapcc_nge     -       tbl_ftrapcc     # 28
16959         short           ftrapcc_ngt     -       tbl_ftrapcc     # 29
16960         short           ftrapcc_sneq    -       tbl_ftrapcc     # 30
16961         short           ftrapcc_st      -       tbl_ftrapcc     # 31
16962
16963 #########################################################################
16964 #                                                                       #
16965 # IEEE Nonaware tests                                                   #
16966 #                                                                       #
16967 # For the IEEE nonaware tests, we set the result based on the           #
16968 # floating point condition codes. In addition, we check to see          #
16969 # if the NAN bit is set, in which case BSUN and AIOP will be set.       #
16970 #                                                                       #
16971 # The cases EQ and NE are shared by the Aware and Nonaware groups       #
16972 # and are incapable of setting the BSUN exception bit.                  #
16973 #                                                                       #
16974 # Typically, only one of the two possible branch directions could       #
16975 # have the NAN bit set.                                                 #
16976 #                                                                       #
16977 #########################################################################
16978
16979 #
16980 # equal:
16981 #
16982 #       Z
16983 #
16984 ftrapcc_eq:
16985         fbeq.w          ftrapcc_trap            # equal?
16986 ftrapcc_eq_no:
16987         rts                                     # do nothing
16988
16989 #
16990 # not equal:
16991 #       _
16992 #       Z
16993 #
16994 ftrapcc_neq:
16995         fbneq.w         ftrapcc_trap            # not equal?
16996 ftrapcc_neq_no:
16997         rts                                     # do nothing
16998
16999 #
17000 # greater than:
17001 #       _______
17002 #       NANvZvN
17003 #
17004 ftrapcc_gt:
17005         fbgt.w          ftrapcc_trap            # greater than?
17006 ftrapcc_gt_no:
17007         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17008         beq.b           ftrapcc_gt_done         # no
17009         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17010         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17011         bne.w           ftrapcc_bsun            # yes
17012 ftrapcc_gt_done:
17013         rts                                     # no; do nothing
17014
17015 #
17016 # not greater than:
17017 #
17018 #       NANvZvN
17019 #
17020 ftrapcc_ngt:
17021         fbngt.w         ftrapcc_ngt_yes         # not greater than?
17022 ftrapcc_ngt_no:
17023         rts                                     # do nothing
17024 ftrapcc_ngt_yes:
17025         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17026         beq.w           ftrapcc_trap            # no; go take trap
17027         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17028         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17029         bne.w           ftrapcc_bsun            # yes
17030         bra.w           ftrapcc_trap            # no; go take trap
17031
17032 #
17033 # greater than or equal:
17034 #          _____
17035 #       Zv(NANvN)
17036 #
17037 ftrapcc_ge:
17038         fbge.w          ftrapcc_ge_yes          # greater than or equal?
17039 ftrapcc_ge_no:
17040         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17041         beq.b           ftrapcc_ge_done         # no; go finish
17042         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17043         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17044         bne.w           ftrapcc_bsun            # yes
17045 ftrapcc_ge_done:
17046         rts                                     # no; do nothing
17047 ftrapcc_ge_yes:
17048         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17049         beq.w           ftrapcc_trap            # no; go take trap
17050         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17051         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17052         bne.w           ftrapcc_bsun            # yes
17053         bra.w           ftrapcc_trap            # no; go take trap
17054
17055 #
17056 # not (greater than or equal):
17057 #              _
17058 #       NANv(N^Z)
17059 #
17060 ftrapcc_nge:
17061         fbnge.w         ftrapcc_nge_yes         # not (greater than or equal)?
17062 ftrapcc_nge_no:
17063         rts                                     # do nothing
17064 ftrapcc_nge_yes:
17065         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17066         beq.w           ftrapcc_trap            # no; go take trap
17067         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17068         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17069         bne.w           ftrapcc_bsun            # yes
17070         bra.w           ftrapcc_trap            # no; go take trap
17071
17072 #
17073 # less than:
17074 #          _____
17075 #       N^(NANvZ)
17076 #
17077 ftrapcc_lt:
17078         fblt.w          ftrapcc_trap            # less than?
17079 ftrapcc_lt_no:
17080         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17081         beq.b           ftrapcc_lt_done         # no; go finish
17082         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17083         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17084         bne.w           ftrapcc_bsun            # yes
17085 ftrapcc_lt_done:
17086         rts                                     # no; do nothing
17087
17088 #
17089 # not less than:
17090 #              _
17091 #       NANv(ZvN)
17092 #
17093 ftrapcc_nlt:
17094         fbnlt.w         ftrapcc_nlt_yes         # not less than?
17095 ftrapcc_nlt_no:
17096         rts                                     # do nothing
17097 ftrapcc_nlt_yes:
17098         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17099         beq.w           ftrapcc_trap            # no; go take trap
17100         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17101         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17102         bne.w           ftrapcc_bsun            # yes
17103         bra.w           ftrapcc_trap            # no; go take trap
17104
17105 #
17106 # less than or equal:
17107 #            ___
17108 #       Zv(N^NAN)
17109 #
17110 ftrapcc_le:
17111         fble.w          ftrapcc_le_yes          # less than or equal?
17112 ftrapcc_le_no:
17113         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17114         beq.b           ftrapcc_le_done         # no; go finish
17115         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17116         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17117         bne.w           ftrapcc_bsun            # yes
17118 ftrapcc_le_done:
17119         rts                                     # no; do nothing
17120 ftrapcc_le_yes:
17121         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17122         beq.w           ftrapcc_trap            # no; go take trap
17123         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17124         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17125         bne.w           ftrapcc_bsun            # yes
17126         bra.w           ftrapcc_trap            # no; go take trap
17127
17128 #
17129 # not (less than or equal):
17130 #            ___
17131 #       NANv(NvZ)
17132 #
17133 ftrapcc_nle:
17134         fbnle.w         ftrapcc_nle_yes         # not (less than or equal)?
17135 ftrapcc_nle_no:
17136         rts                                     # do nothing
17137 ftrapcc_nle_yes:
17138         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17139         beq.w           ftrapcc_trap            # no; go take trap
17140         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17141         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17142         bne.w           ftrapcc_bsun            # yes
17143         bra.w           ftrapcc_trap            # no; go take trap
17144
17145 #
17146 # greater or less than:
17147 #       _____
17148 #       NANvZ
17149 #
17150 ftrapcc_gl:
17151         fbgl.w          ftrapcc_trap            # greater or less than?
17152 ftrapcc_gl_no:
17153         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17154         beq.b           ftrapcc_gl_done         # no; go finish
17155         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17156         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17157         bne.w           ftrapcc_bsun            # yes
17158 ftrapcc_gl_done:
17159         rts                                     # no; do nothing
17160
17161 #
17162 # not (greater or less than):
17163 #
17164 #       NANvZ
17165 #
17166 ftrapcc_ngl:
17167         fbngl.w         ftrapcc_ngl_yes         # not (greater or less than)?
17168 ftrapcc_ngl_no:
17169         rts                                     # do nothing
17170 ftrapcc_ngl_yes:
17171         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17172         beq.w           ftrapcc_trap            # no; go take trap
17173         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17174         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17175         bne.w           ftrapcc_bsun            # yes
17176         bra.w           ftrapcc_trap            # no; go take trap
17177
17178 #
17179 # greater, less, or equal:
17180 #       ___
17181 #       NAN
17182 #
17183 ftrapcc_gle:
17184         fbgle.w         ftrapcc_trap            # greater, less, or equal?
17185 ftrapcc_gle_no:
17186         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17187         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17188         bne.w           ftrapcc_bsun            # yes
17189         rts                                     # no; do nothing
17190
17191 #
17192 # not (greater, less, or equal):
17193 #
17194 #       NAN
17195 #
17196 ftrapcc_ngle:
17197         fbngle.w        ftrapcc_ngle_yes        # not (greater, less, or equal)?
17198 ftrapcc_ngle_no:
17199         rts                                     # do nothing
17200 ftrapcc_ngle_yes:
17201         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17202         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17203         bne.w           ftrapcc_bsun            # yes
17204         bra.w           ftrapcc_trap            # no; go take trap
17205
17206 #########################################################################
17207 #                                                                       #
17208 # Miscellaneous tests                                                   #
17209 #                                                                       #
17210 # For the IEEE aware tests, we only have to set the result based on the #
17211 # floating point condition codes. The BSUN exception will not be        #
17212 # set for any of these tests.                                           #
17213 #                                                                       #
17214 #########################################################################
17215
17216 #
17217 # false:
17218 #
17219 #       False
17220 #
17221 ftrapcc_f:
17222         rts                                     # do nothing
17223
17224 #
17225 # true:
17226 #
17227 #       True
17228 #
17229 ftrapcc_t:
17230         bra.w           ftrapcc_trap            # go take trap
17231
17232 #
17233 # signalling false:
17234 #
17235 #       False
17236 #
17237 ftrapcc_sf:
17238         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17239         beq.b           ftrapcc_sf_done         # no; go finish
17240         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17241         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17242         bne.w           ftrapcc_bsun            # yes
17243 ftrapcc_sf_done:
17244         rts                                     # no; do nothing
17245
17246 #
17247 # signalling true:
17248 #
17249 #       True
17250 #
17251 ftrapcc_st:
17252         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17253         beq.w           ftrapcc_trap            # no; go take trap
17254         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17255         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17256         bne.w           ftrapcc_bsun            # yes
17257         bra.w           ftrapcc_trap            # no; go take trap
17258
17259 #
17260 # signalling equal:
17261 #
17262 #       Z
17263 #
17264 ftrapcc_seq:
17265         fbseq.w         ftrapcc_seq_yes         # signalling equal?
17266 ftrapcc_seq_no:
17267         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17268         beq.w           ftrapcc_seq_done        # no; go finish
17269         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17270         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17271         bne.w           ftrapcc_bsun            # yes
17272 ftrapcc_seq_done:
17273         rts                                     # no; do nothing
17274 ftrapcc_seq_yes:
17275         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17276         beq.w           ftrapcc_trap            # no; go take trap
17277         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17278         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17279         bne.w           ftrapcc_bsun            # yes
17280         bra.w           ftrapcc_trap            # no; go take trap
17281
17282 #
17283 # signalling not equal:
17284 #       _
17285 #       Z
17286 #
17287 ftrapcc_sneq:
17288         fbsneq.w        ftrapcc_sneq_yes        # signalling equal?
17289 ftrapcc_sneq_no:
17290         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17291         beq.w           ftrapcc_sneq_no_done    # no; go finish
17292         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17293         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17294         bne.w           ftrapcc_bsun            # yes
17295 ftrapcc_sneq_no_done:
17296         rts                                     # do nothing
17297 ftrapcc_sneq_yes:
17298         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17299         beq.w           ftrapcc_trap            # no; go take trap
17300         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17301         btst            &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17302         bne.w           ftrapcc_bsun            # yes
17303         bra.w           ftrapcc_trap            # no; go take trap
17304
17305 #########################################################################
17306 #                                                                       #
17307 # IEEE Aware tests                                                      #
17308 #                                                                       #
17309 # For the IEEE aware tests, we only have to set the result based on the #
17310 # floating point condition codes. The BSUN exception will not be        #
17311 # set for any of these tests.                                           #
17312 #                                                                       #
17313 #########################################################################
17314
17315 #
17316 # ordered greater than:
17317 #       _______
17318 #       NANvZvN
17319 #
17320 ftrapcc_ogt:
17321         fbogt.w         ftrapcc_trap            # ordered greater than?
17322 ftrapcc_ogt_no:
17323         rts                                     # do nothing
17324
17325 #
17326 # unordered or less or equal:
17327 #       _______
17328 #       NANvZvN
17329 #
17330 ftrapcc_ule:
17331         fbule.w         ftrapcc_trap            # unordered or less or equal?
17332 ftrapcc_ule_no:
17333         rts                                     # do nothing
17334
17335 #
17336 # ordered greater than or equal:
17337 #          _____
17338 #       Zv(NANvN)
17339 #
17340 ftrapcc_oge:
17341         fboge.w         ftrapcc_trap            # ordered greater than or equal?
17342 ftrapcc_oge_no:
17343         rts                                     # do nothing
17344
17345 #
17346 # unordered or less than:
17347 #              _
17348 #       NANv(N^Z)
17349 #
17350 ftrapcc_ult:
17351         fbult.w         ftrapcc_trap            # unordered or less than?
17352 ftrapcc_ult_no:
17353         rts                                     # do nothing
17354
17355 #
17356 # ordered less than:
17357 #          _____
17358 #       N^(NANvZ)
17359 #
17360 ftrapcc_olt:
17361         fbolt.w         ftrapcc_trap            # ordered less than?
17362 ftrapcc_olt_no:
17363         rts                                     # do nothing
17364
17365 #
17366 # unordered or greater or equal:
17367 #
17368 #       NANvZvN
17369 #
17370 ftrapcc_uge:
17371         fbuge.w         ftrapcc_trap            # unordered or greater than?
17372 ftrapcc_uge_no:
17373         rts                                     # do nothing
17374
17375 #
17376 # ordered less than or equal:
17377 #            ___
17378 #       Zv(N^NAN)
17379 #
17380 ftrapcc_ole:
17381         fbole.w         ftrapcc_trap            # ordered greater or less than?
17382 ftrapcc_ole_no:
17383         rts                                     # do nothing
17384
17385 #
17386 # unordered or greater than:
17387 #            ___
17388 #       NANv(NvZ)
17389 #
17390 ftrapcc_ugt:
17391         fbugt.w         ftrapcc_trap            # unordered or greater than?
17392 ftrapcc_ugt_no:
17393         rts                                     # do nothing
17394
17395 #
17396 # ordered greater or less than:
17397 #       _____
17398 #       NANvZ
17399 #
17400 ftrapcc_ogl:
17401         fbogl.w         ftrapcc_trap            # ordered greater or less than?
17402 ftrapcc_ogl_no:
17403         rts                                     # do nothing
17404
17405 #
17406 # unordered or equal:
17407 #
17408 #       NANvZ
17409 #
17410 ftrapcc_ueq:
17411         fbueq.w         ftrapcc_trap            # unordered or equal?
17412 ftrapcc_ueq_no:
17413         rts                                     # do nothing
17414
17415 #
17416 # ordered:
17417 #       ___
17418 #       NAN
17419 #
17420 ftrapcc_or:
17421         fbor.w          ftrapcc_trap            # ordered?
17422 ftrapcc_or_no:
17423         rts                                     # do nothing
17424
17425 #
17426 # unordered:
17427 #
17428 #       NAN
17429 #
17430 ftrapcc_un:
17431         fbun.w          ftrapcc_trap            # unordered?
17432 ftrapcc_un_no:
17433         rts                                     # do nothing
17434
17435 #######################################################################
17436
17437 # the bsun exception bit was not set.
17438 # we will need to jump to the ftrapcc vector. the stack frame
17439 # is the same size as that of the fp unimp instruction. the
17440 # only difference is that the <ea> field should hold the PC
17441 # of the ftrapcc instruction and the vector offset field
17442 # should denote the ftrapcc trap.
17443 ftrapcc_trap:
17444         mov.b           &ftrapcc_flg,SPCOND_FLG(%a6)
17445         rts
17446
17447 # the emulation routine set bsun and BSUN was enabled. have to
17448 # fix stack and jump to the bsun handler.
17449 # let the caller of this routine shift the stack frame up to
17450 # eliminate the effective address field.
17451 ftrapcc_bsun:
17452         mov.b           &fbsun_flg,SPCOND_FLG(%a6)
17453         rts
17454
17455 #########################################################################
17456 # fscc(): routine to emulate the fscc instruction                       #
17457 #                                                                       #
17458 # XDEF **************************************************************** #
17459 #       _fscc()                                                         #
17460 #                                                                       #
17461 # XREF **************************************************************** #
17462 #       store_dreg_b() - store result to data register file             #
17463 #       dec_areg() - decrement an areg for -(an) mode                   #
17464 #       inc_areg() - increment an areg for (an)+ mode                   #
17465 #       _dmem_write_byte() - store result to memory                     #
17466 #                                                                       #
17467 # INPUT *************************************************************** #
17468 #       none                                                            #
17469 #                                                                       #
17470 # OUTPUT ************************************************************** #
17471 #       none                                                            #
17472 #                                                                       #
17473 # ALGORITHM *********************************************************** #
17474 #       This routine checks which conditional predicate is specified by #
17475 # the stacked fscc instruction opcode and then branches to a routine    #
17476 # for that predicate. The corresponding fbcc instruction is then used   #
17477 # to see whether the condition (specified by the stacked FPSR) is true  #
17478 # or false.                                                             #
17479 #       If a BSUN exception should be indicated, the BSUN and ABSUN     #
17480 # bits are set in the stacked FPSR. If the BSUN exception is enabled,   #
17481 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an   #
17482 # enabled BSUN should not be flagged and the predicate is true, then    #
17483 # the result is stored to the data register file or memory              #
17484 #                                                                       #
17485 #########################################################################
17486
17487         global          _fscc
17488 _fscc:
17489         mov.w           EXC_CMDREG(%a6),%d0     # fetch predicate
17490
17491         clr.l           %d1                     # clear scratch reg
17492         mov.b           FPSR_CC(%a6),%d1        # fetch fp ccodes
17493         ror.l           &0x8,%d1                # rotate to top byte
17494         fmov.l          %d1,%fpsr               # insert into FPSR
17495
17496         mov.w           (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17497         jmp             (tbl_fscc.b,%pc,%d1.w)  # jump to fscc routine
17498
17499 tbl_fscc:
17500         short           fscc_f          -       tbl_fscc        # 00
17501         short           fscc_eq         -       tbl_fscc        # 01
17502         short           fscc_ogt        -       tbl_fscc        # 02
17503         short           fscc_oge        -       tbl_fscc        # 03
17504         short           fscc_olt        -       tbl_fscc        # 04
17505         short           fscc_ole        -       tbl_fscc        # 05
17506         short           fscc_ogl        -       tbl_fscc        # 06
17507         short           fscc_or         -       tbl_fscc        # 07
17508         short           fscc_un         -       tbl_fscc        # 08
17509         short           fscc_ueq        -       tbl_fscc        # 09
17510         short           fscc_ugt        -       tbl_fscc        # 10
17511         short           fscc_uge        -       tbl_fscc        # 11
17512         short           fscc_ult        -       tbl_fscc        # 12
17513         short           fscc_ule        -       tbl_fscc        # 13
17514         short           fscc_neq        -       tbl_fscc        # 14
17515         short           fscc_t          -       tbl_fscc        # 15
17516         short           fscc_sf         -       tbl_fscc        # 16
17517         short           fscc_seq        -       tbl_fscc        # 17
17518         short           fscc_gt         -       tbl_fscc        # 18
17519         short           fscc_ge         -       tbl_fscc        # 19
17520         short           fscc_lt         -       tbl_fscc        # 20
17521         short           fscc_le         -       tbl_fscc        # 21
17522         short           fscc_gl         -       tbl_fscc        # 22
17523         short           fscc_gle        -       tbl_fscc        # 23
17524         short           fscc_ngle       -       tbl_fscc        # 24
17525         short           fscc_ngl        -       tbl_fscc        # 25
17526         short           fscc_nle        -       tbl_fscc        # 26
17527         short           fscc_nlt        -       tbl_fscc        # 27
17528         short           fscc_nge        -       tbl_fscc        # 28
17529         short           fscc_ngt        -       tbl_fscc        # 29
17530         short           fscc_sneq       -       tbl_fscc        # 30
17531         short           fscc_st         -       tbl_fscc        # 31
17532
17533 #########################################################################
17534 #                                                                       #
17535 # IEEE Nonaware tests                                                   #
17536 #                                                                       #
17537 # For the IEEE nonaware tests, we set the result based on the           #
17538 # floating point condition codes. In addition, we check to see          #
17539 # if the NAN bit is set, in which case BSUN and AIOP will be set.       #
17540 #                                                                       #
17541 # The cases EQ and NE are shared by the Aware and Nonaware groups       #
17542 # and are incapable of setting the BSUN exception bit.                  #
17543 #                                                                       #
17544 # Typically, only one of the two possible branch directions could       #
17545 # have the NAN bit set.                                                 #
17546 #                                                                       #
17547 #########################################################################
17548
17549 #
17550 # equal:
17551 #
17552 #       Z
17553 #
17554 fscc_eq:
17555         fbeq.w          fscc_eq_yes             # equal?
17556 fscc_eq_no:
17557         clr.b           %d0                     # set false
17558         bra.w           fscc_done               # go finish
17559 fscc_eq_yes:
17560         st              %d0                     # set true
17561         bra.w           fscc_done               # go finish
17562
17563 #
17564 # not equal:
17565 #       _
17566 #       Z
17567 #
17568 fscc_neq:
17569         fbneq.w         fscc_neq_yes            # not equal?
17570 fscc_neq_no:
17571         clr.b           %d0                     # set false
17572         bra.w           fscc_done               # go finish
17573 fscc_neq_yes:
17574         st              %d0                     # set true
17575         bra.w           fscc_done               # go finish
17576
17577 #
17578 # greater than:
17579 #       _______
17580 #       NANvZvN
17581 #
17582 fscc_gt:
17583         fbgt.w          fscc_gt_yes             # greater than?
17584 fscc_gt_no:
17585         clr.b           %d0                     # set false
17586         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17587         beq.w           fscc_done               # no;go finish
17588         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17589         bra.w           fscc_chk_bsun           # go finish
17590 fscc_gt_yes:
17591         st              %d0                     # set true
17592         bra.w           fscc_done               # go finish
17593
17594 #
17595 # not greater than:
17596 #
17597 #       NANvZvN
17598 #
17599 fscc_ngt:
17600         fbngt.w         fscc_ngt_yes            # not greater than?
17601 fscc_ngt_no:
17602         clr.b           %d0                     # set false
17603         bra.w           fscc_done               # go finish
17604 fscc_ngt_yes:
17605         st              %d0                     # set true
17606         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17607         beq.w           fscc_done               # no;go finish
17608         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17609         bra.w           fscc_chk_bsun           # go finish
17610
17611 #
17612 # greater than or equal:
17613 #          _____
17614 #       Zv(NANvN)
17615 #
17616 fscc_ge:
17617         fbge.w          fscc_ge_yes             # greater than or equal?
17618 fscc_ge_no:
17619         clr.b           %d0                     # set false
17620         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17621         beq.w           fscc_done               # no;go finish
17622         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17623         bra.w           fscc_chk_bsun           # go finish
17624 fscc_ge_yes:
17625         st              %d0                     # set true
17626         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17627         beq.w           fscc_done               # no;go finish
17628         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17629         bra.w           fscc_chk_bsun           # go finish
17630
17631 #
17632 # not (greater than or equal):
17633 #              _
17634 #       NANv(N^Z)
17635 #
17636 fscc_nge:
17637         fbnge.w         fscc_nge_yes            # not (greater than or equal)?
17638 fscc_nge_no:
17639         clr.b           %d0                     # set false
17640         bra.w           fscc_done               # go finish
17641 fscc_nge_yes:
17642         st              %d0                     # set true
17643         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17644         beq.w           fscc_done               # no;go finish
17645         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17646         bra.w           fscc_chk_bsun           # go finish
17647
17648 #
17649 # less than:
17650 #          _____
17651 #       N^(NANvZ)
17652 #
17653 fscc_lt:
17654         fblt.w          fscc_lt_yes             # less than?
17655 fscc_lt_no:
17656         clr.b           %d0                     # set false
17657         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17658         beq.w           fscc_done               # no;go finish
17659         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17660         bra.w           fscc_chk_bsun           # go finish
17661 fscc_lt_yes:
17662         st              %d0                     # set true
17663         bra.w           fscc_done               # go finish
17664
17665 #
17666 # not less than:
17667 #              _
17668 #       NANv(ZvN)
17669 #
17670 fscc_nlt:
17671         fbnlt.w         fscc_nlt_yes            # not less than?
17672 fscc_nlt_no:
17673         clr.b           %d0                     # set false
17674         bra.w           fscc_done               # go finish
17675 fscc_nlt_yes:
17676         st              %d0                     # set true
17677         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17678         beq.w           fscc_done               # no;go finish
17679         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17680         bra.w           fscc_chk_bsun           # go finish
17681
17682 #
17683 # less than or equal:
17684 #            ___
17685 #       Zv(N^NAN)
17686 #
17687 fscc_le:
17688         fble.w          fscc_le_yes             # less than or equal?
17689 fscc_le_no:
17690         clr.b           %d0                     # set false
17691         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17692         beq.w           fscc_done               # no;go finish
17693         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17694         bra.w           fscc_chk_bsun           # go finish
17695 fscc_le_yes:
17696         st              %d0                     # set true
17697         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17698         beq.w           fscc_done               # no;go finish
17699         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17700         bra.w           fscc_chk_bsun           # go finish
17701
17702 #
17703 # not (less than or equal):
17704 #            ___
17705 #       NANv(NvZ)
17706 #
17707 fscc_nle:
17708         fbnle.w         fscc_nle_yes            # not (less than or equal)?
17709 fscc_nle_no:
17710         clr.b           %d0                     # set false
17711         bra.w           fscc_done               # go finish
17712 fscc_nle_yes:
17713         st              %d0                     # set true
17714         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17715         beq.w           fscc_done               # no;go finish
17716         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17717         bra.w           fscc_chk_bsun           # go finish
17718
17719 #
17720 # greater or less than:
17721 #       _____
17722 #       NANvZ
17723 #
17724 fscc_gl:
17725         fbgl.w          fscc_gl_yes             # greater or less than?
17726 fscc_gl_no:
17727         clr.b           %d0                     # set false
17728         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17729         beq.w           fscc_done               # no;go finish
17730         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17731         bra.w           fscc_chk_bsun           # go finish
17732 fscc_gl_yes:
17733         st              %d0                     # set true
17734         bra.w           fscc_done               # go finish
17735
17736 #
17737 # not (greater or less than):
17738 #
17739 #       NANvZ
17740 #
17741 fscc_ngl:
17742         fbngl.w         fscc_ngl_yes            # not (greater or less than)?
17743 fscc_ngl_no:
17744         clr.b           %d0                     # set false
17745         bra.w           fscc_done               # go finish
17746 fscc_ngl_yes:
17747         st              %d0                     # set true
17748         btst            &nan_bit, FPSR_CC(%a6)  # is NAN set in cc?
17749         beq.w           fscc_done               # no;go finish
17750         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17751         bra.w           fscc_chk_bsun           # go finish
17752
17753 #
17754 # greater, less, or equal:
17755 #       ___
17756 #       NAN
17757 #
17758 fscc_gle:
17759         fbgle.w         fscc_gle_yes            # greater, less, or equal?
17760 fscc_gle_no:
17761         clr.b           %d0                     # set false
17762         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17763         bra.w           fscc_chk_bsun           # go finish
17764 fscc_gle_yes:
17765         st              %d0                     # set true
17766         bra.w           fscc_done               # go finish
17767
17768 #
17769 # not (greater, less, or equal):
17770 #
17771 #       NAN
17772 #
17773 fscc_ngle:
17774         fbngle.w                fscc_ngle_yes   # not (greater, less, or equal)?
17775 fscc_ngle_no:
17776         clr.b           %d0                     # set false
17777         bra.w           fscc_done               # go finish
17778 fscc_ngle_yes:
17779         st              %d0                     # set true
17780         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17781         bra.w           fscc_chk_bsun           # go finish
17782
17783 #########################################################################
17784 #                                                                       #
17785 # Miscellaneous tests                                                   #
17786 #                                                                       #
17787 # For the IEEE aware tests, we only have to set the result based on the #
17788 # floating point condition codes. The BSUN exception will not be        #
17789 # set for any of these tests.                                           #
17790 #                                                                       #
17791 #########################################################################
17792
17793 #
17794 # false:
17795 #
17796 #       False
17797 #
17798 fscc_f:
17799         clr.b           %d0                     # set false
17800         bra.w           fscc_done               # go finish
17801
17802 #
17803 # true:
17804 #
17805 #       True
17806 #
17807 fscc_t:
17808         st              %d0                     # set true
17809         bra.w           fscc_done               # go finish
17810
17811 #
17812 # signalling false:
17813 #
17814 #       False
17815 #
17816 fscc_sf:
17817         clr.b           %d0                     # set false
17818         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17819         beq.w           fscc_done               # no;go finish
17820         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17821         bra.w           fscc_chk_bsun           # go finish
17822
17823 #
17824 # signalling true:
17825 #
17826 #       True
17827 #
17828 fscc_st:
17829         st              %d0                     # set false
17830         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17831         beq.w           fscc_done               # no;go finish
17832         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17833         bra.w           fscc_chk_bsun           # go finish
17834
17835 #
17836 # signalling equal:
17837 #
17838 #       Z
17839 #
17840 fscc_seq:
17841         fbseq.w         fscc_seq_yes            # signalling equal?
17842 fscc_seq_no:
17843         clr.b           %d0                     # set false
17844         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17845         beq.w           fscc_done               # no;go finish
17846         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17847         bra.w           fscc_chk_bsun           # go finish
17848 fscc_seq_yes:
17849         st              %d0                     # set true
17850         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17851         beq.w           fscc_done               # no;go finish
17852         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17853         bra.w           fscc_chk_bsun           # go finish
17854
17855 #
17856 # signalling not equal:
17857 #       _
17858 #       Z
17859 #
17860 fscc_sneq:
17861         fbsneq.w        fscc_sneq_yes           # signalling equal?
17862 fscc_sneq_no:
17863         clr.b           %d0                     # set false
17864         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17865         beq.w           fscc_done               # no;go finish
17866         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17867         bra.w           fscc_chk_bsun           # go finish
17868 fscc_sneq_yes:
17869         st              %d0                     # set true
17870         btst            &nan_bit, FPSR_CC(%a6)  # set BSUN exc bit
17871         beq.w           fscc_done               # no;go finish
17872         ori.l           &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17873         bra.w           fscc_chk_bsun           # go finish
17874
17875 #########################################################################
17876 #                                                                       #
17877 # IEEE Aware tests                                                      #
17878 #                                                                       #
17879 # For the IEEE aware tests, we only have to set the result based on the #
17880 # floating point condition codes. The BSUN exception will not be        #
17881 # set for any of these tests.                                           #
17882 #                                                                       #
17883 #########################################################################
17884
17885 #
17886 # ordered greater than:
17887 #       _______
17888 #       NANvZvN
17889 #
17890 fscc_ogt:
17891         fbogt.w         fscc_ogt_yes            # ordered greater than?
17892 fscc_ogt_no:
17893         clr.b           %d0                     # set false
17894         bra.w           fscc_done               # go finish
17895 fscc_ogt_yes:
17896         st              %d0                     # set true
17897         bra.w           fscc_done               # go finish
17898
17899 #
17900 # unordered or less or equal:
17901 #       _______
17902 #       NANvZvN
17903 #
17904 fscc_ule:
17905         fbule.w         fscc_ule_yes            # unordered or less or equal?
17906 fscc_ule_no:
17907         clr.b           %d0                     # set false
17908         bra.w           fscc_done               # go finish
17909 fscc_ule_yes:
17910         st              %d0                     # set true
17911         bra.w           fscc_done               # go finish
17912
17913 #
17914 # ordered greater than or equal:
17915 #          _____
17916 #       Zv(NANvN)
17917 #
17918 fscc_oge:
17919         fboge.w         fscc_oge_yes            # ordered greater than or equal?
17920 fscc_oge_no:
17921         clr.b           %d0                     # set false
17922         bra.w           fscc_done               # go finish
17923 fscc_oge_yes:
17924         st              %d0                     # set true
17925         bra.w           fscc_done               # go finish
17926
17927 #
17928 # unordered or less than:
17929 #              _
17930 #       NANv(N^Z)
17931 #
17932 fscc_ult:
17933         fbult.w         fscc_ult_yes            # unordered or less than?
17934 fscc_ult_no:
17935         clr.b           %d0                     # set false
17936         bra.w           fscc_done               # go finish
17937 fscc_ult_yes:
17938         st              %d0                     # set true
17939         bra.w           fscc_done               # go finish
17940
17941 #
17942 # ordered less than:
17943 #          _____
17944 #       N^(NANvZ)
17945 #
17946 fscc_olt:
17947         fbolt.w         fscc_olt_yes            # ordered less than?
17948 fscc_olt_no:
17949         clr.b           %d0                     # set false
17950         bra.w           fscc_done               # go finish
17951 fscc_olt_yes:
17952         st              %d0                     # set true
17953         bra.w           fscc_done               # go finish
17954
17955 #
17956 # unordered or greater or equal:
17957 #
17958 #       NANvZvN
17959 #
17960 fscc_uge:
17961         fbuge.w         fscc_uge_yes            # unordered or greater than?
17962 fscc_uge_no:
17963         clr.b           %d0                     # set false
17964         bra.w           fscc_done               # go finish
17965 fscc_uge_yes:
17966         st              %d0                     # set true
17967         bra.w           fscc_done               # go finish
17968
17969 #
17970 # ordered less than or equal:
17971 #            ___
17972 #       Zv(N^NAN)
17973 #
17974 fscc_ole:
17975         fbole.w         fscc_ole_yes            # ordered greater or less than?
17976 fscc_ole_no:
17977         clr.b           %d0                     # set false
17978         bra.w           fscc_done               # go finish
17979 fscc_ole_yes:
17980         st              %d0                     # set true
17981         bra.w           fscc_done               # go finish
17982
17983 #
17984 # unordered or greater than:
17985 #            ___
17986 #       NANv(NvZ)
17987 #
17988 fscc_ugt:
17989         fbugt.w         fscc_ugt_yes            # unordered or greater than?
17990 fscc_ugt_no:
17991         clr.b           %d0                     # set false
17992         bra.w           fscc_done               # go finish
17993 fscc_ugt_yes:
17994         st              %d0                     # set true
17995         bra.w           fscc_done               # go finish
17996
17997 #
17998 # ordered greater or less than:
17999 #       _____
18000 #       NANvZ
18001 #
18002 fscc_ogl:
18003         fbogl.w         fscc_ogl_yes            # ordered greater or less than?
18004 fscc_ogl_no:
18005         clr.b           %d0                     # set false
18006         bra.w           fscc_done               # go finish
18007 fscc_ogl_yes:
18008         st              %d0                     # set true
18009         bra.w           fscc_done               # go finish
18010
18011 #
18012 # unordered or equal:
18013 #
18014 #       NANvZ
18015 #
18016 fscc_ueq:
18017         fbueq.w         fscc_ueq_yes            # unordered or equal?
18018 fscc_ueq_no:
18019         clr.b           %d0                     # set false
18020         bra.w           fscc_done               # go finish
18021 fscc_ueq_yes:
18022         st              %d0                     # set true
18023         bra.w           fscc_done               # go finish
18024
18025 #
18026 # ordered:
18027 #       ___
18028 #       NAN
18029 #
18030 fscc_or:
18031         fbor.w          fscc_or_yes             # ordered?
18032 fscc_or_no:
18033         clr.b           %d0                     # set false
18034         bra.w           fscc_done               # go finish
18035 fscc_or_yes:
18036         st              %d0                     # set true
18037         bra.w           fscc_done               # go finish
18038
18039 #
18040 # unordered:
18041 #
18042 #       NAN
18043 #
18044 fscc_un:
18045         fbun.w          fscc_un_yes             # unordered?
18046 fscc_un_no:
18047         clr.b           %d0                     # set false
18048         bra.w           fscc_done               # go finish
18049 fscc_un_yes:
18050         st              %d0                     # set true
18051         bra.w           fscc_done               # go finish
18052
18053 #######################################################################
18054
18055 #
18056 # the bsun exception bit was set. now, check to see is BSUN
18057 # is enabled. if so, don't store result and correct stack frame
18058 # for a bsun exception.
18059 #
18060 fscc_chk_bsun:
18061         btst            &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18062         bne.w           fscc_bsun
18063
18064 #
18065 # the bsun exception bit was not set.
18066 # the result has been selected.
18067 # now, check to see if the result is to be stored in the data register
18068 # file or in memory.
18069 #
18070 fscc_done:
18071         mov.l           %d0,%a0                 # save result for a moment
18072
18073         mov.b           1+EXC_OPWORD(%a6),%d1   # fetch lo opword
18074         mov.l           %d1,%d0                 # make a copy
18075         andi.b          &0x38,%d1               # extract src mode
18076
18077         bne.b           fscc_mem_op             # it's a memory operation
18078
18079         mov.l           %d0,%d1
18080         andi.w          &0x7,%d1                # pass index in d1
18081         mov.l           %a0,%d0                 # pass result in d0
18082         bsr.l           store_dreg_b            # save result in regfile
18083         rts
18084
18085 #
18086 # the stacked <ea> is correct with the exception of:
18087 #       -> Dn : <ea> is garbage
18088 #
18089 # if the addressing mode is post-increment or pre-decrement,
18090 # then the address registers have not been updated.
18091 #
18092 fscc_mem_op:
18093         cmpi.b          %d1,&0x18               # is <ea> (An)+ ?
18094         beq.b           fscc_mem_inc            # yes
18095         cmpi.b          %d1,&0x20               # is <ea> -(An) ?
18096         beq.b           fscc_mem_dec            # yes
18097
18098         mov.l           %a0,%d0                 # pass result in d0
18099         mov.l           EXC_EA(%a6),%a0         # fetch <ea>
18100         bsr.l           _dmem_write_byte        # write result byte
18101
18102         tst.l           %d1                     # did dstore fail?
18103         bne.w           fscc_err                # yes
18104
18105         rts
18106
18107 # addresing mode is post-increment. write the result byte. if the write
18108 # fails then don't update the address register. if write passes then
18109 # call inc_areg() to update the address register.
18110 fscc_mem_inc:
18111         mov.l           %a0,%d0                 # pass result in d0
18112         mov.l           EXC_EA(%a6),%a0         # fetch <ea>
18113         bsr.l           _dmem_write_byte        # write result byte
18114
18115         tst.l           %d1                     # did dstore fail?
18116         bne.w           fscc_err                # yes
18117
18118         mov.b           0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18119         andi.w          &0x7,%d1                # pass index in d1
18120         movq.l          &0x1,%d0                # pass amt to inc by
18121         bsr.l           inc_areg                # increment address register
18122
18123         rts
18124
18125 # addressing mode is pre-decrement. write the result byte. if the write
18126 # fails then don't update the address register. if the write passes then
18127 # call dec_areg() to update the address register.
18128 fscc_mem_dec:
18129         mov.l           %a0,%d0                 # pass result in d0
18130         mov.l           EXC_EA(%a6),%a0         # fetch <ea>
18131         bsr.l           _dmem_write_byte        # write result byte
18132
18133         tst.l           %d1                     # did dstore fail?
18134         bne.w           fscc_err                # yes
18135
18136         mov.b           0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18137         andi.w          &0x7,%d1                # pass index in d1
18138         movq.l          &0x1,%d0                # pass amt to dec by
18139         bsr.l           dec_areg                # decrement address register
18140
18141         rts
18142
18143 # the emulation routine set bsun and BSUN was enabled. have to
18144 # fix stack and jump to the bsun handler.
18145 # let the caller of this routine shift the stack frame up to
18146 # eliminate the effective address field.
18147 fscc_bsun:
18148         mov.b           &fbsun_flg,SPCOND_FLG(%a6)
18149         rts
18150
18151 # the byte write to memory has failed. pass the failing effective address
18152 # and a FSLW to funimp_dacc().
18153 fscc_err:
18154         mov.w           &0x00a1,EXC_VOFF(%a6)
18155         bra.l           facc_finish
18156
18157 #########################################################################
18158 # XDEF **************************************************************** #
18159 #       fmovm_dynamic(): emulate "fmovm" dynamic instruction            #
18160 #                                                                       #
18161 # XREF **************************************************************** #
18162 #       fetch_dreg() - fetch data register                              #
18163 #       {i,d,}mem_read() - fetch data from memory                       #
18164 #       _mem_write() - write data to memory                             #
18165 #       iea_iacc() - instruction memory access error occurred           #
18166 #       iea_dacc() - data memory access error occurred                  #
18167 #       restore() - restore An index regs if access error occurred      #
18168 #                                                                       #
18169 # INPUT *************************************************************** #
18170 #       None                                                            #
18171 #                                                                       #
18172 # OUTPUT ************************************************************** #
18173 #       If instr is "fmovm Dn,-(A7)" from supervisor mode,              #
18174 #               d0 = size of dump                                       #
18175 #               d1 = Dn                                                 #
18176 #       Else if instruction access error,                               #
18177 #               d0 = FSLW                                               #
18178 #       Else if data access error,                                      #
18179 #               d0 = FSLW                                               #
18180 #               a0 = address of fault                                   #
18181 #       Else                                                            #
18182 #               none.                                                   #
18183 #                                                                       #
18184 # ALGORITHM *********************************************************** #
18185 #       The effective address must be calculated since this is entered  #
18186 # from an "Unimplemented Effective Address" exception handler. So, we   #
18187 # have our own fcalc_ea() routine here. If an access error is flagged   #
18188 # by a _{i,d,}mem_read() call, we must exit through the special         #
18189 # handler.                                                              #
18190 #       The data register is determined and its value loaded to get the #
18191 # string of FP registers affected. This value is used as an index into  #
18192 # a lookup table such that we can determine the number of bytes         #
18193 # involved.                                                             #
18194 #       If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used  #
18195 # to read in all FP values. Again, _mem_read() may fail and require a   #
18196 # special exit.                                                         #
18197 #       If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
18198 # to write all FP values. _mem_write() may also fail.                   #
18199 #       If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,  #
18200 # then we return the size of the dump and the string to the caller      #
18201 # so that the move can occur outside of this routine. This special      #
18202 # case is required so that moves to the system stack are handled        #
18203 # correctly.                                                            #
18204 #                                                                       #
18205 # DYNAMIC:                                                              #
18206 #       fmovm.x dn, <ea>                                                #
18207 #       fmovm.x <ea>, dn                                                #
18208 #                                                                       #
18209 #             <WORD 1>                <WORD2>                           #
18210 #       1111 0010 00 |<ea>|     11@& 1000 0$$$ 0000                     #
18211 #                                                                       #
18212 #       & = (0): predecrement addressing mode                           #
18213 #           (1): postincrement or control addressing mode               #
18214 #       @ = (0): move listed regs from memory to the FPU                #
18215 #           (1): move listed regs from the FPU to memory                #
18216 #       $$$    : index of data register holding reg select mask         #
18217 #                                                                       #
18218 # NOTES:                                                                #
18219 #       If the data register holds a zero, then the                     #
18220 #       instruction is a nop.                                           #
18221 #                                                                       #
18222 #########################################################################
18223
18224         global          fmovm_dynamic
18225 fmovm_dynamic:
18226
18227 # extract the data register in which the bit string resides...
18228         mov.b           1+EXC_EXTWORD(%a6),%d1  # fetch extword
18229         andi.w          &0x70,%d1               # extract reg bits
18230         lsr.b           &0x4,%d1                # shift into lo bits
18231
18232 # fetch the bit string into d0...
18233         bsr.l           fetch_dreg              # fetch reg string
18234
18235         andi.l          &0x000000ff,%d0         # keep only lo byte
18236
18237         mov.l           %d0,-(%sp)              # save strg
18238         mov.b           (tbl_fmovm_size.w,%pc,%d0),%d0
18239         mov.l           %d0,-(%sp)              # save size
18240         bsr.l           fmovm_calc_ea           # calculate <ea>
18241         mov.l           (%sp)+,%d0              # restore size
18242         mov.l           (%sp)+,%d1              # restore strg
18243
18244 # if the bit string is a zero, then the operation is a no-op
18245 # but, make sure that we've calculated ea and advanced the opword pointer
18246         beq.w           fmovm_data_done
18247
18248 # separate move ins from move outs...
18249         btst            &0x5,EXC_EXTWORD(%a6)   # is it a move in or out?
18250         beq.w           fmovm_data_in           # it's a move out
18251
18252 #############
18253 # MOVE OUT: #
18254 #############
18255 fmovm_data_out:
18256         btst            &0x4,EXC_EXTWORD(%a6)   # control or predecrement?
18257         bne.w           fmovm_out_ctrl          # control
18258
18259 ############################
18260 fmovm_out_predec:
18261 # for predecrement mode, the bit string is the opposite of both control
18262 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18263 # here, we convert it to be just like the others...
18264         mov.b           (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18265
18266         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
18267         beq.b           fmovm_out_ctrl          # user
18268
18269 fmovm_out_predec_s:
18270         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18271         bne.b           fmovm_out_ctrl
18272
18273 # the operation was unfortunately an: fmovm.x dn,-(sp)
18274 # called from supervisor mode.
18275 # we're also passing "size" and "strg" back to the calling routine
18276         rts
18277
18278 ############################
18279 fmovm_out_ctrl:
18280         mov.l           %a0,%a1                 # move <ea> to a1
18281
18282         sub.l           %d0,%sp                 # subtract size of dump
18283         lea             (%sp),%a0
18284
18285         tst.b           %d1                     # should FP0 be moved?
18286         bpl.b           fmovm_out_ctrl_fp1      # no
18287
18288         mov.l           0x0+EXC_FP0(%a6),(%a0)+ # yes
18289         mov.l           0x4+EXC_FP0(%a6),(%a0)+
18290         mov.l           0x8+EXC_FP0(%a6),(%a0)+
18291
18292 fmovm_out_ctrl_fp1:
18293         lsl.b           &0x1,%d1                # should FP1 be moved?
18294         bpl.b           fmovm_out_ctrl_fp2      # no
18295
18296         mov.l           0x0+EXC_FP1(%a6),(%a0)+ # yes
18297         mov.l           0x4+EXC_FP1(%a6),(%a0)+
18298         mov.l           0x8+EXC_FP1(%a6),(%a0)+
18299
18300 fmovm_out_ctrl_fp2:
18301         lsl.b           &0x1,%d1                # should FP2 be moved?
18302         bpl.b           fmovm_out_ctrl_fp3      # no
18303
18304         fmovm.x         &0x20,(%a0)             # yes
18305         add.l           &0xc,%a0
18306
18307 fmovm_out_ctrl_fp3:
18308         lsl.b           &0x1,%d1                # should FP3 be moved?
18309         bpl.b           fmovm_out_ctrl_fp4      # no
18310
18311         fmovm.x         &0x10,(%a0)             # yes
18312         add.l           &0xc,%a0
18313
18314 fmovm_out_ctrl_fp4:
18315         lsl.b           &0x1,%d1                # should FP4 be moved?
18316         bpl.b           fmovm_out_ctrl_fp5      # no
18317
18318         fmovm.x         &0x08,(%a0)             # yes
18319         add.l           &0xc,%a0
18320
18321 fmovm_out_ctrl_fp5:
18322         lsl.b           &0x1,%d1                # should FP5 be moved?
18323         bpl.b           fmovm_out_ctrl_fp6      # no
18324
18325         fmovm.x         &0x04,(%a0)             # yes
18326         add.l           &0xc,%a0
18327
18328 fmovm_out_ctrl_fp6:
18329         lsl.b           &0x1,%d1                # should FP6 be moved?
18330         bpl.b           fmovm_out_ctrl_fp7      # no
18331
18332         fmovm.x         &0x02,(%a0)             # yes
18333         add.l           &0xc,%a0
18334
18335 fmovm_out_ctrl_fp7:
18336         lsl.b           &0x1,%d1                # should FP7 be moved?
18337         bpl.b           fmovm_out_ctrl_done     # no
18338
18339         fmovm.x         &0x01,(%a0)             # yes
18340         add.l           &0xc,%a0
18341
18342 fmovm_out_ctrl_done:
18343         mov.l           %a1,L_SCR1(%a6)
18344
18345         lea             (%sp),%a0               # pass: supervisor src
18346         mov.l           %d0,-(%sp)              # save size
18347         bsr.l           _dmem_write             # copy data to user mem
18348
18349         mov.l           (%sp)+,%d0
18350         add.l           %d0,%sp                 # clear fpreg data from stack
18351
18352         tst.l           %d1                     # did dstore err?
18353         bne.w           fmovm_out_err           # yes
18354
18355         rts
18356
18357 ############
18358 # MOVE IN: #
18359 ############
18360 fmovm_data_in:
18361         mov.l           %a0,L_SCR1(%a6)
18362
18363         sub.l           %d0,%sp                 # make room for fpregs
18364         lea             (%sp),%a1
18365
18366         mov.l           %d1,-(%sp)              # save bit string for later
18367         mov.l           %d0,-(%sp)              # save # of bytes
18368
18369         bsr.l           _dmem_read              # copy data from user mem
18370
18371         mov.l           (%sp)+,%d0              # retrieve # of bytes
18372
18373         tst.l           %d1                     # did dfetch fail?
18374         bne.w           fmovm_in_err            # yes
18375
18376         mov.l           (%sp)+,%d1              # load bit string
18377
18378         lea             (%sp),%a0               # addr of stack
18379
18380         tst.b           %d1                     # should FP0 be moved?
18381         bpl.b           fmovm_data_in_fp1       # no
18382
18383         mov.l           (%a0)+,0x0+EXC_FP0(%a6) # yes
18384         mov.l           (%a0)+,0x4+EXC_FP0(%a6)
18385         mov.l           (%a0)+,0x8+EXC_FP0(%a6)
18386
18387 fmovm_data_in_fp1:
18388         lsl.b           &0x1,%d1                # should FP1 be moved?
18389         bpl.b           fmovm_data_in_fp2       # no
18390
18391         mov.l           (%a0)+,0x0+EXC_FP1(%a6) # yes
18392         mov.l           (%a0)+,0x4+EXC_FP1(%a6)
18393         mov.l           (%a0)+,0x8+EXC_FP1(%a6)
18394
18395 fmovm_data_in_fp2:
18396         lsl.b           &0x1,%d1                # should FP2 be moved?
18397         bpl.b           fmovm_data_in_fp3       # no
18398
18399         fmovm.x         (%a0)+,&0x20            # yes
18400
18401 fmovm_data_in_fp3:
18402         lsl.b           &0x1,%d1                # should FP3 be moved?
18403         bpl.b           fmovm_data_in_fp4       # no
18404
18405         fmovm.x         (%a0)+,&0x10            # yes
18406
18407 fmovm_data_in_fp4:
18408         lsl.b           &0x1,%d1                # should FP4 be moved?
18409         bpl.b           fmovm_data_in_fp5       # no
18410
18411         fmovm.x         (%a0)+,&0x08            # yes
18412
18413 fmovm_data_in_fp5:
18414         lsl.b           &0x1,%d1                # should FP5 be moved?
18415         bpl.b           fmovm_data_in_fp6       # no
18416
18417         fmovm.x         (%a0)+,&0x04            # yes
18418
18419 fmovm_data_in_fp6:
18420         lsl.b           &0x1,%d1                # should FP6 be moved?
18421         bpl.b           fmovm_data_in_fp7       # no
18422
18423         fmovm.x         (%a0)+,&0x02            # yes
18424
18425 fmovm_data_in_fp7:
18426         lsl.b           &0x1,%d1                # should FP7 be moved?
18427         bpl.b           fmovm_data_in_done      # no
18428
18429         fmovm.x         (%a0)+,&0x01            # yes
18430
18431 fmovm_data_in_done:
18432         add.l           %d0,%sp                 # remove fpregs from stack
18433         rts
18434
18435 #####################################
18436
18437 fmovm_data_done:
18438         rts
18439
18440 ##############################################################################
18441
18442 #
18443 # table indexed by the operation's bit string that gives the number
18444 # of bytes that will be moved.
18445 #
18446 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18447 #
18448 tbl_fmovm_size:
18449         byte    0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18450         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18451         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18452         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18454         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18455         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18456         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18457         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18458         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18460         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18461         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18462         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18464         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18465         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18466         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18467         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18468         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18470         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18471         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18472         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18473         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18474         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18475         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18476         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18477         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18478         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18479         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18480         byte    0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18481
18482 #
18483 # table to convert a pre-decrement bit string into a post-increment
18484 # or control bit string.
18485 # ex:   0x00    ==>     0x00
18486 #       0x01    ==>     0x80
18487 #       0x02    ==>     0x40
18488 #               .
18489 #               .
18490 #       0xfd    ==>     0xbf
18491 #       0xfe    ==>     0x7f
18492 #       0xff    ==>     0xff
18493 #
18494 tbl_fmovm_convert:
18495         byte    0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18496         byte    0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18497         byte    0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18498         byte    0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18499         byte    0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18500         byte    0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18501         byte    0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18502         byte    0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18503         byte    0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18504         byte    0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18505         byte    0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18506         byte    0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18507         byte    0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18508         byte    0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18509         byte    0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18510         byte    0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18511         byte    0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18512         byte    0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18513         byte    0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18514         byte    0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18515         byte    0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18516         byte    0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18517         byte    0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18518         byte    0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18519         byte    0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18520         byte    0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18521         byte    0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18522         byte    0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18523         byte    0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18524         byte    0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18525         byte    0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18526         byte    0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18527
18528         global          fmovm_calc_ea
18529 ###############################################
18530 # _fmovm_calc_ea: calculate effective address #
18531 ###############################################
18532 fmovm_calc_ea:
18533         mov.l           %d0,%a0                 # move # bytes to a0
18534
18535 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
18536 # easily changed if they were inputs passed in registers.
18537         mov.w           EXC_OPWORD(%a6),%d0     # fetch opcode word
18538         mov.w           %d0,%d1                 # make a copy
18539
18540         andi.w          &0x3f,%d0               # extract mode field
18541         andi.l          &0x7,%d1                # extract reg  field
18542
18543 # jump to the corresponding function for each {MODE,REG} pair.
18544         mov.w           (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18545         jmp             (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18546
18547         swbeg           &64
18548 tbl_fea_mode:
18549         short           tbl_fea_mode    -       tbl_fea_mode
18550         short           tbl_fea_mode    -       tbl_fea_mode
18551         short           tbl_fea_mode    -       tbl_fea_mode
18552         short           tbl_fea_mode    -       tbl_fea_mode
18553         short           tbl_fea_mode    -       tbl_fea_mode
18554         short           tbl_fea_mode    -       tbl_fea_mode
18555         short           tbl_fea_mode    -       tbl_fea_mode
18556         short           tbl_fea_mode    -       tbl_fea_mode
18557
18558         short           tbl_fea_mode    -       tbl_fea_mode
18559         short           tbl_fea_mode    -       tbl_fea_mode
18560         short           tbl_fea_mode    -       tbl_fea_mode
18561         short           tbl_fea_mode    -       tbl_fea_mode
18562         short           tbl_fea_mode    -       tbl_fea_mode
18563         short           tbl_fea_mode    -       tbl_fea_mode
18564         short           tbl_fea_mode    -       tbl_fea_mode
18565         short           tbl_fea_mode    -       tbl_fea_mode
18566
18567         short           faddr_ind_a0    -       tbl_fea_mode
18568         short           faddr_ind_a1    -       tbl_fea_mode
18569         short           faddr_ind_a2    -       tbl_fea_mode
18570         short           faddr_ind_a3    -       tbl_fea_mode
18571         short           faddr_ind_a4    -       tbl_fea_mode
18572         short           faddr_ind_a5    -       tbl_fea_mode
18573         short           faddr_ind_a6    -       tbl_fea_mode
18574         short           faddr_ind_a7    -       tbl_fea_mode
18575
18576         short           faddr_ind_p_a0  -       tbl_fea_mode
18577         short           faddr_ind_p_a1  -       tbl_fea_mode
18578         short           faddr_ind_p_a2  -       tbl_fea_mode
18579         short           faddr_ind_p_a3  -       tbl_fea_mode
18580         short           faddr_ind_p_a4  -       tbl_fea_mode
18581         short           faddr_ind_p_a5  -       tbl_fea_mode
18582         short           faddr_ind_p_a6  -       tbl_fea_mode
18583         short           faddr_ind_p_a7  -       tbl_fea_mode
18584
18585         short           faddr_ind_m_a0  -       tbl_fea_mode
18586         short           faddr_ind_m_a1  -       tbl_fea_mode
18587         short           faddr_ind_m_a2  -       tbl_fea_mode
18588         short           faddr_ind_m_a3  -       tbl_fea_mode
18589         short           faddr_ind_m_a4  -       tbl_fea_mode
18590         short           faddr_ind_m_a5  -       tbl_fea_mode
18591         short           faddr_ind_m_a6  -       tbl_fea_mode
18592         short           faddr_ind_m_a7  -       tbl_fea_mode
18593
18594         short           faddr_ind_disp_a0       -       tbl_fea_mode
18595         short           faddr_ind_disp_a1       -       tbl_fea_mode
18596         short           faddr_ind_disp_a2       -       tbl_fea_mode
18597         short           faddr_ind_disp_a3       -       tbl_fea_mode
18598         short           faddr_ind_disp_a4       -       tbl_fea_mode
18599         short           faddr_ind_disp_a5       -       tbl_fea_mode
18600         short           faddr_ind_disp_a6       -       tbl_fea_mode
18601         short           faddr_ind_disp_a7       -       tbl_fea_mode
18602
18603         short           faddr_ind_ext   -       tbl_fea_mode
18604         short           faddr_ind_ext   -       tbl_fea_mode
18605         short           faddr_ind_ext   -       tbl_fea_mode
18606         short           faddr_ind_ext   -       tbl_fea_mode
18607         short           faddr_ind_ext   -       tbl_fea_mode
18608         short           faddr_ind_ext   -       tbl_fea_mode
18609         short           faddr_ind_ext   -       tbl_fea_mode
18610         short           faddr_ind_ext   -       tbl_fea_mode
18611
18612         short           fabs_short      -       tbl_fea_mode
18613         short           fabs_long       -       tbl_fea_mode
18614         short           fpc_ind         -       tbl_fea_mode
18615         short           fpc_ind_ext     -       tbl_fea_mode
18616         short           tbl_fea_mode    -       tbl_fea_mode
18617         short           tbl_fea_mode    -       tbl_fea_mode
18618         short           tbl_fea_mode    -       tbl_fea_mode
18619         short           tbl_fea_mode    -       tbl_fea_mode
18620
18621 ###################################
18622 # Address register indirect: (An) #
18623 ###################################
18624 faddr_ind_a0:
18625         mov.l           EXC_DREGS+0x8(%a6),%a0  # Get current a0
18626         rts
18627
18628 faddr_ind_a1:
18629         mov.l           EXC_DREGS+0xc(%a6),%a0  # Get current a1
18630         rts
18631
18632 faddr_ind_a2:
18633         mov.l           %a2,%a0                 # Get current a2
18634         rts
18635
18636 faddr_ind_a3:
18637         mov.l           %a3,%a0                 # Get current a3
18638         rts
18639
18640 faddr_ind_a4:
18641         mov.l           %a4,%a0                 # Get current a4
18642         rts
18643
18644 faddr_ind_a5:
18645         mov.l           %a5,%a0                 # Get current a5
18646         rts
18647
18648 faddr_ind_a6:
18649         mov.l           (%a6),%a0               # Get current a6
18650         rts
18651
18652 faddr_ind_a7:
18653         mov.l           EXC_A7(%a6),%a0         # Get current a7
18654         rts
18655
18656 #####################################################
18657 # Address register indirect w/ postincrement: (An)+ #
18658 #####################################################
18659 faddr_ind_p_a0:
18660         mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
18661         mov.l           %d0,%d1
18662         add.l           %a0,%d1                 # Increment
18663         mov.l           %d1,EXC_DREGS+0x8(%a6)  # Save incr value
18664         mov.l           %d0,%a0
18665         rts
18666
18667 faddr_ind_p_a1:
18668         mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
18669         mov.l           %d0,%d1
18670         add.l           %a0,%d1                 # Increment
18671         mov.l           %d1,EXC_DREGS+0xc(%a6)  # Save incr value
18672         mov.l           %d0,%a0
18673         rts
18674
18675 faddr_ind_p_a2:
18676         mov.l           %a2,%d0                 # Get current a2
18677         mov.l           %d0,%d1
18678         add.l           %a0,%d1                 # Increment
18679         mov.l           %d1,%a2                 # Save incr value
18680         mov.l           %d0,%a0
18681         rts
18682
18683 faddr_ind_p_a3:
18684         mov.l           %a3,%d0                 # Get current a3
18685         mov.l           %d0,%d1
18686         add.l           %a0,%d1                 # Increment
18687         mov.l           %d1,%a3                 # Save incr value
18688         mov.l           %d0,%a0
18689         rts
18690
18691 faddr_ind_p_a4:
18692         mov.l           %a4,%d0                 # Get current a4
18693         mov.l           %d0,%d1
18694         add.l           %a0,%d1                 # Increment
18695         mov.l           %d1,%a4                 # Save incr value
18696         mov.l           %d0,%a0
18697         rts
18698
18699 faddr_ind_p_a5:
18700         mov.l           %a5,%d0                 # Get current a5
18701         mov.l           %d0,%d1
18702         add.l           %a0,%d1                 # Increment
18703         mov.l           %d1,%a5                 # Save incr value
18704         mov.l           %d0,%a0
18705         rts
18706
18707 faddr_ind_p_a6:
18708         mov.l           (%a6),%d0               # Get current a6
18709         mov.l           %d0,%d1
18710         add.l           %a0,%d1                 # Increment
18711         mov.l           %d1,(%a6)               # Save incr value
18712         mov.l           %d0,%a0
18713         rts
18714
18715 faddr_ind_p_a7:
18716         mov.b           &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18717
18718         mov.l           EXC_A7(%a6),%d0         # Get current a7
18719         mov.l           %d0,%d1
18720         add.l           %a0,%d1                 # Increment
18721         mov.l           %d1,EXC_A7(%a6)         # Save incr value
18722         mov.l           %d0,%a0
18723         rts
18724
18725 ####################################################
18726 # Address register indirect w/ predecrement: -(An) #
18727 ####################################################
18728 faddr_ind_m_a0:
18729         mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
18730         sub.l           %a0,%d0                 # Decrement
18731         mov.l           %d0,EXC_DREGS+0x8(%a6)  # Save decr value
18732         mov.l           %d0,%a0
18733         rts
18734
18735 faddr_ind_m_a1:
18736         mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
18737         sub.l           %a0,%d0                 # Decrement
18738         mov.l           %d0,EXC_DREGS+0xc(%a6)  # Save decr value
18739         mov.l           %d0,%a0
18740         rts
18741
18742 faddr_ind_m_a2:
18743         mov.l           %a2,%d0                 # Get current a2
18744         sub.l           %a0,%d0                 # Decrement
18745         mov.l           %d0,%a2                 # Save decr value
18746         mov.l           %d0,%a0
18747         rts
18748
18749 faddr_ind_m_a3:
18750         mov.l           %a3,%d0                 # Get current a3
18751         sub.l           %a0,%d0                 # Decrement
18752         mov.l           %d0,%a3                 # Save decr value
18753         mov.l           %d0,%a0
18754         rts
18755
18756 faddr_ind_m_a4:
18757         mov.l           %a4,%d0                 # Get current a4
18758         sub.l           %a0,%d0                 # Decrement
18759         mov.l           %d0,%a4                 # Save decr value
18760         mov.l           %d0,%a0
18761         rts
18762
18763 faddr_ind_m_a5:
18764         mov.l           %a5,%d0                 # Get current a5
18765         sub.l           %a0,%d0                 # Decrement
18766         mov.l           %d0,%a5                 # Save decr value
18767         mov.l           %d0,%a0
18768         rts
18769
18770 faddr_ind_m_a6:
18771         mov.l           (%a6),%d0               # Get current a6
18772         sub.l           %a0,%d0                 # Decrement
18773         mov.l           %d0,(%a6)               # Save decr value
18774         mov.l           %d0,%a0
18775         rts
18776
18777 faddr_ind_m_a7:
18778         mov.b           &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18779
18780         mov.l           EXC_A7(%a6),%d0         # Get current a7
18781         sub.l           %a0,%d0                 # Decrement
18782         mov.l           %d0,EXC_A7(%a6)         # Save decr value
18783         mov.l           %d0,%a0
18784         rts
18785
18786 ########################################################
18787 # Address register indirect w/ displacement: (d16, An) #
18788 ########################################################
18789 faddr_ind_disp_a0:
18790         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18791         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18792         bsr.l           _imem_read_word
18793
18794         tst.l           %d1                     # did ifetch fail?
18795         bne.l           iea_iacc                # yes
18796
18797         mov.w           %d0,%a0                 # sign extend displacement
18798
18799         add.l           EXC_DREGS+0x8(%a6),%a0  # a0 + d16
18800         rts
18801
18802 faddr_ind_disp_a1:
18803         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18804         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18805         bsr.l           _imem_read_word
18806
18807         tst.l           %d1                     # did ifetch fail?
18808         bne.l           iea_iacc                # yes
18809
18810         mov.w           %d0,%a0                 # sign extend displacement
18811
18812         add.l           EXC_DREGS+0xc(%a6),%a0  # a1 + d16
18813         rts
18814
18815 faddr_ind_disp_a2:
18816         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18817         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18818         bsr.l           _imem_read_word
18819
18820         tst.l           %d1                     # did ifetch fail?
18821         bne.l           iea_iacc                # yes
18822
18823         mov.w           %d0,%a0                 # sign extend displacement
18824
18825         add.l           %a2,%a0                 # a2 + d16
18826         rts
18827
18828 faddr_ind_disp_a3:
18829         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18830         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18831         bsr.l           _imem_read_word
18832
18833         tst.l           %d1                     # did ifetch fail?
18834         bne.l           iea_iacc                # yes
18835
18836         mov.w           %d0,%a0                 # sign extend displacement
18837
18838         add.l           %a3,%a0                 # a3 + d16
18839         rts
18840
18841 faddr_ind_disp_a4:
18842         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18843         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18844         bsr.l           _imem_read_word
18845
18846         tst.l           %d1                     # did ifetch fail?
18847         bne.l           iea_iacc                # yes
18848
18849         mov.w           %d0,%a0                 # sign extend displacement
18850
18851         add.l           %a4,%a0                 # a4 + d16
18852         rts
18853
18854 faddr_ind_disp_a5:
18855         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18856         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18857         bsr.l           _imem_read_word
18858
18859         tst.l           %d1                     # did ifetch fail?
18860         bne.l           iea_iacc                # yes
18861
18862         mov.w           %d0,%a0                 # sign extend displacement
18863
18864         add.l           %a5,%a0                 # a5 + d16
18865         rts
18866
18867 faddr_ind_disp_a6:
18868         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18869         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18870         bsr.l           _imem_read_word
18871
18872         tst.l           %d1                     # did ifetch fail?
18873         bne.l           iea_iacc                # yes
18874
18875         mov.w           %d0,%a0                 # sign extend displacement
18876
18877         add.l           (%a6),%a0               # a6 + d16
18878         rts
18879
18880 faddr_ind_disp_a7:
18881         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18882         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18883         bsr.l           _imem_read_word
18884
18885         tst.l           %d1                     # did ifetch fail?
18886         bne.l           iea_iacc                # yes
18887
18888         mov.w           %d0,%a0                 # sign extend displacement
18889
18890         add.l           EXC_A7(%a6),%a0         # a7 + d16
18891         rts
18892
18893 ########################################################################
18894 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18895 #    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
18896 # Memory indirect postindexed: ([bd, An], Xn, od)                      #
18897 # Memory indirect preindexed: ([bd, An, Xn], od)                       #
18898 ########################################################################
18899 faddr_ind_ext:
18900         addq.l          &0x8,%d1
18901         bsr.l           fetch_dreg              # fetch base areg
18902         mov.l           %d0,-(%sp)
18903
18904         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18905         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18906         bsr.l           _imem_read_word         # fetch extword in d0
18907
18908         tst.l           %d1                     # did ifetch fail?
18909         bne.l           iea_iacc                # yes
18910
18911         mov.l           (%sp)+,%a0
18912
18913         btst            &0x8,%d0
18914         bne.w           fcalc_mem_ind
18915
18916         mov.l           %d0,L_SCR1(%a6)         # hold opword
18917
18918         mov.l           %d0,%d1
18919         rol.w           &0x4,%d1
18920         andi.w          &0xf,%d1                # extract index regno
18921
18922 # count on fetch_dreg() not to alter a0...
18923         bsr.l           fetch_dreg              # fetch index
18924
18925         mov.l           %d2,-(%sp)              # save d2
18926         mov.l           L_SCR1(%a6),%d2         # fetch opword
18927
18928         btst            &0xb,%d2                # is it word or long?
18929         bne.b           faii8_long
18930         ext.l           %d0                     # sign extend word index
18931 faii8_long:
18932         mov.l           %d2,%d1
18933         rol.w           &0x7,%d1
18934         andi.l          &0x3,%d1                # extract scale value
18935
18936         lsl.l           %d1,%d0                 # shift index by scale
18937
18938         extb.l          %d2                     # sign extend displacement
18939         add.l           %d2,%d0                 # index + disp
18940         add.l           %d0,%a0                 # An + (index + disp)
18941
18942         mov.l           (%sp)+,%d2              # restore old d2
18943         rts
18944
18945 ###########################
18946 # Absolute short: (XXX).W #
18947 ###########################
18948 fabs_short:
18949         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18950         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18951         bsr.l           _imem_read_word         # fetch short address
18952
18953         tst.l           %d1                     # did ifetch fail?
18954         bne.l           iea_iacc                # yes
18955
18956         mov.w           %d0,%a0                 # return <ea> in a0
18957         rts
18958
18959 ##########################
18960 # Absolute long: (XXX).L #
18961 ##########################
18962 fabs_long:
18963         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18964         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
18965         bsr.l           _imem_read_long         # fetch long address
18966
18967         tst.l           %d1                     # did ifetch fail?
18968         bne.l           iea_iacc                # yes
18969
18970         mov.l           %d0,%a0                 # return <ea> in a0
18971         rts
18972
18973 #######################################################
18974 # Program counter indirect w/ displacement: (d16, PC) #
18975 #######################################################
18976 fpc_ind:
18977         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
18978         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
18979         bsr.l           _imem_read_word         # fetch word displacement
18980
18981         tst.l           %d1                     # did ifetch fail?
18982         bne.l           iea_iacc                # yes
18983
18984         mov.w           %d0,%a0                 # sign extend displacement
18985
18986         add.l           EXC_EXTWPTR(%a6),%a0    # pc + d16
18987
18988 # _imem_read_word() increased the extwptr by 2. need to adjust here.
18989         subq.l          &0x2,%a0                # adjust <ea>
18990         rts
18991
18992 ##########################################################
18993 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18994 # "     "     w/   "  (base displacement): (bd, PC, An)  #
18995 # PC memory indirect postindexed: ([bd, PC], Xn, od)     #
18996 # PC memory indirect preindexed: ([bd, PC, Xn], od)      #
18997 ##########################################################
18998 fpc_ind_ext:
18999         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19000         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
19001         bsr.l           _imem_read_word         # fetch ext word
19002
19003         tst.l           %d1                     # did ifetch fail?
19004         bne.l           iea_iacc                # yes
19005
19006         mov.l           EXC_EXTWPTR(%a6),%a0    # put base in a0
19007         subq.l          &0x2,%a0                # adjust base
19008
19009         btst            &0x8,%d0                # is disp only 8 bits?
19010         bne.w           fcalc_mem_ind           # calc memory indirect
19011
19012         mov.l           %d0,L_SCR1(%a6)         # store opword
19013
19014         mov.l           %d0,%d1                 # make extword copy
19015         rol.w           &0x4,%d1                # rotate reg num into place
19016         andi.w          &0xf,%d1                # extract register number
19017
19018 # count on fetch_dreg() not to alter a0...
19019         bsr.l           fetch_dreg              # fetch index
19020
19021         mov.l           %d2,-(%sp)              # save d2
19022         mov.l           L_SCR1(%a6),%d2         # fetch opword
19023
19024         btst            &0xb,%d2                # is index word or long?
19025         bne.b           fpii8_long              # long
19026         ext.l           %d0                     # sign extend word index
19027 fpii8_long:
19028         mov.l           %d2,%d1
19029         rol.w           &0x7,%d1                # rotate scale value into place
19030         andi.l          &0x3,%d1                # extract scale value
19031
19032         lsl.l           %d1,%d0                 # shift index by scale
19033
19034         extb.l          %d2                     # sign extend displacement
19035         add.l           %d2,%d0                 # disp + index
19036         add.l           %d0,%a0                 # An + (index + disp)
19037
19038         mov.l           (%sp)+,%d2              # restore temp register
19039         rts
19040
19041 # d2 = index
19042 # d3 = base
19043 # d4 = od
19044 # d5 = extword
19045 fcalc_mem_ind:
19046         btst            &0x6,%d0                # is the index suppressed?
19047         beq.b           fcalc_index
19048
19049         movm.l          &0x3c00,-(%sp)          # save d2-d5
19050
19051         mov.l           %d0,%d5                 # put extword in d5
19052         mov.l           %a0,%d3                 # put base in d3
19053
19054         clr.l           %d2                     # yes, so index = 0
19055         bra.b           fbase_supp_ck
19056
19057 # index:
19058 fcalc_index:
19059         mov.l           %d0,L_SCR1(%a6)         # save d0 (opword)
19060         bfextu          %d0{&16:&4},%d1         # fetch dreg index
19061         bsr.l           fetch_dreg
19062
19063         movm.l          &0x3c00,-(%sp)          # save d2-d5
19064         mov.l           %d0,%d2                 # put index in d2
19065         mov.l           L_SCR1(%a6),%d5
19066         mov.l           %a0,%d3
19067
19068         btst            &0xb,%d5                # is index word or long?
19069         bne.b           fno_ext
19070         ext.l           %d2
19071
19072 fno_ext:
19073         bfextu          %d5{&21:&2},%d0
19074         lsl.l           %d0,%d2
19075
19076 # base address (passed as parameter in d3):
19077 # we clear the value here if it should actually be suppressed.
19078 fbase_supp_ck:
19079         btst            &0x7,%d5                # is the bd suppressed?
19080         beq.b           fno_base_sup
19081         clr.l           %d3
19082
19083 # base displacement:
19084 fno_base_sup:
19085         bfextu          %d5{&26:&2},%d0         # get bd size
19086 #       beq.l           fmovm_error             # if (size == 0) it's reserved
19087
19088         cmpi.b          %d0,&0x2
19089         blt.b           fno_bd
19090         beq.b           fget_word_bd
19091
19092         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19093         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19094         bsr.l           _imem_read_long
19095
19096         tst.l           %d1                     # did ifetch fail?
19097         bne.l           fcea_iacc               # yes
19098
19099         bra.b           fchk_ind
19100
19101 fget_word_bd:
19102         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19103         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
19104         bsr.l           _imem_read_word
19105
19106         tst.l           %d1                     # did ifetch fail?
19107         bne.l           fcea_iacc               # yes
19108
19109         ext.l           %d0                     # sign extend bd
19110
19111 fchk_ind:
19112         add.l           %d0,%d3                 # base += bd
19113
19114 # outer displacement:
19115 fno_bd:
19116         bfextu          %d5{&30:&2},%d0         # is od suppressed?
19117         beq.w           faii_bd
19118
19119         cmpi.b          %d0,&0x2
19120         blt.b           fnull_od
19121         beq.b           fword_od
19122
19123         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19124         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19125         bsr.l           _imem_read_long
19126
19127         tst.l           %d1                     # did ifetch fail?
19128         bne.l           fcea_iacc               # yes
19129
19130         bra.b           fadd_them
19131
19132 fword_od:
19133         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19134         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
19135         bsr.l           _imem_read_word
19136
19137         tst.l           %d1                     # did ifetch fail?
19138         bne.l           fcea_iacc               # yes
19139
19140         ext.l           %d0                     # sign extend od
19141         bra.b           fadd_them
19142
19143 fnull_od:
19144         clr.l           %d0
19145
19146 fadd_them:
19147         mov.l           %d0,%d4
19148
19149         btst            &0x2,%d5                # pre or post indexing?
19150         beq.b           fpre_indexed
19151
19152         mov.l           %d3,%a0
19153         bsr.l           _dmem_read_long
19154
19155         tst.l           %d1                     # did dfetch fail?
19156         bne.w           fcea_err                # yes
19157
19158         add.l           %d2,%d0                 # <ea> += index
19159         add.l           %d4,%d0                 # <ea> += od
19160         bra.b           fdone_ea
19161
19162 fpre_indexed:
19163         add.l           %d2,%d3                 # preindexing
19164         mov.l           %d3,%a0
19165         bsr.l           _dmem_read_long
19166
19167         tst.l           %d1                     # did dfetch fail?
19168         bne.w           fcea_err                # yes
19169
19170         add.l           %d4,%d0                 # ea += od
19171         bra.b           fdone_ea
19172
19173 faii_bd:
19174         add.l           %d2,%d3                 # ea = (base + bd) + index
19175         mov.l           %d3,%d0
19176 fdone_ea:
19177         mov.l           %d0,%a0
19178
19179         movm.l          (%sp)+,&0x003c          # restore d2-d5
19180         rts
19181
19182 #########################################################
19183 fcea_err:
19184         mov.l           %d3,%a0
19185
19186         movm.l          (%sp)+,&0x003c          # restore d2-d5
19187         mov.w           &0x0101,%d0
19188         bra.l           iea_dacc
19189
19190 fcea_iacc:
19191         movm.l          (%sp)+,&0x003c          # restore d2-d5
19192         bra.l           iea_iacc
19193
19194 fmovm_out_err:
19195         bsr.l           restore
19196         mov.w           &0x00e1,%d0
19197         bra.b           fmovm_err
19198
19199 fmovm_in_err:
19200         bsr.l           restore
19201         mov.w           &0x0161,%d0
19202
19203 fmovm_err:
19204         mov.l           L_SCR1(%a6),%a0
19205         bra.l           iea_dacc
19206
19207 #########################################################################
19208 # XDEF **************************************************************** #
19209 #       fmovm_ctrl(): emulate fmovm.l of control registers instr        #
19210 #                                                                       #
19211 # XREF **************************************************************** #
19212 #       _imem_read_long() - read longword from memory                   #
19213 #       iea_iacc() - _imem_read_long() failed; error recovery           #
19214 #                                                                       #
19215 # INPUT *************************************************************** #
19216 #       None                                                            #
19217 #                                                                       #
19218 # OUTPUT ************************************************************** #
19219 #       If _imem_read_long() doesn't fail:                              #
19220 #               USER_FPCR(a6)  = new FPCR value                         #
19221 #               USER_FPSR(a6)  = new FPSR value                         #
19222 #               USER_FPIAR(a6) = new FPIAR value                        #
19223 #                                                                       #
19224 # ALGORITHM *********************************************************** #
19225 #       Decode the instruction type by looking at the extension word    #
19226 # in order to see how many control registers to fetch from memory.      #
19227 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
19228 # the special access error exit handler iea_iacc().                     #
19229 #                                                                       #
19230 # Instruction word decoding:                                            #
19231 #                                                                       #
19232 #       fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}                           #
19233 #                                                                       #
19234 #               WORD1                   WORD2                           #
19235 #       1111 0010 00 111100     100$ $$00 0000 0000                     #
19236 #                                                                       #
19237 #       $$$ (100): FPCR                                                 #
19238 #           (010): FPSR                                                 #
19239 #           (001): FPIAR                                                #
19240 #           (000): FPIAR                                                #
19241 #                                                                       #
19242 #########################################################################
19243
19244         global          fmovm_ctrl
19245 fmovm_ctrl:
19246         mov.b           EXC_EXTWORD(%a6),%d0    # fetch reg select bits
19247         cmpi.b          %d0,&0x9c               # fpcr & fpsr & fpiar ?
19248         beq.w           fctrl_in_7              # yes
19249         cmpi.b          %d0,&0x98               # fpcr & fpsr ?
19250         beq.w           fctrl_in_6              # yes
19251         cmpi.b          %d0,&0x94               # fpcr & fpiar ?
19252         beq.b           fctrl_in_5              # yes
19253
19254 # fmovem.l #<data>, fpsr/fpiar
19255 fctrl_in_3:
19256         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19257         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19258         bsr.l           _imem_read_long         # fetch FPSR from mem
19259
19260         tst.l           %d1                     # did ifetch fail?
19261         bne.l           iea_iacc                # yes
19262
19263         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to stack
19264         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19265         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19266         bsr.l           _imem_read_long         # fetch FPIAR from mem
19267
19268         tst.l           %d1                     # did ifetch fail?
19269         bne.l           iea_iacc                # yes
19270
19271         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
19272         rts
19273
19274 # fmovem.l #<data>, fpcr/fpiar
19275 fctrl_in_5:
19276         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19277         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19278         bsr.l           _imem_read_long         # fetch FPCR from mem
19279
19280         tst.l           %d1                     # did ifetch fail?
19281         bne.l           iea_iacc                # yes
19282
19283         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to stack
19284         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19285         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19286         bsr.l           _imem_read_long         # fetch FPIAR from mem
19287
19288         tst.l           %d1                     # did ifetch fail?
19289         bne.l           iea_iacc                # yes
19290
19291         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
19292         rts
19293
19294 # fmovem.l #<data>, fpcr/fpsr
19295 fctrl_in_6:
19296         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19297         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19298         bsr.l           _imem_read_long         # fetch FPCR from mem
19299
19300         tst.l           %d1                     # did ifetch fail?
19301         bne.l           iea_iacc                # yes
19302
19303         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
19304         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19305         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19306         bsr.l           _imem_read_long         # fetch FPSR from mem
19307
19308         tst.l           %d1                     # did ifetch fail?
19309         bne.l           iea_iacc                # yes
19310
19311         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
19312         rts
19313
19314 # fmovem.l #<data>, fpcr/fpsr/fpiar
19315 fctrl_in_7:
19316         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19317         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19318         bsr.l           _imem_read_long         # fetch FPCR from mem
19319
19320         tst.l           %d1                     # did ifetch fail?
19321         bne.l           iea_iacc                # yes
19322
19323         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
19324         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19325         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19326         bsr.l           _imem_read_long         # fetch FPSR from mem
19327
19328         tst.l           %d1                     # did ifetch fail?
19329         bne.l           iea_iacc                # yes
19330
19331         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
19332         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
19333         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
19334         bsr.l           _imem_read_long         # fetch FPIAR from mem
19335
19336         tst.l           %d1                     # did ifetch fail?
19337         bne.l           iea_iacc                # yes
19338
19339         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to mem
19340         rts
19341
19342 #########################################################################
19343 # XDEF **************************************************************** #
19344 #       _dcalc_ea(): calc correct <ea> from <ea> stacked on exception   #
19345 #                                                                       #
19346 # XREF **************************************************************** #
19347 #       inc_areg() - increment an address register                      #
19348 #       dec_areg() - decrement an address register                      #
19349 #                                                                       #
19350 # INPUT *************************************************************** #
19351 #       d0 = number of bytes to adjust <ea> by                          #
19352 #                                                                       #
19353 # OUTPUT ************************************************************** #
19354 #       None                                                            #
19355 #                                                                       #
19356 # ALGORITHM *********************************************************** #
19357 # "Dummy" CALCulate Effective Address:                                  #
19358 #       The stacked <ea> for FP unimplemented instructions and opclass  #
19359 #       two packed instructions is correct with the exception of...     #
19360 #                                                                       #
19361 #       1) -(An)   : The register is not updated regardless of size.    #
19362 #                    Also, for extended precision and packed, the       #
19363 #                    stacked <ea> value is 8 bytes too big              #
19364 #       2) (An)+   : The register is not updated.                       #
19365 #       3) #<data> : The upper longword of the immediate operand is     #
19366 #                    stacked b,w,l and s sizes are completely stacked.  #
19367 #                    d,x, and p are not.                                #
19368 #                                                                       #
19369 #########################################################################
19370
19371         global          _dcalc_ea
19372 _dcalc_ea:
19373         mov.l           %d0, %a0                # move # bytes to %a0
19374
19375         mov.b           1+EXC_OPWORD(%a6), %d0  # fetch opcode word
19376         mov.l           %d0, %d1                # make a copy
19377
19378         andi.w          &0x38, %d0              # extract mode field
19379         andi.l          &0x7, %d1               # extract reg  field
19380
19381         cmpi.b          %d0,&0x18               # is mode (An)+ ?
19382         beq.b           dcea_pi                 # yes
19383
19384         cmpi.b          %d0,&0x20               # is mode -(An) ?
19385         beq.b           dcea_pd                 # yes
19386
19387         or.w            %d1,%d0                 # concat mode,reg
19388         cmpi.b          %d0,&0x3c               # is mode #<data>?
19389
19390         beq.b           dcea_imm                # yes
19391
19392         mov.l           EXC_EA(%a6),%a0         # return <ea>
19393         rts
19394
19395 # need to set immediate data flag here since we'll need to do
19396 # an imem_read to fetch this later.
19397 dcea_imm:
19398         mov.b           &immed_flg,SPCOND_FLG(%a6)
19399         lea             ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19400         rts
19401
19402 # here, the <ea> is stacked correctly. however, we must update the
19403 # address register...
19404 dcea_pi:
19405         mov.l           %a0,%d0                 # pass amt to inc by
19406         bsr.l           inc_areg                # inc addr register
19407
19408         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
19409         rts
19410
19411 # the <ea> is stacked correctly for all but extended and packed which
19412 # the <ea>s are 8 bytes too large.
19413 # it would make no sense to have a pre-decrement to a7 in supervisor
19414 # mode so we don't even worry about this tricky case here : )
19415 dcea_pd:
19416         mov.l           %a0,%d0                 # pass amt to dec by
19417         bsr.l           dec_areg                # dec addr register
19418
19419         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
19420
19421         cmpi.b          %d0,&0xc                # is opsize ext or packed?
19422         beq.b           dcea_pd2                # yes
19423         rts
19424 dcea_pd2:
19425         sub.l           &0x8,%a0                # correct <ea>
19426         mov.l           %a0,EXC_EA(%a6)         # put correct <ea> on stack
19427         rts
19428
19429 #########################################################################
19430 # XDEF **************************************************************** #
19431 #       _calc_ea_fout(): calculate correct stacked <ea> for extended    #
19432 #                        and packed data opclass 3 operations.          #
19433 #                                                                       #
19434 # XREF **************************************************************** #
19435 #       None                                                            #
19436 #                                                                       #
19437 # INPUT *************************************************************** #
19438 #       None                                                            #
19439 #                                                                       #
19440 # OUTPUT ************************************************************** #
19441 #       a0 = return correct effective address                           #
19442 #                                                                       #
19443 # ALGORITHM *********************************************************** #
19444 #       For opclass 3 extended and packed data operations, the <ea>     #
19445 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
19446 # modes. Also, while we're at it, the index register itself must get    #
19447 # updated.                                                              #
19448 #       So, for -(an), we must subtract 8 off of the stacked <ea> value #
19449 # and return that value as the correct <ea> and store that value in An. #
19450 # For (an)+, the stacked <ea> is correct but we must adjust An by +12.  #
19451 #                                                                       #
19452 #########################################################################
19453
19454 # This calc_ea is currently used to retrieve the correct <ea>
19455 # for fmove outs of type extended and packed.
19456         global          _calc_ea_fout
19457 _calc_ea_fout:
19458         mov.b           1+EXC_OPWORD(%a6),%d0   # fetch opcode word
19459         mov.l           %d0,%d1                 # make a copy
19460
19461         andi.w          &0x38,%d0               # extract mode field
19462         andi.l          &0x7,%d1                # extract reg  field
19463
19464         cmpi.b          %d0,&0x18               # is mode (An)+ ?
19465         beq.b           ceaf_pi                 # yes
19466
19467         cmpi.b          %d0,&0x20               # is mode -(An) ?
19468         beq.w           ceaf_pd                 # yes
19469
19470         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
19471         rts
19472
19473 # (An)+ : extended and packed fmove out
19474 #       : stacked <ea> is correct
19475 #       : "An" not updated
19476 ceaf_pi:
19477         mov.w           (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19478         mov.l           EXC_EA(%a6),%a0
19479         jmp             (tbl_ceaf_pi.b,%pc,%d1.w*1)
19480
19481         swbeg           &0x8
19482 tbl_ceaf_pi:
19483         short           ceaf_pi0 - tbl_ceaf_pi
19484         short           ceaf_pi1 - tbl_ceaf_pi
19485         short           ceaf_pi2 - tbl_ceaf_pi
19486         short           ceaf_pi3 - tbl_ceaf_pi
19487         short           ceaf_pi4 - tbl_ceaf_pi
19488         short           ceaf_pi5 - tbl_ceaf_pi
19489         short           ceaf_pi6 - tbl_ceaf_pi
19490         short           ceaf_pi7 - tbl_ceaf_pi
19491
19492 ceaf_pi0:
19493         addi.l          &0xc,EXC_DREGS+0x8(%a6)
19494         rts
19495 ceaf_pi1:
19496         addi.l          &0xc,EXC_DREGS+0xc(%a6)
19497         rts
19498 ceaf_pi2:
19499         add.l           &0xc,%a2
19500         rts
19501 ceaf_pi3:
19502         add.l           &0xc,%a3
19503         rts
19504 ceaf_pi4:
19505         add.l           &0xc,%a4
19506         rts
19507 ceaf_pi5:
19508         add.l           &0xc,%a5
19509         rts
19510 ceaf_pi6:
19511         addi.l          &0xc,EXC_A6(%a6)
19512         rts
19513 ceaf_pi7:
19514         mov.b           &mia7_flg,SPCOND_FLG(%a6)
19515         addi.l          &0xc,EXC_A7(%a6)
19516         rts
19517
19518 # -(An) : extended and packed fmove out
19519 #       : stacked <ea> = actual <ea> + 8
19520 #       : "An" not updated
19521 ceaf_pd:
19522         mov.w           (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19523         mov.l           EXC_EA(%a6),%a0
19524         sub.l           &0x8,%a0
19525         sub.l           &0x8,EXC_EA(%a6)
19526         jmp             (tbl_ceaf_pd.b,%pc,%d1.w*1)
19527
19528         swbeg           &0x8
19529 tbl_ceaf_pd:
19530         short           ceaf_pd0 - tbl_ceaf_pd
19531         short           ceaf_pd1 - tbl_ceaf_pd
19532         short           ceaf_pd2 - tbl_ceaf_pd
19533         short           ceaf_pd3 - tbl_ceaf_pd
19534         short           ceaf_pd4 - tbl_ceaf_pd
19535         short           ceaf_pd5 - tbl_ceaf_pd
19536         short           ceaf_pd6 - tbl_ceaf_pd
19537         short           ceaf_pd7 - tbl_ceaf_pd
19538
19539 ceaf_pd0:
19540         mov.l           %a0,EXC_DREGS+0x8(%a6)
19541         rts
19542 ceaf_pd1:
19543         mov.l           %a0,EXC_DREGS+0xc(%a6)
19544         rts
19545 ceaf_pd2:
19546         mov.l           %a0,%a2
19547         rts
19548 ceaf_pd3:
19549         mov.l           %a0,%a3
19550         rts
19551 ceaf_pd4:
19552         mov.l           %a0,%a4
19553         rts
19554 ceaf_pd5:
19555         mov.l           %a0,%a5
19556         rts
19557 ceaf_pd6:
19558         mov.l           %a0,EXC_A6(%a6)
19559         rts
19560 ceaf_pd7:
19561         mov.l           %a0,EXC_A7(%a6)
19562         mov.b           &mda7_flg,SPCOND_FLG(%a6)
19563         rts
19564
19565 #########################################################################
19566 # XDEF **************************************************************** #
19567 #       _load_fop(): load operand for unimplemented FP exception        #
19568 #                                                                       #
19569 # XREF **************************************************************** #
19570 #       set_tag_x() - determine ext prec optype tag                     #
19571 #       set_tag_s() - determine sgl prec optype tag                     #
19572 #       set_tag_d() - determine dbl prec optype tag                     #
19573 #       unnorm_fix() - convert normalized number to denorm or zero      #
19574 #       norm() - normalize a denormalized number                        #
19575 #       get_packed() - fetch a packed operand from memory               #
19576 #       _dcalc_ea() - calculate <ea>, fixing An in process              #
19577 #                                                                       #
19578 #       _imem_read_{word,long}() - read from instruction memory         #
19579 #       _dmem_read() - read from data memory                            #
19580 #       _dmem_read_{byte,word,long}() - read from data memory           #
19581 #                                                                       #
19582 #       facc_in_{b,w,l,d,x}() - mem read failed; special exit point     #
19583 #                                                                       #
19584 # INPUT *************************************************************** #
19585 #       None                                                            #
19586 #                                                                       #
19587 # OUTPUT ************************************************************** #
19588 #       If memory access doesn't fail:                                  #
19589 #               FP_SRC(a6) = source operand in extended precision       #
19590 #               FP_DST(a6) = destination operand in extended precision  #
19591 #                                                                       #
19592 # ALGORITHM *********************************************************** #
19593 #       This is called from the Unimplemented FP exception handler in   #
19594 # order to load the source and maybe destination operand into           #
19595 # FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load  #
19596 # the source and destination from the FP register file. Set the optype  #
19597 # tags for both if dyadic, one for monadic. If a number is an UNNORM,   #
19598 # convert it to a DENORM or a ZERO.                                     #
19599 #       If the instruction is opclass two (memory->reg), then fetch     #
19600 # the destination from the register file and the source operand from    #
19601 # memory. Tag and fix both as above w/ opclass zero instructions.       #
19602 #       If the source operand is byte,word,long, or single, it may be   #
19603 # in the data register file. If it's actually out in memory, use one of #
19604 # the mem_read() routines to fetch it. If the mem_read() access returns #
19605 # a failing value, exit through the special facc_in() routine which     #
19606 # will create an acess error exception frame from the current exception #
19607 # frame.                                                                #
19608 #       Immediate data and regular data accesses are separated because  #
19609 # if an immediate data access fails, the resulting fault status         #
19610 # longword stacked for the access error exception must have the         #
19611 # instruction bit set.                                                  #
19612 #                                                                       #
19613 #########################################################################
19614
19615         global          _load_fop
19616 _load_fop:
19617
19618 #  15     13 12 10  9 7  6       0
19619 # /        \ /   \ /  \ /         \
19620 # ---------------------------------
19621 # | opclass | RX  | RY | EXTENSION |  (2nd word of general FP instruction)
19622 # ---------------------------------
19623 #
19624
19625 #       bfextu          EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19626 #       cmpi.b          %d0, &0x2               # which class is it? ('000,'010,'011)
19627 #       beq.w           op010                   # handle <ea> -> fpn
19628 #       bgt.w           op011                   # handle fpn -> <ea>
19629
19630 # we're not using op011 for now...
19631         btst            &0x6,EXC_CMDREG(%a6)
19632         bne.b           op010
19633
19634 ############################
19635 # OPCLASS '000: reg -> reg #
19636 ############################
19637 op000:
19638         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension word lo
19639         btst            &0x5,%d0                # testing extension bits
19640         beq.b           op000_src               # (bit 5 == 0) => monadic
19641         btst            &0x4,%d0                # (bit 5 == 1)
19642         beq.b           op000_dst               # (bit 4 == 0) => dyadic
19643         and.w           &0x007f,%d0             # extract extension bits {6:0}
19644         cmpi.w          %d0,&0x0038             # is it an fcmp (dyadic) ?
19645         bne.b           op000_src               # it's an fcmp
19646
19647 op000_dst:
19648         bfextu          EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19649         bsr.l           load_fpn2               # fetch dst fpreg into FP_DST
19650
19651         bsr.l           set_tag_x               # get dst optype tag
19652
19653         cmpi.b          %d0, &UNNORM            # is dst fpreg an UNNORM?
19654         beq.b           op000_dst_unnorm        # yes
19655 op000_dst_cont:
19656         mov.b           %d0, DTAG(%a6)          # store the dst optype tag
19657
19658 op000_src:
19659         bfextu          EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19660         bsr.l           load_fpn1               # fetch src fpreg into FP_SRC
19661
19662         bsr.l           set_tag_x               # get src optype tag
19663
19664         cmpi.b          %d0, &UNNORM            # is src fpreg an UNNORM?
19665         beq.b           op000_src_unnorm        # yes
19666 op000_src_cont:
19667         mov.b           %d0, STAG(%a6)          # store the src optype tag
19668         rts
19669
19670 op000_dst_unnorm:
19671         bsr.l           unnorm_fix              # fix the dst UNNORM
19672         bra.b           op000_dst_cont
19673 op000_src_unnorm:
19674         bsr.l           unnorm_fix              # fix the src UNNORM
19675         bra.b           op000_src_cont
19676
19677 #############################
19678 # OPCLASS '010: <ea> -> reg #
19679 #############################
19680 op010:
19681         mov.w           EXC_CMDREG(%a6),%d0     # fetch extension word
19682         btst            &0x5,%d0                # testing extension bits
19683         beq.b           op010_src               # (bit 5 == 0) => monadic
19684         btst            &0x4,%d0                # (bit 5 == 1)
19685         beq.b           op010_dst               # (bit 4 == 0) => dyadic
19686         and.w           &0x007f,%d0             # extract extension bits {6:0}
19687         cmpi.w          %d0,&0x0038             # is it an fcmp (dyadic) ?
19688         bne.b           op010_src               # it's an fcmp
19689
19690 op010_dst:
19691         bfextu          EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19692         bsr.l           load_fpn2               # fetch dst fpreg ptr
19693
19694         bsr.l           set_tag_x               # get dst type tag
19695
19696         cmpi.b          %d0, &UNNORM            # is dst fpreg an UNNORM?
19697         beq.b           op010_dst_unnorm        # yes
19698 op010_dst_cont:
19699         mov.b           %d0, DTAG(%a6)          # store the dst optype tag
19700
19701 op010_src:
19702         bfextu          EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19703
19704         bfextu          EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19705         bne.w           fetch_from_mem          # src op is in memory
19706
19707 op010_dreg:
19708         clr.b           STAG(%a6)               # either NORM or ZERO
19709         bfextu          EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19710
19711         mov.w           (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19712         jmp             (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19713
19714 op010_dst_unnorm:
19715         bsr.l           unnorm_fix              # fix the dst UNNORM
19716         bra.b           op010_dst_cont
19717
19718         swbeg           &0x8
19719 tbl_op010_dreg:
19720         short           opd_long        - tbl_op010_dreg
19721         short           opd_sgl         - tbl_op010_dreg
19722         short           tbl_op010_dreg  - tbl_op010_dreg
19723         short           tbl_op010_dreg  - tbl_op010_dreg
19724         short           opd_word        - tbl_op010_dreg
19725         short           tbl_op010_dreg  - tbl_op010_dreg
19726         short           opd_byte        - tbl_op010_dreg
19727         short           tbl_op010_dreg  - tbl_op010_dreg
19728
19729 #
19730 # LONG: can be either NORM or ZERO...
19731 #
19732 opd_long:
19733         bsr.l           fetch_dreg              # fetch long in d0
19734         fmov.l          %d0, %fp0               # load a long
19735         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19736         fbeq.w          opd_long_zero           # long is a ZERO
19737         rts
19738 opd_long_zero:
19739         mov.b           &ZERO, STAG(%a6)        # set ZERO optype flag
19740         rts
19741
19742 #
19743 # WORD: can be either NORM or ZERO...
19744 #
19745 opd_word:
19746         bsr.l           fetch_dreg              # fetch word in d0
19747         fmov.w          %d0, %fp0               # load a word
19748         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19749         fbeq.w          opd_word_zero           # WORD is a ZERO
19750         rts
19751 opd_word_zero:
19752         mov.b           &ZERO, STAG(%a6)        # set ZERO optype flag
19753         rts
19754
19755 #
19756 # BYTE: can be either NORM or ZERO...
19757 #
19758 opd_byte:
19759         bsr.l           fetch_dreg              # fetch word in d0
19760         fmov.b          %d0, %fp0               # load a byte
19761         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19762         fbeq.w          opd_byte_zero           # byte is a ZERO
19763         rts
19764 opd_byte_zero:
19765         mov.b           &ZERO, STAG(%a6)        # set ZERO optype flag
19766         rts
19767
19768 #
19769 # SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19770 #
19771 # separate SNANs and DENORMs so they can be loaded w/ special care.
19772 # all others can simply be moved "in" using fmove.
19773 #
19774 opd_sgl:
19775         bsr.l           fetch_dreg              # fetch sgl in d0
19776         mov.l           %d0,L_SCR1(%a6)
19777
19778         lea             L_SCR1(%a6), %a0        # pass: ptr to the sgl
19779         bsr.l           set_tag_s               # determine sgl type
19780         mov.b           %d0, STAG(%a6)          # save the src tag
19781
19782         cmpi.b          %d0, &SNAN              # is it an SNAN?
19783         beq.w           get_sgl_snan            # yes
19784
19785         cmpi.b          %d0, &DENORM            # is it a DENORM?
19786         beq.w           get_sgl_denorm          # yes
19787
19788         fmov.s          (%a0), %fp0             # no, so can load it regular
19789         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19790         rts
19791
19792 ##############################################################################
19793
19794 #########################################################################
19795 # fetch_from_mem():                                                     #
19796 # - src is out in memory. must:                                         #
19797 #       (1) calc ea - must read AFTER you know the src type since       #
19798 #                     if the ea is -() or ()+, need to know # of bytes. #
19799 #       (2) read it in from either user or supervisor space             #
19800 #       (3) if (b || w || l) then simply read in                        #
19801 #           if (s || d || x) then check for SNAN,UNNORM,DENORM          #
19802 #           if (packed) then punt for now                               #
19803 # INPUT:                                                                #
19804 #       %d0 : src type field                                            #
19805 #########################################################################
19806 fetch_from_mem:
19807         clr.b           STAG(%a6)               # either NORM or ZERO
19808
19809         mov.w           (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19810         jmp             (tbl_fp_type.b,%pc,%d0.w*1)
19811
19812         swbeg           &0x8
19813 tbl_fp_type:
19814         short           load_long       - tbl_fp_type
19815         short           load_sgl        - tbl_fp_type
19816         short           load_ext        - tbl_fp_type
19817         short           load_packed     - tbl_fp_type
19818         short           load_word       - tbl_fp_type
19819         short           load_dbl        - tbl_fp_type
19820         short           load_byte       - tbl_fp_type
19821         short           tbl_fp_type     - tbl_fp_type
19822
19823 #########################################
19824 # load a LONG into %fp0:                #
19825 #       -number can't fault             #
19826 #       (1) calc ea                     #
19827 #       (2) read 4 bytes into L_SCR1    #
19828 #       (3) fmov.l into %fp0            #
19829 #########################################
19830 load_long:
19831         movq.l          &0x4, %d0               # pass: 4 (bytes)
19832         bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
19833
19834         cmpi.b          SPCOND_FLG(%a6),&immed_flg
19835         beq.b           load_long_immed
19836
19837         bsr.l           _dmem_read_long         # fetch src operand from memory
19838
19839         tst.l           %d1                     # did dfetch fail?
19840         bne.l           facc_in_l               # yes
19841
19842 load_long_cont:
19843         fmov.l          %d0, %fp0               # read into %fp0;convert to xprec
19844         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19845
19846         fbeq.w          load_long_zero          # src op is a ZERO
19847         rts
19848 load_long_zero:
19849         mov.b           &ZERO, STAG(%a6)        # set optype tag to ZERO
19850         rts
19851
19852 load_long_immed:
19853         bsr.l           _imem_read_long         # fetch src operand immed data
19854
19855         tst.l           %d1                     # did ifetch fail?
19856         bne.l           funimp_iacc             # yes
19857         bra.b           load_long_cont
19858
19859 #########################################
19860 # load a WORD into %fp0:                #
19861 #       -number can't fault             #
19862 #       (1) calc ea                     #
19863 #       (2) read 2 bytes into L_SCR1    #
19864 #       (3) fmov.w into %fp0            #
19865 #########################################
19866 load_word:
19867         movq.l          &0x2, %d0               # pass: 2 (bytes)
19868         bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
19869
19870         cmpi.b          SPCOND_FLG(%a6),&immed_flg
19871         beq.b           load_word_immed
19872
19873         bsr.l           _dmem_read_word         # fetch src operand from memory
19874
19875         tst.l           %d1                     # did dfetch fail?
19876         bne.l           facc_in_w               # yes
19877
19878 load_word_cont:
19879         fmov.w          %d0, %fp0               # read into %fp0;convert to xprec
19880         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19881
19882         fbeq.w          load_word_zero          # src op is a ZERO
19883         rts
19884 load_word_zero:
19885         mov.b           &ZERO, STAG(%a6)        # set optype tag to ZERO
19886         rts
19887
19888 load_word_immed:
19889         bsr.l           _imem_read_word         # fetch src operand immed data
19890
19891         tst.l           %d1                     # did ifetch fail?
19892         bne.l           funimp_iacc             # yes
19893         bra.b           load_word_cont
19894
19895 #########################################
19896 # load a BYTE into %fp0:                #
19897 #       -number can't fault             #
19898 #       (1) calc ea                     #
19899 #       (2) read 1 byte into L_SCR1     #
19900 #       (3) fmov.b into %fp0            #
19901 #########################################
19902 load_byte:
19903         movq.l          &0x1, %d0               # pass: 1 (byte)
19904         bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
19905
19906         cmpi.b          SPCOND_FLG(%a6),&immed_flg
19907         beq.b           load_byte_immed
19908
19909         bsr.l           _dmem_read_byte         # fetch src operand from memory
19910
19911         tst.l           %d1                     # did dfetch fail?
19912         bne.l           facc_in_b               # yes
19913
19914 load_byte_cont:
19915         fmov.b          %d0, %fp0               # read into %fp0;convert to xprec
19916         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19917
19918         fbeq.w          load_byte_zero          # src op is a ZERO
19919         rts
19920 load_byte_zero:
19921         mov.b           &ZERO, STAG(%a6)        # set optype tag to ZERO
19922         rts
19923
19924 load_byte_immed:
19925         bsr.l           _imem_read_word         # fetch src operand immed data
19926
19927         tst.l           %d1                     # did ifetch fail?
19928         bne.l           funimp_iacc             # yes
19929         bra.b           load_byte_cont
19930
19931 #########################################
19932 # load a SGL into %fp0:                 #
19933 #       -number can't fault             #
19934 #       (1) calc ea                     #
19935 #       (2) read 4 bytes into L_SCR1    #
19936 #       (3) fmov.s into %fp0            #
19937 #########################################
19938 load_sgl:
19939         movq.l          &0x4, %d0               # pass: 4 (bytes)
19940         bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
19941
19942         cmpi.b          SPCOND_FLG(%a6),&immed_flg
19943         beq.b           load_sgl_immed
19944
19945         bsr.l           _dmem_read_long         # fetch src operand from memory
19946         mov.l           %d0, L_SCR1(%a6)        # store src op on stack
19947
19948         tst.l           %d1                     # did dfetch fail?
19949         bne.l           facc_in_l               # yes
19950
19951 load_sgl_cont:
19952         lea             L_SCR1(%a6), %a0        # pass: ptr to sgl src op
19953         bsr.l           set_tag_s               # determine src type tag
19954         mov.b           %d0, STAG(%a6)          # save src optype tag on stack
19955
19956         cmpi.b          %d0, &DENORM            # is it a sgl DENORM?
19957         beq.w           get_sgl_denorm          # yes
19958
19959         cmpi.b          %d0, &SNAN              # is it a sgl SNAN?
19960         beq.w           get_sgl_snan            # yes
19961
19962         fmov.s          L_SCR1(%a6), %fp0       # read into %fp0;convert to xprec
19963         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
19964         rts
19965
19966 load_sgl_immed:
19967         bsr.l           _imem_read_long         # fetch src operand immed data
19968
19969         tst.l           %d1                     # did ifetch fail?
19970         bne.l           funimp_iacc             # yes
19971         bra.b           load_sgl_cont
19972
19973 # must convert sgl denorm format to an Xprec denorm fmt suitable for
19974 # normalization...
19975 # %a0 : points to sgl denorm
19976 get_sgl_denorm:
19977         clr.w           FP_SRC_EX(%a6)
19978         bfextu          (%a0){&9:&23}, %d0      # fetch sgl hi(_mantissa)
19979         lsl.l           &0x8, %d0
19980         mov.l           %d0, FP_SRC_HI(%a6)     # set ext hi(_mantissa)
19981         clr.l           FP_SRC_LO(%a6)          # set ext lo(_mantissa)
19982
19983         clr.w           FP_SRC_EX(%a6)
19984         btst            &0x7, (%a0)             # is sgn bit set?
19985         beq.b           sgl_dnrm_norm
19986         bset            &0x7, FP_SRC_EX(%a6)    # set sgn of xprec value
19987
19988 sgl_dnrm_norm:
19989         lea             FP_SRC(%a6), %a0
19990         bsr.l           norm                    # normalize number
19991         mov.w           &0x3f81, %d1            # xprec exp = 0x3f81
19992         sub.w           %d0, %d1                # exp = 0x3f81 - shft amt.
19993         or.w            %d1, FP_SRC_EX(%a6)     # {sgn,exp}
19994
19995         mov.b           &NORM, STAG(%a6)        # fix src type tag
19996         rts
19997
19998 # convert sgl to ext SNAN
19999 # %a0 : points to sgl SNAN
20000 get_sgl_snan:
20001         mov.w           &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20002         bfextu          (%a0){&9:&23}, %d0
20003         lsl.l           &0x8, %d0               # extract and insert hi(man)
20004         mov.l           %d0, FP_SRC_HI(%a6)
20005         clr.l           FP_SRC_LO(%a6)
20006
20007         btst            &0x7, (%a0)             # see if sign of SNAN is set
20008         beq.b           no_sgl_snan_sgn
20009         bset            &0x7, FP_SRC_EX(%a6)
20010 no_sgl_snan_sgn:
20011         rts
20012
20013 #########################################
20014 # load a DBL into %fp0:                 #
20015 #       -number can't fault             #
20016 #       (1) calc ea                     #
20017 #       (2) read 8 bytes into L_SCR(1,2)#
20018 #       (3) fmov.d into %fp0            #
20019 #########################################
20020 load_dbl:
20021         movq.l          &0x8, %d0               # pass: 8 (bytes)
20022         bsr.l           _dcalc_ea               # calc <ea>; <ea> in %a0
20023
20024         cmpi.b          SPCOND_FLG(%a6),&immed_flg
20025         beq.b           load_dbl_immed
20026
20027         lea             L_SCR1(%a6), %a1        # pass: ptr to input dbl tmp space
20028         movq.l          &0x8, %d0               # pass: # bytes to read
20029         bsr.l           _dmem_read              # fetch src operand from memory
20030
20031         tst.l           %d1                     # did dfetch fail?
20032         bne.l           facc_in_d               # yes
20033
20034 load_dbl_cont:
20035         lea             L_SCR1(%a6), %a0        # pass: ptr to input dbl
20036         bsr.l           set_tag_d               # determine src type tag
20037         mov.b           %d0, STAG(%a6)          # set src optype tag
20038
20039         cmpi.b          %d0, &DENORM            # is it a dbl DENORM?
20040         beq.w           get_dbl_denorm          # yes
20041
20042         cmpi.b          %d0, &SNAN              # is it a dbl SNAN?
20043         beq.w           get_dbl_snan            # yes
20044
20045         fmov.d          L_SCR1(%a6), %fp0       # read into %fp0;convert to xprec
20046         fmovm.x         &0x80, FP_SRC(%a6)      # return src op in FP_SRC
20047         rts
20048
20049 load_dbl_immed:
20050         lea             L_SCR1(%a6), %a1        # pass: ptr to input dbl tmp space
20051         movq.l          &0x8, %d0               # pass: # bytes to read
20052         bsr.l           _imem_read              # fetch src operand from memory
20053
20054         tst.l           %d1                     # did ifetch fail?
20055         bne.l           funimp_iacc             # yes
20056         bra.b           load_dbl_cont
20057
20058 # must convert dbl denorm format to an Xprec denorm fmt suitable for
20059 # normalization...
20060 # %a0 : loc. of dbl denorm
20061 get_dbl_denorm:
20062         clr.w           FP_SRC_EX(%a6)
20063         bfextu          (%a0){&12:&31}, %d0     # fetch hi(_mantissa)
20064         mov.l           %d0, FP_SRC_HI(%a6)
20065         bfextu          4(%a0){&11:&21}, %d0    # fetch lo(_mantissa)
20066         mov.l           &0xb, %d1
20067         lsl.l           %d1, %d0
20068         mov.l           %d0, FP_SRC_LO(%a6)
20069
20070         btst            &0x7, (%a0)             # is sgn bit set?
20071         beq.b           dbl_dnrm_norm
20072         bset            &0x7, FP_SRC_EX(%a6)    # set sgn of xprec value
20073
20074 dbl_dnrm_norm:
20075         lea             FP_SRC(%a6), %a0
20076         bsr.l           norm                    # normalize number
20077         mov.w           &0x3c01, %d1            # xprec exp = 0x3c01
20078         sub.w           %d0, %d1                # exp = 0x3c01 - shft amt.
20079         or.w            %d1, FP_SRC_EX(%a6)     # {sgn,exp}
20080
20081         mov.b           &NORM, STAG(%a6)        # fix src type tag
20082         rts
20083
20084 # convert dbl to ext SNAN
20085 # %a0 : points to dbl SNAN
20086 get_dbl_snan:
20087         mov.w           &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20088
20089         bfextu          (%a0){&12:&31}, %d0     # fetch hi(_mantissa)
20090         mov.l           %d0, FP_SRC_HI(%a6)
20091         bfextu          4(%a0){&11:&21}, %d0    # fetch lo(_mantissa)
20092         mov.l           &0xb, %d1
20093         lsl.l           %d1, %d0
20094         mov.l           %d0, FP_SRC_LO(%a6)
20095
20096         btst            &0x7, (%a0)             # see if sign of SNAN is set
20097         beq.b           no_dbl_snan_sgn
20098         bset            &0x7, FP_SRC_EX(%a6)
20099 no_dbl_snan_sgn:
20100         rts
20101
20102 #################################################
20103 # load a Xprec into %fp0:                       #
20104 #       -number can't fault                     #
20105 #       (1) calc ea                             #
20106 #       (2) read 12 bytes into L_SCR(1,2)       #
20107 #       (3) fmov.x into %fp0                    #
20108 #################################################
20109 load_ext:
20110         mov.l           &0xc, %d0               # pass: 12 (bytes)
20111         bsr.l           _dcalc_ea               # calc <ea>
20112
20113         lea             FP_SRC(%a6), %a1        # pass: ptr to input ext tmp space
20114         mov.l           &0xc, %d0               # pass: # of bytes to read
20115         bsr.l           _dmem_read              # fetch src operand from memory
20116
20117         tst.l           %d1                     # did dfetch fail?
20118         bne.l           facc_in_x               # yes
20119
20120         lea             FP_SRC(%a6), %a0        # pass: ptr to src op
20121         bsr.l           set_tag_x               # determine src type tag
20122
20123         cmpi.b          %d0, &UNNORM            # is the src op an UNNORM?
20124         beq.b           load_ext_unnorm         # yes
20125
20126         mov.b           %d0, STAG(%a6)          # store the src optype tag
20127         rts
20128
20129 load_ext_unnorm:
20130         bsr.l           unnorm_fix              # fix the src UNNORM
20131         mov.b           %d0, STAG(%a6)          # store the src optype tag
20132         rts
20133
20134 #################################################
20135 # load a packed into %fp0:                      #
20136 #       -number can't fault                     #
20137 #       (1) calc ea                             #
20138 #       (2) read 12 bytes into L_SCR(1,2,3)     #
20139 #       (3) fmov.x into %fp0                    #
20140 #################################################
20141 load_packed:
20142         bsr.l           get_packed
20143
20144         lea             FP_SRC(%a6),%a0         # pass ptr to src op
20145         bsr.l           set_tag_x               # determine src type tag
20146         cmpi.b          %d0,&UNNORM             # is the src op an UNNORM ZERO?
20147         beq.b           load_packed_unnorm      # yes
20148
20149         mov.b           %d0,STAG(%a6)           # store the src optype tag
20150         rts
20151
20152 load_packed_unnorm:
20153         bsr.l           unnorm_fix              # fix the UNNORM ZERO
20154         mov.b           %d0,STAG(%a6)           # store the src optype tag
20155         rts
20156
20157 #########################################################################
20158 # XDEF **************************************************************** #
20159 #       fout(): move from fp register to memory or data register        #
20160 #                                                                       #
20161 # XREF **************************************************************** #
20162 #       _round() - needed to create EXOP for sgl/dbl precision          #
20163 #       norm() - needed to create EXOP for extended precision           #
20164 #       ovf_res() - create default overflow result for sgl/dbl precision#
20165 #       unf_res() - create default underflow result for sgl/dbl prec.   #
20166 #       dst_dbl() - create rounded dbl precision result.                #
20167 #       dst_sgl() - create rounded sgl precision result.                #
20168 #       fetch_dreg() - fetch dynamic k-factor reg for packed.           #
20169 #       bindec() - convert FP binary number to packed number.           #
20170 #       _mem_write() - write data to memory.                            #
20171 #       _mem_write2() - write data to memory unless supv mode -(a7) exc.#
20172 #       _dmem_write_{byte,word,long}() - write data to memory.          #
20173 #       store_dreg_{b,w,l}() - store data to data register file.        #
20174 #       facc_out_{b,w,l,d,x}() - data access error occurred.            #
20175 #                                                                       #
20176 # INPUT *************************************************************** #
20177 #       a0 = pointer to extended precision source operand               #
20178 #       d0 = round prec,mode                                            #
20179 #                                                                       #
20180 # OUTPUT ************************************************************** #
20181 #       fp0 : intermediate underflow or overflow result if              #
20182 #             OVFL/UNFL occurred for a sgl or dbl operand               #
20183 #                                                                       #
20184 # ALGORITHM *********************************************************** #
20185 #       This routine is accessed by many handlers that need to do an    #
20186 # opclass three move of an operand out to memory.                       #
20187 #       Decode an fmove out (opclass 3) instruction to determine if     #
20188 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data  #
20189 # register or memory. The algorithm uses a standard "fmove" to create   #
20190 # the rounded result. Also, since exceptions are disabled, this also    #
20191 # create the correct OPERR default result if appropriate.               #
20192 #       For sgl or dbl precision, overflow or underflow can occur. If   #
20193 # either occurs and is enabled, the EXOP.                               #
20194 #       For extended precision, the stacked <ea> must be fixed along    #
20195 # w/ the address index register as appropriate w/ _calc_ea_fout(). If   #
20196 # the source is a denorm and if underflow is enabled, an EXOP must be   #
20197 # created.                                                              #
20198 #       For packed, the k-factor must be fetched from the instruction   #
20199 # word or a data register. The <ea> must be fixed as w/ extended        #
20200 # precision. Then, bindec() is called to create the appropriate         #
20201 # packed result.                                                        #
20202 #       If at any time an access error is flagged by one of the move-   #
20203 # to-memory routines, then a special exit must be made so that the      #
20204 # access error can be handled properly.                                 #
20205 #                                                                       #
20206 #########################################################################
20207
20208         global          fout
20209 fout:
20210         bfextu          EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20211         mov.w           (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20212         jmp             (tbl_fout.b,%pc,%a1)    # jump to routine
20213
20214         swbeg           &0x8
20215 tbl_fout:
20216         short           fout_long       -       tbl_fout
20217         short           fout_sgl        -       tbl_fout
20218         short           fout_ext        -       tbl_fout
20219         short           fout_pack       -       tbl_fout
20220         short           fout_word       -       tbl_fout
20221         short           fout_dbl        -       tbl_fout
20222         short           fout_byte       -       tbl_fout
20223         short           fout_pack       -       tbl_fout
20224
20225 #################################################################
20226 # fmove.b out ###################################################
20227 #################################################################
20228
20229 # Only "Unimplemented Data Type" exceptions enter here. The operand
20230 # is either a DENORM or a NORM.
20231 fout_byte:
20232         tst.b           STAG(%a6)               # is operand normalized?
20233         bne.b           fout_byte_denorm        # no
20234
20235         fmovm.x         SRC(%a0),&0x80          # load value
20236
20237 fout_byte_norm:
20238         fmov.l          %d0,%fpcr               # insert rnd prec,mode
20239
20240         fmov.b          %fp0,%d0                # exec move out w/ correct rnd mode
20241
20242         fmov.l          &0x0,%fpcr              # clear FPCR
20243         fmov.l          %fpsr,%d1               # fetch FPSR
20244         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
20245
20246         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20247         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20248         beq.b           fout_byte_dn            # must save to integer regfile
20249
20250         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20251         bsr.l           _dmem_write_byte        # write byte
20252
20253         tst.l           %d1                     # did dstore fail?
20254         bne.l           facc_out_b              # yes
20255
20256         rts
20257
20258 fout_byte_dn:
20259         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20260         andi.w          &0x7,%d1
20261         bsr.l           store_dreg_b
20262         rts
20263
20264 fout_byte_denorm:
20265         mov.l           SRC_EX(%a0),%d1
20266         andi.l          &0x80000000,%d1         # keep DENORM sign
20267         ori.l           &0x00800000,%d1         # make smallest sgl
20268         fmov.s          %d1,%fp0
20269         bra.b           fout_byte_norm
20270
20271 #################################################################
20272 # fmove.w out ###################################################
20273 #################################################################
20274
20275 # Only "Unimplemented Data Type" exceptions enter here. The operand
20276 # is either a DENORM or a NORM.
20277 fout_word:
20278         tst.b           STAG(%a6)               # is operand normalized?
20279         bne.b           fout_word_denorm        # no
20280
20281         fmovm.x         SRC(%a0),&0x80          # load value
20282
20283 fout_word_norm:
20284         fmov.l          %d0,%fpcr               # insert rnd prec:mode
20285
20286         fmov.w          %fp0,%d0                # exec move out w/ correct rnd mode
20287
20288         fmov.l          &0x0,%fpcr              # clear FPCR
20289         fmov.l          %fpsr,%d1               # fetch FPSR
20290         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
20291
20292         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20293         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20294         beq.b           fout_word_dn            # must save to integer regfile
20295
20296         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20297         bsr.l           _dmem_write_word        # write word
20298
20299         tst.l           %d1                     # did dstore fail?
20300         bne.l           facc_out_w              # yes
20301
20302         rts
20303
20304 fout_word_dn:
20305         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20306         andi.w          &0x7,%d1
20307         bsr.l           store_dreg_w
20308         rts
20309
20310 fout_word_denorm:
20311         mov.l           SRC_EX(%a0),%d1
20312         andi.l          &0x80000000,%d1         # keep DENORM sign
20313         ori.l           &0x00800000,%d1         # make smallest sgl
20314         fmov.s          %d1,%fp0
20315         bra.b           fout_word_norm
20316
20317 #################################################################
20318 # fmove.l out ###################################################
20319 #################################################################
20320
20321 # Only "Unimplemented Data Type" exceptions enter here. The operand
20322 # is either a DENORM or a NORM.
20323 fout_long:
20324         tst.b           STAG(%a6)               # is operand normalized?
20325         bne.b           fout_long_denorm        # no
20326
20327         fmovm.x         SRC(%a0),&0x80          # load value
20328
20329 fout_long_norm:
20330         fmov.l          %d0,%fpcr               # insert rnd prec:mode
20331
20332         fmov.l          %fp0,%d0                # exec move out w/ correct rnd mode
20333
20334         fmov.l          &0x0,%fpcr              # clear FPCR
20335         fmov.l          %fpsr,%d1               # fetch FPSR
20336         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
20337
20338 fout_long_write:
20339         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20340         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20341         beq.b           fout_long_dn            # must save to integer regfile
20342
20343         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20344         bsr.l           _dmem_write_long        # write long
20345
20346         tst.l           %d1                     # did dstore fail?
20347         bne.l           facc_out_l              # yes
20348
20349         rts
20350
20351 fout_long_dn:
20352         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20353         andi.w          &0x7,%d1
20354         bsr.l           store_dreg_l
20355         rts
20356
20357 fout_long_denorm:
20358         mov.l           SRC_EX(%a0),%d1
20359         andi.l          &0x80000000,%d1         # keep DENORM sign
20360         ori.l           &0x00800000,%d1         # make smallest sgl
20361         fmov.s          %d1,%fp0
20362         bra.b           fout_long_norm
20363
20364 #################################################################
20365 # fmove.x out ###################################################
20366 #################################################################
20367
20368 # Only "Unimplemented Data Type" exceptions enter here. The operand
20369 # is either a DENORM or a NORM.
20370 # The DENORM causes an Underflow exception.
20371 fout_ext:
20372
20373 # we copy the extended precision result to FP_SCR0 so that the reserved
20374 # 16-bit field gets zeroed. we do this since we promise not to disturb
20375 # what's at SRC(a0).
20376         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20377         clr.w           2+FP_SCR0_EX(%a6)       # clear reserved field
20378         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20379         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20380
20381         fmovm.x         SRC(%a0),&0x80          # return result
20382
20383         bsr.l           _calc_ea_fout           # fix stacked <ea>
20384
20385         mov.l           %a0,%a1                 # pass: dst addr
20386         lea             FP_SCR0(%a6),%a0        # pass: src addr
20387         mov.l           &0xc,%d0                # pass: opsize is 12 bytes
20388
20389 # we must not yet write the extended precision data to the stack
20390 # in the pre-decrement case from supervisor mode or else we'll corrupt
20391 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
20392         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
20393         beq.b           fout_ext_a7
20394
20395         bsr.l           _dmem_write             # write ext prec number to memory
20396
20397         tst.l           %d1                     # did dstore fail?
20398         bne.w           fout_ext_err            # yes
20399
20400         tst.b           STAG(%a6)               # is operand normalized?
20401         bne.b           fout_ext_denorm         # no
20402         rts
20403
20404 # the number is a DENORM. must set the underflow exception bit
20405 fout_ext_denorm:
20406         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20407
20408         mov.b           FPCR_ENABLE(%a6),%d0
20409         andi.b          &0x0a,%d0               # is UNFL or INEX enabled?
20410         bne.b           fout_ext_exc            # yes
20411         rts
20412
20413 # we don't want to do the write if the exception occurred in supervisor mode
20414 # so _mem_write2() handles this for us.
20415 fout_ext_a7:
20416         bsr.l           _mem_write2             # write ext prec number to memory
20417
20418         tst.l           %d1                     # did dstore fail?
20419         bne.w           fout_ext_err            # yes
20420
20421         tst.b           STAG(%a6)               # is operand normalized?
20422         bne.b           fout_ext_denorm         # no
20423         rts
20424
20425 fout_ext_exc:
20426         lea             FP_SCR0(%a6),%a0
20427         bsr.l           norm                    # normalize the mantissa
20428         neg.w           %d0                     # new exp = -(shft amt)
20429         andi.w          &0x7fff,%d0
20430         andi.w          &0x8000,FP_SCR0_EX(%a6) # keep only old sign
20431         or.w            %d0,FP_SCR0_EX(%a6)     # insert new exponent
20432         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
20433         rts
20434
20435 fout_ext_err:
20436         mov.l           EXC_A6(%a6),(%a6)       # fix stacked a6
20437         bra.l           facc_out_x
20438
20439 #########################################################################
20440 # fmove.s out ###########################################################
20441 #########################################################################
20442 fout_sgl:
20443         andi.b          &0x30,%d0               # clear rnd prec
20444         ori.b           &s_mode*0x10,%d0        # insert sgl prec
20445         mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
20446
20447 #
20448 # operand is a normalized number. first, we check to see if the move out
20449 # would cause either an underflow or overflow. these cases are handled
20450 # separately. otherwise, set the FPCR to the proper rounding mode and
20451 # execute the move.
20452 #
20453         mov.w           SRC_EX(%a0),%d0         # extract exponent
20454         andi.w          &0x7fff,%d0             # strip sign
20455
20456         cmpi.w          %d0,&SGL_HI             # will operand overflow?
20457         bgt.w           fout_sgl_ovfl           # yes; go handle OVFL
20458         beq.w           fout_sgl_may_ovfl       # maybe; go handle possible OVFL
20459         cmpi.w          %d0,&SGL_LO             # will operand underflow?
20460         blt.w           fout_sgl_unfl           # yes; go handle underflow
20461
20462 #
20463 # NORMs(in range) can be stored out by a simple "fmov.s"
20464 # Unnormalized inputs can come through this point.
20465 #
20466 fout_sgl_exg:
20467         fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
20468
20469         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
20470         fmov.l          &0x0,%fpsr              # clear FPSR
20471
20472         fmov.s          %fp0,%d0                # store does convert and round
20473
20474         fmov.l          &0x0,%fpcr              # clear FPCR
20475         fmov.l          %fpsr,%d1               # save FPSR
20476
20477         or.w            %d1,2+USER_FPSR(%a6)    # set possible inex2/ainex
20478
20479 fout_sgl_exg_write:
20480         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20481         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20482         beq.b           fout_sgl_exg_write_dn   # must save to integer regfile
20483
20484         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20485         bsr.l           _dmem_write_long        # write long
20486
20487         tst.l           %d1                     # did dstore fail?
20488         bne.l           facc_out_l              # yes
20489
20490         rts
20491
20492 fout_sgl_exg_write_dn:
20493         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20494         andi.w          &0x7,%d1
20495         bsr.l           store_dreg_l
20496         rts
20497
20498 #
20499 # here, we know that the operand would UNFL if moved out to single prec,
20500 # so, denorm and round and then use generic store single routine to
20501 # write the value to memory.
20502 #
20503 fout_sgl_unfl:
20504         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20505
20506         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20507         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20508         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20509         mov.l           %a0,-(%sp)
20510
20511         clr.l           %d0                     # pass: S.F. = 0
20512
20513         cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
20514         bne.b           fout_sgl_unfl_cont      # let DENORMs fall through
20515
20516         lea             FP_SCR0(%a6),%a0
20517         bsr.l           norm                    # normalize the DENORM
20518
20519 fout_sgl_unfl_cont:
20520         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
20521         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
20522         bsr.l           unf_res                 # calc default underflow result
20523
20524         lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
20525         bsr.l           dst_sgl                 # convert to single prec
20526
20527         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20528         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20529         beq.b           fout_sgl_unfl_dn        # must save to integer regfile
20530
20531         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20532         bsr.l           _dmem_write_long        # write long
20533
20534         tst.l           %d1                     # did dstore fail?
20535         bne.l           facc_out_l              # yes
20536
20537         bra.b           fout_sgl_unfl_chkexc
20538
20539 fout_sgl_unfl_dn:
20540         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20541         andi.w          &0x7,%d1
20542         bsr.l           store_dreg_l
20543
20544 fout_sgl_unfl_chkexc:
20545         mov.b           FPCR_ENABLE(%a6),%d1
20546         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
20547         bne.w           fout_sd_exc_unfl        # yes
20548         addq.l          &0x4,%sp
20549         rts
20550
20551 #
20552 # it's definitely an overflow so call ovf_res to get the correct answer
20553 #
20554 fout_sgl_ovfl:
20555         tst.b           3+SRC_HI(%a0)           # is result inexact?
20556         bne.b           fout_sgl_ovfl_inex2
20557         tst.l           SRC_LO(%a0)             # is result inexact?
20558         bne.b           fout_sgl_ovfl_inex2
20559         ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20560         bra.b           fout_sgl_ovfl_cont
20561 fout_sgl_ovfl_inex2:
20562         ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20563
20564 fout_sgl_ovfl_cont:
20565         mov.l           %a0,-(%sp)
20566
20567 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20568 # overflow result. DON'T save the returned ccodes from ovf_res() since
20569 # fmove out doesn't alter them.
20570         tst.b           SRC_EX(%a0)             # is operand negative?
20571         smi             %d1                     # set if so
20572         mov.l           L_SCR3(%a6),%d0         # pass: sgl prec,rnd mode
20573         bsr.l           ovf_res                 # calc OVFL result
20574         fmovm.x         (%a0),&0x80             # load default overflow result
20575         fmov.s          %fp0,%d0                # store to single
20576
20577         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
20578         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
20579         beq.b           fout_sgl_ovfl_dn        # must save to integer regfile
20580
20581         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
20582         bsr.l           _dmem_write_long        # write long
20583
20584         tst.l           %d1                     # did dstore fail?
20585         bne.l           facc_out_l              # yes
20586
20587         bra.b           fout_sgl_ovfl_chkexc
20588
20589 fout_sgl_ovfl_dn:
20590         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
20591         andi.w          &0x7,%d1
20592         bsr.l           store_dreg_l
20593
20594 fout_sgl_ovfl_chkexc:
20595         mov.b           FPCR_ENABLE(%a6),%d1
20596         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
20597         bne.w           fout_sd_exc_ovfl        # yes
20598         addq.l          &0x4,%sp
20599         rts
20600
20601 #
20602 # move out MAY overflow:
20603 # (1) force the exp to 0x3fff
20604 # (2) do a move w/ appropriate rnd mode
20605 # (3) if exp still equals zero, then insert original exponent
20606 #       for the correct result.
20607 #     if exp now equals one, then it overflowed so call ovf_res.
20608 #
20609 fout_sgl_may_ovfl:
20610         mov.w           SRC_EX(%a0),%d1         # fetch current sign
20611         andi.w          &0x8000,%d1             # keep it,clear exp
20612         ori.w           &0x3fff,%d1             # insert exp = 0
20613         mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
20614         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20615         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20616
20617         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
20618
20619         fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
20620         fmov.l          &0x0,%fpcr              # clear FPCR
20621
20622         fabs.x          %fp0                    # need absolute value
20623         fcmp.b          %fp0,&0x2               # did exponent increase?
20624         fblt.w          fout_sgl_exg            # no; go finish NORM
20625         bra.w           fout_sgl_ovfl           # yes; go handle overflow
20626
20627 ################
20628
20629 fout_sd_exc_unfl:
20630         mov.l           (%sp)+,%a0
20631
20632         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20633         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20634         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20635
20636         cmpi.b          STAG(%a6),&DENORM       # was src a DENORM?
20637         bne.b           fout_sd_exc_cont        # no
20638
20639         lea             FP_SCR0(%a6),%a0
20640         bsr.l           norm
20641         neg.l           %d0
20642         andi.w          &0x7fff,%d0
20643         bfins           %d0,FP_SCR0_EX(%a6){&1:&15}
20644         bra.b           fout_sd_exc_cont
20645
20646 fout_sd_exc:
20647 fout_sd_exc_ovfl:
20648         mov.l           (%sp)+,%a0              # restore a0
20649
20650         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20651         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20652         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20653
20654 fout_sd_exc_cont:
20655         bclr            &0x7,FP_SCR0_EX(%a6)    # clear sign bit
20656         sne.b           2+FP_SCR0_EX(%a6)       # set internal sign bit
20657         lea             FP_SCR0(%a6),%a0        # pass: ptr to DENORM
20658
20659         mov.b           3+L_SCR3(%a6),%d1
20660         lsr.b           &0x4,%d1
20661         andi.w          &0x0c,%d1
20662         swap            %d1
20663         mov.b           3+L_SCR3(%a6),%d1
20664         lsr.b           &0x4,%d1
20665         andi.w          &0x03,%d1
20666         clr.l           %d0                     # pass: zero g,r,s
20667         bsr.l           _round                  # round the DENORM
20668
20669         tst.b           2+FP_SCR0_EX(%a6)       # is EXOP negative?
20670         beq.b           fout_sd_exc_done        # no
20671         bset            &0x7,FP_SCR0_EX(%a6)    # yes
20672
20673 fout_sd_exc_done:
20674         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
20675         rts
20676
20677 #################################################################
20678 # fmove.d out ###################################################
20679 #################################################################
20680 fout_dbl:
20681         andi.b          &0x30,%d0               # clear rnd prec
20682         ori.b           &d_mode*0x10,%d0        # insert dbl prec
20683         mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
20684
20685 #
20686 # operand is a normalized number. first, we check to see if the move out
20687 # would cause either an underflow or overflow. these cases are handled
20688 # separately. otherwise, set the FPCR to the proper rounding mode and
20689 # execute the move.
20690 #
20691         mov.w           SRC_EX(%a0),%d0         # extract exponent
20692         andi.w          &0x7fff,%d0             # strip sign
20693
20694         cmpi.w          %d0,&DBL_HI             # will operand overflow?
20695         bgt.w           fout_dbl_ovfl           # yes; go handle OVFL
20696         beq.w           fout_dbl_may_ovfl       # maybe; go handle possible OVFL
20697         cmpi.w          %d0,&DBL_LO             # will operand underflow?
20698         blt.w           fout_dbl_unfl           # yes; go handle underflow
20699
20700 #
20701 # NORMs(in range) can be stored out by a simple "fmov.d"
20702 # Unnormalized inputs can come through this point.
20703 #
20704 fout_dbl_exg:
20705         fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
20706
20707         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
20708         fmov.l          &0x0,%fpsr              # clear FPSR
20709
20710         fmov.d          %fp0,L_SCR1(%a6)        # store does convert and round
20711
20712         fmov.l          &0x0,%fpcr              # clear FPCR
20713         fmov.l          %fpsr,%d0               # save FPSR
20714
20715         or.w            %d0,2+USER_FPSR(%a6)    # set possible inex2/ainex
20716
20717         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
20718         lea             L_SCR1(%a6),%a0         # pass: src addr
20719         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
20720         bsr.l           _dmem_write             # store dbl fop to memory
20721
20722         tst.l           %d1                     # did dstore fail?
20723         bne.l           facc_out_d              # yes
20724
20725         rts                                     # no; so we're finished
20726
20727 #
20728 # here, we know that the operand would UNFL if moved out to double prec,
20729 # so, denorm and round and then use generic store double routine to
20730 # write the value to memory.
20731 #
20732 fout_dbl_unfl:
20733         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20734
20735         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
20736         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
20737         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
20738         mov.l           %a0,-(%sp)
20739
20740         clr.l           %d0                     # pass: S.F. = 0
20741
20742         cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
20743         bne.b           fout_dbl_unfl_cont      # let DENORMs fall through
20744
20745         lea             FP_SCR0(%a6),%a0
20746         bsr.l           norm                    # normalize the DENORM
20747
20748 fout_dbl_unfl_cont:
20749         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
20750         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
20751         bsr.l           unf_res                 # calc default underflow result
20752
20753         lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
20754         bsr.l           dst_dbl                 # convert to single prec
20755         mov.l           %d0,L_SCR1(%a6)
20756         mov.l           %d1,L_SCR2(%a6)
20757
20758         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
20759         lea             L_SCR1(%a6),%a0         # pass: src addr
20760         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
20761         bsr.l           _dmem_write             # store dbl fop to memory
20762
20763         tst.l           %d1                     # did dstore fail?
20764         bne.l           facc_out_d              # yes
20765
20766         mov.b           FPCR_ENABLE(%a6),%d1
20767         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
20768         bne.w           fout_sd_exc_unfl        # yes
20769         addq.l          &0x4,%sp
20770         rts
20771
20772 #
20773 # it's definitely an overflow so call ovf_res to get the correct answer
20774 #
20775 fout_dbl_ovfl:
20776         mov.w           2+SRC_LO(%a0),%d0
20777         andi.w          &0x7ff,%d0
20778         bne.b           fout_dbl_ovfl_inex2
20779
20780         ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20781         bra.b           fout_dbl_ovfl_cont
20782 fout_dbl_ovfl_inex2:
20783         ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20784
20785 fout_dbl_ovfl_cont:
20786         mov.l           %a0,-(%sp)
20787
20788 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20789 # overflow result. DON'T save the returned ccodes from ovf_res() since
20790 # fmove out doesn't alter them.
20791         tst.b           SRC_EX(%a0)             # is operand negative?
20792         smi             %d1                     # set if so
20793         mov.l           L_SCR3(%a6),%d0         # pass: dbl prec,rnd mode
20794         bsr.l           ovf_res                 # calc OVFL result
20795         fmovm.x         (%a0),&0x80             # load default overflow result
20796         fmov.d          %fp0,L_SCR1(%a6)        # store to double
20797
20798         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
20799         lea             L_SCR1(%a6),%a0         # pass: src addr
20800         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
20801         bsr.l           _dmem_write             # store dbl fop to memory
20802
20803         tst.l           %d1                     # did dstore fail?
20804         bne.l           facc_out_d              # yes
20805
20806         mov.b           FPCR_ENABLE(%a6),%d1
20807         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
20808         bne.w           fout_sd_exc_ovfl        # yes
20809         addq.l          &0x4,%sp
20810         rts
20811
20812 #
20813 # move out MAY overflow:
20814 # (1) force the exp to 0x3fff
20815 # (2) do a move w/ appropriate rnd mode
20816 # (3) if exp still equals zero, then insert original exponent
20817 #       for the correct result.
20818 #     if exp now equals one, then it overflowed so call ovf_res.
20819 #
20820 fout_dbl_may_ovfl:
20821         mov.w           SRC_EX(%a0),%d1         # fetch current sign
20822         andi.w          &0x8000,%d1             # keep it,clear exp
20823         ori.w           &0x3fff,%d1             # insert exp = 0
20824         mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
20825         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20826         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20827
20828         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
20829
20830         fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
20831         fmov.l          &0x0,%fpcr              # clear FPCR
20832
20833         fabs.x          %fp0                    # need absolute value
20834         fcmp.b          %fp0,&0x2               # did exponent increase?
20835         fblt.w          fout_dbl_exg            # no; go finish NORM
20836         bra.w           fout_dbl_ovfl           # yes; go handle overflow
20837
20838 #########################################################################
20839 # XDEF **************************************************************** #
20840 #       dst_dbl(): create double precision value from extended prec.    #
20841 #                                                                       #
20842 # XREF **************************************************************** #
20843 #       None                                                            #
20844 #                                                                       #
20845 # INPUT *************************************************************** #
20846 #       a0 = pointer to source operand in extended precision            #
20847 #                                                                       #
20848 # OUTPUT ************************************************************** #
20849 #       d0 = hi(double precision result)                                #
20850 #       d1 = lo(double precision result)                                #
20851 #                                                                       #
20852 # ALGORITHM *********************************************************** #
20853 #                                                                       #
20854 #  Changes extended precision to double precision.                      #
20855 #  Note: no attempt is made to round the extended value to double.      #
20856 #       dbl_sign = ext_sign                                             #
20857 #       dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)            #
20858 #       get rid of ext integer bit                                      #
20859 #       dbl_mant = ext_mant{62:12}                                      #
20860 #                                                                       #
20861 #               ---------------   ---------------    ---------------    #
20862 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
20863 #               ---------------   ---------------    ---------------    #
20864 #                95         64    63 62       32      31     11   0     #
20865 #                                    |                       |          #
20866 #                                    |                       |          #
20867 #                                    |                       |          #
20868 #                                    v                       v          #
20869 #                             ---------------   ---------------         #
20870 #  double   ->                |s|exp| mant  |   |  mant       |         #
20871 #                             ---------------   ---------------         #
20872 #                             63     51   32   31              0        #
20873 #                                                                       #
20874 #########################################################################
20875
20876 dst_dbl:
20877         clr.l           %d0                     # clear d0
20878         mov.w           FTEMP_EX(%a0),%d0       # get exponent
20879         subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
20880         addi.w          &DBL_BIAS,%d0           # add double precision bias
20881         tst.b           FTEMP_HI(%a0)           # is number a denorm?
20882         bmi.b           dst_get_dupper          # no
20883         subq.w          &0x1,%d0                # yes; denorm bias = DBL_BIAS - 1
20884 dst_get_dupper:
20885         swap            %d0                     # d0 now in upper word
20886         lsl.l           &0x4,%d0                # d0 in proper place for dbl prec exp
20887         tst.b           FTEMP_EX(%a0)           # test sign
20888         bpl.b           dst_get_dman            # if postive, go process mantissa
20889         bset            &0x1f,%d0               # if negative, set sign
20890 dst_get_dman:
20891         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
20892         bfextu          %d1{&1:&20},%d1         # get upper 20 bits of ms
20893         or.l            %d1,%d0                 # put these bits in ms word of double
20894         mov.l           %d0,L_SCR1(%a6)         # put the new exp back on the stack
20895         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
20896         mov.l           &21,%d0                 # load shift count
20897         lsl.l           %d0,%d1                 # put lower 11 bits in upper bits
20898         mov.l           %d1,L_SCR2(%a6)         # build lower lword in memory
20899         mov.l           FTEMP_LO(%a0),%d1       # get ls mantissa
20900         bfextu          %d1{&0:&21},%d0         # get ls 21 bits of double
20901         mov.l           L_SCR2(%a6),%d1
20902         or.l            %d0,%d1                 # put them in double result
20903         mov.l           L_SCR1(%a6),%d0
20904         rts
20905
20906 #########################################################################
20907 # XDEF **************************************************************** #
20908 #       dst_sgl(): create single precision value from extended prec     #
20909 #                                                                       #
20910 # XREF **************************************************************** #
20911 #                                                                       #
20912 # INPUT *************************************************************** #
20913 #       a0 = pointer to source operand in extended precision            #
20914 #                                                                       #
20915 # OUTPUT ************************************************************** #
20916 #       d0 = single precision result                                    #
20917 #                                                                       #
20918 # ALGORITHM *********************************************************** #
20919 #                                                                       #
20920 # Changes extended precision to single precision.                       #
20921 #       sgl_sign = ext_sign                                             #
20922 #       sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)             #
20923 #       get rid of ext integer bit                                      #
20924 #       sgl_mant = ext_mant{62:12}                                      #
20925 #                                                                       #
20926 #               ---------------   ---------------    ---------------    #
20927 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
20928 #               ---------------   ---------------    ---------------    #
20929 #                95         64    63 62    40 32      31     12   0     #
20930 #                                    |     |                            #
20931 #                                    |     |                            #
20932 #                                    |     |                            #
20933 #                                    v     v                            #
20934 #                             ---------------                           #
20935 #  single   ->                |s|exp| mant  |                           #
20936 #                             ---------------                           #
20937 #                             31     22     0                           #
20938 #                                                                       #
20939 #########################################################################
20940
20941 dst_sgl:
20942         clr.l           %d0
20943         mov.w           FTEMP_EX(%a0),%d0       # get exponent
20944         subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
20945         addi.w          &SGL_BIAS,%d0           # add single precision bias
20946         tst.b           FTEMP_HI(%a0)           # is number a denorm?
20947         bmi.b           dst_get_supper          # no
20948         subq.w          &0x1,%d0                # yes; denorm bias = SGL_BIAS - 1
20949 dst_get_supper:
20950         swap            %d0                     # put exp in upper word of d0
20951         lsl.l           &0x7,%d0                # shift it into single exp bits
20952         tst.b           FTEMP_EX(%a0)           # test sign
20953         bpl.b           dst_get_sman            # if positive, continue
20954         bset            &0x1f,%d0               # if negative, put in sign first
20955 dst_get_sman:
20956         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
20957         andi.l          &0x7fffff00,%d1         # get upper 23 bits of ms
20958         lsr.l           &0x8,%d1                # and put them flush right
20959         or.l            %d1,%d0                 # put these bits in ms word of single
20960         rts
20961
20962 ##############################################################################
20963 fout_pack:
20964         bsr.l           _calc_ea_fout           # fetch the <ea>
20965         mov.l           %a0,-(%sp)
20966
20967         mov.b           STAG(%a6),%d0           # fetch input type
20968         bne.w           fout_pack_not_norm      # input is not NORM
20969
20970 fout_pack_norm:
20971         btst            &0x4,EXC_CMDREG(%a6)    # static or dynamic?
20972         beq.b           fout_pack_s             # static
20973
20974 fout_pack_d:
20975         mov.b           1+EXC_CMDREG(%a6),%d1   # fetch dynamic reg
20976         lsr.b           &0x4,%d1
20977         andi.w          &0x7,%d1
20978
20979         bsr.l           fetch_dreg              # fetch Dn w/ k-factor
20980
20981         bra.b           fout_pack_type
20982 fout_pack_s:
20983         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch static field
20984
20985 fout_pack_type:
20986         bfexts          %d0{&25:&7},%d0         # extract k-factor
20987         mov.l   %d0,-(%sp)
20988
20989         lea             FP_SRC(%a6),%a0         # pass: ptr to input
20990
20991 # bindec is currently scrambling FP_SRC for denorm inputs.
20992 # we'll have to change this, but for now, tough luck!!!
20993         bsr.l           bindec                  # convert xprec to packed
20994
20995 #       andi.l          &0xcfff000f,FP_SCR0(%a6) # clear unused fields
20996         andi.l          &0xcffff00f,FP_SCR0(%a6) # clear unused fields
20997
20998         mov.l   (%sp)+,%d0
20999
21000         tst.b           3+FP_SCR0_EX(%a6)
21001         bne.b           fout_pack_set
21002         tst.l           FP_SCR0_HI(%a6)
21003         bne.b           fout_pack_set
21004         tst.l           FP_SCR0_LO(%a6)
21005         bne.b           fout_pack_set
21006
21007 # add the extra condition that only if the k-factor was zero, too, should
21008 # we zero the exponent
21009         tst.l           %d0
21010         bne.b           fout_pack_set
21011 # "mantissa" is all zero which means that the answer is zero. but, the '040
21012 # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
21013 # if the mantissa is zero, I will zero the exponent, too.
21014 # the question now is whether the exponents sign bit is allowed to be non-zero
21015 # for a zero, also...
21016         andi.w          &0xf000,FP_SCR0(%a6)
21017
21018 fout_pack_set:
21019
21020         lea             FP_SCR0(%a6),%a0        # pass: src addr
21021
21022 fout_pack_write:
21023         mov.l           (%sp)+,%a1              # pass: dst addr
21024         mov.l           &0xc,%d0                # pass: opsize is 12 bytes
21025
21026         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
21027         beq.b           fout_pack_a7
21028
21029         bsr.l           _dmem_write             # write ext prec number to memory
21030
21031         tst.l           %d1                     # did dstore fail?
21032         bne.w           fout_ext_err            # yes
21033
21034         rts
21035
21036 # we don't want to do the write if the exception occurred in supervisor mode
21037 # so _mem_write2() handles this for us.
21038 fout_pack_a7:
21039         bsr.l           _mem_write2             # write ext prec number to memory
21040
21041         tst.l           %d1                     # did dstore fail?
21042         bne.w           fout_ext_err            # yes
21043
21044         rts
21045
21046 fout_pack_not_norm:
21047         cmpi.b          %d0,&DENORM             # is it a DENORM?
21048         beq.w           fout_pack_norm          # yes
21049         lea             FP_SRC(%a6),%a0
21050         clr.w           2+FP_SRC_EX(%a6)
21051         cmpi.b          %d0,&SNAN               # is it an SNAN?
21052         beq.b           fout_pack_snan          # yes
21053         bra.b           fout_pack_write         # no
21054
21055 fout_pack_snan:
21056         ori.w           &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21057         bset            &0x6,FP_SRC_HI(%a6)     # set snan bit
21058         bra.b           fout_pack_write
21059
21060 #########################################################################
21061 # XDEF **************************************************************** #
21062 #       fetch_dreg(): fetch register according to index in d1           #
21063 #                                                                       #
21064 # XREF **************************************************************** #
21065 #       None                                                            #
21066 #                                                                       #
21067 # INPUT *************************************************************** #
21068 #       d1 = index of register to fetch from                            #
21069 #                                                                       #
21070 # OUTPUT ************************************************************** #
21071 #       d0 = value of register fetched                                  #
21072 #                                                                       #
21073 # ALGORITHM *********************************************************** #
21074 #       According to the index value in d1 which can range from zero    #
21075 # to fifteen, load the corresponding register file value (where         #
21076 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the    #
21077 # stack. The rest should still be in their original places.             #
21078 #                                                                       #
21079 #########################################################################
21080
21081 # this routine leaves d1 intact for subsequent store_dreg calls.
21082         global          fetch_dreg
21083 fetch_dreg:
21084         mov.w           (tbl_fdreg.b,%pc,%d1.w*2),%d0
21085         jmp             (tbl_fdreg.b,%pc,%d0.w*1)
21086
21087 tbl_fdreg:
21088         short           fdreg0 - tbl_fdreg
21089         short           fdreg1 - tbl_fdreg
21090         short           fdreg2 - tbl_fdreg
21091         short           fdreg3 - tbl_fdreg
21092         short           fdreg4 - tbl_fdreg
21093         short           fdreg5 - tbl_fdreg
21094         short           fdreg6 - tbl_fdreg
21095         short           fdreg7 - tbl_fdreg
21096         short           fdreg8 - tbl_fdreg
21097         short           fdreg9 - tbl_fdreg
21098         short           fdrega - tbl_fdreg
21099         short           fdregb - tbl_fdreg
21100         short           fdregc - tbl_fdreg
21101         short           fdregd - tbl_fdreg
21102         short           fdrege - tbl_fdreg
21103         short           fdregf - tbl_fdreg
21104
21105 fdreg0:
21106         mov.l           EXC_DREGS+0x0(%a6),%d0
21107         rts
21108 fdreg1:
21109         mov.l           EXC_DREGS+0x4(%a6),%d0
21110         rts
21111 fdreg2:
21112         mov.l           %d2,%d0
21113         rts
21114 fdreg3:
21115         mov.l           %d3,%d0
21116         rts
21117 fdreg4:
21118         mov.l           %d4,%d0
21119         rts
21120 fdreg5:
21121         mov.l           %d5,%d0
21122         rts
21123 fdreg6:
21124         mov.l           %d6,%d0
21125         rts
21126 fdreg7:
21127         mov.l           %d7,%d0
21128         rts
21129 fdreg8:
21130         mov.l           EXC_DREGS+0x8(%a6),%d0
21131         rts
21132 fdreg9:
21133         mov.l           EXC_DREGS+0xc(%a6),%d0
21134         rts
21135 fdrega:
21136         mov.l           %a2,%d0
21137         rts
21138 fdregb:
21139         mov.l           %a3,%d0
21140         rts
21141 fdregc:
21142         mov.l           %a4,%d0
21143         rts
21144 fdregd:
21145         mov.l           %a5,%d0
21146         rts
21147 fdrege:
21148         mov.l           (%a6),%d0
21149         rts
21150 fdregf:
21151         mov.l           EXC_A7(%a6),%d0
21152         rts
21153
21154 #########################################################################
21155 # XDEF **************************************************************** #
21156 #       store_dreg_l(): store longword to data register specified by d1 #
21157 #                                                                       #
21158 # XREF **************************************************************** #
21159 #       None                                                            #
21160 #                                                                       #
21161 # INPUT *************************************************************** #
21162 #       d0 = longowrd value to store                                    #
21163 #       d1 = index of register to fetch from                            #
21164 #                                                                       #
21165 # OUTPUT ************************************************************** #
21166 #       (data register is updated)                                      #
21167 #                                                                       #
21168 # ALGORITHM *********************************************************** #
21169 #       According to the index value in d1, store the longword value    #
21170 # in d0 to the corresponding data register. D0/D1 are on the stack      #
21171 # while the rest are in their initial places.                           #
21172 #                                                                       #
21173 #########################################################################
21174
21175         global          store_dreg_l
21176 store_dreg_l:
21177         mov.w           (tbl_sdregl.b,%pc,%d1.w*2),%d1
21178         jmp             (tbl_sdregl.b,%pc,%d1.w*1)
21179
21180 tbl_sdregl:
21181         short           sdregl0 - tbl_sdregl
21182         short           sdregl1 - tbl_sdregl
21183         short           sdregl2 - tbl_sdregl
21184         short           sdregl3 - tbl_sdregl
21185         short           sdregl4 - tbl_sdregl
21186         short           sdregl5 - tbl_sdregl
21187         short           sdregl6 - tbl_sdregl
21188         short           sdregl7 - tbl_sdregl
21189
21190 sdregl0:
21191         mov.l           %d0,EXC_DREGS+0x0(%a6)
21192         rts
21193 sdregl1:
21194         mov.l           %d0,EXC_DREGS+0x4(%a6)
21195         rts
21196 sdregl2:
21197         mov.l           %d0,%d2
21198         rts
21199 sdregl3:
21200         mov.l           %d0,%d3
21201         rts
21202 sdregl4:
21203         mov.l           %d0,%d4
21204         rts
21205 sdregl5:
21206         mov.l           %d0,%d5
21207         rts
21208 sdregl6:
21209         mov.l           %d0,%d6
21210         rts
21211 sdregl7:
21212         mov.l           %d0,%d7
21213         rts
21214
21215 #########################################################################
21216 # XDEF **************************************************************** #
21217 #       store_dreg_w(): store word to data register specified by d1     #
21218 #                                                                       #
21219 # XREF **************************************************************** #
21220 #       None                                                            #
21221 #                                                                       #
21222 # INPUT *************************************************************** #
21223 #       d0 = word value to store                                        #
21224 #       d1 = index of register to fetch from                            #
21225 #                                                                       #
21226 # OUTPUT ************************************************************** #
21227 #       (data register is updated)                                      #
21228 #                                                                       #
21229 # ALGORITHM *********************************************************** #
21230 #       According to the index value in d1, store the word value        #
21231 # in d0 to the corresponding data register. D0/D1 are on the stack      #
21232 # while the rest are in their initial places.                           #
21233 #                                                                       #
21234 #########################################################################
21235
21236         global          store_dreg_w
21237 store_dreg_w:
21238         mov.w           (tbl_sdregw.b,%pc,%d1.w*2),%d1
21239         jmp             (tbl_sdregw.b,%pc,%d1.w*1)
21240
21241 tbl_sdregw:
21242         short           sdregw0 - tbl_sdregw
21243         short           sdregw1 - tbl_sdregw
21244         short           sdregw2 - tbl_sdregw
21245         short           sdregw3 - tbl_sdregw
21246         short           sdregw4 - tbl_sdregw
21247         short           sdregw5 - tbl_sdregw
21248         short           sdregw6 - tbl_sdregw
21249         short           sdregw7 - tbl_sdregw
21250
21251 sdregw0:
21252         mov.w           %d0,2+EXC_DREGS+0x0(%a6)
21253         rts
21254 sdregw1:
21255         mov.w           %d0,2+EXC_DREGS+0x4(%a6)
21256         rts
21257 sdregw2:
21258         mov.w           %d0,%d2
21259         rts
21260 sdregw3:
21261         mov.w           %d0,%d3
21262         rts
21263 sdregw4:
21264         mov.w           %d0,%d4
21265         rts
21266 sdregw5:
21267         mov.w           %d0,%d5
21268         rts
21269 sdregw6:
21270         mov.w           %d0,%d6
21271         rts
21272 sdregw7:
21273         mov.w           %d0,%d7
21274         rts
21275
21276 #########################################################################
21277 # XDEF **************************************************************** #
21278 #       store_dreg_b(): store byte to data register specified by d1     #
21279 #                                                                       #
21280 # XREF **************************************************************** #
21281 #       None                                                            #
21282 #                                                                       #
21283 # INPUT *************************************************************** #
21284 #       d0 = byte value to store                                        #
21285 #       d1 = index of register to fetch from                            #
21286 #                                                                       #
21287 # OUTPUT ************************************************************** #
21288 #       (data register is updated)                                      #
21289 #                                                                       #
21290 # ALGORITHM *********************************************************** #
21291 #       According to the index value in d1, store the byte value        #
21292 # in d0 to the corresponding data register. D0/D1 are on the stack      #
21293 # while the rest are in their initial places.                           #
21294 #                                                                       #
21295 #########################################################################
21296
21297         global          store_dreg_b
21298 store_dreg_b:
21299         mov.w           (tbl_sdregb.b,%pc,%d1.w*2),%d1
21300         jmp             (tbl_sdregb.b,%pc,%d1.w*1)
21301
21302 tbl_sdregb:
21303         short           sdregb0 - tbl_sdregb
21304         short           sdregb1 - tbl_sdregb
21305         short           sdregb2 - tbl_sdregb
21306         short           sdregb3 - tbl_sdregb
21307         short           sdregb4 - tbl_sdregb
21308         short           sdregb5 - tbl_sdregb
21309         short           sdregb6 - tbl_sdregb
21310         short           sdregb7 - tbl_sdregb
21311
21312 sdregb0:
21313         mov.b           %d0,3+EXC_DREGS+0x0(%a6)
21314         rts
21315 sdregb1:
21316         mov.b           %d0,3+EXC_DREGS+0x4(%a6)
21317         rts
21318 sdregb2:
21319         mov.b           %d0,%d2
21320         rts
21321 sdregb3:
21322         mov.b           %d0,%d3
21323         rts
21324 sdregb4:
21325         mov.b           %d0,%d4
21326         rts
21327 sdregb5:
21328         mov.b           %d0,%d5
21329         rts
21330 sdregb6:
21331         mov.b           %d0,%d6
21332         rts
21333 sdregb7:
21334         mov.b           %d0,%d7
21335         rts
21336
21337 #########################################################################
21338 # XDEF **************************************************************** #
21339 #       inc_areg(): increment an address register by the value in d0    #
21340 #                                                                       #
21341 # XREF **************************************************************** #
21342 #       None                                                            #
21343 #                                                                       #
21344 # INPUT *************************************************************** #
21345 #       d0 = amount to increment by                                     #
21346 #       d1 = index of address register to increment                     #
21347 #                                                                       #
21348 # OUTPUT ************************************************************** #
21349 #       (address register is updated)                                   #
21350 #                                                                       #
21351 # ALGORITHM *********************************************************** #
21352 #       Typically used for an instruction w/ a post-increment <ea>,     #
21353 # this routine adds the increment value in d0 to the address register   #
21354 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
21355 # in their original places.                                             #
21356 #       For a7, if the increment amount is one, then we have to         #
21357 # increment by two. For any a7 update, set the mia7_flag so that if     #
21358 # an access error exception occurs later in emulation, this address     #
21359 # register update can be undone.                                        #
21360 #                                                                       #
21361 #########################################################################
21362
21363         global          inc_areg
21364 inc_areg:
21365         mov.w           (tbl_iareg.b,%pc,%d1.w*2),%d1
21366         jmp             (tbl_iareg.b,%pc,%d1.w*1)
21367
21368 tbl_iareg:
21369         short           iareg0 - tbl_iareg
21370         short           iareg1 - tbl_iareg
21371         short           iareg2 - tbl_iareg
21372         short           iareg3 - tbl_iareg
21373         short           iareg4 - tbl_iareg
21374         short           iareg5 - tbl_iareg
21375         short           iareg6 - tbl_iareg
21376         short           iareg7 - tbl_iareg
21377
21378 iareg0: add.l           %d0,EXC_DREGS+0x8(%a6)
21379         rts
21380 iareg1: add.l           %d0,EXC_DREGS+0xc(%a6)
21381         rts
21382 iareg2: add.l           %d0,%a2
21383         rts
21384 iareg3: add.l           %d0,%a3
21385         rts
21386 iareg4: add.l           %d0,%a4
21387         rts
21388 iareg5: add.l           %d0,%a5
21389         rts
21390 iareg6: add.l           %d0,(%a6)
21391         rts
21392 iareg7: mov.b           &mia7_flg,SPCOND_FLG(%a6)
21393         cmpi.b          %d0,&0x1
21394         beq.b           iareg7b
21395         add.l           %d0,EXC_A7(%a6)
21396         rts
21397 iareg7b:
21398         addq.l          &0x2,EXC_A7(%a6)
21399         rts
21400
21401 #########################################################################
21402 # XDEF **************************************************************** #
21403 #       dec_areg(): decrement an address register by the value in d0    #
21404 #                                                                       #
21405 # XREF **************************************************************** #
21406 #       None                                                            #
21407 #                                                                       #
21408 # INPUT *************************************************************** #
21409 #       d0 = amount to decrement by                                     #
21410 #       d1 = index of address register to decrement                     #
21411 #                                                                       #
21412 # OUTPUT ************************************************************** #
21413 #       (address register is updated)                                   #
21414 #                                                                       #
21415 # ALGORITHM *********************************************************** #
21416 #       Typically used for an instruction w/ a pre-decrement <ea>,      #
21417 # this routine adds the decrement value in d0 to the address register   #
21418 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
21419 # in their original places.                                             #
21420 #       For a7, if the decrement amount is one, then we have to         #
21421 # decrement by two. For any a7 update, set the mda7_flag so that if     #
21422 # an access error exception occurs later in emulation, this address     #
21423 # register update can be undone.                                        #
21424 #                                                                       #
21425 #########################################################################
21426
21427         global          dec_areg
21428 dec_areg:
21429         mov.w           (tbl_dareg.b,%pc,%d1.w*2),%d1
21430         jmp             (tbl_dareg.b,%pc,%d1.w*1)
21431
21432 tbl_dareg:
21433         short           dareg0 - tbl_dareg
21434         short           dareg1 - tbl_dareg
21435         short           dareg2 - tbl_dareg
21436         short           dareg3 - tbl_dareg
21437         short           dareg4 - tbl_dareg
21438         short           dareg5 - tbl_dareg
21439         short           dareg6 - tbl_dareg
21440         short           dareg7 - tbl_dareg
21441
21442 dareg0: sub.l           %d0,EXC_DREGS+0x8(%a6)
21443         rts
21444 dareg1: sub.l           %d0,EXC_DREGS+0xc(%a6)
21445         rts
21446 dareg2: sub.l           %d0,%a2
21447         rts
21448 dareg3: sub.l           %d0,%a3
21449         rts
21450 dareg4: sub.l           %d0,%a4
21451         rts
21452 dareg5: sub.l           %d0,%a5
21453         rts
21454 dareg6: sub.l           %d0,(%a6)
21455         rts
21456 dareg7: mov.b           &mda7_flg,SPCOND_FLG(%a6)
21457         cmpi.b          %d0,&0x1
21458         beq.b           dareg7b
21459         sub.l           %d0,EXC_A7(%a6)
21460         rts
21461 dareg7b:
21462         subq.l          &0x2,EXC_A7(%a6)
21463         rts
21464
21465 ##############################################################################
21466
21467 #########################################################################
21468 # XDEF **************************************************************** #
21469 #       load_fpn1(): load FP register value into FP_SRC(a6).            #
21470 #                                                                       #
21471 # XREF **************************************************************** #
21472 #       None                                                            #
21473 #                                                                       #
21474 # INPUT *************************************************************** #
21475 #       d0 = index of FP register to load                               #
21476 #                                                                       #
21477 # OUTPUT ************************************************************** #
21478 #       FP_SRC(a6) = value loaded from FP register file                 #
21479 #                                                                       #
21480 # ALGORITHM *********************************************************** #
21481 #       Using the index in d0, load FP_SRC(a6) with a number from the   #
21482 # FP register file.                                                     #
21483 #                                                                       #
21484 #########################################################################
21485
21486         global          load_fpn1
21487 load_fpn1:
21488         mov.w           (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21489         jmp             (tbl_load_fpn1.b,%pc,%d0.w*1)
21490
21491 tbl_load_fpn1:
21492         short           load_fpn1_0 - tbl_load_fpn1
21493         short           load_fpn1_1 - tbl_load_fpn1
21494         short           load_fpn1_2 - tbl_load_fpn1
21495         short           load_fpn1_3 - tbl_load_fpn1
21496         short           load_fpn1_4 - tbl_load_fpn1
21497         short           load_fpn1_5 - tbl_load_fpn1
21498         short           load_fpn1_6 - tbl_load_fpn1
21499         short           load_fpn1_7 - tbl_load_fpn1
21500
21501 load_fpn1_0:
21502         mov.l           0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21503         mov.l           4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21504         mov.l           8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21505         lea             FP_SRC(%a6), %a0
21506         rts
21507 load_fpn1_1:
21508         mov.l           0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21509         mov.l           4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21510         mov.l           8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21511         lea             FP_SRC(%a6), %a0
21512         rts
21513 load_fpn1_2:
21514         fmovm.x         &0x20, FP_SRC(%a6)
21515         lea             FP_SRC(%a6), %a0
21516         rts
21517 load_fpn1_3:
21518         fmovm.x         &0x10, FP_SRC(%a6)
21519         lea             FP_SRC(%a6), %a0
21520         rts
21521 load_fpn1_4:
21522         fmovm.x         &0x08, FP_SRC(%a6)
21523         lea             FP_SRC(%a6), %a0
21524         rts
21525 load_fpn1_5:
21526         fmovm.x         &0x04, FP_SRC(%a6)
21527         lea             FP_SRC(%a6), %a0
21528         rts
21529 load_fpn1_6:
21530         fmovm.x         &0x02, FP_SRC(%a6)
21531         lea             FP_SRC(%a6), %a0
21532         rts
21533 load_fpn1_7:
21534         fmovm.x         &0x01, FP_SRC(%a6)
21535         lea             FP_SRC(%a6), %a0
21536         rts
21537
21538 #############################################################################
21539
21540 #########################################################################
21541 # XDEF **************************************************************** #
21542 #       load_fpn2(): load FP register value into FP_DST(a6).            #
21543 #                                                                       #
21544 # XREF **************************************************************** #
21545 #       None                                                            #
21546 #                                                                       #
21547 # INPUT *************************************************************** #
21548 #       d0 = index of FP register to load                               #
21549 #                                                                       #
21550 # OUTPUT ************************************************************** #
21551 #       FP_DST(a6) = value loaded from FP register file                 #
21552 #                                                                       #
21553 # ALGORITHM *********************************************************** #
21554 #       Using the index in d0, load FP_DST(a6) with a number from the   #
21555 # FP register file.                                                     #
21556 #                                                                       #
21557 #########################################################################
21558
21559         global          load_fpn2
21560 load_fpn2:
21561         mov.w           (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21562         jmp             (tbl_load_fpn2.b,%pc,%d0.w*1)
21563
21564 tbl_load_fpn2:
21565         short           load_fpn2_0 - tbl_load_fpn2
21566         short           load_fpn2_1 - tbl_load_fpn2
21567         short           load_fpn2_2 - tbl_load_fpn2
21568         short           load_fpn2_3 - tbl_load_fpn2
21569         short           load_fpn2_4 - tbl_load_fpn2
21570         short           load_fpn2_5 - tbl_load_fpn2
21571         short           load_fpn2_6 - tbl_load_fpn2
21572         short           load_fpn2_7 - tbl_load_fpn2
21573
21574 load_fpn2_0:
21575         mov.l           0+EXC_FP0(%a6), 0+FP_DST(%a6)
21576         mov.l           4+EXC_FP0(%a6), 4+FP_DST(%a6)
21577         mov.l           8+EXC_FP0(%a6), 8+FP_DST(%a6)
21578         lea             FP_DST(%a6), %a0
21579         rts
21580 load_fpn2_1:
21581         mov.l           0+EXC_FP1(%a6), 0+FP_DST(%a6)
21582         mov.l           4+EXC_FP1(%a6), 4+FP_DST(%a6)
21583         mov.l           8+EXC_FP1(%a6), 8+FP_DST(%a6)
21584         lea             FP_DST(%a6), %a0
21585         rts
21586 load_fpn2_2:
21587         fmovm.x         &0x20, FP_DST(%a6)
21588         lea             FP_DST(%a6), %a0
21589         rts
21590 load_fpn2_3:
21591         fmovm.x         &0x10, FP_DST(%a6)
21592         lea             FP_DST(%a6), %a0
21593         rts
21594 load_fpn2_4:
21595         fmovm.x         &0x08, FP_DST(%a6)
21596         lea             FP_DST(%a6), %a0
21597         rts
21598 load_fpn2_5:
21599         fmovm.x         &0x04, FP_DST(%a6)
21600         lea             FP_DST(%a6), %a0
21601         rts
21602 load_fpn2_6:
21603         fmovm.x         &0x02, FP_DST(%a6)
21604         lea             FP_DST(%a6), %a0
21605         rts
21606 load_fpn2_7:
21607         fmovm.x         &0x01, FP_DST(%a6)
21608         lea             FP_DST(%a6), %a0
21609         rts
21610
21611 #############################################################################
21612
21613 #########################################################################
21614 # XDEF **************************************************************** #
21615 #       store_fpreg(): store an fp value to the fpreg designated d0.    #
21616 #                                                                       #
21617 # XREF **************************************************************** #
21618 #       None                                                            #
21619 #                                                                       #
21620 # INPUT *************************************************************** #
21621 #       fp0 = extended precision value to store                         #
21622 #       d0  = index of floating-point register                          #
21623 #                                                                       #
21624 # OUTPUT ************************************************************** #
21625 #       None                                                            #
21626 #                                                                       #
21627 # ALGORITHM *********************************************************** #
21628 #       Store the value in fp0 to the FP register designated by the     #
21629 # value in d0. The FP number can be DENORM or SNAN so we have to be     #
21630 # careful that we don't take an exception here.                         #
21631 #                                                                       #
21632 #########################################################################
21633
21634         global          store_fpreg
21635 store_fpreg:
21636         mov.w           (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21637         jmp             (tbl_store_fpreg.b,%pc,%d0.w*1)
21638
21639 tbl_store_fpreg:
21640         short           store_fpreg_0 - tbl_store_fpreg
21641         short           store_fpreg_1 - tbl_store_fpreg
21642         short           store_fpreg_2 - tbl_store_fpreg
21643         short           store_fpreg_3 - tbl_store_fpreg
21644         short           store_fpreg_4 - tbl_store_fpreg
21645         short           store_fpreg_5 - tbl_store_fpreg
21646         short           store_fpreg_6 - tbl_store_fpreg
21647         short           store_fpreg_7 - tbl_store_fpreg
21648
21649 store_fpreg_0:
21650         fmovm.x         &0x80, EXC_FP0(%a6)
21651         rts
21652 store_fpreg_1:
21653         fmovm.x         &0x80, EXC_FP1(%a6)
21654         rts
21655 store_fpreg_2:
21656         fmovm.x         &0x01, -(%sp)
21657         fmovm.x         (%sp)+, &0x20
21658         rts
21659 store_fpreg_3:
21660         fmovm.x         &0x01, -(%sp)
21661         fmovm.x         (%sp)+, &0x10
21662         rts
21663 store_fpreg_4:
21664         fmovm.x         &0x01, -(%sp)
21665         fmovm.x         (%sp)+, &0x08
21666         rts
21667 store_fpreg_5:
21668         fmovm.x         &0x01, -(%sp)
21669         fmovm.x         (%sp)+, &0x04
21670         rts
21671 store_fpreg_6:
21672         fmovm.x         &0x01, -(%sp)
21673         fmovm.x         (%sp)+, &0x02
21674         rts
21675 store_fpreg_7:
21676         fmovm.x         &0x01, -(%sp)
21677         fmovm.x         (%sp)+, &0x01
21678         rts
21679
21680 #########################################################################
21681 # XDEF **************************************************************** #
21682 #       _denorm(): denormalize an intermediate result                   #
21683 #                                                                       #
21684 # XREF **************************************************************** #
21685 #       None                                                            #
21686 #                                                                       #
21687 # INPUT *************************************************************** #
21688 #       a0 = points to the operand to be denormalized                   #
21689 #               (in the internal extended format)                       #
21690 #                                                                       #
21691 #       d0 = rounding precision                                         #
21692 #                                                                       #
21693 # OUTPUT ************************************************************** #
21694 #       a0 = pointer to the denormalized result                         #
21695 #               (in the internal extended format)                       #
21696 #                                                                       #
21697 #       d0 = guard,round,sticky                                         #
21698 #                                                                       #
21699 # ALGORITHM *********************************************************** #
21700 #       According to the exponent underflow threshold for the given     #
21701 # precision, shift the mantissa bits to the right in order raise the    #
21702 # exponent of the operand to the threshold value. While shifting the    #
21703 # mantissa bits right, maintain the value of the guard, round, and      #
21704 # sticky bits.                                                          #
21705 # other notes:                                                          #
21706 #       (1) _denorm() is called by the underflow routines               #
21707 #       (2) _denorm() does NOT affect the status register               #
21708 #                                                                       #
21709 #########################################################################
21710
21711 #
21712 # table of exponent threshold values for each precision
21713 #
21714 tbl_thresh:
21715         short           0x0
21716         short           sgl_thresh
21717         short           dbl_thresh
21718
21719         global          _denorm
21720 _denorm:
21721 #
21722 # Load the exponent threshold for the precision selected and check
21723 # to see if (threshold - exponent) is > 65 in which case we can
21724 # simply calculate the sticky bit and zero the mantissa. otherwise
21725 # we have to call the denormalization routine.
21726 #
21727         lsr.b           &0x2, %d0               # shift prec to lo bits
21728         mov.w           (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21729         mov.w           %d1, %d0                # copy d1 into d0
21730         sub.w           FTEMP_EX(%a0), %d0      # diff = threshold - exp
21731         cmpi.w          %d0, &66                # is diff > 65? (mant + g,r bits)
21732         bpl.b           denorm_set_stky         # yes; just calc sticky
21733
21734         clr.l           %d0                     # clear g,r,s
21735         btst            &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21736         beq.b           denorm_call             # no; don't change anything
21737         bset            &29, %d0                # yes; set sticky bit
21738
21739 denorm_call:
21740         bsr.l           dnrm_lp                 # denormalize the number
21741         rts
21742
21743 #
21744 # all bit would have been shifted off during the denorm so simply
21745 # calculate if the sticky should be set and clear the entire mantissa.
21746 #
21747 denorm_set_stky:
21748         mov.l           &0x20000000, %d0        # set sticky bit in return value
21749         mov.w           %d1, FTEMP_EX(%a0)      # load exp with threshold
21750         clr.l           FTEMP_HI(%a0)           # set d1 = 0 (ms mantissa)
21751         clr.l           FTEMP_LO(%a0)           # set d2 = 0 (ms mantissa)
21752         rts
21753
21754 #                                                                       #
21755 # dnrm_lp(): normalize exponent/mantissa to specified threshhold        #
21756 #                                                                       #
21757 # INPUT:                                                                #
21758 #       %a0        : points to the operand to be denormalized           #
21759 #       %d0{31:29} : initial guard,round,sticky                         #
21760 #       %d1{15:0}  : denormalization threshold                          #
21761 # OUTPUT:                                                               #
21762 #       %a0        : points to the denormalized operand                 #
21763 #       %d0{31:29} : final guard,round,sticky                           #
21764 #                                                                       #
21765
21766 # *** Local Equates *** #
21767 set     GRS,            L_SCR2                  # g,r,s temp storage
21768 set     FTEMP_LO2,      L_SCR1                  # FTEMP_LO copy
21769
21770         global          dnrm_lp
21771 dnrm_lp:
21772
21773 #
21774 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
21775 # in memory so as to make the bitfield extraction for denormalization easier.
21776 #
21777         mov.l           FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21778         mov.l           %d0, GRS(%a6)           # place g,r,s after it
21779
21780 #
21781 # check to see how much less than the underflow threshold the operand
21782 # exponent is.
21783 #
21784         mov.l           %d1, %d0                # copy the denorm threshold
21785         sub.w           FTEMP_EX(%a0), %d1      # d1 = threshold - uns exponent
21786         ble.b           dnrm_no_lp              # d1 <= 0
21787         cmpi.w          %d1, &0x20              # is ( 0 <= d1 < 32) ?
21788         blt.b           case_1                  # yes
21789         cmpi.w          %d1, &0x40              # is (32 <= d1 < 64) ?
21790         blt.b           case_2                  # yes
21791         bra.w           case_3                  # (d1 >= 64)
21792
21793 #
21794 # No normalization necessary
21795 #
21796 dnrm_no_lp:
21797         mov.l           GRS(%a6), %d0           # restore original g,r,s
21798         rts
21799
21800 #
21801 # case (0<d1<32)
21802 #
21803 # %d0 = denorm threshold
21804 # %d1 = "n" = amt to shift
21805 #
21806 #       ---------------------------------------------------------
21807 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
21808 #       ---------------------------------------------------------
21809 #       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21810 #       \          \                  \                  \
21811 #        \          \                  \                  \
21812 #         \          \                  \                  \
21813 #          \          \                  \                  \
21814 #           \          \                  \                  \
21815 #            \          \                  \                  \
21816 #             \          \                  \                  \
21817 #              \          \                  \                  \
21818 #       <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21819 #       ---------------------------------------------------------
21820 #       |0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs              |
21821 #       ---------------------------------------------------------
21822 #
21823 case_1:
21824         mov.l           %d2, -(%sp)             # create temp storage
21825
21826         mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
21827         mov.l           &32, %d0
21828         sub.w           %d1, %d0                # %d0 = 32 - %d1
21829
21830         cmpi.w          %d1, &29                # is shft amt >= 29
21831         blt.b           case1_extract           # no; no fix needed
21832         mov.b           GRS(%a6), %d2
21833         or.b            %d2, 3+FTEMP_LO2(%a6)
21834
21835 case1_extract:
21836         bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21837         bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21838         bfextu          FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21839
21840         mov.l           %d2, FTEMP_HI(%a0)      # store new FTEMP_HI
21841         mov.l           %d1, FTEMP_LO(%a0)      # store new FTEMP_LO
21842
21843         bftst           %d0{&2:&30}             # were bits shifted off?
21844         beq.b           case1_sticky_clear      # no; go finish
21845         bset            &rnd_stky_bit, %d0      # yes; set sticky bit
21846
21847 case1_sticky_clear:
21848         and.l           &0xe0000000, %d0        # clear all but G,R,S
21849         mov.l           (%sp)+, %d2             # restore temp register
21850         rts
21851
21852 #
21853 # case (32<=d1<64)
21854 #
21855 # %d0 = denorm threshold
21856 # %d1 = "n" = amt to shift
21857 #
21858 #       ---------------------------------------------------------
21859 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
21860 #       ---------------------------------------------------------
21861 #       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21862 #       \          \                  \
21863 #        \          \                  \
21864 #         \          \                  -------------------
21865 #          \          --------------------                 \
21866 #           -------------------           \                 \
21867 #                              \           \                 \
21868 #                               \           \                 \
21869 #                                \           \                 \
21870 #       <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21871 #       ---------------------------------------------------------
21872 #       |0...............0|0....0| NEW_LO     |grs              |
21873 #       ---------------------------------------------------------
21874 #
21875 case_2:
21876         mov.l           %d2, -(%sp)             # create temp storage
21877
21878         mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
21879         subi.w          &0x20, %d1              # %d1 now between 0 and 32
21880         mov.l           &0x20, %d0
21881         sub.w           %d1, %d0                # %d0 = 32 - %d1
21882
21883 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21884 # the number of bits to check for the sticky detect.
21885 # it only plays a role in shift amounts of 61-63.
21886         mov.b           GRS(%a6), %d2
21887         or.b            %d2, 3+FTEMP_LO2(%a6)
21888
21889         bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21890         bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21891
21892         bftst           %d1{&2:&30}             # were any bits shifted off?
21893         bne.b           case2_set_sticky        # yes; set sticky bit
21894         bftst           FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
21895         bne.b           case2_set_sticky        # yes; set sticky bit
21896
21897         mov.l           %d1, %d0                # move new G,R,S to %d0
21898         bra.b           case2_end
21899
21900 case2_set_sticky:
21901         mov.l           %d1, %d0                # move new G,R,S to %d0
21902         bset            &rnd_stky_bit, %d0      # set sticky bit
21903
21904 case2_end:
21905         clr.l           FTEMP_HI(%a0)           # store FTEMP_HI = 0
21906         mov.l           %d2, FTEMP_LO(%a0)      # store FTEMP_LO
21907         and.l           &0xe0000000, %d0        # clear all but G,R,S
21908
21909         mov.l           (%sp)+,%d2              # restore temp register
21910         rts
21911
21912 #
21913 # case (d1>=64)
21914 #
21915 # %d0 = denorm threshold
21916 # %d1 = amt to shift
21917 #
21918 case_3:
21919         mov.w           %d0, FTEMP_EX(%a0)      # insert denorm threshold
21920
21921         cmpi.w          %d1, &65                # is shift amt > 65?
21922         blt.b           case3_64                # no; it's == 64
21923         beq.b           case3_65                # no; it's == 65
21924
21925 #
21926 # case (d1>65)
21927 #
21928 # Shift value is > 65 and out of range. All bits are shifted off.
21929 # Return a zero mantissa with the sticky bit set
21930 #
21931         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
21932         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
21933         mov.l           &0x20000000, %d0        # set sticky bit
21934         rts
21935
21936 #
21937 # case (d1 == 64)
21938 #
21939 #       ---------------------------------------------------------
21940 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
21941 #       ---------------------------------------------------------
21942 #       <-------(32)------>
21943 #       \                  \
21944 #        \                  \
21945 #         \                  \
21946 #          \                  ------------------------------
21947 #           -------------------------------                 \
21948 #                                          \                 \
21949 #                                           \                 \
21950 #                                            \                 \
21951 #                                             <-------(32)------>
21952 #       ---------------------------------------------------------
21953 #       |0...............0|0................0|grs               |
21954 #       ---------------------------------------------------------
21955 #
21956 case3_64:
21957         mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
21958         mov.l           %d0, %d1                # make a copy
21959         and.l           &0xc0000000, %d0        # extract G,R
21960         and.l           &0x3fffffff, %d1        # extract other bits
21961
21962         bra.b           case3_complete
21963
21964 #
21965 # case (d1 == 65)
21966 #
21967 #       ---------------------------------------------------------
21968 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
21969 #       ---------------------------------------------------------
21970 #       <-------(32)------>
21971 #       \                  \
21972 #        \                  \
21973 #         \                  \
21974 #          \                  ------------------------------
21975 #           --------------------------------                \
21976 #                                           \                \
21977 #                                            \                \
21978 #                                             \                \
21979 #                                              <-------(31)----->
21980 #       ---------------------------------------------------------
21981 #       |0...............0|0................0|0rs               |
21982 #       ---------------------------------------------------------
21983 #
21984 case3_65:
21985         mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
21986         and.l           &0x80000000, %d0        # extract R bit
21987         lsr.l           &0x1, %d0               # shift high bit into R bit
21988         and.l           &0x7fffffff, %d1        # extract other bits
21989
21990 case3_complete:
21991 # last operation done was an "and" of the bits shifted off so the condition
21992 # codes are already set so branch accordingly.
21993         bne.b           case3_set_sticky        # yes; go set new sticky
21994         tst.l           FTEMP_LO(%a0)           # were any bits shifted off?
21995         bne.b           case3_set_sticky        # yes; go set new sticky
21996         tst.b           GRS(%a6)                # were any bits shifted off?
21997         bne.b           case3_set_sticky        # yes; go set new sticky
21998
21999 #
22000 # no bits were shifted off so don't set the sticky bit.
22001 # the guard and
22002 # the entire mantissa is zero.
22003 #
22004         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
22005         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
22006         rts
22007
22008 #
22009 # some bits were shifted off so set the sticky bit.
22010 # the entire mantissa is zero.
22011 #
22012 case3_set_sticky:
22013         bset            &rnd_stky_bit,%d0       # set new sticky bit
22014         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
22015         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
22016         rts
22017
22018 #########################################################################
22019 # XDEF **************************************************************** #
22020 #       _round(): round result according to precision/mode              #
22021 #                                                                       #
22022 # XREF **************************************************************** #
22023 #       None                                                            #
22024 #                                                                       #
22025 # INPUT *************************************************************** #
22026 #       a0        = ptr to input operand in internal extended format    #
22027 #       d1(hi)    = contains rounding precision:                        #
22028 #                       ext = $0000xxxx                                 #
22029 #                       sgl = $0004xxxx                                 #
22030 #                       dbl = $0008xxxx                                 #
22031 #       d1(lo)    = contains rounding mode:                             #
22032 #                       RN  = $xxxx0000                                 #
22033 #                       RZ  = $xxxx0001                                 #
22034 #                       RM  = $xxxx0002                                 #
22035 #                       RP  = $xxxx0003                                 #
22036 #       d0{31:29} = contains the g,r,s bits (extended)                  #
22037 #                                                                       #
22038 # OUTPUT ************************************************************** #
22039 #       a0 = pointer to rounded result                                  #
22040 #                                                                       #
22041 # ALGORITHM *********************************************************** #
22042 #       On return the value pointed to by a0 is correctly rounded,      #
22043 #       a0 is preserved and the g-r-s bits in d0 are cleared.           #
22044 #       The result is not typed - the tag field is invalid.  The        #
22045 #       result is still in the internal extended format.                #
22046 #                                                                       #
22047 #       The INEX bit of USER_FPSR will be set if the rounded result was #
22048 #       inexact (i.e. if any of the g-r-s bits were set).               #
22049 #                                                                       #
22050 #########################################################################
22051
22052         global          _round
22053 _round:
22054 #
22055 # ext_grs() looks at the rounding precision and sets the appropriate
22056 # G,R,S bits.
22057 # If (G,R,S == 0) then result is exact and round is done, else set
22058 # the inex flag in status reg and continue.
22059 #
22060         bsr.l           ext_grs                 # extract G,R,S
22061
22062         tst.l           %d0                     # are G,R,S zero?
22063         beq.w           truncate                # yes; round is complete
22064
22065         or.w            &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22066
22067 #
22068 # Use rounding mode as an index into a jump table for these modes.
22069 # All of the following assumes grs != 0.
22070 #
22071         mov.w           (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22072         jmp             (tbl_mode.b,%pc,%a1)    # jmp to rnd mode handler
22073
22074 tbl_mode:
22075         short           rnd_near - tbl_mode
22076         short           truncate - tbl_mode     # RZ always truncates
22077         short           rnd_mnus - tbl_mode
22078         short           rnd_plus - tbl_mode
22079
22080 #################################################################
22081 #       ROUND PLUS INFINITY                                     #
22082 #                                                               #
22083 #       If sign of fp number = 0 (positive), then add 1 to l.   #
22084 #################################################################
22085 rnd_plus:
22086         tst.b           FTEMP_SGN(%a0)          # check for sign
22087         bmi.w           truncate                # if positive then truncate
22088
22089         mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
22090         swap            %d1                     # set up d1 for round prec.
22091
22092         cmpi.b          %d1, &s_mode            # is prec = sgl?
22093         beq.w           add_sgl                 # yes
22094         bgt.w           add_dbl                 # no; it's dbl
22095         bra.w           add_ext                 # no; it's ext
22096
22097 #################################################################
22098 #       ROUND MINUS INFINITY                                    #
22099 #                                                               #
22100 #       If sign of fp number = 1 (negative), then add 1 to l.   #
22101 #################################################################
22102 rnd_mnus:
22103         tst.b           FTEMP_SGN(%a0)          # check for sign
22104         bpl.w           truncate                # if negative then truncate
22105
22106         mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
22107         swap            %d1                     # set up d1 for round prec.
22108
22109         cmpi.b          %d1, &s_mode            # is prec = sgl?
22110         beq.w           add_sgl                 # yes
22111         bgt.w           add_dbl                 # no; it's dbl
22112         bra.w           add_ext                 # no; it's ext
22113
22114 #################################################################
22115 #       ROUND NEAREST                                           #
22116 #                                                               #
22117 #       If (g=1), then add 1 to l and if (r=s=0), then clear l  #
22118 #       Note that this will round to even in case of a tie.     #
22119 #################################################################
22120 rnd_near:
22121         asl.l           &0x1, %d0               # shift g-bit to c-bit
22122         bcc.w           truncate                # if (g=1) then
22123
22124         swap            %d1                     # set up d1 for round prec.
22125
22126         cmpi.b          %d1, &s_mode            # is prec = sgl?
22127         beq.w           add_sgl                 # yes
22128         bgt.w           add_dbl                 # no; it's dbl
22129         bra.w           add_ext                 # no; it's ext
22130
22131 # *** LOCAL EQUATES ***
22132 set     ad_1_sgl,       0x00000100      # constant to add 1 to l-bit in sgl prec
22133 set     ad_1_dbl,       0x00000800      # constant to add 1 to l-bit in dbl prec
22134
22135 #########################
22136 #       ADD SINGLE      #
22137 #########################
22138 add_sgl:
22139         add.l           &ad_1_sgl, FTEMP_HI(%a0)
22140         bcc.b           scc_clr                 # no mantissa overflow
22141         roxr.w          FTEMP_HI(%a0)           # shift v-bit back in
22142         roxr.w          FTEMP_HI+2(%a0)         # shift v-bit back in
22143         add.w           &0x1, FTEMP_EX(%a0)     # and incr exponent
22144 scc_clr:
22145         tst.l           %d0                     # test for rs = 0
22146         bne.b           sgl_done
22147         and.w           &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22148 sgl_done:
22149         and.l           &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22150         clr.l           FTEMP_LO(%a0)           # clear d2
22151         rts
22152
22153 #########################
22154 #       ADD EXTENDED    #
22155 #########################
22156 add_ext:
22157         addq.l          &1,FTEMP_LO(%a0)        # add 1 to l-bit
22158         bcc.b           xcc_clr                 # test for carry out
22159         addq.l          &1,FTEMP_HI(%a0)        # propogate carry
22160         bcc.b           xcc_clr
22161         roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
22162         roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
22163         roxr.w          FTEMP_LO(%a0)
22164         roxr.w          FTEMP_LO+2(%a0)
22165         add.w           &0x1,FTEMP_EX(%a0)      # and inc exp
22166 xcc_clr:
22167         tst.l           %d0                     # test rs = 0
22168         bne.b           add_ext_done
22169         and.b           &0xfe,FTEMP_LO+3(%a0)   # clear the l bit
22170 add_ext_done:
22171         rts
22172
22173 #########################
22174 #       ADD DOUBLE      #
22175 #########################
22176 add_dbl:
22177         add.l           &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22178         bcc.b           dcc_clr                 # no carry
22179         addq.l          &0x1, FTEMP_HI(%a0)     # propogate carry
22180         bcc.b           dcc_clr                 # no carry
22181
22182         roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
22183         roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
22184         roxr.w          FTEMP_LO(%a0)
22185         roxr.w          FTEMP_LO+2(%a0)
22186         addq.w          &0x1, FTEMP_EX(%a0)     # incr exponent
22187 dcc_clr:
22188         tst.l           %d0                     # test for rs = 0
22189         bne.b           dbl_done
22190         and.w           &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22191
22192 dbl_done:
22193         and.l           &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22194         rts
22195
22196 ###########################
22197 # Truncate all other bits #
22198 ###########################
22199 truncate:
22200         swap            %d1                     # select rnd prec
22201
22202         cmpi.b          %d1, &s_mode            # is prec sgl?
22203         beq.w           sgl_done                # yes
22204         bgt.b           dbl_done                # no; it's dbl
22205         rts                                     # no; it's ext
22206
22207
22208 #
22209 # ext_grs(): extract guard, round and sticky bits according to
22210 #            rounding precision.
22211 #
22212 # INPUT
22213 #       d0         = extended precision g,r,s (in d0{31:29})
22214 #       d1         = {PREC,ROUND}
22215 # OUTPUT
22216 #       d0{31:29}  = guard, round, sticky
22217 #
22218 # The ext_grs extract the guard/round/sticky bits according to the
22219 # selected rounding precision. It is called by the round subroutine
22220 # only.  All registers except d0 are kept intact. d0 becomes an
22221 # updated guard,round,sticky in d0{31:29}
22222 #
22223 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22224 #        prior to usage, and needs to restore d1 to original. this
22225 #        routine is tightly tied to the round routine and not meant to
22226 #        uphold standard subroutine calling practices.
22227 #
22228
22229 ext_grs:
22230         swap            %d1                     # have d1.w point to round precision
22231         tst.b           %d1                     # is rnd prec = extended?
22232         bne.b           ext_grs_not_ext         # no; go handle sgl or dbl
22233
22234 #
22235 # %d0 actually already hold g,r,s since _round() had it before calling
22236 # this function. so, as long as we don't disturb it, we are "returning" it.
22237 #
22238 ext_grs_ext:
22239         swap            %d1                     # yes; return to correct positions
22240         rts
22241
22242 ext_grs_not_ext:
22243         movm.l          &0x3000, -(%sp)         # make some temp registers {d2/d3}
22244
22245         cmpi.b          %d1, &s_mode            # is rnd prec = sgl?
22246         bne.b           ext_grs_dbl             # no; go handle dbl
22247
22248 #
22249 # sgl:
22250 #       96              64        40    32              0
22251 #       -----------------------------------------------------
22252 #       | EXP   |XXXXXXX|         |xx   |               |grs|
22253 #       -----------------------------------------------------
22254 #                       <--(24)--->nn\                     /
22255 #                                  ee ---------------------
22256 #                                  ww           |
22257 #                                               v
22258 #                                  gr      new sticky
22259 #
22260 ext_grs_sgl:
22261         bfextu          FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22262         mov.l           &30, %d2                # of the sgl prec. limits
22263         lsl.l           %d2, %d3                # shift g-r bits to MSB of d3
22264         mov.l           FTEMP_HI(%a0), %d2      # get word 2 for s-bit test
22265         and.l           &0x0000003f, %d2        # s bit is the or of all other
22266         bne.b           ext_grs_st_stky         # bits to the right of g-r
22267         tst.l           FTEMP_LO(%a0)           # test lower mantissa
22268         bne.b           ext_grs_st_stky         # if any are set, set sticky
22269         tst.l           %d0                     # test original g,r,s
22270         bne.b           ext_grs_st_stky         # if any are set, set sticky
22271         bra.b           ext_grs_end_sd          # if words 3 and 4 are clr, exit
22272
22273 #
22274 # dbl:
22275 #       96              64              32       11     0
22276 #       -----------------------------------------------------
22277 #       | EXP   |XXXXXXX|               |        |xx    |grs|
22278 #       -----------------------------------------------------
22279 #                                                 nn\       /
22280 #                                                 ee -------
22281 #                                                 ww    |
22282 #                                                       v
22283 #                                                 gr    new sticky
22284 #
22285 ext_grs_dbl:
22286         bfextu          FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22287         mov.l           &30, %d2                # of the dbl prec. limits
22288         lsl.l           %d2, %d3                # shift g-r bits to the MSB of d3
22289         mov.l           FTEMP_LO(%a0), %d2      # get lower mantissa  for s-bit test
22290         and.l           &0x000001ff, %d2        # s bit is the or-ing of all
22291         bne.b           ext_grs_st_stky         # other bits to the right of g-r
22292         tst.l           %d0                     # test word original g,r,s
22293         bne.b           ext_grs_st_stky         # if any are set, set sticky
22294         bra.b           ext_grs_end_sd          # if clear, exit
22295
22296 ext_grs_st_stky:
22297         bset            &rnd_stky_bit, %d3      # set sticky bit
22298 ext_grs_end_sd:
22299         mov.l           %d3, %d0                # return grs to d0
22300
22301         movm.l          (%sp)+, &0xc            # restore scratch registers {d2/d3}
22302
22303         swap            %d1                     # restore d1 to original
22304         rts
22305
22306 #########################################################################
22307 # norm(): normalize the mantissa of an extended precision input. the    #
22308 #         input operand should not be normalized already.               #
22309 #                                                                       #
22310 # XDEF **************************************************************** #
22311 #       norm()                                                          #
22312 #                                                                       #
22313 # XREF **************************************************************** #
22314 #       none                                                            #
22315 #                                                                       #
22316 # INPUT *************************************************************** #
22317 #       a0 = pointer fp extended precision operand to normalize         #
22318 #                                                                       #
22319 # OUTPUT ************************************************************** #
22320 #       d0 = number of bit positions the mantissa was shifted           #
22321 #       a0 = the input operand's mantissa is normalized; the exponent   #
22322 #            is unchanged.                                              #
22323 #                                                                       #
22324 #########################################################################
22325         global          norm
22326 norm:
22327         mov.l           %d2, -(%sp)             # create some temp regs
22328         mov.l           %d3, -(%sp)
22329
22330         mov.l           FTEMP_HI(%a0), %d0      # load hi(mantissa)
22331         mov.l           FTEMP_LO(%a0), %d1      # load lo(mantissa)
22332
22333         bfffo           %d0{&0:&32}, %d2        # how many places to shift?
22334         beq.b           norm_lo                 # hi(man) is all zeroes!
22335
22336 norm_hi:
22337         lsl.l           %d2, %d0                # left shift hi(man)
22338         bfextu          %d1{&0:%d2}, %d3        # extract lo bits
22339
22340         or.l            %d3, %d0                # create hi(man)
22341         lsl.l           %d2, %d1                # create lo(man)
22342
22343         mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
22344         mov.l           %d1, FTEMP_LO(%a0)      # store new lo(man)
22345
22346         mov.l           %d2, %d0                # return shift amount
22347
22348         mov.l           (%sp)+, %d3             # restore temp regs
22349         mov.l           (%sp)+, %d2
22350
22351         rts
22352
22353 norm_lo:
22354         bfffo           %d1{&0:&32}, %d2        # how many places to shift?
22355         lsl.l           %d2, %d1                # shift lo(man)
22356         add.l           &32, %d2                # add 32 to shft amount
22357
22358         mov.l           %d1, FTEMP_HI(%a0)      # store hi(man)
22359         clr.l           FTEMP_LO(%a0)           # lo(man) is now zero
22360
22361         mov.l           %d2, %d0                # return shift amount
22362
22363         mov.l           (%sp)+, %d3             # restore temp regs
22364         mov.l           (%sp)+, %d2
22365
22366         rts
22367
22368 #########################################################################
22369 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO     #
22370 #               - returns corresponding optype tag                      #
22371 #                                                                       #
22372 # XDEF **************************************************************** #
22373 #       unnorm_fix()                                                    #
22374 #                                                                       #
22375 # XREF **************************************************************** #
22376 #       norm() - normalize the mantissa                                 #
22377 #                                                                       #
22378 # INPUT *************************************************************** #
22379 #       a0 = pointer to unnormalized extended precision number          #
22380 #                                                                       #
22381 # OUTPUT ************************************************************** #
22382 #       d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO  #
22383 #       a0 = input operand has been converted to a norm, denorm, or     #
22384 #            zero; both the exponent and mantissa are changed.          #
22385 #                                                                       #
22386 #########################################################################
22387
22388         global          unnorm_fix
22389 unnorm_fix:
22390         bfffo           FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22391         bne.b           unnorm_shift            # hi(man) is not all zeroes
22392
22393 #
22394 # hi(man) is all zeroes so see if any bits in lo(man) are set
22395 #
22396 unnorm_chk_lo:
22397         bfffo           FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22398         beq.w           unnorm_zero             # yes
22399
22400         add.w           &32, %d0                # no; fix shift distance
22401
22402 #
22403 # d0 = # shifts needed for complete normalization
22404 #
22405 unnorm_shift:
22406         clr.l           %d1                     # clear top word
22407         mov.w           FTEMP_EX(%a0), %d1      # extract exponent
22408         and.w           &0x7fff, %d1            # strip off sgn
22409
22410         cmp.w           %d0, %d1                # will denorm push exp < 0?
22411         bgt.b           unnorm_nrm_zero         # yes; denorm only until exp = 0
22412
22413 #
22414 # exponent would not go < 0. therefore, number stays normalized
22415 #
22416         sub.w           %d0, %d1                # shift exponent value
22417         mov.w           FTEMP_EX(%a0), %d0      # load old exponent
22418         and.w           &0x8000, %d0            # save old sign
22419         or.w            %d0, %d1                # {sgn,new exp}
22420         mov.w           %d1, FTEMP_EX(%a0)      # insert new exponent
22421
22422         bsr.l           norm                    # normalize UNNORM
22423
22424         mov.b           &NORM, %d0              # return new optype tag
22425         rts
22426
22427 #
22428 # exponent would go < 0, so only denormalize until exp = 0
22429 #
22430 unnorm_nrm_zero:
22431         cmp.b           %d1, &32                # is exp <= 32?
22432         bgt.b           unnorm_nrm_zero_lrg     # no; go handle large exponent
22433
22434         bfextu          FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22435         mov.l           %d0, FTEMP_HI(%a0)      # save new hi(man)
22436
22437         mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
22438         lsl.l           %d1, %d0                # extract new lo(man)
22439         mov.l           %d0, FTEMP_LO(%a0)      # save new lo(man)
22440
22441         and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
22442
22443         mov.b           &DENORM, %d0            # return new optype tag
22444         rts
22445
22446 #
22447 # only mantissa bits set are in lo(man)
22448 #
22449 unnorm_nrm_zero_lrg:
22450         sub.w           &32, %d1                # adjust shft amt by 32
22451
22452         mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
22453         lsl.l           %d1, %d0                # left shift lo(man)
22454
22455         mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
22456         clr.l           FTEMP_LO(%a0)           # lo(man) = 0
22457
22458         and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
22459
22460         mov.b           &DENORM, %d0            # return new optype tag
22461         rts
22462
22463 #
22464 # whole mantissa is zero so this UNNORM is actually a zero
22465 #
22466 unnorm_zero:
22467         and.w           &0x8000, FTEMP_EX(%a0)  # force exponent to zero
22468
22469         mov.b           &ZERO, %d0              # fix optype tag
22470         rts
22471
22472 #########################################################################
22473 # XDEF **************************************************************** #
22474 #       set_tag_x(): return the optype of the input ext fp number       #
22475 #                                                                       #
22476 # XREF **************************************************************** #
22477 #       None                                                            #
22478 #                                                                       #
22479 # INPUT *************************************************************** #
22480 #       a0 = pointer to extended precision operand                      #
22481 #                                                                       #
22482 # OUTPUT ************************************************************** #
22483 #       d0 = value of type tag                                          #
22484 #               one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO     #
22485 #                                                                       #
22486 # ALGORITHM *********************************************************** #
22487 #       Simply test the exponent, j-bit, and mantissa values to         #
22488 # determine the type of operand.                                        #
22489 #       If it's an unnormalized zero, alter the operand and force it    #
22490 # to be a normal zero.                                                  #
22491 #                                                                       #
22492 #########################################################################
22493
22494         global          set_tag_x
22495 set_tag_x:
22496         mov.w           FTEMP_EX(%a0), %d0      # extract exponent
22497         andi.w          &0x7fff, %d0            # strip off sign
22498         cmpi.w          %d0, &0x7fff            # is (EXP == MAX)?
22499         beq.b           inf_or_nan_x
22500 not_inf_or_nan_x:
22501         btst            &0x7,FTEMP_HI(%a0)
22502         beq.b           not_norm_x
22503 is_norm_x:
22504         mov.b           &NORM, %d0
22505         rts
22506 not_norm_x:
22507         tst.w           %d0                     # is exponent = 0?
22508         bne.b           is_unnorm_x
22509 not_unnorm_x:
22510         tst.l           FTEMP_HI(%a0)
22511         bne.b           is_denorm_x
22512         tst.l           FTEMP_LO(%a0)
22513         bne.b           is_denorm_x
22514 is_zero_x:
22515         mov.b           &ZERO, %d0
22516         rts
22517 is_denorm_x:
22518         mov.b           &DENORM, %d0
22519         rts
22520 # must distinguish now "Unnormalized zeroes" which we
22521 # must convert to zero.
22522 is_unnorm_x:
22523         tst.l           FTEMP_HI(%a0)
22524         bne.b           is_unnorm_reg_x
22525         tst.l           FTEMP_LO(%a0)
22526         bne.b           is_unnorm_reg_x
22527 # it's an "unnormalized zero". let's convert it to an actual zero...
22528         andi.w          &0x8000,FTEMP_EX(%a0)   # clear exponent
22529         mov.b           &ZERO, %d0
22530         rts
22531 is_unnorm_reg_x:
22532         mov.b           &UNNORM, %d0
22533         rts
22534 inf_or_nan_x:
22535         tst.l           FTEMP_LO(%a0)
22536         bne.b           is_nan_x
22537         mov.l           FTEMP_HI(%a0), %d0
22538         and.l           &0x7fffffff, %d0        # msb is a don't care!
22539         bne.b           is_nan_x
22540 is_inf_x:
22541         mov.b           &INF, %d0
22542         rts
22543 is_nan_x:
22544         btst            &0x6, FTEMP_HI(%a0)
22545         beq.b           is_snan_x
22546         mov.b           &QNAN, %d0
22547         rts
22548 is_snan_x:
22549         mov.b           &SNAN, %d0
22550         rts
22551
22552 #########################################################################
22553 # XDEF **************************************************************** #
22554 #       set_tag_d(): return the optype of the input dbl fp number       #
22555 #                                                                       #
22556 # XREF **************************************************************** #
22557 #       None                                                            #
22558 #                                                                       #
22559 # INPUT *************************************************************** #
22560 #       a0 = points to double precision operand                         #
22561 #                                                                       #
22562 # OUTPUT ************************************************************** #
22563 #       d0 = value of type tag                                          #
22564 #               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
22565 #                                                                       #
22566 # ALGORITHM *********************************************************** #
22567 #       Simply test the exponent, j-bit, and mantissa values to         #
22568 # determine the type of operand.                                        #
22569 #                                                                       #
22570 #########################################################################
22571
22572         global          set_tag_d
22573 set_tag_d:
22574         mov.l           FTEMP(%a0), %d0
22575         mov.l           %d0, %d1
22576
22577         andi.l          &0x7ff00000, %d0
22578         beq.b           zero_or_denorm_d
22579
22580         cmpi.l          %d0, &0x7ff00000
22581         beq.b           inf_or_nan_d
22582
22583 is_norm_d:
22584         mov.b           &NORM, %d0
22585         rts
22586 zero_or_denorm_d:
22587         and.l           &0x000fffff, %d1
22588         bne             is_denorm_d
22589         tst.l           4+FTEMP(%a0)
22590         bne             is_denorm_d
22591 is_zero_d:
22592         mov.b           &ZERO, %d0
22593         rts
22594 is_denorm_d:
22595         mov.b           &DENORM, %d0
22596         rts
22597 inf_or_nan_d:
22598         and.l           &0x000fffff, %d1
22599         bne             is_nan_d
22600         tst.l           4+FTEMP(%a0)
22601         bne             is_nan_d
22602 is_inf_d:
22603         mov.b           &INF, %d0
22604         rts
22605 is_nan_d:
22606         btst            &19, %d1
22607         bne             is_qnan_d
22608 is_snan_d:
22609         mov.b           &SNAN, %d0
22610         rts
22611 is_qnan_d:
22612         mov.b           &QNAN, %d0
22613         rts
22614
22615 #########################################################################
22616 # XDEF **************************************************************** #
22617 #       set_tag_s(): return the optype of the input sgl fp number       #
22618 #                                                                       #
22619 # XREF **************************************************************** #
22620 #       None                                                            #
22621 #                                                                       #
22622 # INPUT *************************************************************** #
22623 #       a0 = pointer to single precision operand                        #
22624 #                                                                       #
22625 # OUTPUT ************************************************************** #
22626 #       d0 = value of type tag                                          #
22627 #               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
22628 #                                                                       #
22629 # ALGORITHM *********************************************************** #
22630 #       Simply test the exponent, j-bit, and mantissa values to         #
22631 # determine the type of operand.                                        #
22632 #                                                                       #
22633 #########################################################################
22634
22635         global          set_tag_s
22636 set_tag_s:
22637         mov.l           FTEMP(%a0), %d0
22638         mov.l           %d0, %d1
22639
22640         andi.l          &0x7f800000, %d0
22641         beq.b           zero_or_denorm_s
22642
22643         cmpi.l          %d0, &0x7f800000
22644         beq.b           inf_or_nan_s
22645
22646 is_norm_s:
22647         mov.b           &NORM, %d0
22648         rts
22649 zero_or_denorm_s:
22650         and.l           &0x007fffff, %d1
22651         bne             is_denorm_s
22652 is_zero_s:
22653         mov.b           &ZERO, %d0
22654         rts
22655 is_denorm_s:
22656         mov.b           &DENORM, %d0
22657         rts
22658 inf_or_nan_s:
22659         and.l           &0x007fffff, %d1
22660         bne             is_nan_s
22661 is_inf_s:
22662         mov.b           &INF, %d0
22663         rts
22664 is_nan_s:
22665         btst            &22, %d1
22666         bne             is_qnan_s
22667 is_snan_s:
22668         mov.b           &SNAN, %d0
22669         rts
22670 is_qnan_s:
22671         mov.b           &QNAN, %d0
22672         rts
22673
22674 #########################################################################
22675 # XDEF **************************************************************** #
22676 #       unf_res(): routine to produce default underflow result of a     #
22677 #                  scaled extended precision number; this is used by    #
22678 #                  fadd/fdiv/fmul/etc. emulation routines.              #
22679 #       unf_res4(): same as above but for fsglmul/fsgldiv which use     #
22680 #                   single round prec and extended prec mode.           #
22681 #                                                                       #
22682 # XREF **************************************************************** #
22683 #       _denorm() - denormalize according to scale factor               #
22684 #       _round() - round denormalized number according to rnd prec      #
22685 #                                                                       #
22686 # INPUT *************************************************************** #
22687 #       a0 = pointer to extended precison operand                       #
22688 #       d0 = scale factor                                               #
22689 #       d1 = rounding precision/mode                                    #
22690 #                                                                       #
22691 # OUTPUT ************************************************************** #
22692 #       a0 = pointer to default underflow result in extended precision  #
22693 #       d0.b = result FPSR_cc which caller may or may not want to save  #
22694 #                                                                       #
22695 # ALGORITHM *********************************************************** #
22696 #       Convert the input operand to "internal format" which means the  #
22697 # exponent is extended to 16 bits and the sign is stored in the unused  #
22698 # portion of the extended precison operand. Denormalize the number      #
22699 # according to the scale factor passed in d0. Then, round the           #
22700 # denormalized result.                                                  #
22701 #       Set the FPSR_exc bits as appropriate but return the cc bits in  #
22702 # d0 in case the caller doesn't want to save them (as is the case for   #
22703 # fmove out).                                                           #
22704 #       unf_res4() for fsglmul/fsgldiv forces the denorm to extended    #
22705 # precision and the rounding mode to single.                            #
22706 #                                                                       #
22707 #########################################################################
22708         global          unf_res
22709 unf_res:
22710         mov.l           %d1, -(%sp)             # save rnd prec,mode on stack
22711
22712         btst            &0x7, FTEMP_EX(%a0)     # make "internal" format
22713         sne             FTEMP_SGN(%a0)
22714
22715         mov.w           FTEMP_EX(%a0), %d1      # extract exponent
22716         and.w           &0x7fff, %d1
22717         sub.w           %d0, %d1
22718         mov.w           %d1, FTEMP_EX(%a0)      # insert 16 bit exponent
22719
22720         mov.l           %a0, -(%sp)             # save operand ptr during calls
22721
22722         mov.l           0x4(%sp),%d0            # pass rnd prec.
22723         andi.w          &0x00c0,%d0
22724         lsr.w           &0x4,%d0
22725         bsr.l           _denorm                 # denorm result
22726
22727         mov.l           (%sp),%a0
22728         mov.w           0x6(%sp),%d1            # load prec:mode into %d1
22729         andi.w          &0xc0,%d1               # extract rnd prec
22730         lsr.w           &0x4,%d1
22731         swap            %d1
22732         mov.w           0x6(%sp),%d1
22733         andi.w          &0x30,%d1
22734         lsr.w           &0x4,%d1
22735         bsr.l           _round                  # round the denorm
22736
22737         mov.l           (%sp)+, %a0
22738
22739 # result is now rounded properly. convert back to normal format
22740         bclr            &0x7, FTEMP_EX(%a0)     # clear sgn first; may have residue
22741         tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
22742         beq.b           unf_res_chkifzero       # no; result is positive
22743         bset            &0x7, FTEMP_EX(%a0)     # set result sgn
22744         clr.b           FTEMP_SGN(%a0)          # clear temp sign
22745
22746 # the number may have become zero after rounding. set ccodes accordingly.
22747 unf_res_chkifzero:
22748         clr.l           %d0
22749         tst.l           FTEMP_HI(%a0)           # is value now a zero?
22750         bne.b           unf_res_cont            # no
22751         tst.l           FTEMP_LO(%a0)
22752         bne.b           unf_res_cont            # no
22753 #       bset            &z_bit, FPSR_CC(%a6)    # yes; set zero ccode bit
22754         bset            &z_bit, %d0             # yes; set zero ccode bit
22755
22756 unf_res_cont:
22757
22758 #
22759 # can inex1 also be set along with unfl and inex2???
22760 #
22761 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22762 #
22763         btst            &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22764         beq.b           unf_res_end             # no
22765         bset            &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22766
22767 unf_res_end:
22768         add.l           &0x4, %sp               # clear stack
22769         rts
22770
22771 # unf_res() for fsglmul() and fsgldiv().
22772         global          unf_res4
22773 unf_res4:
22774         mov.l           %d1,-(%sp)              # save rnd prec,mode on stack
22775
22776         btst            &0x7,FTEMP_EX(%a0)      # make "internal" format
22777         sne             FTEMP_SGN(%a0)
22778
22779         mov.w           FTEMP_EX(%a0),%d1       # extract exponent
22780         and.w           &0x7fff,%d1
22781         sub.w           %d0,%d1
22782         mov.w           %d1,FTEMP_EX(%a0)       # insert 16 bit exponent
22783
22784         mov.l           %a0,-(%sp)              # save operand ptr during calls
22785
22786         clr.l           %d0                     # force rnd prec = ext
22787         bsr.l           _denorm                 # denorm result
22788
22789         mov.l           (%sp),%a0
22790         mov.w           &s_mode,%d1             # force rnd prec = sgl
22791         swap            %d1
22792         mov.w           0x6(%sp),%d1            # load rnd mode
22793         andi.w          &0x30,%d1               # extract rnd prec
22794         lsr.w           &0x4,%d1
22795         bsr.l           _round                  # round the denorm
22796
22797         mov.l           (%sp)+,%a0
22798
22799 # result is now rounded properly. convert back to normal format
22800         bclr            &0x7,FTEMP_EX(%a0)      # clear sgn first; may have residue
22801         tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
22802         beq.b           unf_res4_chkifzero      # no; result is positive
22803         bset            &0x7,FTEMP_EX(%a0)      # set result sgn
22804         clr.b           FTEMP_SGN(%a0)          # clear temp sign
22805
22806 # the number may have become zero after rounding. set ccodes accordingly.
22807 unf_res4_chkifzero:
22808         clr.l           %d0
22809         tst.l           FTEMP_HI(%a0)           # is value now a zero?
22810         bne.b           unf_res4_cont           # no
22811         tst.l           FTEMP_LO(%a0)
22812         bne.b           unf_res4_cont           # no
22813 #       bset            &z_bit,FPSR_CC(%a6)     # yes; set zero ccode bit
22814         bset            &z_bit,%d0              # yes; set zero ccode bit
22815
22816 unf_res4_cont:
22817
22818 #
22819 # can inex1 also be set along with unfl and inex2???
22820 #
22821 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22822 #
22823         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22824         beq.b           unf_res4_end            # no
22825         bset            &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22826
22827 unf_res4_end:
22828         add.l           &0x4,%sp                # clear stack
22829         rts
22830
22831 #########################################################################
22832 # XDEF **************************************************************** #
22833 #       ovf_res(): routine to produce the default overflow result of    #
22834 #                  an overflowing number.                               #
22835 #       ovf_res2(): same as above but the rnd mode/prec are passed      #
22836 #                   differently.                                        #
22837 #                                                                       #
22838 # XREF **************************************************************** #
22839 #       none                                                            #
22840 #                                                                       #
22841 # INPUT *************************************************************** #
22842 #       d1.b    = '-1' => (-); '0' => (+)                               #
22843 #   ovf_res():                                                          #
22844 #       d0      = rnd mode/prec                                         #
22845 #   ovf_res2():                                                         #
22846 #       hi(d0)  = rnd prec                                              #
22847 #       lo(d0)  = rnd mode                                              #
22848 #                                                                       #
22849 # OUTPUT ************************************************************** #
22850 #       a0      = points to extended precision result                   #
22851 #       d0.b    = condition code bits                                   #
22852 #                                                                       #
22853 # ALGORITHM *********************************************************** #
22854 #       The default overflow result can be determined by the sign of    #
22855 # the result and the rounding mode/prec in effect. These bits are       #
22856 # concatenated together to create an index into the default result      #
22857 # table. A pointer to the correct result is returned in a0. The         #
22858 # resulting condition codes are returned in d0 in case the caller       #
22859 # doesn't want FPSR_cc altered (as is the case for fmove out).          #
22860 #                                                                       #
22861 #########################################################################
22862
22863         global          ovf_res
22864 ovf_res:
22865         andi.w          &0x10,%d1               # keep result sign
22866         lsr.b           &0x4,%d0                # shift prec/mode
22867         or.b            %d0,%d1                 # concat the two
22868         mov.w           %d1,%d0                 # make a copy
22869         lsl.b           &0x1,%d1                # multiply d1 by 2
22870         bra.b           ovf_res_load
22871
22872         global          ovf_res2
22873 ovf_res2:
22874         and.w           &0x10, %d1              # keep result sign
22875         or.b            %d0, %d1                # insert rnd mode
22876         swap            %d0
22877         or.b            %d0, %d1                # insert rnd prec
22878         mov.w           %d1, %d0                # make a copy
22879         lsl.b           &0x1, %d1               # shift left by 1
22880
22881 #
22882 # use the rounding mode, precision, and result sign as in index into the
22883 # two tables below to fetch the default result and the result ccodes.
22884 #
22885 ovf_res_load:
22886         mov.b           (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22887         lea             (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22888
22889         rts
22890
22891 tbl_ovfl_cc:
22892         byte            0x2, 0x0, 0x0, 0x2
22893         byte            0x2, 0x0, 0x0, 0x2
22894         byte            0x2, 0x0, 0x0, 0x2
22895         byte            0x0, 0x0, 0x0, 0x0
22896         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
22897         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
22898         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
22899
22900 tbl_ovfl_result:
22901         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22902         long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22903         long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22904         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22905
22906         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22907         long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22908         long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22909         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22910
22911         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22912         long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22913         long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22914         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22915
22916         long            0x00000000,0x00000000,0x00000000,0x00000000
22917         long            0x00000000,0x00000000,0x00000000,0x00000000
22918         long            0x00000000,0x00000000,0x00000000,0x00000000
22919         long            0x00000000,0x00000000,0x00000000,0x00000000
22920
22921         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22922         long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22923         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22924         long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22925
22926         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22927         long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22928         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22929         long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22930
22931         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22932         long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22933         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22934         long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22935
22936 #########################################################################
22937 # XDEF **************************************************************** #
22938 #       get_packed(): fetch a packed operand from memory and then       #
22939 #                     convert it to a floating-point binary number.     #
22940 #                                                                       #
22941 # XREF **************************************************************** #
22942 #       _dcalc_ea() - calculate the correct <ea>                        #
22943 #       _mem_read() - fetch the packed operand from memory              #
22944 #       facc_in_x() - the fetch failed so jump to special exit code     #
22945 #       decbin()    - convert packed to binary extended precision       #
22946 #                                                                       #
22947 # INPUT *************************************************************** #
22948 #       None                                                            #
22949 #                                                                       #
22950 # OUTPUT ************************************************************** #
22951 #       If no failure on _mem_read():                                   #
22952 #       FP_SRC(a6) = packed operand now as a binary FP number           #
22953 #                                                                       #
22954 # ALGORITHM *********************************************************** #
22955 #       Get the correct <ea> whihc is the value on the exception stack  #
22956 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.     #
22957 # Then, fetch the operand from memory. If the fetch fails, exit         #
22958 # through facc_in_x().                                                  #
22959 #       If the packed operand is a ZERO,NAN, or INF, convert it to      #
22960 # its binary representation here. Else, call decbin() which will        #
22961 # convert the packed value to an extended precision binary value.       #
22962 #                                                                       #
22963 #########################################################################
22964
22965 # the stacked <ea> for packed is correct except for -(An).
22966 # the base reg must be updated for both -(An) and (An)+.
22967         global          get_packed
22968 get_packed:
22969         mov.l           &0xc,%d0                # packed is 12 bytes
22970         bsr.l           _dcalc_ea               # fetch <ea>; correct An
22971
22972         lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
22973         mov.l           &0xc,%d0                # pass: 12 bytes
22974         bsr.l           _dmem_read              # read packed operand
22975
22976         tst.l           %d1                     # did dfetch fail?
22977         bne.l           facc_in_x               # yes
22978
22979 # The packed operand is an INF or a NAN if the exponent field is all ones.
22980         bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
22981         cmpi.w          %d0,&0x7fff             # INF or NAN?
22982         bne.b           gp_try_zero             # no
22983         rts                                     # operand is an INF or NAN
22984
22985 # The packed operand is a zero if the mantissa is all zero, else it's
22986 # a normal packed op.
22987 gp_try_zero:
22988         mov.b           3+FP_SRC(%a6),%d0       # get byte 4
22989         andi.b          &0x0f,%d0               # clear all but last nybble
22990         bne.b           gp_not_spec             # not a zero
22991         tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
22992         bne.b           gp_not_spec             # not a zero
22993         tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
22994         bne.b           gp_not_spec             # not a zero
22995         rts                                     # operand is a ZERO
22996 gp_not_spec:
22997         lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
22998         bsr.l           decbin                  # convert to extended
22999         fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
23000         rts
23001
23002 #########################################################################
23003 # decbin(): Converts normalized packed bcd value pointed to by register #
23004 #           a0 to extended-precision value in fp0.                      #
23005 #                                                                       #
23006 # INPUT *************************************************************** #
23007 #       a0 = pointer to normalized packed bcd value                     #
23008 #                                                                       #
23009 # OUTPUT ************************************************************** #
23010 #       fp0 = exact fp representation of the packed bcd value.          #
23011 #                                                                       #
23012 # ALGORITHM *********************************************************** #
23013 #       Expected is a normal bcd (i.e. non-exceptional; all inf, zero,  #
23014 #       and NaN operands are dispatched without entering this routine)  #
23015 #       value in 68881/882 format at location (a0).                     #
23016 #                                                                       #
23017 #       A1. Convert the bcd exponent to binary by successive adds and   #
23018 #       muls. Set the sign according to SE. Subtract 16 to compensate   #
23019 #       for the mantissa which is to be interpreted as 17 integer       #
23020 #       digits, rather than 1 integer and 16 fraction digits.           #
23021 #       Note: this operation can never overflow.                        #
23022 #                                                                       #
23023 #       A2. Convert the bcd mantissa to binary by successive            #
23024 #       adds and muls in FP0. Set the sign according to SM.             #
23025 #       The mantissa digits will be converted with the decimal point    #
23026 #       assumed following the least-significant digit.                  #
23027 #       Note: this operation can never overflow.                        #
23028 #                                                                       #
23029 #       A3. Count the number of leading/trailing zeros in the           #
23030 #       bcd string.  If SE is positive, count the leading zeros;        #
23031 #       if negative, count the trailing zeros.  Set the adjusted        #
23032 #       exponent equal to the exponent from A1 and the zero count       #
23033 #       added if SM = 1 and subtracted if SM = 0.  Scale the            #
23034 #       mantissa the equivalent of forcing in the bcd value:            #
23035 #                                                                       #
23036 #       SM = 0  a non-zero digit in the integer position                #
23037 #       SM = 1  a non-zero digit in Mant0, lsd of the fraction          #
23038 #                                                                       #
23039 #       this will insure that any value, regardless of its              #
23040 #       representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted     #
23041 #       consistently.                                                   #
23042 #                                                                       #
23043 #       A4. Calculate the factor 10^exp in FP1 using a table of         #
23044 #       10^(2^n) values.  To reduce the error in forming factors        #
23045 #       greater than 10^27, a directed rounding scheme is used with     #
23046 #       tables rounded to RN, RM, and RP, according to the table        #
23047 #       in the comments of the pwrten section.                          #
23048 #                                                                       #
23049 #       A5. Form the final binary number by scaling the mantissa by     #
23050 #       the exponent factor.  This is done by multiplying the           #
23051 #       mantissa in FP0 by the factor in FP1 if the adjusted            #
23052 #       exponent sign is positive, and dividing FP0 by FP1 if           #
23053 #       it is negative.                                                 #
23054 #                                                                       #
23055 #       Clean up and return. Check if the final mul or div was inexact. #
23056 #       If so, set INEX1 in USER_FPSR.                                  #
23057 #                                                                       #
23058 #########################################################################
23059
23060 #
23061 #       PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23062 #       to nearest, minus, and plus, respectively.  The tables include
23063 #       10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
23064 #       is required until the power is greater than 27, however, all
23065 #       tables include the first 5 for ease of indexing.
23066 #
23067 RTABLE:
23068         byte            0,0,0,0
23069         byte            2,3,2,3
23070         byte            2,3,3,2
23071         byte            3,2,2,3
23072
23073         set             FNIBS,7
23074         set             FSTRT,0
23075
23076         set             ESTRT,4
23077         set             EDIGITS,2
23078
23079         global          decbin
23080 decbin:
23081         mov.l           0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23082         mov.l           0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23083         mov.l           0x8(%a0),FP_SCR0_LO(%a6)
23084
23085         lea             FP_SCR0(%a6),%a0
23086
23087         movm.l          &0x3c00,-(%sp)          # save d2-d5
23088         fmovm.x         &0x1,-(%sp)             # save fp1
23089 #
23090 # Calculate exponent:
23091 #  1. Copy bcd value in memory for use as a working copy.
23092 #  2. Calculate absolute value of exponent in d1 by mul and add.
23093 #  3. Correct for exponent sign.
23094 #  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23095 #     (i.e., all digits assumed left of the decimal point.)
23096 #
23097 # Register usage:
23098 #
23099 #  calc_e:
23100 #       (*)  d0: temp digit storage
23101 #       (*)  d1: accumulator for binary exponent
23102 #       (*)  d2: digit count
23103 #       (*)  d3: offset pointer
23104 #       ( )  d4: first word of bcd
23105 #       ( )  a0: pointer to working bcd value
23106 #       ( )  a6: pointer to original bcd value
23107 #       (*)  FP_SCR1: working copy of original bcd value
23108 #       (*)  L_SCR1: copy of original exponent word
23109 #
23110 calc_e:
23111         mov.l           &EDIGITS,%d2            # # of nibbles (digits) in fraction part
23112         mov.l           &ESTRT,%d3              # counter to pick up digits
23113         mov.l           (%a0),%d4               # get first word of bcd
23114         clr.l           %d1                     # zero d1 for accumulator
23115 e_gd:
23116         mulu.l          &0xa,%d1                # mul partial product by one digit place
23117         bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend into d0
23118         add.l           %d0,%d1                 # d1 = d1 + d0
23119         addq.b          &4,%d3                  # advance d3 to the next digit
23120         dbf.w           %d2,e_gd                # if we have used all 3 digits, exit loop
23121         btst            &30,%d4                 # get SE
23122         beq.b           e_pos                   # don't negate if pos
23123         neg.l           %d1                     # negate before subtracting
23124 e_pos:
23125         sub.l           &16,%d1                 # sub to compensate for shift of mant
23126         bge.b           e_save                  # if still pos, do not neg
23127         neg.l           %d1                     # now negative, make pos and set SE
23128         or.l            &0x40000000,%d4         # set SE in d4,
23129         or.l            &0x40000000,(%a0)       # and in working bcd
23130 e_save:
23131         mov.l           %d1,-(%sp)              # save exp on stack
23132 #
23133 #
23134 # Calculate mantissa:
23135 #  1. Calculate absolute value of mantissa in fp0 by mul and add.
23136 #  2. Correct for mantissa sign.
23137 #     (i.e., all digits assumed left of the decimal point.)
23138 #
23139 # Register usage:
23140 #
23141 #  calc_m:
23142 #       (*)  d0: temp digit storage
23143 #       (*)  d1: lword counter
23144 #       (*)  d2: digit count
23145 #       (*)  d3: offset pointer
23146 #       ( )  d4: words 2 and 3 of bcd
23147 #       ( )  a0: pointer to working bcd value
23148 #       ( )  a6: pointer to original bcd value
23149 #       (*) fp0: mantissa accumulator
23150 #       ( )  FP_SCR1: working copy of original bcd value
23151 #       ( )  L_SCR1: copy of original exponent word
23152 #
23153 calc_m:
23154         mov.l           &1,%d1                  # word counter, init to 1
23155         fmov.s          &0x00000000,%fp0        # accumulator
23156 #
23157 #
23158 #  Since the packed number has a long word between the first & second parts,
23159 #  get the integer digit then skip down & get the rest of the
23160 #  mantissa.  We will unroll the loop once.
23161 #
23162         bfextu          (%a0){&28:&4},%d0       # integer part is ls digit in long word
23163         fadd.b          %d0,%fp0                # add digit to sum in fp0
23164 #
23165 #
23166 #  Get the rest of the mantissa.
23167 #
23168 loadlw:
23169         mov.l           (%a0,%d1.L*4),%d4       # load mantissa lonqword into d4
23170         mov.l           &FSTRT,%d3              # counter to pick up digits
23171         mov.l           &FNIBS,%d2              # reset number of digits per a0 ptr
23172 md2b:
23173         fmul.s          &0x41200000,%fp0        # fp0 = fp0 * 10
23174         bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend
23175         fadd.b          %d0,%fp0                # fp0 = fp0 + digit
23176 #
23177 #
23178 #  If all the digits (8) in that long word have been converted (d2=0),
23179 #  then inc d1 (=2) to point to the next long word and reset d3 to 0
23180 #  to initialize the digit offset, and set d2 to 7 for the digit count;
23181 #  else continue with this long word.
23182 #
23183         addq.b          &4,%d3                  # advance d3 to the next digit
23184         dbf.w           %d2,md2b                # check for last digit in this lw
23185 nextlw:
23186         addq.l          &1,%d1                  # inc lw pointer in mantissa
23187         cmp.l           %d1,&2                  # test for last lw
23188         ble.b           loadlw                  # if not, get last one
23189 #
23190 #  Check the sign of the mant and make the value in fp0 the same sign.
23191 #
23192 m_sign:
23193         btst            &31,(%a0)               # test sign of the mantissa
23194         beq.b           ap_st_z                 # if clear, go to append/strip zeros
23195         fneg.x          %fp0                    # if set, negate fp0
23196 #
23197 # Append/strip zeros:
23198 #
23199 #  For adjusted exponents which have an absolute value greater than 27*,
23200 #  this routine calculates the amount needed to normalize the mantissa
23201 #  for the adjusted exponent.  That number is subtracted from the exp
23202 #  if the exp was positive, and added if it was negative.  The purpose
23203 #  of this is to reduce the value of the exponent and the possibility
23204 #  of error in calculation of pwrten.
23205 #
23206 #  1. Branch on the sign of the adjusted exponent.
23207 #  2p.(positive exp)
23208 #   2. Check M16 and the digits in lwords 2 and 3 in decending order.
23209 #   3. Add one for each zero encountered until a non-zero digit.
23210 #   4. Subtract the count from the exp.
23211 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
23212 #          and set SE.
23213 #       6. Multiply the mantissa by 10**count.
23214 #  2n.(negative exp)
23215 #   2. Check the digits in lwords 3 and 2 in decending order.
23216 #   3. Add one for each zero encountered until a non-zero digit.
23217 #   4. Add the count to the exp.
23218 #   5. Check if the exp has crossed zero in #3 above; clear SE.
23219 #   6. Divide the mantissa by 10**count.
23220 #
23221 #  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
23222 #   any adjustment due to append/strip zeros will drive the resultane
23223 #   exponent towards zero.  Since all pwrten constants with a power
23224 #   of 27 or less are exact, there is no need to use this routine to
23225 #   attempt to lessen the resultant exponent.
23226 #
23227 # Register usage:
23228 #
23229 #  ap_st_z:
23230 #       (*)  d0: temp digit storage
23231 #       (*)  d1: zero count
23232 #       (*)  d2: digit count
23233 #       (*)  d3: offset pointer
23234 #       ( )  d4: first word of bcd
23235 #       (*)  d5: lword counter
23236 #       ( )  a0: pointer to working bcd value
23237 #       ( )  FP_SCR1: working copy of original bcd value
23238 #       ( )  L_SCR1: copy of original exponent word
23239 #
23240 #
23241 # First check the absolute value of the exponent to see if this
23242 # routine is necessary.  If so, then check the sign of the exponent
23243 # and do append (+) or strip (-) zeros accordingly.
23244 # This section handles a positive adjusted exponent.
23245 #
23246 ap_st_z:
23247         mov.l           (%sp),%d1               # load expA for range test
23248         cmp.l           %d1,&27                 # test is with 27
23249         ble.w           pwrten                  # if abs(expA) <28, skip ap/st zeros
23250         btst            &30,(%a0)               # check sign of exp
23251         bne.b           ap_st_n                 # if neg, go to neg side
23252         clr.l           %d1                     # zero count reg
23253         mov.l           (%a0),%d4               # load lword 1 to d4
23254         bfextu          %d4{&28:&4},%d0         # get M16 in d0
23255         bne.b           ap_p_fx                 # if M16 is non-zero, go fix exp
23256         addq.l          &1,%d1                  # inc zero count
23257         mov.l           &1,%d5                  # init lword counter
23258         mov.l           (%a0,%d5.L*4),%d4       # get lword 2 to d4
23259         bne.b           ap_p_cl                 # if lw 2 is zero, skip it
23260         addq.l          &8,%d1                  # and inc count by 8
23261         addq.l          &1,%d5                  # inc lword counter
23262         mov.l           (%a0,%d5.L*4),%d4       # get lword 3 to d4
23263 ap_p_cl:
23264         clr.l           %d3                     # init offset reg
23265         mov.l           &7,%d2                  # init digit counter
23266 ap_p_gd:
23267         bfextu          %d4{%d3:&4},%d0         # get digit
23268         bne.b           ap_p_fx                 # if non-zero, go to fix exp
23269         addq.l          &4,%d3                  # point to next digit
23270         addq.l          &1,%d1                  # inc digit counter
23271         dbf.w           %d2,ap_p_gd             # get next digit
23272 ap_p_fx:
23273         mov.l           %d1,%d0                 # copy counter to d2
23274         mov.l           (%sp),%d1               # get adjusted exp from memory
23275         sub.l           %d0,%d1                 # subtract count from exp
23276         bge.b           ap_p_fm                 # if still pos, go to pwrten
23277         neg.l           %d1                     # now its neg; get abs
23278         mov.l           (%a0),%d4               # load lword 1 to d4
23279         or.l            &0x40000000,%d4         # and set SE in d4
23280         or.l            &0x40000000,(%a0)       # and in memory
23281 #
23282 # Calculate the mantissa multiplier to compensate for the striping of
23283 # zeros from the mantissa.
23284 #
23285 ap_p_fm:
23286         lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
23287         clr.l           %d3                     # init table index
23288         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
23289         mov.l           &3,%d2                  # init d2 to count bits in counter
23290 ap_p_el:
23291         asr.l           &1,%d0                  # shift lsb into carry
23292         bcc.b           ap_p_en                 # if 1, mul fp1 by pwrten factor
23293         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
23294 ap_p_en:
23295         add.l           &12,%d3                 # inc d3 to next rtable entry
23296         tst.l           %d0                     # check if d0 is zero
23297         bne.b           ap_p_el                 # if not, get next bit
23298         fmul.x          %fp1,%fp0               # mul mantissa by 10**(no_bits_shifted)
23299         bra.b           pwrten                  # go calc pwrten
23300 #
23301 # This section handles a negative adjusted exponent.
23302 #
23303 ap_st_n:
23304         clr.l           %d1                     # clr counter
23305         mov.l           &2,%d5                  # set up d5 to point to lword 3
23306         mov.l           (%a0,%d5.L*4),%d4       # get lword 3
23307         bne.b           ap_n_cl                 # if not zero, check digits
23308         sub.l           &1,%d5                  # dec d5 to point to lword 2
23309         addq.l          &8,%d1                  # inc counter by 8
23310         mov.l           (%a0,%d5.L*4),%d4       # get lword 2
23311 ap_n_cl:
23312         mov.l           &28,%d3                 # point to last digit
23313         mov.l           &7,%d2                  # init digit counter
23314 ap_n_gd:
23315         bfextu          %d4{%d3:&4},%d0         # get digit
23316         bne.b           ap_n_fx                 # if non-zero, go to exp fix
23317         subq.l          &4,%d3                  # point to previous digit
23318         addq.l          &1,%d1                  # inc digit counter
23319         dbf.w           %d2,ap_n_gd             # get next digit
23320 ap_n_fx:
23321         mov.l           %d1,%d0                 # copy counter to d0
23322         mov.l           (%sp),%d1               # get adjusted exp from memory
23323         sub.l           %d0,%d1                 # subtract count from exp
23324         bgt.b           ap_n_fm                 # if still pos, go fix mantissa
23325         neg.l           %d1                     # take abs of exp and clr SE
23326         mov.l           (%a0),%d4               # load lword 1 to d4
23327         and.l           &0xbfffffff,%d4         # and clr SE in d4
23328         and.l           &0xbfffffff,(%a0)       # and in memory
23329 #
23330 # Calculate the mantissa multiplier to compensate for the appending of
23331 # zeros to the mantissa.
23332 #
23333 ap_n_fm:
23334         lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
23335         clr.l           %d3                     # init table index
23336         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
23337         mov.l           &3,%d2                  # init d2 to count bits in counter
23338 ap_n_el:
23339         asr.l           &1,%d0                  # shift lsb into carry
23340         bcc.b           ap_n_en                 # if 1, mul fp1 by pwrten factor
23341         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
23342 ap_n_en:
23343         add.l           &12,%d3                 # inc d3 to next rtable entry
23344         tst.l           %d0                     # check if d0 is zero
23345         bne.b           ap_n_el                 # if not, get next bit
23346         fdiv.x          %fp1,%fp0               # div mantissa by 10**(no_bits_shifted)
23347 #
23348 #
23349 # Calculate power-of-ten factor from adjusted and shifted exponent.
23350 #
23351 # Register usage:
23352 #
23353 #  pwrten:
23354 #       (*)  d0: temp
23355 #       ( )  d1: exponent
23356 #       (*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23357 #       (*)  d3: FPCR work copy
23358 #       ( )  d4: first word of bcd
23359 #       (*)  a1: RTABLE pointer
23360 #  calc_p:
23361 #       (*)  d0: temp
23362 #       ( )  d1: exponent
23363 #       (*)  d3: PWRTxx table index
23364 #       ( )  a0: pointer to working copy of bcd
23365 #       (*)  a1: PWRTxx pointer
23366 #       (*) fp1: power-of-ten accumulator
23367 #
23368 # Pwrten calculates the exponent factor in the selected rounding mode
23369 # according to the following table:
23370 #
23371 #       Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
23372 #
23373 #       ANY       ANY   RN      RN
23374 #
23375 #        +         +    RP      RP
23376 #        -         +    RP      RM
23377 #        +         -    RP      RM
23378 #        -         -    RP      RP
23379 #
23380 #        +         +    RM      RM
23381 #        -         +    RM      RP
23382 #        +         -    RM      RP
23383 #        -         -    RM      RM
23384 #
23385 #        +         +    RZ      RM
23386 #        -         +    RZ      RM
23387 #        +         -    RZ      RP
23388 #        -         -    RZ      RP
23389 #
23390 #
23391 pwrten:
23392         mov.l           USER_FPCR(%a6),%d3      # get user's FPCR
23393         bfextu          %d3{&26:&2},%d2         # isolate rounding mode bits
23394         mov.l           (%a0),%d4               # reload 1st bcd word to d4
23395         asl.l           &2,%d2                  # format d2 to be
23396         bfextu          %d4{&0:&2},%d0          # {FPCR[6],FPCR[5],SM,SE}
23397         add.l           %d0,%d2                 # in d2 as index into RTABLE
23398         lea.l           RTABLE(%pc),%a1         # load rtable base
23399         mov.b           (%a1,%d2),%d0           # load new rounding bits from table
23400         clr.l           %d3                     # clear d3 to force no exc and extended
23401         bfins           %d0,%d3{&26:&2}         # stuff new rounding bits in FPCR
23402         fmov.l          %d3,%fpcr               # write new FPCR
23403         asr.l           &1,%d0                  # write correct PTENxx table
23404         bcc.b           not_rp                  # to a1
23405         lea.l           PTENRP(%pc),%a1         # it is RP
23406         bra.b           calc_p                  # go to init section
23407 not_rp:
23408         asr.l           &1,%d0                  # keep checking
23409         bcc.b           not_rm
23410         lea.l           PTENRM(%pc),%a1         # it is RM
23411         bra.b           calc_p                  # go to init section
23412 not_rm:
23413         lea.l           PTENRN(%pc),%a1         # it is RN
23414 calc_p:
23415         mov.l           %d1,%d0                 # copy exp to d0;use d0
23416         bpl.b           no_neg                  # if exp is negative,
23417         neg.l           %d0                     # invert it
23418         or.l            &0x40000000,(%a0)       # and set SE bit
23419 no_neg:
23420         clr.l           %d3                     # table index
23421         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
23422 e_loop:
23423         asr.l           &1,%d0                  # shift next bit into carry
23424         bcc.b           e_next                  # if zero, skip the mul
23425         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
23426 e_next:
23427         add.l           &12,%d3                 # inc d3 to next rtable entry
23428         tst.l           %d0                     # check if d0 is zero
23429         bne.b           e_loop                  # not zero, continue shifting
23430 #
23431 #
23432 #  Check the sign of the adjusted exp and make the value in fp0 the
23433 #  same sign. If the exp was pos then multiply fp1*fp0;
23434 #  else divide fp0/fp1.
23435 #
23436 # Register Usage:
23437 #  norm:
23438 #       ( )  a0: pointer to working bcd value
23439 #       (*) fp0: mantissa accumulator
23440 #       ( ) fp1: scaling factor - 10**(abs(exp))
23441 #
23442 pnorm:
23443         btst            &30,(%a0)               # test the sign of the exponent
23444         beq.b           mul                     # if clear, go to multiply
23445 div:
23446         fdiv.x          %fp1,%fp0               # exp is negative, so divide mant by exp
23447         bra.b           end_dec
23448 mul:
23449         fmul.x          %fp1,%fp0               # exp is positive, so multiply by exp
23450 #
23451 #
23452 # Clean up and return with result in fp0.
23453 #
23454 # If the final mul/div in decbin incurred an inex exception,
23455 # it will be inex2, but will be reported as inex1 by get_op.
23456 #
23457 end_dec:
23458         fmov.l          %fpsr,%d0               # get status register
23459         bclr            &inex2_bit+8,%d0        # test for inex2 and clear it
23460         beq.b           no_exc                  # skip this if no exc
23461         ori.w           &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23462 no_exc:
23463         add.l           &0x4,%sp                # clear 1 lw param
23464         fmovm.x         (%sp)+,&0x40            # restore fp1
23465         movm.l          (%sp)+,&0x3c            # restore d2-d5
23466         fmov.l          &0x0,%fpcr
23467         fmov.l          &0x0,%fpsr
23468         rts
23469
23470 #########################################################################
23471 # bindec(): Converts an input in extended precision format to bcd format#
23472 #                                                                       #
23473 # INPUT *************************************************************** #
23474 #       a0 = pointer to the input extended precision value in memory.   #
23475 #            the input may be either normalized, unnormalized, or       #
23476 #            denormalized.                                              #
23477 #       d0 = contains the k-factor sign-extended to 32-bits.            #
23478 #                                                                       #
23479 # OUTPUT ************************************************************** #
23480 #       FP_SCR0(a6) = bcd format result on the stack.                   #
23481 #                                                                       #
23482 # ALGORITHM *********************************************************** #
23483 #                                                                       #
23484 #       A1.     Set RM and size ext;  Set SIGMA = sign of input.        #
23485 #               The k-factor is saved for use in d7. Clear the          #
23486 #               BINDEC_FLG for separating normalized/denormalized       #
23487 #               input.  If input is unnormalized or denormalized,       #
23488 #               normalize it.                                           #
23489 #                                                                       #
23490 #       A2.     Set X = abs(input).                                     #
23491 #                                                                       #
23492 #       A3.     Compute ILOG.                                           #
23493 #               ILOG is the log base 10 of the input value.  It is      #
23494 #               approximated by adding e + 0.f when the original        #
23495 #               value is viewed as 2^^e * 1.f in extended precision.    #
23496 #               This value is stored in d6.                             #
23497 #                                                                       #
23498 #       A4.     Clr INEX bit.                                           #
23499 #               The operation in A3 above may have set INEX2.           #
23500 #                                                                       #
23501 #       A5.     Set ICTR = 0;                                           #
23502 #               ICTR is a flag used in A13.  It must be set before the  #
23503 #               loop entry A6.                                          #
23504 #                                                                       #
23505 #       A6.     Calculate LEN.                                          #
23506 #               LEN is the number of digits to be displayed.  The       #
23507 #               k-factor can dictate either the total number of digits, #
23508 #               if it is a positive number, or the number of digits     #
23509 #               after the decimal point which are to be included as     #
23510 #               significant.  See the 68882 manual for examples.        #
23511 #               If LEN is computed to be greater than 17, set OPERR in  #
23512 #               USER_FPSR.  LEN is stored in d4.                        #
23513 #                                                                       #
23514 #       A7.     Calculate SCALE.                                        #
23515 #               SCALE is equal to 10^ISCALE, where ISCALE is the number #
23516 #               of decimal places needed to insure LEN integer digits   #
23517 #               in the output before conversion to bcd. LAMBDA is the   #
23518 #               sign of ISCALE, used in A9. Fp1 contains                #
23519 #               10^^(abs(ISCALE)) using a rounding mode which is a      #
23520 #               function of the original rounding mode and the signs    #
23521 #               of ISCALE and X.  A table is given in the code.         #
23522 #                                                                       #
23523 #       A8.     Clr INEX; Force RZ.                                     #
23524 #               The operation in A3 above may have set INEX2.           #
23525 #               RZ mode is forced for the scaling operation to insure   #
23526 #               only one rounding error.  The grs bits are collected in #
23527 #               the INEX flag for use in A10.                           #
23528 #                                                                       #
23529 #       A9.     Scale X -> Y.                                           #
23530 #               The mantissa is scaled to the desired number of         #
23531 #               significant digits.  The excess digits are collected    #
23532 #               in INEX2.                                               #
23533 #                                                                       #
23534 #       A10.    Or in INEX.                                             #
23535 #               If INEX is set, round error occurred.  This is          #
23536 #               compensated for by 'or-ing' in the INEX2 flag to        #
23537 #               the lsb of Y.                                           #
23538 #                                                                       #
23539 #       A11.    Restore original FPCR; set size ext.                    #
23540 #               Perform FINT operation in the user's rounding mode.     #
23541 #               Keep the size to extended.                              #
23542 #                                                                       #
23543 #       A12.    Calculate YINT = FINT(Y) according to user's rounding   #
23544 #               mode.  The FPSP routine sintd0 is used.  The output     #
23545 #               is in fp0.                                              #
23546 #                                                                       #
23547 #       A13.    Check for LEN digits.                                   #
23548 #               If the int operation results in more than LEN digits,   #
23549 #               or less than LEN -1 digits, adjust ILOG and repeat from #
23550 #               A6.  This test occurs only on the first pass.  If the   #
23551 #               result is exactly 10^LEN, decrement ILOG and divide     #
23552 #               the mantissa by 10.                                     #
23553 #                                                                       #
23554 #       A14.    Convert the mantissa to bcd.                            #
23555 #               The binstr routine is used to convert the LEN digit     #
23556 #               mantissa to bcd in memory.  The input to binstr is      #
23557 #               to be a fraction; i.e. (mantissa)/10^LEN and adjusted   #
23558 #               such that the decimal point is to the left of bit 63.   #
23559 #               The bcd digits are stored in the correct position in    #
23560 #               the final string area in memory.                        #
23561 #                                                                       #
23562 #       A15.    Convert the exponent to bcd.                            #
23563 #               As in A14 above, the exp is converted to bcd and the    #
23564 #               digits are stored in the final string.                  #
23565 #               Test the length of the final exponent string.  If the   #
23566 #               length is 4, set operr.                                 #
23567 #                                                                       #
23568 #       A16.    Write sign bits to final string.                        #
23569 #                                                                       #
23570 #########################################################################
23571
23572 set     BINDEC_FLG,     EXC_TEMP        # DENORM flag
23573
23574 # Constants in extended precision
23575 PLOG2:
23576         long            0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23577 PLOG2UP1:
23578         long            0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23579
23580 # Constants in single precision
23581 FONE:
23582         long            0x3F800000,0x00000000,0x00000000,0x00000000
23583 FTWO:
23584         long            0x40000000,0x00000000,0x00000000,0x00000000
23585 FTEN:
23586         long            0x41200000,0x00000000,0x00000000,0x00000000
23587 F4933:
23588         long            0x459A2800,0x00000000,0x00000000,0x00000000
23589
23590 RBDTBL:
23591         byte            0,0,0,0
23592         byte            3,3,2,2
23593         byte            3,2,2,3
23594         byte            2,3,3,2
23595
23596 #       Implementation Notes:
23597 #
23598 #       The registers are used as follows:
23599 #
23600 #               d0: scratch; LEN input to binstr
23601 #               d1: scratch
23602 #               d2: upper 32-bits of mantissa for binstr
23603 #               d3: scratch;lower 32-bits of mantissa for binstr
23604 #               d4: LEN
23605 #               d5: LAMBDA/ICTR
23606 #               d6: ILOG
23607 #               d7: k-factor
23608 #               a0: ptr for original operand/final result
23609 #               a1: scratch pointer
23610 #               a2: pointer to FP_X; abs(original value) in ext
23611 #               fp0: scratch
23612 #               fp1: scratch
23613 #               fp2: scratch
23614 #               F_SCR1:
23615 #               F_SCR2:
23616 #               L_SCR1:
23617 #               L_SCR2:
23618
23619         global          bindec
23620 bindec:
23621         movm.l          &0x3f20,-(%sp)  #  {%d2-%d7/%a2}
23622         fmovm.x         &0x7,-(%sp)     #  {%fp0-%fp2}
23623
23624 # A1. Set RM and size ext. Set SIGMA = sign input;
23625 #     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
23626 #     separating  normalized/denormalized input.  If the input
23627 #     is a denormalized number, set the BINDEC_FLG memory word
23628 #     to signal denorm.  If the input is unnormalized, normalize
23629 #     the input and test for denormalized result.
23630 #
23631         fmov.l          &rm_mode*0x10,%fpcr     # set RM and ext
23632         mov.l           (%a0),L_SCR2(%a6)       # save exponent for sign check
23633         mov.l           %d0,%d7         # move k-factor to d7
23634
23635         clr.b           BINDEC_FLG(%a6) # clr norm/denorm flag
23636         cmpi.b          STAG(%a6),&DENORM # is input a DENORM?
23637         bne.w           A2_str          # no; input is a NORM
23638
23639 #
23640 # Normalize the denorm
23641 #
23642 un_de_norm:
23643         mov.w           (%a0),%d0
23644         and.w           &0x7fff,%d0     # strip sign of normalized exp
23645         mov.l           4(%a0),%d1
23646         mov.l           8(%a0),%d2
23647 norm_loop:
23648         sub.w           &1,%d0
23649         lsl.l           &1,%d2
23650         roxl.l          &1,%d1
23651         tst.l           %d1
23652         bge.b           norm_loop
23653 #
23654 # Test if the normalized input is denormalized
23655 #
23656         tst.w           %d0
23657         bgt.b           pos_exp         # if greater than zero, it is a norm
23658         st              BINDEC_FLG(%a6) # set flag for denorm
23659 pos_exp:
23660         and.w           &0x7fff,%d0     # strip sign of normalized exp
23661         mov.w           %d0,(%a0)
23662         mov.l           %d1,4(%a0)
23663         mov.l           %d2,8(%a0)
23664
23665 # A2. Set X = abs(input).
23666 #
23667 A2_str:
23668         mov.l           (%a0),FP_SCR1(%a6)      # move input to work space
23669         mov.l           4(%a0),FP_SCR1+4(%a6)   # move input to work space
23670         mov.l           8(%a0),FP_SCR1+8(%a6)   # move input to work space
23671         and.l           &0x7fffffff,FP_SCR1(%a6)        # create abs(X)
23672
23673 # A3. Compute ILOG.
23674 #     ILOG is the log base 10 of the input value.  It is approx-
23675 #     imated by adding e + 0.f when the original value is viewed
23676 #     as 2^^e * 1.f in extended precision.  This value is stored
23677 #     in d6.
23678 #
23679 # Register usage:
23680 #       Input/Output
23681 #       d0: k-factor/exponent
23682 #       d2: x/x
23683 #       d3: x/x
23684 #       d4: x/x
23685 #       d5: x/x
23686 #       d6: x/ILOG
23687 #       d7: k-factor/Unchanged
23688 #       a0: ptr for original operand/final result
23689 #       a1: x/x
23690 #       a2: x/x
23691 #       fp0: x/float(ILOG)
23692 #       fp1: x/x
23693 #       fp2: x/x
23694 #       F_SCR1:x/x
23695 #       F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23696 #       L_SCR1:x/x
23697 #       L_SCR2:first word of X packed/Unchanged
23698
23699         tst.b           BINDEC_FLG(%a6) # check for denorm
23700         beq.b           A3_cont         # if clr, continue with norm
23701         mov.l           &-4933,%d6      # force ILOG = -4933
23702         bra.b           A4_str
23703 A3_cont:
23704         mov.w           FP_SCR1(%a6),%d0        # move exp to d0
23705         mov.w           &0x3fff,FP_SCR1(%a6)    # replace exponent with 0x3fff
23706         fmov.x          FP_SCR1(%a6),%fp0       # now fp0 has 1.f
23707         sub.w           &0x3fff,%d0     # strip off bias
23708         fadd.w          %d0,%fp0        # add in exp
23709         fsub.s          FONE(%pc),%fp0  # subtract off 1.0
23710         fbge.w          pos_res         # if pos, branch
23711         fmul.x          PLOG2UP1(%pc),%fp0      # if neg, mul by LOG2UP1
23712         fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
23713         bra.b           A4_str          # go move out ILOG
23714 pos_res:
23715         fmul.x          PLOG2(%pc),%fp0 # if pos, mul by LOG2
23716         fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
23717
23718
23719 # A4. Clr INEX bit.
23720 #     The operation in A3 above may have set INEX2.
23721
23722 A4_str:
23723         fmov.l          &0,%fpsr        # zero all of fpsr - nothing needed
23724
23725
23726 # A5. Set ICTR = 0;
23727 #     ICTR is a flag used in A13.  It must be set before the
23728 #     loop entry A6. The lower word of d5 is used for ICTR.
23729
23730         clr.w           %d5             # clear ICTR
23731
23732 # A6. Calculate LEN.
23733 #     LEN is the number of digits to be displayed.  The k-factor
23734 #     can dictate either the total number of digits, if it is
23735 #     a positive number, or the number of digits after the
23736 #     original decimal point which are to be included as
23737 #     significant.  See the 68882 manual for examples.
23738 #     If LEN is computed to be greater than 17, set OPERR in
23739 #     USER_FPSR.  LEN is stored in d4.
23740 #
23741 # Register usage:
23742 #       Input/Output
23743 #       d0: exponent/Unchanged
23744 #       d2: x/x/scratch
23745 #       d3: x/x
23746 #       d4: exc picture/LEN
23747 #       d5: ICTR/Unchanged
23748 #       d6: ILOG/Unchanged
23749 #       d7: k-factor/Unchanged
23750 #       a0: ptr for original operand/final result
23751 #       a1: x/x
23752 #       a2: x/x
23753 #       fp0: float(ILOG)/Unchanged
23754 #       fp1: x/x
23755 #       fp2: x/x
23756 #       F_SCR1:x/x
23757 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
23758 #       L_SCR1:x/x
23759 #       L_SCR2:first word of X packed/Unchanged
23760
23761 A6_str:
23762         tst.l           %d7             # branch on sign of k
23763         ble.b           k_neg           # if k <= 0, LEN = ILOG + 1 - k
23764         mov.l           %d7,%d4         # if k > 0, LEN = k
23765         bra.b           len_ck          # skip to LEN check
23766 k_neg:
23767         mov.l           %d6,%d4         # first load ILOG to d4
23768         sub.l           %d7,%d4         # subtract off k
23769         addq.l          &1,%d4          # add in the 1
23770 len_ck:
23771         tst.l           %d4             # LEN check: branch on sign of LEN
23772         ble.b           LEN_ng          # if neg, set LEN = 1
23773         cmp.l           %d4,&17         # test if LEN > 17
23774         ble.b           A7_str          # if not, forget it
23775         mov.l           &17,%d4         # set max LEN = 17
23776         tst.l           %d7             # if negative, never set OPERR
23777         ble.b           A7_str          # if positive, continue
23778         or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
23779         bra.b           A7_str          # finished here
23780 LEN_ng:
23781         mov.l           &1,%d4          # min LEN is 1
23782
23783
23784 # A7. Calculate SCALE.
23785 #     SCALE is equal to 10^ISCALE, where ISCALE is the number
23786 #     of decimal places needed to insure LEN integer digits
23787 #     in the output before conversion to bcd. LAMBDA is the sign
23788 #     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
23789 #     the rounding mode as given in the following table (see
23790 #     Coonen, p. 7.23 as ref.; however, the SCALE variable is
23791 #     of opposite sign in bindec.sa from Coonen).
23792 #
23793 #       Initial                                 USE
23794 #       FPCR[6:5]       LAMBDA  SIGN(X)         FPCR[6:5]
23795 #       ----------------------------------------------
23796 #        RN     00         0       0            00/0    RN
23797 #        RN     00         0       1            00/0    RN
23798 #        RN     00         1       0            00/0    RN
23799 #        RN     00         1       1            00/0    RN
23800 #        RZ     01         0       0            11/3    RP
23801 #        RZ     01         0       1            11/3    RP
23802 #        RZ     01         1       0            10/2    RM
23803 #        RZ     01         1       1            10/2    RM
23804 #        RM     10         0       0            11/3    RP
23805 #        RM     10         0       1            10/2    RM
23806 #        RM     10         1       0            10/2    RM
23807 #        RM     10         1       1            11/3    RP
23808 #        RP     11         0       0            10/2    RM
23809 #        RP     11         0       1            11/3    RP
23810 #        RP     11         1       0            11/3    RP
23811 #        RP     11         1       1            10/2    RM
23812 #
23813 # Register usage:
23814 #       Input/Output
23815 #       d0: exponent/scratch - final is 0
23816 #       d2: x/0 or 24 for A9
23817 #       d3: x/scratch - offset ptr into PTENRM array
23818 #       d4: LEN/Unchanged
23819 #       d5: 0/ICTR:LAMBDA
23820 #       d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23821 #       d7: k-factor/Unchanged
23822 #       a0: ptr for original operand/final result
23823 #       a1: x/ptr to PTENRM array
23824 #       a2: x/x
23825 #       fp0: float(ILOG)/Unchanged
23826 #       fp1: x/10^ISCALE
23827 #       fp2: x/x
23828 #       F_SCR1:x/x
23829 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
23830 #       L_SCR1:x/x
23831 #       L_SCR2:first word of X packed/Unchanged
23832
23833 A7_str:
23834         tst.l           %d7             # test sign of k
23835         bgt.b           k_pos           # if pos and > 0, skip this
23836         cmp.l           %d7,%d6         # test k - ILOG
23837         blt.b           k_pos           # if ILOG >= k, skip this
23838         mov.l           %d7,%d6         # if ((k<0) & (ILOG < k)) ILOG = k
23839 k_pos:
23840         mov.l           %d6,%d0         # calc ILOG + 1 - LEN in d0
23841         addq.l          &1,%d0          # add the 1
23842         sub.l           %d4,%d0         # sub off LEN
23843         swap            %d5             # use upper word of d5 for LAMBDA
23844         clr.w           %d5             # set it zero initially
23845         clr.w           %d2             # set up d2 for very small case
23846         tst.l           %d0             # test sign of ISCALE
23847         bge.b           iscale          # if pos, skip next inst
23848         addq.w          &1,%d5          # if neg, set LAMBDA true
23849         cmp.l           %d0,&0xffffecd4 # test iscale <= -4908
23850         bgt.b           no_inf          # if false, skip rest
23851         add.l           &24,%d0         # add in 24 to iscale
23852         mov.l           &24,%d2         # put 24 in d2 for A9
23853 no_inf:
23854         neg.l           %d0             # and take abs of ISCALE
23855 iscale:
23856         fmov.s          FONE(%pc),%fp1  # init fp1 to 1
23857         bfextu          USER_FPCR(%a6){&26:&2},%d1      # get initial rmode bits
23858         lsl.w           &1,%d1          # put them in bits 2:1
23859         add.w           %d5,%d1         # add in LAMBDA
23860         lsl.w           &1,%d1          # put them in bits 3:1
23861         tst.l           L_SCR2(%a6)     # test sign of original x
23862         bge.b           x_pos           # if pos, don't set bit 0
23863         addq.l          &1,%d1          # if neg, set bit 0
23864 x_pos:
23865         lea.l           RBDTBL(%pc),%a2 # load rbdtbl base
23866         mov.b           (%a2,%d1),%d3   # load d3 with new rmode
23867         lsl.l           &4,%d3          # put bits in proper position
23868         fmov.l          %d3,%fpcr       # load bits into fpu
23869         lsr.l           &4,%d3          # put bits in proper position
23870         tst.b           %d3             # decode new rmode for pten table
23871         bne.b           not_rn          # if zero, it is RN
23872         lea.l           PTENRN(%pc),%a1 # load a1 with RN table base
23873         bra.b           rmode           # exit decode
23874 not_rn:
23875         lsr.b           &1,%d3          # get lsb in carry
23876         bcc.b           not_rp2         # if carry clear, it is RM
23877         lea.l           PTENRP(%pc),%a1 # load a1 with RP table base
23878         bra.b           rmode           # exit decode
23879 not_rp2:
23880         lea.l           PTENRM(%pc),%a1 # load a1 with RM table base
23881 rmode:
23882         clr.l           %d3             # clr table index
23883 e_loop2:
23884         lsr.l           &1,%d0          # shift next bit into carry
23885         bcc.b           e_next2         # if zero, skip the mul
23886         fmul.x          (%a1,%d3),%fp1  # mul by 10**(d3_bit_no)
23887 e_next2:
23888         add.l           &12,%d3         # inc d3 to next pwrten table entry
23889         tst.l           %d0             # test if ISCALE is zero
23890         bne.b           e_loop2         # if not, loop
23891
23892 # A8. Clr INEX; Force RZ.
23893 #     The operation in A3 above may have set INEX2.
23894 #     RZ mode is forced for the scaling operation to insure
23895 #     only one rounding error.  The grs bits are collected in
23896 #     the INEX flag for use in A10.
23897 #
23898 # Register usage:
23899 #       Input/Output
23900
23901         fmov.l          &0,%fpsr        # clr INEX
23902         fmov.l          &rz_mode*0x10,%fpcr     # set RZ rounding mode
23903
23904 # A9. Scale X -> Y.
23905 #     The mantissa is scaled to the desired number of significant
23906 #     digits.  The excess digits are collected in INEX2. If mul,
23907 #     Check d2 for excess 10 exponential value.  If not zero,
23908 #     the iscale value would have caused the pwrten calculation
23909 #     to overflow.  Only a negative iscale can cause this, so
23910 #     multiply by 10^(d2), which is now only allowed to be 24,
23911 #     with a multiply by 10^8 and 10^16, which is exact since
23912 #     10^24 is exact.  If the input was denormalized, we must
23913 #     create a busy stack frame with the mul command and the
23914 #     two operands, and allow the fpu to complete the multiply.
23915 #
23916 # Register usage:
23917 #       Input/Output
23918 #       d0: FPCR with RZ mode/Unchanged
23919 #       d2: 0 or 24/unchanged
23920 #       d3: x/x
23921 #       d4: LEN/Unchanged
23922 #       d5: ICTR:LAMBDA
23923 #       d6: ILOG/Unchanged
23924 #       d7: k-factor/Unchanged
23925 #       a0: ptr for original operand/final result
23926 #       a1: ptr to PTENRM array/Unchanged
23927 #       a2: x/x
23928 #       fp0: float(ILOG)/X adjusted for SCALE (Y)
23929 #       fp1: 10^ISCALE/Unchanged
23930 #       fp2: x/x
23931 #       F_SCR1:x/x
23932 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
23933 #       L_SCR1:x/x
23934 #       L_SCR2:first word of X packed/Unchanged
23935
23936 A9_str:
23937         fmov.x          (%a0),%fp0      # load X from memory
23938         fabs.x          %fp0            # use abs(X)
23939         tst.w           %d5             # LAMBDA is in lower word of d5
23940         bne.b           sc_mul          # if neg (LAMBDA = 1), scale by mul
23941         fdiv.x          %fp1,%fp0       # calculate X / SCALE -> Y to fp0
23942         bra.w           A10_st          # branch to A10
23943
23944 sc_mul:
23945         tst.b           BINDEC_FLG(%a6) # check for denorm
23946         beq.w           A9_norm         # if norm, continue with mul
23947
23948 # for DENORM, we must calculate:
23949 #       fp0 = input_op * 10^ISCALE * 10^24
23950 # since the input operand is a DENORM, we can't multiply it directly.
23951 # so, we do the multiplication of the exponents and mantissas separately.
23952 # in this way, we avoid underflow on intermediate stages of the
23953 # multiplication and guarantee a result without exception.
23954         fmovm.x         &0x2,-(%sp)     # save 10^ISCALE to stack
23955
23956         mov.w           (%sp),%d3       # grab exponent
23957         andi.w          &0x7fff,%d3     # clear sign
23958         ori.w           &0x8000,(%a0)   # make DENORM exp negative
23959         add.w           (%a0),%d3       # add DENORM exp to 10^ISCALE exp
23960         subi.w          &0x3fff,%d3     # subtract BIAS
23961         add.w           36(%a1),%d3
23962         subi.w          &0x3fff,%d3     # subtract BIAS
23963         add.w           48(%a1),%d3
23964         subi.w          &0x3fff,%d3     # subtract BIAS
23965
23966         bmi.w           sc_mul_err      # is result is DENORM, punt!!!
23967
23968         andi.w          &0x8000,(%sp)   # keep sign
23969         or.w            %d3,(%sp)       # insert new exponent
23970         andi.w          &0x7fff,(%a0)   # clear sign bit on DENORM again
23971         mov.l           0x8(%a0),-(%sp) # put input op mantissa on stk
23972         mov.l           0x4(%a0),-(%sp)
23973         mov.l           &0x3fff0000,-(%sp) # force exp to zero
23974         fmovm.x         (%sp)+,&0x80    # load normalized DENORM into fp0
23975         fmul.x          (%sp)+,%fp0
23976
23977 #       fmul.x  36(%a1),%fp0    # multiply fp0 by 10^8
23978 #       fmul.x  48(%a1),%fp0    # multiply fp0 by 10^16
23979         mov.l           36+8(%a1),-(%sp) # get 10^8 mantissa
23980         mov.l           36+4(%a1),-(%sp)
23981         mov.l           &0x3fff0000,-(%sp) # force exp to zero
23982         mov.l           48+8(%a1),-(%sp) # get 10^16 mantissa
23983         mov.l           48+4(%a1),-(%sp)
23984         mov.l           &0x3fff0000,-(%sp)# force exp to zero
23985         fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^8
23986         fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^16
23987         bra.b           A10_st
23988
23989 sc_mul_err:
23990         bra.b           sc_mul_err
23991
23992 A9_norm:
23993         tst.w           %d2             # test for small exp case
23994         beq.b           A9_con          # if zero, continue as normal
23995         fmul.x          36(%a1),%fp0    # multiply fp0 by 10^8
23996         fmul.x          48(%a1),%fp0    # multiply fp0 by 10^16
23997 A9_con:
23998         fmul.x          %fp1,%fp0       # calculate X * SCALE -> Y to fp0
23999
24000 # A10. Or in INEX.
24001 #      If INEX is set, round error occurred.  This is compensated
24002 #      for by 'or-ing' in the INEX2 flag to the lsb of Y.
24003 #
24004 # Register usage:
24005 #       Input/Output
24006 #       d0: FPCR with RZ mode/FPSR with INEX2 isolated
24007 #       d2: x/x
24008 #       d3: x/x
24009 #       d4: LEN/Unchanged
24010 #       d5: ICTR:LAMBDA
24011 #       d6: ILOG/Unchanged
24012 #       d7: k-factor/Unchanged
24013 #       a0: ptr for original operand/final result
24014 #       a1: ptr to PTENxx array/Unchanged
24015 #       a2: x/ptr to FP_SCR1(a6)
24016 #       fp0: Y/Y with lsb adjusted
24017 #       fp1: 10^ISCALE/Unchanged
24018 #       fp2: x/x
24019
24020 A10_st:
24021         fmov.l          %fpsr,%d0       # get FPSR
24022         fmov.x          %fp0,FP_SCR1(%a6)       # move Y to memory
24023         lea.l           FP_SCR1(%a6),%a2        # load a2 with ptr to FP_SCR1
24024         btst            &9,%d0          # check if INEX2 set
24025         beq.b           A11_st          # if clear, skip rest
24026         or.l            &1,8(%a2)       # or in 1 to lsb of mantissa
24027         fmov.x          FP_SCR1(%a6),%fp0       # write adjusted Y back to fpu
24028
24029
24030 # A11. Restore original FPCR; set size ext.
24031 #      Perform FINT operation in the user's rounding mode.  Keep
24032 #      the size to extended.  The sintdo entry point in the sint
24033 #      routine expects the FPCR value to be in USER_FPCR for
24034 #      mode and precision.  The original FPCR is saved in L_SCR1.
24035
24036 A11_st:
24037         mov.l           USER_FPCR(%a6),L_SCR1(%a6)      # save it for later
24038         and.l           &0x00000030,USER_FPCR(%a6)      # set size to ext,
24039 #                                       ;block exceptions
24040
24041
24042 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24043 #      The FPSP routine sintd0 is used.  The output is in fp0.
24044 #
24045 # Register usage:
24046 #       Input/Output
24047 #       d0: FPSR with AINEX cleared/FPCR with size set to ext
24048 #       d2: x/x/scratch
24049 #       d3: x/x
24050 #       d4: LEN/Unchanged
24051 #       d5: ICTR:LAMBDA/Unchanged
24052 #       d6: ILOG/Unchanged
24053 #       d7: k-factor/Unchanged
24054 #       a0: ptr for original operand/src ptr for sintdo
24055 #       a1: ptr to PTENxx array/Unchanged
24056 #       a2: ptr to FP_SCR1(a6)/Unchanged
24057 #       a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24058 #       fp0: Y/YINT
24059 #       fp1: 10^ISCALE/Unchanged
24060 #       fp2: x/x
24061 #       F_SCR1:x/x
24062 #       F_SCR2:Y adjusted for inex/Y with original exponent
24063 #       L_SCR1:x/original USER_FPCR
24064 #       L_SCR2:first word of X packed/Unchanged
24065
24066 A12_st:
24067         movm.l  &0xc0c0,-(%sp)  # save regs used by sintd0       {%d0-%d1/%a0-%a1}
24068         mov.l   L_SCR1(%a6),-(%sp)
24069         mov.l   L_SCR2(%a6),-(%sp)
24070
24071         lea.l           FP_SCR1(%a6),%a0        # a0 is ptr to FP_SCR1(a6)
24072         fmov.x          %fp0,(%a0)      # move Y to memory at FP_SCR1(a6)
24073         tst.l           L_SCR2(%a6)     # test sign of original operand
24074         bge.b           do_fint12               # if pos, use Y
24075         or.l            &0x80000000,(%a0)       # if neg, use -Y
24076 do_fint12:
24077         mov.l   USER_FPSR(%a6),-(%sp)
24078 #       bsr     sintdo          # sint routine returns int in fp0
24079
24080         fmov.l  USER_FPCR(%a6),%fpcr
24081         fmov.l  &0x0,%fpsr                      # clear the AEXC bits!!!
24082 ##      mov.l           USER_FPCR(%a6),%d0      # ext prec/keep rnd mode
24083 ##      andi.l          &0x00000030,%d0
24084 ##      fmov.l          %d0,%fpcr
24085         fint.x          FP_SCR1(%a6),%fp0       # do fint()
24086         fmov.l  %fpsr,%d0
24087         or.w    %d0,FPSR_EXCEPT(%a6)
24088 ##      fmov.l          &0x0,%fpcr
24089 ##      fmov.l          %fpsr,%d0               # don't keep ccodes
24090 ##      or.w            %d0,FPSR_EXCEPT(%a6)
24091
24092         mov.b   (%sp),USER_FPSR(%a6)
24093         add.l   &4,%sp
24094
24095         mov.l   (%sp)+,L_SCR2(%a6)
24096         mov.l   (%sp)+,L_SCR1(%a6)
24097         movm.l  (%sp)+,&0x303   # restore regs used by sint      {%d0-%d1/%a0-%a1}
24098
24099         mov.l   L_SCR2(%a6),FP_SCR1(%a6)        # restore original exponent
24100         mov.l   L_SCR1(%a6),USER_FPCR(%a6)      # restore user's FPCR
24101
24102 # A13. Check for LEN digits.
24103 #      If the int operation results in more than LEN digits,
24104 #      or less than LEN -1 digits, adjust ILOG and repeat from
24105 #      A6.  This test occurs only on the first pass.  If the
24106 #      result is exactly 10^LEN, decrement ILOG and divide
24107 #      the mantissa by 10.  The calculation of 10^LEN cannot
24108 #      be inexact, since all powers of ten upto 10^27 are exact
24109 #      in extended precision, so the use of a previous power-of-ten
24110 #      table will introduce no error.
24111 #
24112 #
24113 # Register usage:
24114 #       Input/Output
24115 #       d0: FPCR with size set to ext/scratch final = 0
24116 #       d2: x/x
24117 #       d3: x/scratch final = x
24118 #       d4: LEN/LEN adjusted
24119 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
24120 #       d6: ILOG/ILOG adjusted
24121 #       d7: k-factor/Unchanged
24122 #       a0: pointer into memory for packed bcd string formation
24123 #       a1: ptr to PTENxx array/Unchanged
24124 #       a2: ptr to FP_SCR1(a6)/Unchanged
24125 #       fp0: int portion of Y/abs(YINT) adjusted
24126 #       fp1: 10^ISCALE/Unchanged
24127 #       fp2: x/10^LEN
24128 #       F_SCR1:x/x
24129 #       F_SCR2:Y with original exponent/Unchanged
24130 #       L_SCR1:original USER_FPCR/Unchanged
24131 #       L_SCR2:first word of X packed/Unchanged
24132
24133 A13_st:
24134         swap            %d5             # put ICTR in lower word of d5
24135         tst.w           %d5             # check if ICTR = 0
24136         bne             not_zr          # if non-zero, go to second test
24137 #
24138 # Compute 10^(LEN-1)
24139 #
24140         fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
24141         mov.l           %d4,%d0         # put LEN in d0
24142         subq.l          &1,%d0          # d0 = LEN -1
24143         clr.l           %d3             # clr table index
24144 l_loop:
24145         lsr.l           &1,%d0          # shift next bit into carry
24146         bcc.b           l_next          # if zero, skip the mul
24147         fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
24148 l_next:
24149         add.l           &12,%d3         # inc d3 to next pwrten table entry
24150         tst.l           %d0             # test if LEN is zero
24151         bne.b           l_loop          # if not, loop
24152 #
24153 # 10^LEN-1 is computed for this test and A14.  If the input was
24154 # denormalized, check only the case in which YINT > 10^LEN.
24155 #
24156         tst.b           BINDEC_FLG(%a6) # check if input was norm
24157         beq.b           A13_con         # if norm, continue with checking
24158         fabs.x          %fp0            # take abs of YINT
24159         bra             test_2
24160 #
24161 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24162 #
24163 A13_con:
24164         fabs.x          %fp0            # take abs of YINT
24165         fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^(LEN-1)
24166         fbge.w          test_2          # if greater, do next test
24167         subq.l          &1,%d6          # subtract 1 from ILOG
24168         mov.w           &1,%d5          # set ICTR
24169         fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
24170         fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
24171         bra.w           A6_str          # return to A6 and recompute YINT
24172 test_2:
24173         fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
24174         fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^LEN
24175         fblt.w          A14_st          # if less, all is ok, go to A14
24176         fbgt.w          fix_ex          # if greater, fix and redo
24177         fdiv.s          FTEN(%pc),%fp0  # if equal, divide by 10
24178         addq.l          &1,%d6          # and inc ILOG
24179         bra.b           A14_st          # and continue elsewhere
24180 fix_ex:
24181         addq.l          &1,%d6          # increment ILOG by 1
24182         mov.w           &1,%d5          # set ICTR
24183         fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
24184         bra.w           A6_str          # return to A6 and recompute YINT
24185 #
24186 # Since ICTR <> 0, we have already been through one adjustment,
24187 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24188 # 10^LEN is again computed using whatever table is in a1 since the
24189 # value calculated cannot be inexact.
24190 #
24191 not_zr:
24192         fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
24193         mov.l           %d4,%d0         # put LEN in d0
24194         clr.l           %d3             # clr table index
24195 z_loop:
24196         lsr.l           &1,%d0          # shift next bit into carry
24197         bcc.b           z_next          # if zero, skip the mul
24198         fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
24199 z_next:
24200         add.l           &12,%d3         # inc d3 to next pwrten table entry
24201         tst.l           %d0             # test if LEN is zero
24202         bne.b           z_loop          # if not, loop
24203         fabs.x          %fp0            # get abs(YINT)
24204         fcmp.x          %fp0,%fp2       # check if abs(YINT) = 10^LEN
24205         fbneq.w         A14_st          # if not, skip this
24206         fdiv.s          FTEN(%pc),%fp0  # divide abs(YINT) by 10
24207         addq.l          &1,%d6          # and inc ILOG by 1
24208         addq.l          &1,%d4          # and inc LEN
24209         fmul.s          FTEN(%pc),%fp2  # if LEN++, the get 10^^LEN
24210
24211 # A14. Convert the mantissa to bcd.
24212 #      The binstr routine is used to convert the LEN digit
24213 #      mantissa to bcd in memory.  The input to binstr is
24214 #      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24215 #      such that the decimal point is to the left of bit 63.
24216 #      The bcd digits are stored in the correct position in
24217 #      the final string area in memory.
24218 #
24219 #
24220 # Register usage:
24221 #       Input/Output
24222 #       d0: x/LEN call to binstr - final is 0
24223 #       d1: x/0
24224 #       d2: x/ms 32-bits of mant of abs(YINT)
24225 #       d3: x/ls 32-bits of mant of abs(YINT)
24226 #       d4: LEN/Unchanged
24227 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
24228 #       d6: ILOG
24229 #       d7: k-factor/Unchanged
24230 #       a0: pointer into memory for packed bcd string formation
24231 #           /ptr to first mantissa byte in result string
24232 #       a1: ptr to PTENxx array/Unchanged
24233 #       a2: ptr to FP_SCR1(a6)/Unchanged
24234 #       fp0: int portion of Y/abs(YINT) adjusted
24235 #       fp1: 10^ISCALE/Unchanged
24236 #       fp2: 10^LEN/Unchanged
24237 #       F_SCR1:x/Work area for final result
24238 #       F_SCR2:Y with original exponent/Unchanged
24239 #       L_SCR1:original USER_FPCR/Unchanged
24240 #       L_SCR2:first word of X packed/Unchanged
24241
24242 A14_st:
24243         fmov.l          &rz_mode*0x10,%fpcr     # force rz for conversion
24244         fdiv.x          %fp2,%fp0       # divide abs(YINT) by 10^LEN
24245         lea.l           FP_SCR0(%a6),%a0
24246         fmov.x          %fp0,(%a0)      # move abs(YINT)/10^LEN to memory
24247         mov.l           4(%a0),%d2      # move 2nd word of FP_RES to d2
24248         mov.l           8(%a0),%d3      # move 3rd word of FP_RES to d3
24249         clr.l           4(%a0)          # zero word 2 of FP_RES
24250         clr.l           8(%a0)          # zero word 3 of FP_RES
24251         mov.l           (%a0),%d0       # move exponent to d0
24252         swap            %d0             # put exponent in lower word
24253         beq.b           no_sft          # if zero, don't shift
24254         sub.l           &0x3ffd,%d0     # sub bias less 2 to make fract
24255         tst.l           %d0             # check if > 1
24256         bgt.b           no_sft          # if so, don't shift
24257         neg.l           %d0             # make exp positive
24258 m_loop:
24259         lsr.l           &1,%d2          # shift d2:d3 right, add 0s
24260         roxr.l          &1,%d3          # the number of places
24261         dbf.w           %d0,m_loop      # given in d0
24262 no_sft:
24263         tst.l           %d2             # check for mantissa of zero
24264         bne.b           no_zr           # if not, go on
24265         tst.l           %d3             # continue zero check
24266         beq.b           zer_m           # if zero, go directly to binstr
24267 no_zr:
24268         clr.l           %d1             # put zero in d1 for addx
24269         add.l           &0x00000080,%d3 # inc at bit 7
24270         addx.l          %d1,%d2         # continue inc
24271         and.l           &0xffffff80,%d3 # strip off lsb not used by 882
24272 zer_m:
24273         mov.l           %d4,%d0         # put LEN in d0 for binstr call
24274         addq.l          &3,%a0          # a0 points to M16 byte in result
24275         bsr             binstr          # call binstr to convert mant
24276
24277
24278 # A15. Convert the exponent to bcd.
24279 #      As in A14 above, the exp is converted to bcd and the
24280 #      digits are stored in the final string.
24281 #
24282 #      Digits are stored in L_SCR1(a6) on return from BINDEC as:
24283 #
24284 #        32               16 15                0
24285 #       -----------------------------------------
24286 #       |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
24287 #       -----------------------------------------
24288 #
24289 # And are moved into their proper places in FP_SCR0.  If digit e4
24290 # is non-zero, OPERR is signaled.  In all cases, all 4 digits are
24291 # written as specified in the 881/882 manual for packed decimal.
24292 #
24293 # Register usage:
24294 #       Input/Output
24295 #       d0: x/LEN call to binstr - final is 0
24296 #       d1: x/scratch (0);shift count for final exponent packing
24297 #       d2: x/ms 32-bits of exp fraction/scratch
24298 #       d3: x/ls 32-bits of exp fraction
24299 #       d4: LEN/Unchanged
24300 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
24301 #       d6: ILOG
24302 #       d7: k-factor/Unchanged
24303 #       a0: ptr to result string/ptr to L_SCR1(a6)
24304 #       a1: ptr to PTENxx array/Unchanged
24305 #       a2: ptr to FP_SCR1(a6)/Unchanged
24306 #       fp0: abs(YINT) adjusted/float(ILOG)
24307 #       fp1: 10^ISCALE/Unchanged
24308 #       fp2: 10^LEN/Unchanged
24309 #       F_SCR1:Work area for final result/BCD result
24310 #       F_SCR2:Y with original exponent/ILOG/10^4
24311 #       L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24312 #       L_SCR2:first word of X packed/Unchanged
24313
24314 A15_st:
24315         tst.b           BINDEC_FLG(%a6) # check for denorm
24316         beq.b           not_denorm
24317         ftest.x         %fp0            # test for zero
24318         fbeq.w          den_zero        # if zero, use k-factor or 4933
24319         fmov.l          %d6,%fp0        # float ILOG
24320         fabs.x          %fp0            # get abs of ILOG
24321         bra.b           convrt
24322 den_zero:
24323         tst.l           %d7             # check sign of the k-factor
24324         blt.b           use_ilog        # if negative, use ILOG
24325         fmov.s          F4933(%pc),%fp0 # force exponent to 4933
24326         bra.b           convrt          # do it
24327 use_ilog:
24328         fmov.l          %d6,%fp0        # float ILOG
24329         fabs.x          %fp0            # get abs of ILOG
24330         bra.b           convrt
24331 not_denorm:
24332         ftest.x         %fp0            # test for zero
24333         fbneq.w         not_zero        # if zero, force exponent
24334         fmov.s          FONE(%pc),%fp0  # force exponent to 1
24335         bra.b           convrt          # do it
24336 not_zero:
24337         fmov.l          %d6,%fp0        # float ILOG
24338         fabs.x          %fp0            # get abs of ILOG
24339 convrt:
24340         fdiv.x          24(%a1),%fp0    # compute ILOG/10^4
24341         fmov.x          %fp0,FP_SCR1(%a6)       # store fp0 in memory
24342         mov.l           4(%a2),%d2      # move word 2 to d2
24343         mov.l           8(%a2),%d3      # move word 3 to d3
24344         mov.w           (%a2),%d0       # move exp to d0
24345         beq.b           x_loop_fin      # if zero, skip the shift
24346         sub.w           &0x3ffd,%d0     # subtract off bias
24347         neg.w           %d0             # make exp positive
24348 x_loop:
24349         lsr.l           &1,%d2          # shift d2:d3 right
24350         roxr.l          &1,%d3          # the number of places
24351         dbf.w           %d0,x_loop      # given in d0
24352 x_loop_fin:
24353         clr.l           %d1             # put zero in d1 for addx
24354         add.l           &0x00000080,%d3 # inc at bit 6
24355         addx.l          %d1,%d2         # continue inc
24356         and.l           &0xffffff80,%d3 # strip off lsb not used by 882
24357         mov.l           &4,%d0          # put 4 in d0 for binstr call
24358         lea.l           L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
24359         bsr             binstr          # call binstr to convert exp
24360         mov.l           L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
24361         mov.l           &12,%d1         # use d1 for shift count
24362         lsr.l           %d1,%d0         # shift d0 right by 12
24363         bfins           %d0,FP_SCR0(%a6){&4:&12}        # put e3:e2:e1 in FP_SCR0
24364         lsr.l           %d1,%d0         # shift d0 right by 12
24365         bfins           %d0,FP_SCR0(%a6){&16:&4}        # put e4 in FP_SCR0
24366         tst.b           %d0             # check if e4 is zero
24367         beq.b           A16_st          # if zero, skip rest
24368         or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
24369
24370
24371 # A16. Write sign bits to final string.
24372 #          Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24373 #
24374 # Register usage:
24375 #       Input/Output
24376 #       d0: x/scratch - final is x
24377 #       d2: x/x
24378 #       d3: x/x
24379 #       d4: LEN/Unchanged
24380 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
24381 #       d6: ILOG/ILOG adjusted
24382 #       d7: k-factor/Unchanged
24383 #       a0: ptr to L_SCR1(a6)/Unchanged
24384 #       a1: ptr to PTENxx array/Unchanged
24385 #       a2: ptr to FP_SCR1(a6)/Unchanged
24386 #       fp0: float(ILOG)/Unchanged
24387 #       fp1: 10^ISCALE/Unchanged
24388 #       fp2: 10^LEN/Unchanged
24389 #       F_SCR1:BCD result with correct signs
24390 #       F_SCR2:ILOG/10^4
24391 #       L_SCR1:Exponent digits on return from binstr
24392 #       L_SCR2:first word of X packed/Unchanged
24393
24394 A16_st:
24395         clr.l           %d0             # clr d0 for collection of signs
24396         and.b           &0x0f,FP_SCR0(%a6)      # clear first nibble of FP_SCR0
24397         tst.l           L_SCR2(%a6)     # check sign of original mantissa
24398         bge.b           mant_p          # if pos, don't set SM
24399         mov.l           &2,%d0          # move 2 in to d0 for SM
24400 mant_p:
24401         tst.l           %d6             # check sign of ILOG
24402         bge.b           wr_sgn          # if pos, don't set SE
24403         addq.l          &1,%d0          # set bit 0 in d0 for SE
24404 wr_sgn:
24405         bfins           %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
24406
24407 # Clean up and restore all registers used.
24408
24409         fmov.l          &0,%fpsr        # clear possible inex2/ainex bits
24410         fmovm.x         (%sp)+,&0xe0    #  {%fp0-%fp2}
24411         movm.l          (%sp)+,&0x4fc   #  {%d2-%d7/%a2}
24412         rts
24413
24414         global          PTENRN
24415 PTENRN:
24416         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
24417         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
24418         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
24419         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
24420         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
24421         long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
24422         long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
24423         long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
24424         long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
24425         long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
24426         long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
24427         long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
24428         long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
24429
24430         global          PTENRP
24431 PTENRP:
24432         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
24433         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
24434         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
24435         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
24436         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
24437         long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
24438         long            0x40D30000,0xC2781F49,0xFFCFA6D6        # 10 ^ 64
24439         long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
24440         long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
24441         long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
24442         long            0x4D480000,0xC9767586,0x81750C18        # 10 ^ 1024
24443         long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
24444         long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
24445
24446         global          PTENRM
24447 PTENRM:
24448         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
24449         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
24450         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
24451         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
24452         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
24453         long            0x40690000,0x9DC5ADA8,0x2B70B59D        # 10 ^ 32
24454         long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
24455         long            0x41A80000,0x93BA47C9,0x80E98CDF        # 10 ^ 128
24456         long            0x43510000,0xAA7EEBFB,0x9DF9DE8D        # 10 ^ 256
24457         long            0x46A30000,0xE319A0AE,0xA60E91C6        # 10 ^ 512
24458         long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
24459         long            0x5A920000,0x9E8B3B5D,0xC53D5DE4        # 10 ^ 2048
24460         long            0x75250000,0xC4605202,0x8A20979A        # 10 ^ 4096
24461
24462 #########################################################################
24463 # binstr(): Converts a 64-bit binary integer to bcd.                    #
24464 #                                                                       #
24465 # INPUT *************************************************************** #
24466 #       d2:d3 = 64-bit binary integer                                   #
24467 #       d0    = desired length (LEN)                                    #
24468 #       a0    = pointer to start in memory for bcd characters           #
24469 #               (This pointer must point to byte 4 of the first         #
24470 #                lword of the packed decimal memory string.)            #
24471 #                                                                       #
24472 # OUTPUT ************************************************************** #
24473 #       a0 = pointer to LEN bcd digits representing the 64-bit integer. #
24474 #                                                                       #
24475 # ALGORITHM *********************************************************** #
24476 #       The 64-bit binary is assumed to have a decimal point before     #
24477 #       bit 63.  The fraction is multiplied by 10 using a mul by 2      #
24478 #       shift and a mul by 8 shift.  The bits shifted out of the        #
24479 #       msb form a decimal digit.  This process is iterated until       #
24480 #       LEN digits are formed.                                          #
24481 #                                                                       #
24482 # A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the        #
24483 #     digit formed will be assumed the least significant.  This is      #
24484 #     to force the first byte formed to have a 0 in the upper 4 bits.   #
24485 #                                                                       #
24486 # A2. Beginning of the loop:                                            #
24487 #     Copy the fraction in d2:d3 to d4:d5.                              #
24488 #                                                                       #
24489 # A3. Multiply the fraction in d2:d3 by 8 using bit-field               #
24490 #     extracts and shifts.  The three msbs from d2 will go into d1.     #
24491 #                                                                       #
24492 # A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb        #
24493 #     will be collected by the carry.                                   #
24494 #                                                                       #
24495 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5      #
24496 #     into d2:d3.  D1 will contain the bcd digit formed.                #
24497 #                                                                       #
24498 # A6. Test d7.  If zero, the digit formed is the ms digit.  If non-     #
24499 #     zero, it is the ls digit.  Put the digit in its place in the      #
24500 #     upper word of d0.  If it is the ls digit, write the word          #
24501 #     from d0 to memory.                                                #
24502 #                                                                       #
24503 # A7. Decrement d6 (LEN counter) and repeat the loop until zero.        #
24504 #                                                                       #
24505 #########################################################################
24506
24507 #       Implementation Notes:
24508 #
24509 #       The registers are used as follows:
24510 #
24511 #               d0: LEN counter
24512 #               d1: temp used to form the digit
24513 #               d2: upper 32-bits of fraction for mul by 8
24514 #               d3: lower 32-bits of fraction for mul by 8
24515 #               d4: upper 32-bits of fraction for mul by 2
24516 #               d5: lower 32-bits of fraction for mul by 2
24517 #               d6: temp for bit-field extracts
24518 #               d7: byte digit formation word;digit count {0,1}
24519 #               a0: pointer into memory for packed bcd string formation
24520 #
24521
24522         global          binstr
24523 binstr:
24524         movm.l          &0xff00,-(%sp)  #  {%d0-%d7}
24525
24526 #
24527 # A1: Init d7
24528 #
24529         mov.l           &1,%d7          # init d7 for second digit
24530         subq.l          &1,%d0          # for dbf d0 would have LEN+1 passes
24531 #
24532 # A2. Copy d2:d3 to d4:d5.  Start loop.
24533 #
24534 loop:
24535         mov.l           %d2,%d4         # copy the fraction before muls
24536         mov.l           %d3,%d5         # to d4:d5
24537 #
24538 # A3. Multiply d2:d3 by 8; extract msbs into d1.
24539 #
24540         bfextu          %d2{&0:&3},%d1  # copy 3 msbs of d2 into d1
24541         asl.l           &3,%d2          # shift d2 left by 3 places
24542         bfextu          %d3{&0:&3},%d6  # copy 3 msbs of d3 into d6
24543         asl.l           &3,%d3          # shift d3 left by 3 places
24544         or.l            %d6,%d2         # or in msbs from d3 into d2
24545 #
24546 # A4. Multiply d4:d5 by 2; add carry out to d1.
24547 #
24548         asl.l           &1,%d5          # mul d5 by 2
24549         roxl.l          &1,%d4          # mul d4 by 2
24550         swap            %d6             # put 0 in d6 lower word
24551         addx.w          %d6,%d1         # add in extend from mul by 2
24552 #
24553 # A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
24554 #
24555         add.l           %d5,%d3         # add lower 32 bits
24556         nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24557         addx.l          %d4,%d2         # add with extend upper 32 bits
24558         nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24559         addx.w          %d6,%d1         # add in extend from add to d1
24560         swap            %d6             # with d6 = 0; put 0 in upper word
24561 #
24562 # A6. Test d7 and branch.
24563 #
24564         tst.w           %d7             # if zero, store digit & to loop
24565         beq.b           first_d         # if non-zero, form byte & write
24566 sec_d:
24567         swap            %d7             # bring first digit to word d7b
24568         asl.w           &4,%d7          # first digit in upper 4 bits d7b
24569         add.w           %d1,%d7         # add in ls digit to d7b
24570         mov.b           %d7,(%a0)+      # store d7b byte in memory
24571         swap            %d7             # put LEN counter in word d7a
24572         clr.w           %d7             # set d7a to signal no digits done
24573         dbf.w           %d0,loop        # do loop some more!
24574         bra.b           end_bstr        # finished, so exit
24575 first_d:
24576         swap            %d7             # put digit word in d7b
24577         mov.w           %d1,%d7         # put new digit in d7b
24578         swap            %d7             # put LEN counter in word d7a
24579         addq.w          &1,%d7          # set d7a to signal first digit done
24580         dbf.w           %d0,loop        # do loop some more!
24581         swap            %d7             # put last digit in string
24582         lsl.w           &4,%d7          # move it to upper 4 bits
24583         mov.b           %d7,(%a0)+      # store it in memory string
24584 #
24585 # Clean up and return with result in fp0.
24586 #
24587 end_bstr:
24588         movm.l          (%sp)+,&0xff    #  {%d0-%d7}
24589         rts
24590
24591 #########################################################################
24592 # XDEF **************************************************************** #
24593 #       facc_in_b(): dmem_read_byte failed                              #
24594 #       facc_in_w(): dmem_read_word failed                              #
24595 #       facc_in_l(): dmem_read_long failed                              #
24596 #       facc_in_d(): dmem_read of dbl prec failed                       #
24597 #       facc_in_x(): dmem_read of ext prec failed                       #
24598 #                                                                       #
24599 #       facc_out_b(): dmem_write_byte failed                            #
24600 #       facc_out_w(): dmem_write_word failed                            #
24601 #       facc_out_l(): dmem_write_long failed                            #
24602 #       facc_out_d(): dmem_write of dbl prec failed                     #
24603 #       facc_out_x(): dmem_write of ext prec failed                     #
24604 #                                                                       #
24605 # XREF **************************************************************** #
24606 #       _real_access() - exit through access error handler              #
24607 #                                                                       #
24608 # INPUT *************************************************************** #
24609 #       None                                                            #
24610 #                                                                       #
24611 # OUTPUT ************************************************************** #
24612 #       None                                                            #
24613 #                                                                       #
24614 # ALGORITHM *********************************************************** #
24615 #       Flow jumps here when an FP data fetch call gets an error        #
24616 # result. This means the operating system wants an access error frame   #
24617 # made out of the current exception stack frame.                        #
24618 #       So, we first call restore() which makes sure that any updated   #
24619 # -(an)+ register gets returned to its pre-exception value and then     #
24620 # we change the stack to an acess error stack frame.                    #
24621 #                                                                       #
24622 #########################################################################
24623
24624 facc_in_b:
24625         movq.l          &0x1,%d0                        # one byte
24626         bsr.w           restore                         # fix An
24627
24628         mov.w           &0x0121,EXC_VOFF(%a6)           # set FSLW
24629         bra.w           facc_finish
24630
24631 facc_in_w:
24632         movq.l          &0x2,%d0                        # two bytes
24633         bsr.w           restore                         # fix An
24634
24635         mov.w           &0x0141,EXC_VOFF(%a6)           # set FSLW
24636         bra.b           facc_finish
24637
24638 facc_in_l:
24639         movq.l          &0x4,%d0                        # four bytes
24640         bsr.w           restore                         # fix An
24641
24642         mov.w           &0x0101,EXC_VOFF(%a6)           # set FSLW
24643         bra.b           facc_finish
24644
24645 facc_in_d:
24646         movq.l          &0x8,%d0                        # eight bytes
24647         bsr.w           restore                         # fix An
24648
24649         mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
24650         bra.b           facc_finish
24651
24652 facc_in_x:
24653         movq.l          &0xc,%d0                        # twelve bytes
24654         bsr.w           restore                         # fix An
24655
24656         mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
24657         bra.b           facc_finish
24658
24659 ################################################################
24660
24661 facc_out_b:
24662         movq.l          &0x1,%d0                        # one byte
24663         bsr.w           restore                         # restore An
24664
24665         mov.w           &0x00a1,EXC_VOFF(%a6)           # set FSLW
24666         bra.b           facc_finish
24667
24668 facc_out_w:
24669         movq.l          &0x2,%d0                        # two bytes
24670         bsr.w           restore                         # restore An
24671
24672         mov.w           &0x00c1,EXC_VOFF(%a6)           # set FSLW
24673         bra.b           facc_finish
24674
24675 facc_out_l:
24676         movq.l          &0x4,%d0                        # four bytes
24677         bsr.w           restore                         # restore An
24678
24679         mov.w           &0x0081,EXC_VOFF(%a6)           # set FSLW
24680         bra.b           facc_finish
24681
24682 facc_out_d:
24683         movq.l          &0x8,%d0                        # eight bytes
24684         bsr.w           restore                         # restore An
24685
24686         mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
24687         bra.b           facc_finish
24688
24689 facc_out_x:
24690         mov.l           &0xc,%d0                        # twelve bytes
24691         bsr.w           restore                         # restore An
24692
24693         mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
24694
24695 # here's where we actually create the access error frame from the
24696 # current exception stack frame.
24697 facc_finish:
24698         mov.l           USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24699
24700         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
24701         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24702         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
24703
24704         unlk            %a6
24705
24706         mov.l           (%sp),-(%sp)            # store SR, hi(PC)
24707         mov.l           0x8(%sp),0x4(%sp)       # store lo(PC)
24708         mov.l           0xc(%sp),0x8(%sp)       # store EA
24709         mov.l           &0x00000001,0xc(%sp)    # store FSLW
24710         mov.w           0x6(%sp),0xc(%sp)       # fix FSLW (size)
24711         mov.w           &0x4008,0x6(%sp)        # store voff
24712
24713         btst            &0x5,(%sp)              # supervisor or user mode?
24714         beq.b           facc_out2               # user
24715         bset            &0x2,0xd(%sp)           # set supervisor TM bit
24716
24717 facc_out2:
24718         bra.l           _real_access
24719
24720 ##################################################################
24721
24722 # if the effective addressing mode was predecrement or postincrement,
24723 # the emulation has already changed its value to the correct post-
24724 # instruction value. but since we're exiting to the access error
24725 # handler, then AN must be returned to its pre-instruction value.
24726 # we do that here.
24727 restore:
24728         mov.b           EXC_OPWORD+0x1(%a6),%d1
24729         andi.b          &0x38,%d1               # extract opmode
24730         cmpi.b          %d1,&0x18               # postinc?
24731         beq.w           rest_inc
24732         cmpi.b          %d1,&0x20               # predec?
24733         beq.w           rest_dec
24734         rts
24735
24736 rest_inc:
24737         mov.b           EXC_OPWORD+0x1(%a6),%d1
24738         andi.w          &0x0007,%d1             # fetch An
24739
24740         mov.w           (tbl_rest_inc.b,%pc,%d1.w*2),%d1
24741         jmp             (tbl_rest_inc.b,%pc,%d1.w*1)
24742
24743 tbl_rest_inc:
24744         short           ri_a0 - tbl_rest_inc
24745         short           ri_a1 - tbl_rest_inc
24746         short           ri_a2 - tbl_rest_inc
24747         short           ri_a3 - tbl_rest_inc
24748         short           ri_a4 - tbl_rest_inc
24749         short           ri_a5 - tbl_rest_inc
24750         short           ri_a6 - tbl_rest_inc
24751         short           ri_a7 - tbl_rest_inc
24752
24753 ri_a0:
24754         sub.l           %d0,EXC_DREGS+0x8(%a6)  # fix stacked a0
24755         rts
24756 ri_a1:
24757         sub.l           %d0,EXC_DREGS+0xc(%a6)  # fix stacked a1
24758         rts
24759 ri_a2:
24760         sub.l           %d0,%a2                 # fix a2
24761         rts
24762 ri_a3:
24763         sub.l           %d0,%a3                 # fix a3
24764         rts
24765 ri_a4:
24766         sub.l           %d0,%a4                 # fix a4
24767         rts
24768 ri_a5:
24769         sub.l           %d0,%a5                 # fix a5
24770         rts
24771 ri_a6:
24772         sub.l           %d0,(%a6)               # fix stacked a6
24773         rts
24774 # if it's a fmove out instruction, we don't have to fix a7
24775 # because we hadn't changed it yet. if it's an opclass two
24776 # instruction (data moved in) and the exception was in supervisor
24777 # mode, then also also wasn't updated. if it was user mode, then
24778 # restore the correct a7 which is in the USP currently.
24779 ri_a7:
24780         cmpi.b          EXC_VOFF(%a6),&0x30     # move in or out?
24781         bne.b           ri_a7_done              # out
24782
24783         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
24784         bne.b           ri_a7_done              # supervisor
24785         movc            %usp,%a0                # restore USP
24786         sub.l           %d0,%a0
24787         movc            %a0,%usp
24788 ri_a7_done:
24789         rts
24790
24791 # need to invert adjustment value if the <ea> was predec
24792 rest_dec:
24793         neg.l           %d0
24794         bra.b           rest_inc