arch/m68k-all/m680x0/fpsp/bindec.sa

   1 *       $NetBSD: bindec.sa,v 1.5 2001/12/09 01:43:13 briggs Exp $
   2
   3 *       MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
   4 *       M68000 Hi-Performance Microprocessor Division
   5 *       M68040 Software Package
   6 *
   7 *       M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
   8 *       All rights reserved.
   9 *
  10 *       THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  11 *       To the maximum extent permitted by applicable law,
  12 *       MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  13 *       INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
  14 *       PARTICULAR PURPOSE and any warranty against infringement with
  15 *       regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
  16 *       and any accompanying written materials.
  17 *
  18 *       To the maximum extent permitted by applicable law,
  19 *       IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  20 *       (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
  21 *       PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
  22 *       OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
  23 *       SOFTWARE.  Motorola assumes no responsibility for the maintenance
  24 *       and support of the SOFTWARE.
  25 *
  26 *       You are hereby granted a copyright license to use, modify, and
  27 *       distribute the SOFTWARE so long as this entire notice is retained
  28 *       without alteration in any modified and/or redistributed versions,
  29 *       and that such modified versions are clearly identified as such.
  30 *       No licenses are granted by implication, estoppel or otherwise
  31 *       under any patents or trademarks of Motorola, Inc.
  32
  33 *
  34 *       bindec.sa 3.4 1/3/91
  35 *
  36 *       bindec
  37 *
  38 *       Description:
  39 *               Converts an input in extended precision format
  40 *               to bcd format.
  41 *
  42 *       Input:
  43 *               a0 points to the input extended precision value
  44 *               value in memory; d0 contains the k-factor sign-extended
  45 *               to 32-bits.  The input may be either normalized,
  46 *               unnormalized, or denormalized.
  47 *
  48 *       Output: result in the FP_SCR1 space on the stack.
  49 *
  50 *       Saves and Modifies: D2-D7,A2,FP2
  51 *
  52 *       Algorithm:
  53 *
  54 *       A1.     Set RM and size ext;  Set SIGMA = sign of input.
  55 *               The k-factor is saved for use in d7. Clear the
  56 *               BINDEC_FLG for separating normalized/denormalized
  57 *               input.  If input is unnormalized or denormalized,
  58 *               normalize it.
  59 *
  60 *       A2.     Set X = abs(input).
  61 *
  62 *       A3.     Compute ILOG.
  63 *               ILOG is the log base 10 of the input value.  It is
  64 *               approximated by adding e + 0.f when the original
  65 *               value is viewed as 2^^e * 1.f in extended precision.
  66 *               This value is stored in d6.
  67 *
  68 *       A4.     Clr INEX bit.
  69 *               The operation in A3 above may have set INEX2.
  70 *
  71 *       A5.     Set ICTR = 0;
  72 *               ICTR is a flag used in A13.  It must be set before the
  73 *               loop entry A6.
  74 *
  75 *       A6.     Calculate LEN.
  76 *               LEN is the number of digits to be displayed.  The
  77 *               k-factor can dictate either the total number of digits,
  78 *               if it is a positive number, or the number of digits
  79 *               after the decimal point which are to be included as
  80 *               significant.  See the 68882 manual for examples.
  81 *               If LEN is computed to be greater than 17, set OPERR in
  82 *               USER_FPSR.  LEN is stored in d4.
  83 *
  84 *       A7.     Calculate SCALE.
  85 *               SCALE is equal to 10^ISCALE, where ISCALE is the number
  86 *               of decimal places needed to insure LEN integer digits
  87 *               in the output before conversion to bcd. LAMBDA is the
  88 *               sign of ISCALE, used in A9. Fp1 contains
  89 *               10^^(abs(ISCALE)) using a rounding mode which is a
  90 *               function of the original rounding mode and the signs
  91 *               of ISCALE and X.  A table is given in the code.
  92 *
  93 *       A8.     Clr INEX; Force RZ.
  94 *               The operation in A3 above may have set INEX2.
  95 *               RZ mode is forced for the scaling operation to insure
  96 *               only one rounding error.  The grs bits are collected in
  97 *               the INEX flag for use in A10.
  98 *
  99 *       A9.     Scale X -> Y.
 100 *               The mantissa is scaled to the desired number of
 101 *               significant digits.  The excess digits are collected
 102 *               in INEX2.
 103 *
 104 *       A10.    Or in INEX.
 105 *               If INEX is set, round error occurred.  This is
 106 *               compensated for by 'or-ing' in the INEX2 flag to
 107 *               the lsb of Y.
 108 *
 109 *       A11.    Restore original FPCR; set size ext.
 110 *               Perform FINT operation in the user's rounding mode.
 111 *               Keep the size to extended.
 112 *
 113 *       A12.    Calculate YINT = FINT(Y) according to user's rounding
 114 *               mode.  The FPSP routine sintd0 is used.  The output
 115 *               is in fp0.
 116 *
 117 *       A13.    Check for LEN digits.
 118 *               If the int operation results in more than LEN digits,
 119 *               or less than LEN -1 digits, adjust ILOG and repeat from
 120 *               A6.  This test occurs only on the first pass.  If the
 121 *               result is exactly 10^LEN, decrement ILOG and divide
 122 *               the mantissa by 10.
 123 *
 124 *       A14.    Convert the mantissa to bcd.
 125 *               The binstr routine is used to convert the LEN digit
 126 *               mantissa to bcd in memory.  The input to binstr is
 127 *               to be a fraction; i.e. (mantissa)/10^LEN and adjusted
 128 *               such that the decimal point is to the left of bit 63.
 129 *               The bcd digits are stored in the correct position in
 130 *               the final string area in memory.
 131 *
 132 *       A15.    Convert the exponent to bcd.
 133 *               As in A14 above, the exp is converted to bcd and the
 134 *               digits are stored in the final string.
 135 *               Test the length of the final exponent string.  If the
 136 *               length is 4, set operr.
 137 *
 138 *       A16.    Write sign bits to final string.
 139 *
 140 *       Implementation Notes:
 141 *
 142 *       The registers are used as follows:
 143 *
 144 *               d0: scratch; LEN input to binstr
 145 *               d1: scratch
 146 *               d2: upper 32-bits of mantissa for binstr
 147 *               d3: scratch;lower 32-bits of mantissa for binstr
 148 *               d4: LEN
 149 *               d5: LAMBDA/ICTR
 150 *               d6: ILOG
 151 *               d7: k-factor
 152 *               a0: ptr for original operand/final result
 153 *               a1: scratch pointer
 154 *               a2: pointer to FP_X; abs(original value) in ext
 155 *               fp0: scratch
 156 *               fp1: scratch
 157 *               fp2: scratch
 158 *               F_SCR1:
 159 *               F_SCR2:
 160 *               L_SCR1:
 161 *               L_SCR2:
 162 *
 163
 164 BINDEC    IDNT    2,1 Motorola 040 Floating Point Software Package
 165
 166         include fpsp.h
 167
 168         section 8
 169
 170 * Constants in extended precision
 171 LOG2    dc.l    $3FFD0000,$9A209A84,$FBCFF798,$00000000
 172 LOG2UP1 dc.l    $3FFD0000,$9A209A84,$FBCFF799,$00000000
 173
 174 * Constants in single precision
 175 FONE    dc.l    $3F800000,$00000000,$00000000,$00000000
 176 FTWO    dc.l    $40000000,$00000000,$00000000,$00000000
 177 FTEN    dc.l    $41200000,$00000000,$00000000,$00000000
 178 F4933   dc.l    $459A2800,$00000000,$00000000,$00000000
 179
 180 RBDTBL  dc.b    0,0,0,0
 181         dc.b    3,3,2,2
 182         dc.b    3,2,2,3
 183         dc.b    2,3,3,2
 184
 185         xref    binstr
 186         xref    sintdo
 187         xref    ptenrn,ptenrm,ptenrp
 188
 189         xdef    bindec
 190         xdef    sc_mul
 191 bindec:
 192         movem.l d2-d7/a2,-(a7)
 193         fmovem.x fp0-fp2,-(a7)
 194
 195 * A1. Set RM and size ext. Set SIGMA = sign input;
 196 *     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
 197 *     separating  normalized/denormalized input.  If the input
 198 *     is a denormalized number, set the BINDEC_FLG memory word
 199 *     to signal denorm.  If the input is unnormalized, normalize
 200 *     the input and test for denormalized result.
 201 *
 202         fmove.l #rm_mode,FPCR   ;set RM and ext
 203         move.l  (a0),L_SCR2(a6) ;save exponent for sign check
 204         move.l  d0,d7           ;move k-factor to d7
 205         clr.b   BINDEC_FLG(a6)  ;clr norm/denorm flag
 206         move.w  STAG(a6),d0     ;get stag
 207         andi.w  #$e000,d0       ;isolate stag bits
 208         beq     A2_str          ;if zero, input is norm
 209 *
 210 * Normalize the denorm
 211 *
 212 un_de_norm:
 213         move.w  (a0),d0
 214         andi.w  #$7fff,d0       ;strip sign of normalized exp
 215         move.l  4(a0),d1
 216         move.l  8(a0),d2
 217 norm_loop:
 218         sub.w   #1,d0
 219         add.l   d2,d2
 220         addx.l  d1,d1
 221         tst.l   d1
 222         bge.b   norm_loop
 223 *
 224 * Test if the normalized input is denormalized
 225 *
 226         tst.w   d0
 227         bgt.b   pos_exp         ;if greater than zero, it is a norm
 228         st      BINDEC_FLG(a6)  ;set flag for denorm
 229 pos_exp:
 230         andi.w  #$7fff,d0       ;strip sign of normalized exp
 231         move.w  d0,(a0)
 232         move.l  d1,4(a0)
 233         move.l  d2,8(a0)
 234
 235 * A2. Set X = abs(input).
 236 *
 237 A2_str:
 238         move.l  (a0),FP_SCR2(a6) ; move input to work space
 239         move.l  4(a0),FP_SCR2+4(a6) ; move input to work space
 240         move.l  8(a0),FP_SCR2+8(a6) ; move input to work space
 241         andi.l  #$7fffffff,FP_SCR2(a6) ;create abs(X)
 242
 243 * A3. Compute ILOG.
 244 *     ILOG is the log base 10 of the input value.  It is approx-
 245 *     imated by adding e + 0.f when the original value is viewed
 246 *     as 2^^e * 1.f in extended precision.  This value is stored
 247 *     in d6.
 248 *
 249 * Register usage:
 250 *       Input/Output
 251 *       d0: k-factor/exponent
 252 *       d2: x/x
 253 *       d3: x/x
 254 *       d4: x/x
 255 *       d5: x/x
 256 *       d6: x/ILOG
 257 *       d7: k-factor/Unchanged
 258 *       a0: ptr for original operand/final result
 259 *       a1: x/x
 260 *       a2: x/x
 261 *       fp0: x/float(ILOG)
 262 *       fp1: x/x
 263 *       fp2: x/x
 264 *       F_SCR1:x/x
 265 *       F_SCR2:Abs(X)/Abs(X) with $3fff exponent
 266 *       L_SCR1:x/x
 267 *       L_SCR2:first word of X packed/Unchanged
 268
 269         tst.b   BINDEC_FLG(a6)  ;check for denorm
 270         beq.b   A3_cont         ;if clr, continue with norm
 271         move.l  #-4933,d6       ;force ILOG = -4933
 272         bra.b   A4_str
 273 A3_cont:
 274         move.w  FP_SCR2(a6),d0  ;move exp to d0
 275         move.w  #$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
 276         fmove.x FP_SCR2(a6),fp0 ;now fp0 has 1.f
 277         sub.w   #$3fff,d0       ;strip off bias
 278         fadd.w  d0,fp0          ;add in exp
 279         fsub.s  FONE,fp0        ;subtract off 1.0
 280         fbge.w  pos_res         ;if pos, branch
 281         fmul.x  LOG2UP1,fp0     ;if neg, mul by LOG2UP1
 282         fmove.l fp0,d6          ;put ILOG in d6 as a lword
 283         bra.b   A4_str          ;go move out ILOG
 284 pos_res:
 285         fmul.x  LOG2,fp0        ;if pos, mul by LOG2
 286         fmove.l fp0,d6          ;put ILOG in d6 as a lword
 287
 288
 289 * A4. Clr INEX bit.
 290 *     The operation in A3 above may have set INEX2.
 291
 292 A4_str:
 293         fmove.l #0,FPSR         ;zero all of fpsr - nothing needed
 294
 295
 296 * A5. Set ICTR = 0;
 297 *     ICTR is a flag used in A13.  It must be set before the
 298 *     loop entry A6. The lower word of d5 is used for ICTR.
 299
 300         clr.w   d5              ;clear ICTR
 301
 302
 303 * A6. Calculate LEN.
 304 *     LEN is the number of digits to be displayed.  The k-factor
 305 *     can dictate either the total number of digits, if it is
 306 *     a positive number, or the number of digits after the
 307 *     original decimal point which are to be included as
 308 *     significant.  See the 68882 manual for examples.
 309 *     If LEN is computed to be greater than 17, set OPERR in
 310 *     USER_FPSR.  LEN is stored in d4.
 311 *
 312 * Register usage:
 313 *       Input/Output
 314 *       d0: exponent/Unchanged
 315 *       d2: x/x/scratch
 316 *       d3: x/x
 317 *       d4: exc picture/LEN
 318 *       d5: ICTR/Unchanged
 319 *       d6: ILOG/Unchanged
 320 *       d7: k-factor/Unchanged
 321 *       a0: ptr for original operand/final result
 322 *       a1: x/x
 323 *       a2: x/x
 324 *       fp0: float(ILOG)/Unchanged
 325 *       fp1: x/x
 326 *       fp2: x/x
 327 *       F_SCR1:x/x
 328 *       F_SCR2:Abs(X) with $3fff exponent/Unchanged
 329 *       L_SCR1:x/x
 330 *       L_SCR2:first word of X packed/Unchanged
 331
 332 A6_str:
 333         tst.l   d7              ;branch on sign of k
 334         ble.b   k_neg           ;if k <= 0, LEN = ILOG + 1 - k
 335         move.l  d7,d4           ;if k > 0, LEN = k
 336         bra.b   len_ck          ;skip to LEN check
 337 k_neg:
 338         move.l  d6,d4           ;first load ILOG to d4
 339         sub.l   d7,d4           ;subtract off k
 340         addq.l  #1,d4           ;add in the 1
 341 len_ck:
 342         tst.l   d4              ;LEN check: branch on sign of LEN
 343         ble.b   LEN_ng          ;if neg, set LEN = 1
 344         cmp.l   #17,d4          ;test if LEN > 17
 345         ble.b   A7_str          ;if not, forget it
 346         move.l  #17,d4          ;set max LEN = 17
 347         tst.l   d7              ;if negative, never set OPERR
 348         ble.b   A7_str          ;if positive, continue
 349         or.l    #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
 350         bra.b   A7_str          ;finished here
 351 LEN_ng:
 352         moveq.l #1,d4           ;min LEN is 1
 353
 354
 355 * A7. Calculate SCALE.
 356 *     SCALE is equal to 10^ISCALE, where ISCALE is the number
 357 *     of decimal places needed to insure LEN integer digits
 358 *     in the output before conversion to bcd. LAMBDA is the sign
 359 *     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
 360 *     the rounding mode as given in the following table (see
 361 *     Coonen, p. 7.23 as ref.; however, the SCALE variable is
 362 *     of opposite sign in bindec.sa from Coonen).
 363 *
 364 *       Initial                                 USE
 365 *       FPCR[6:5]       LAMBDA  SIGN(X)         FPCR[6:5]
 366 *       ----------------------------------------------
 367 *        RN     00         0       0            00/0    RN
 368 *        RN     00         0       1            00/0    RN
 369 *        RN     00         1       0            00/0    RN
 370 *        RN     00         1       1            00/0    RN
 371 *        RZ     01         0       0            11/3    RP
 372 *        RZ     01         0       1            11/3    RP
 373 *        RZ     01         1       0            10/2    RM
 374 *        RZ     01         1       1            10/2    RM
 375 *        RM     10         0       0            11/3    RP
 376 *        RM     10         0       1            10/2    RM
 377 *        RM     10         1       0            10/2    RM
 378 *        RM     10         1       1            11/3    RP
 379 *        RP     11         0       0            10/2    RM
 380 *        RP     11         0       1            11/3    RP
 381 *        RP     11         1       0            11/3    RP
 382 *        RP     11         1       1            10/2    RM
 383 *
 384 * Register usage:
 385 *       Input/Output
 386 *       d0: exponent/scratch - final is 0
 387 *       d2: x/0 or 24 for A9
 388 *       d3: x/scratch - offset ptr into PTENRM array
 389 *       d4: LEN/Unchanged
 390 *       d5: 0/ICTR:LAMBDA
 391 *       d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
 392 *       d7: k-factor/Unchanged
 393 *       a0: ptr for original operand/final result
 394 *       a1: x/ptr to PTENRM array
 395 *       a2: x/x
 396 *       fp0: float(ILOG)/Unchanged
 397 *       fp1: x/10^ISCALE
 398 *       fp2: x/x
 399 *       F_SCR1:x/x
 400 *       F_SCR2:Abs(X) with $3fff exponent/Unchanged
 401 *       L_SCR1:x/x
 402 *       L_SCR2:first word of X packed/Unchanged
 403
 404 A7_str:
 405         tst.l   d7              ;test sign of k
 406         bgt.b   k_pos           ;if pos and > 0, skip this
 407         cmp.l   d6,d7           ;test k - ILOG
 408         blt.b   k_pos           ;if ILOG >= k, skip this
 409         move.l  d7,d6           ;if ((k<0) & (ILOG < k)) ILOG = k
 410 k_pos:
 411         move.l  d6,d0           ;calc ILOG + 1 - LEN in d0
 412         addq.l  #1,d0           ;add the 1
 413         sub.l   d4,d0           ;sub off LEN
 414         swap    d5              ;use upper word of d5 for LAMBDA
 415         clr.w   d5              ;set it zero initially
 416         clr.w   d2              ;set up d2 for very small case
 417         tst.l   d0              ;test sign of ISCALE
 418         bge.b   iscale          ;if pos, skip next inst
 419         addq.w  #1,d5           ;if neg, set LAMBDA true
 420         cmp.l   #$ffffecd4,d0   ;test iscale <= -4908
 421         bgt.b   no_inf          ;if false, skip rest
 422         addi.l  #24,d0          ;add in 24 to iscale
 423         move.l  #24,d2          ;put 24 in d2 for A9
 424 no_inf:
 425         neg.l   d0              ;and take abs of ISCALE
 426 iscale:
 427         fmove.s FONE,fp1        ;init fp1 to 1
 428         bfextu  USER_FPCR(a6){26:2},d1 ;get initial rmode bits
 429         add.w   d1,d1           ;put them in bits 2:1
 430         add.w   d5,d1           ;add in LAMBDA
 431         add.w   d1,d1           ;put them in bits 3:1
 432         tst.l   L_SCR2(a6)      ;test sign of original x
 433         bge.b   x_pos           ;if pos, don't set bit 0
 434         addq.l  #1,d1           ;if neg, set bit 0
 435 x_pos:
 436         lea.l   RBDTBL,a2       ;load rbdtbl base
 437         move.b  (a2,d1),d3      ;load d3 with new rmode
 438         lsl.l   #4,d3           ;put bits in proper position
 439         fmove.l d3,fpcr         ;load bits into fpu
 440         lsr.l   #4,d3           ;put bits in proper position
 441         tst.b   d3              ;decode new rmode for pten table
 442         bne.b   not_rn          ;if zero, it is RN
 443         lea.l   PTENRN,a1       ;load a1 with RN table base
 444         bra.b   rmode           ;exit decode
 445 not_rn:
 446         lsr.b   #1,d3           ;get lsb in carry
 447         bcc.b   not_rp          ;if carry clear, it is RM
 448         lea.l   PTENRP,a1       ;load a1 with RP table base
 449         bra.b   rmode           ;exit decode
 450 not_rp:
 451         lea.l   PTENRM,a1       ;load a1 with RM table base
 452 rmode:
 453         clr.l   d3              ;clr table index
 454 e_loop:
 455         lsr.l   #1,d0           ;shift next bit into carry
 456         bcc.b   e_next          ;if zero, skip the mul
 457         fmul.x  (a1,d3),fp1     ;mul by 10**(d3_bit_no)
 458 e_next:
 459         add.l   #12,d3          ;inc d3 to next pwrten table entry
 460         tst.l   d0              ;test if ISCALE is zero
 461         bne.b   e_loop          ;if not, loop
 462
 463
 464 * A8. Clr INEX; Force RZ.
 465 *     The operation in A3 above may have set INEX2.
 466 *     RZ mode is forced for the scaling operation to insure
 467 *     only one rounding error.  The grs bits are collected in
 468 *     the INEX flag for use in A10.
 469 *
 470 * Register usage:
 471 *       Input/Output
 472
 473         fmove.l #0,FPSR         ;clr INEX
 474         fmove.l #rz_mode,FPCR   ;set RZ rounding mode
 475
 476
 477 * A9. Scale X -> Y.
 478 *     The mantissa is scaled to the desired number of significant
 479 *     digits.  The excess digits are collected in INEX2. If mul,
 480 *     Check d2 for excess 10 exponential value.  If not zero,
 481 *     the iscale value would have caused the pwrten calculation
 482 *     to overflow.  Only a negative iscale can cause this, so
 483 *     multiply by 10^(d2), which is now only allowed to be 24,
 484 *     with a multiply by 10^8 and 10^16, which is exact since
 485 *     10^24 is exact.  If the input was denormalized, we must
 486 *     create a busy stack frame with the mul command and the
 487 *     two operands, and allow the fpu to complete the multiply.
 488 *
 489 * Register usage:
 490 *       Input/Output
 491 *       d0: FPCR with RZ mode/Unchanged
 492 *       d2: 0 or 24/unchanged
 493 *       d3: x/x
 494 *       d4: LEN/Unchanged
 495 *       d5: ICTR:LAMBDA
 496 *       d6: ILOG/Unchanged
 497 *       d7: k-factor/Unchanged
 498 *       a0: ptr for original operand/final result
 499 *       a1: ptr to PTENRM array/Unchanged
 500 *       a2: x/x
 501 *       fp0: float(ILOG)/X adjusted for SCALE (Y)
 502 *       fp1: 10^ISCALE/Unchanged
 503 *       fp2: x/x
 504 *       F_SCR1:x/x
 505 *       F_SCR2:Abs(X) with $3fff exponent/Unchanged
 506 *       L_SCR1:x/x
 507 *       L_SCR2:first word of X packed/Unchanged
 508
 509 A9_str:
 510         fmove.x (a0),fp0        ;load X from memory
 511         fabs.x  fp0             ;use abs(X)
 512         tst.w   d5              ;LAMBDA is in lower word of d5
 513         bne.b   short_sc_mul    ;if neg (LAMBDA = 1), scale by mul
 514         fdiv.x  fp1,fp0         ;calculate X / SCALE -> Y to fp0
 515         bra.b   A10_st          ;branch to A10
 516
 517 sc_mul:
 518 short_sc_mul:
 519         tst.b   BINDEC_FLG(a6)  ;check for denorm
 520         beq.b   A9_norm         ;if norm, continue with mul
 521         fmovem.x fp1,-(a7)      ;load ETEMP with 10^ISCALE
 522         move.l  8(a0),-(a7)     ;load FPTEMP with input arg
 523         move.l  4(a0),-(a7)
 524         move.l  (a0),-(a7)
 525         move.l  #18,d3          ;load count for busy stack
 526 A9_loop:
 527         clr.l   -(a7)           ;clear lword on stack
 528         dbf.w   d3,A9_loop
 529         move.b  VER_TMP(a6),(a7) ;write current version number
 530         move.b  #BUSY_SIZE-4,1(a7) ;write current busy size
 531         move.b  #$10,$44(a7)    ;set fcefpte[15] bit
 532         move.w  #$0023,$40(a7)  ;load cmdreg1b with mul command
 533         move.b  #$fe,$8(a7)     ;load all 1s to cu savepc
 534         frestore (a7)+          ;restore frame to fpu for completion
 535         fmul.x  36(a1),fp0      ;multiply fp0 by 10^8
 536         fmul.x  48(a1),fp0      ;multiply fp0 by 10^16
 537         bra.b   A10_st
 538 A9_norm:
 539         tst.w   d2              ;test for small exp case
 540         beq.b   A9_con          ;if zero, continue as normal
 541         fmul.x  36(a1),fp0      ;multiply fp0 by 10^8
 542         fmul.x  48(a1),fp0      ;multiply fp0 by 10^16
 543 A9_con:
 544         fmul.x  fp1,fp0         ;calculate X * SCALE -> Y to fp0
 545
 546
 547 * A10. Or in INEX.
 548 *      If INEX is set, round error occurred.  This is compensated
 549 *      for by 'or-ing' in the INEX2 flag to the lsb of Y.
 550 *
 551 * Register usage:
 552 *       Input/Output
 553 *       d0: FPCR with RZ mode/FPSR with INEX2 isolated
 554 *       d2: x/x
 555 *       d3: x/x
 556 *       d4: LEN/Unchanged
 557 *       d5: ICTR:LAMBDA
 558 *       d6: ILOG/Unchanged
 559 *       d7: k-factor/Unchanged
 560 *       a0: ptr for original operand/final result
 561 *       a1: ptr to PTENxx array/Unchanged
 562 *       a2: x/ptr to FP_SCR2(a6)
 563 *       fp0: Y/Y with lsb adjusted
 564 *       fp1: 10^ISCALE/Unchanged
 565 *       fp2: x/x
 566
 567 A10_st:
 568         fmove.l FPSR,d0         ;get FPSR
 569         fmove.x fp0,FP_SCR2(a6) ;move Y to memory
 570         lea.l   FP_SCR2(a6),a2  ;load a2 with ptr to FP_SCR2
 571         btst.l  #9,d0           ;check if INEX2 set
 572         beq.b   A11_st          ;if clear, skip rest
 573         ori.l   #1,8(a2)        ;or in 1 to lsb of mantissa
 574         fmove.x FP_SCR2(a6),fp0 ;write adjusted Y back to fpu
 575
 576
 577 * A11. Restore original FPCR; set size ext.
 578 *      Perform FINT operation in the user's rounding mode.  Keep
 579 *      the size to extended.  The sintdo entry point in the sint
 580 *      routine expects the FPCR value to be in USER_FPCR for
 581 *      mode and precision.  The original FPCR is saved in L_SCR1.
 582
 583 A11_st:
 584         move.l  USER_FPCR(a6),L_SCR1(a6) ;save it for later
 585         andi.l  #$00000030,USER_FPCR(a6) ;set size to ext,
 586 *                                       ;block exceptions
 587
 588
 589 * A12. Calculate YINT = FINT(Y) according to user's rounding mode.
 590 *      The FPSP routine sintd0 is used.  The output is in fp0.
 591 *
 592 * Register usage:
 593 *       Input/Output
 594 *       d0: FPSR with AINEX cleared/FPCR with size set to ext
 595 *       d2: x/x/scratch
 596 *       d3: x/x
 597 *       d4: LEN/Unchanged
 598 *       d5: ICTR:LAMBDA/Unchanged
 599 *       d6: ILOG/Unchanged
 600 *       d7: k-factor/Unchanged
 601 *       a0: ptr for original operand/src ptr for sintdo
 602 *       a1: ptr to PTENxx array/Unchanged
 603 *       a2: ptr to FP_SCR2(a6)/Unchanged
 604 *       a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
 605 *       fp0: Y/YINT
 606 *       fp1: 10^ISCALE/Unchanged
 607 *       fp2: x/x
 608 *       F_SCR1:x/x
 609 *       F_SCR2:Y adjusted for inex/Y with original exponent
 610 *       L_SCR1:x/original USER_FPCR
 611 *       L_SCR2:first word of X packed/Unchanged
 612
 613 A12_st:
 614         movem.l d0-d1/a0-a1,-(a7)       ;save regs used by sintd0
 615         move.l  L_SCR1(a6),-(a7)
 616         move.l  L_SCR2(a6),-(a7)
 617         lea.l   FP_SCR2(a6),a0          ;a0 is ptr to F_SCR2(a6)
 618         fmove.x fp0,(a0)                ;move Y to memory at FP_SCR2(a6)
 619         tst.l   L_SCR2(a6)              ;test sign of original operand
 620         bge.b   do_fint                 ;if pos, use Y
 621         or.l    #$80000000,(a0)         ;if neg, use -Y
 622 do_fint:
 623         move.l  USER_FPSR(a6),-(a7)
 624         bsr     sintdo                  ;sint routine returns int in fp0
 625         move.b  (a7),USER_FPSR(a6)
 626         add.l   #4,a7
 627         move.l  (a7)+,L_SCR2(a6)
 628         move.l  (a7)+,L_SCR1(a6)
 629         movem.l (a7)+,d0-d1/a0-a1       ;restore regs used by sint
 630         move.l  L_SCR2(a6),FP_SCR2(a6)  ;restore original exponent
 631         move.l  L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
 632
 633
 634 * A13. Check for LEN digits.
 635 *      If the int operation results in more than LEN digits,
 636 *      or less than LEN -1 digits, adjust ILOG and repeat from
 637 *      A6.  This test occurs only on the first pass.  If the
 638 *      result is exactly 10^LEN, decrement ILOG and divide
 639 *      the mantissa by 10.  The calculation of 10^LEN cannot
 640 *      be inexact, since all powers of ten upto 10^27 are exact
 641 *      in extended precision, so the use of a previous power-of-ten
 642 *      table will introduce no error.
 643 *
 644 *
 645 * Register usage:
 646 *       Input/Output
 647 *       d0: FPCR with size set to ext/scratch final = 0
 648 *       d2: x/x
 649 *       d3: x/scratch final = x
 650 *       d4: LEN/LEN adjusted
 651 *       d5: ICTR:LAMBDA/LAMBDA:ICTR
 652 *       d6: ILOG/ILOG adjusted
 653 *       d7: k-factor/Unchanged
 654 *       a0: pointer into memory for packed bcd string formation
 655 *       a1: ptr to PTENxx array/Unchanged
 656 *       a2: ptr to FP_SCR2(a6)/Unchanged
 657 *       fp0: int portion of Y/abs(YINT) adjusted
 658 *       fp1: 10^ISCALE/Unchanged
 659 *       fp2: x/10^LEN
 660 *       F_SCR1:x/x
 661 *       F_SCR2:Y with original exponent/Unchanged
 662 *       L_SCR1:original USER_FPCR/Unchanged
 663 *       L_SCR2:first word of X packed/Unchanged
 664
 665 A13_st:
 666         swap    d5              ;put ICTR in lower word of d5
 667         tst.w   d5              ;check if ICTR = 0
 668         bne     not_zr          ;if non-zero, go to second test
 669 *
 670 * Compute 10^(LEN-1)
 671 *
 672         fmove.s FONE,fp2        ;init fp2 to 1.0
 673         move.l  d4,d0           ;put LEN in d0
 674         subq.l  #1,d0           ;d0 = LEN -1
 675         clr.l   d3              ;clr table index
 676 l_loop:
 677         lsr.l   #1,d0           ;shift next bit into carry
 678         bcc.b   l_next          ;if zero, skip the mul
 679         fmul.x  (a1,d3),fp2     ;mul by 10**(d3_bit_no)
 680 l_next:
 681         add.l   #12,d3          ;inc d3 to next pwrten table entry
 682         tst.l   d0              ;test if LEN is zero
 683         bne.b   l_loop          ;if not, loop
 684 *
 685 * 10^LEN-1 is computed for this test and A14.  If the input was
 686 * denormalized, check only the case in which YINT > 10^LEN.
 687 *
 688         tst.b   BINDEC_FLG(a6)  ;check if input was norm
 689         beq.b   A13_con         ;if norm, continue with checking
 690         fabs.x  fp0             ;take abs of YINT
 691         bra     test_2
 692 *
 693 * Compare abs(YINT) to 10^(LEN-1) and 10^LEN
 694 *
 695 A13_con:
 696         fabs.x  fp0             ;take abs of YINT
 697         fcmp.x  fp2,fp0         ;compare abs(YINT) with 10^(LEN-1)
 698         fbge.w  test_2          ;if greater, do next test
 699         subq.l  #1,d6           ;subtract 1 from ILOG
 700         move.w  #1,d5           ;set ICTR
 701         fmove.l #rm_mode,FPCR   ;set rmode to RM
 702         fmul.s  FTEN,fp2        ;compute 10^LEN
 703         bra.w   A6_str          ;return to A6 and recompute YINT
 704 test_2:
 705         fmul.s  FTEN,fp2        ;compute 10^LEN
 706         fcmp.x  fp2,fp0         ;compare abs(YINT) with 10^LEN
 707         fblt.w  A14_st          ;if less, all is ok, go to A14
 708         fbgt.w  fix_ex          ;if greater, fix and redo
 709         fdiv.s  FTEN,fp0        ;if equal, divide by 10
 710         addq.l  #1,d6           ; and inc ILOG
 711         bra.b   A14_st          ; and continue elsewhere
 712 fix_ex:
 713         addq.l  #1,d6           ;increment ILOG by 1
 714         move.w  #1,d5           ;set ICTR
 715         fmove.l #rm_mode,FPCR   ;set rmode to RM
 716         bra.w   A6_str          ;return to A6 and recompute YINT
 717 *
 718 * Since ICTR <> 0, we have already been through one adjustment,
 719 * and shouldn't have another; this is to check if abs(YINT) = 10^LEN
 720 * 10^LEN is again computed using whatever table is in a1 since the
 721 * value calculated cannot be inexact.
 722 *
 723 not_zr:
 724         fmove.s FONE,fp2        ;init fp2 to 1.0
 725         move.l  d4,d0           ;put LEN in d0
 726         clr.l   d3              ;clr table index
 727 z_loop:
 728         lsr.l   #1,d0           ;shift next bit into carry
 729         bcc.b   z_next          ;if zero, skip the mul
 730         fmul.x  (a1,d3),fp2     ;mul by 10**(d3_bit_no)
 731 z_next:
 732         add.l   #12,d3          ;inc d3 to next pwrten table entry
 733         tst.l   d0              ;test if LEN is zero
 734         bne.b   z_loop          ;if not, loop
 735         fabs.x  fp0             ;get abs(YINT)
 736         fcmp.x  fp2,fp0         ;check if abs(YINT) = 10^LEN
 737         fbne.w  A14_st          ;if not, skip this
 738         fdiv.s  FTEN,fp0        ;divide abs(YINT) by 10
 739         addq.l  #1,d6           ;and inc ILOG by 1
 740         addq.l  #1,d4           ; and inc LEN
 741         fmul.s  FTEN,fp2        ; if LEN++, the get 10^^LEN
 742
 743
 744 * A14. Convert the mantissa to bcd.
 745 *      The binstr routine is used to convert the LEN digit
 746 *      mantissa to bcd in memory.  The input to binstr is
 747 *      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
 748 *      such that the decimal point is to the left of bit 63.
 749 *      The bcd digits are stored in the correct position in
 750 *      the final string area in memory.
 751 *
 752 *
 753 * Register usage:
 754 *       Input/Output
 755 *       d0: x/LEN call to binstr - final is 0
 756 *       d1: x/0
 757 *       d2: x/ms 32-bits of mant of abs(YINT)
 758 *       d3: x/ls 32-bits of mant of abs(YINT)
 759 *       d4: LEN/Unchanged
 760 *       d5: ICTR:LAMBDA/LAMBDA:ICTR
 761 *       d6: ILOG
 762 *       d7: k-factor/Unchanged
 763 *       a0: pointer into memory for packed bcd string formation
 764 *           /ptr to first mantissa byte in result string
 765 *       a1: ptr to PTENxx array/Unchanged
 766 *       a2: ptr to FP_SCR2(a6)/Unchanged
 767 *       fp0: int portion of Y/abs(YINT) adjusted
 768 *       fp1: 10^ISCALE/Unchanged
 769 *       fp2: 10^LEN/Unchanged
 770 *       F_SCR1:x/Work area for final result
 771 *       F_SCR2:Y with original exponent/Unchanged
 772 *       L_SCR1:original USER_FPCR/Unchanged
 773 *       L_SCR2:first word of X packed/Unchanged
 774
 775 A14_st:
 776         fmove.l #rz_mode,FPCR   ;force rz for conversion
 777         fdiv.x  fp2,fp0         ;divide abs(YINT) by 10^LEN
 778         lea.l   FP_SCR1(a6),a0
 779         fmove.x fp0,(a0)        ;move abs(YINT)/10^LEN to memory
 780         move.l  4(a0),d2        ;move 2nd word of FP_RES to d2
 781         move.l  8(a0),d3        ;move 3rd word of FP_RES to d3
 782         clr.l   4(a0)           ;zero word 2 of FP_RES
 783         clr.l   8(a0)           ;zero word 3 of FP_RES
 784         move.l  (a0),d0         ;move exponent to d0
 785         swap    d0              ;put exponent in lower word
 786         beq.b   no_sft          ;if zero, don't shift
 787         subi.l  #$3ffd,d0       ;sub bias less 2 to make fract
 788         tst.l   d0              ;check if > 1
 789         bgt.b   no_sft          ;if so, don't shift
 790         neg.l   d0              ;make exp positive
 791 m_loop:
 792         lsr.l   #1,d2           ;shift d2:d3 right, add 0s
 793         roxr.l  #1,d3           ;the number of places
 794         dbf.w   d0,m_loop       ;given in d0
 795 no_sft:
 796         tst.l   d2              ;check for mantissa of zero
 797         bne.b   no_zr           ;if not, go on
 798         tst.l   d3              ;continue zero check
 799         beq.b   zer_m           ;if zero, go directly to binstr
 800 no_zr:
 801         clr.l   d1              ;put zero in d1 for addx
 802         addi.l  #$00000080,d3   ;inc at bit 7
 803         addx.l  d1,d2           ;continue inc
 804         andi.l  #$ffffff80,d3   ;strip off lsb not used by 882
 805 zer_m:
 806         move.l  d4,d0           ;put LEN in d0 for binstr call
 807         addq.l  #3,a0           ;a0 points to M16 byte in result
 808         bsr     binstr          ;call binstr to convert mant
 809
 810
 811 * A15. Convert the exponent to bcd.
 812 *      As in A14 above, the exp is converted to bcd and the
 813 *      digits are stored in the final string.
 814 *
 815 *      Digits are stored in L_SCR1(a6) on return from BINDEC as:
 816 *
 817 *        32               16 15                0
 818 *       -----------------------------------------
 819 *       |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
 820 *       -----------------------------------------
 821 *
 822 * And are moved into their proper places in FP_SCR1.  If digit e4
 823 * is non-zero, OPERR is signaled.  In all cases, all 4 digits are
 824 * written as specified in the 881/882 manual for packed decimal.
 825 *
 826 * Register usage:
 827 *       Input/Output
 828 *       d0: x/LEN call to binstr - final is 0
 829 *       d1: x/scratch (0);shift count for final exponent packing
 830 *       d2: x/ms 32-bits of exp fraction/scratch
 831 *       d3: x/ls 32-bits of exp fraction
 832 *       d4: LEN/Unchanged
 833 *       d5: ICTR:LAMBDA/LAMBDA:ICTR
 834 *       d6: ILOG
 835 *       d7: k-factor/Unchanged
 836 *       a0: ptr to result string/ptr to L_SCR1(a6)
 837 *       a1: ptr to PTENxx array/Unchanged
 838 *       a2: ptr to FP_SCR2(a6)/Unchanged
 839 *       fp0: abs(YINT) adjusted/float(ILOG)
 840 *       fp1: 10^ISCALE/Unchanged
 841 *       fp2: 10^LEN/Unchanged
 842 *       F_SCR1:Work area for final result/BCD result
 843 *       F_SCR2:Y with original exponent/ILOG/10^4
 844 *       L_SCR1:original USER_FPCR/Exponent digits on return from binstr
 845 *       L_SCR2:first word of X packed/Unchanged
 846
 847 A15_st:
 848         tst.b   BINDEC_FLG(a6)  ;check for denorm
 849         beq.b   not_denorm
 850         ftst.x  fp0             ;test for zero
 851         fbeq.w  den_zero        ;if zero, use k-factor or 4933
 852         fmove.l d6,fp0          ;float ILOG
 853         fabs.x  fp0             ;get abs of ILOG
 854         bra.b   convrt
 855 den_zero:
 856         tst.l   d7              ;check sign of the k-factor
 857         blt.b   use_ilog        ;if negative, use ILOG
 858         fmove.s F4933,fp0       ;force exponent to 4933
 859         bra.b   convrt          ;do it
 860 use_ilog:
 861         fmove.l d6,fp0          ;float ILOG
 862         fabs.x  fp0             ;get abs of ILOG
 863         bra.b   convrt
 864 not_denorm:
 865         ftst.x  fp0             ;test for zero
 866         fbne.w  not_zero        ;if zero, force exponent
 867         fmove.s FONE,fp0        ;force exponent to 1
 868         bra.b   convrt          ;do it
 869 not_zero:
 870         fmove.l d6,fp0          ;float ILOG
 871         fabs.x  fp0             ;get abs of ILOG
 872 convrt:
 873         fdiv.x  24(a1),fp0      ;compute ILOG/10^4
 874         fmove.x fp0,FP_SCR2(a6) ;store fp0 in memory
 875         move.l  4(a2),d2        ;move word 2 to d2
 876         move.l  8(a2),d3        ;move word 3 to d3
 877         move.w  (a2),d0         ;move exp to d0
 878         beq.b   x_loop_fin      ;if zero, skip the shift
 879         subi.w  #$3ffd,d0       ;subtract off bias
 880         neg.w   d0              ;make exp positive
 881 x_loop:
 882         lsr.l   #1,d2           ;shift d2:d3 right
 883         roxr.l  #1,d3           ;the number of places
 884         dbf.w   d0,x_loop       ;given in d0
 885 x_loop_fin:
 886         clr.l   d1              ;put zero in d1 for addx
 887         addi.l  #$00000080,d3   ;inc at bit 6
 888         addx.l  d1,d2           ;continue inc
 889         andi.l  #$ffffff80,d3   ;strip off lsb not used by 882
 890         move.l  #4,d0           ;put 4 in d0 for binstr call
 891         lea.l   L_SCR1(a6),a0   ;a0 is ptr to L_SCR1 for exp digits
 892         bsr     binstr          ;call binstr to convert exp
 893         move.l  L_SCR1(a6),d0   ;load L_SCR1 lword to d0
 894         move.l  #12,d1          ;use d1 for shift count
 895         lsr.l   d1,d0           ;shift d0 right by 12
 896         bfins   d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
 897         lsr.l   d1,d0           ;shift d0 right by 12
 898         bfins   d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
 899         tst.b   d0              ;check if e4 is zero
 900         beq.b   A16_st          ;if zero, skip rest
 901         or.l    #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
 902
 903
 904 * A16. Write sign bits to final string.
 905 *          Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
 906 *
 907 * Register usage:
 908 *       Input/Output
 909 *       d0: x/scratch - final is x
 910 *       d2: x/x
 911 *       d3: x/x
 912 *       d4: LEN/Unchanged
 913 *       d5: ICTR:LAMBDA/LAMBDA:ICTR
 914 *       d6: ILOG/ILOG adjusted
 915 *       d7: k-factor/Unchanged
 916 *       a0: ptr to L_SCR1(a6)/Unchanged
 917 *       a1: ptr to PTENxx array/Unchanged
 918 *       a2: ptr to FP_SCR2(a6)/Unchanged
 919 *       fp0: float(ILOG)/Unchanged
 920 *       fp1: 10^ISCALE/Unchanged
 921 *       fp2: 10^LEN/Unchanged
 922 *       F_SCR1:BCD result with correct signs
 923 *       F_SCR2:ILOG/10^4
 924 *       L_SCR1:Exponent digits on return from binstr
 925 *       L_SCR2:first word of X packed/Unchanged
 926
 927 A16_st:
 928         clr.l   d0              ;clr d0 for collection of signs
 929         andi.b  #$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
 930         tst.l   L_SCR2(a6)      ;check sign of original mantissa
 931         bge.b   mant_p          ;if pos, don't set SM
 932         moveq.l #2,d0           ;move 2 in to d0 for SM
 933 mant_p:
 934         tst.l   d6              ;check sign of ILOG
 935         bge.b   wr_sgn          ;if pos, don't set SE
 936         addq.l  #1,d0           ;set bit 0 in d0 for SE
 937 wr_sgn:
 938         bfins   d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
 939
 940 * Clean up and restore all registers used.
 941
 942         fmove.l #0,FPSR         ;clear possible inex2/ainex bits
 943         fmovem.x (a7)+,fp0-fp2
 944         movem.l (a7)+,d2-d7/a2
 945         rts
 946
 947         end