i386/linux/linux-0.99/drivers/FPU-emu/reg_round.S

   1         .file "reg_round.S"
   2 /*---------------------------------------------------------------------------+
   3  |  reg_round.S                                                              |
   4  |                                                                           |
   5  | Rounding/truncation/etc for FPU basic arithmetic functions.               |
   6  |                                                                           |
   7  | Copyright (C) 1993                                                        |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
  10  |                                                                           |
  11  | This code has four possible entry points.                                 |
  12  | The following must be entered by a jmp intruction:                        |
  13  |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
  14  |                                                                           |
  15  | The _round_reg entry point is intended to be used by C code.              |
  16  | From C, call as:                                                          |
  17  | void round_reg(FPU_REG *arg, unsigned int extent, unsigned int control_w) |
  18  |                                                                           |
  19  | For correct "up" and "down" rounding, the argument must have the correct  |
  20  | sign.                                                                     |
  21  |                                                                           |
  22  +---------------------------------------------------------------------------*/
  23
  24 /*---------------------------------------------------------------------------+
  25  | Four entry points.                                                        |
  26  |                                                                           |
  27  | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points:     |
  28  |  %eax:%ebx  64 bit significand                                            |
  29  |  %edx       32 bit extension of the significand                           |
  30  |  %edi       pointer to an FPU_REG for the result to be stored             |
  31  |  stack      calling function must have set up a C stack frame and         |
  32  |             pushed %esi, %edi, and %ebx                                   |
  33  |                                                                           |
  34  | Needed just for the fpu_reg_round_sqrt entry point:                       |
  35  |  %cx  A control word in the same format as the FPU control word.          |
  36  | Otherwise, PARAM4 must give such a value.                                 |
  37  |                                                                           |
  38  |                                                                           |
  39  | The significand and its extension are assumed to be exact in the          |
  40  | following sense:                                                          |
  41  |   If the significand by itself is the exact result then the significand   |
  42  |   extension (%edx) must contain 0, otherwise the significand extension    |
  43  |   must be non-zero.                                                       |
  44  |   If the significand extension is non-zero then the significand is        |
  45  |   smaller than the magnitude of the correct exact result by an amount     |
  46  |   greater than zero and less than one ls bit of the significand.          |
  47  |   The significand extension is only required to have three possible       |
  48  |   non-zero values:                                                        |
  49  |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
  50  |                                 bit smaller than the magnitude of the     |
  51  |                                 true exact result.                        |
  52  |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
  53  |                                 smaller than the magnitude of the true    |
  54  |                                 exact result.                             |
  55  |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
  56  |                                 bit smaller than the magnitude of the     |
  57  |                                 true exact result.                        |
  58  |                                                                           |
  59  +---------------------------------------------------------------------------*/
  60
  61 /*---------------------------------------------------------------------------+
  62  |  The code in this module has become quite complex, but it should handle   |
  63  |  all of the FPU flags which are set at this stage of the basic arithmetic |
  64  |  computations.                                                            |
  65  |  There are a few rare cases where the results are not set identically to  |
  66  |  a real FPU. These require a bit more thought because at this stage the   |
  67  |  results of the code here appear to be more consistent...                 |
  68  |  This may be changed in a future version.                                 |
  69  +---------------------------------------------------------------------------*/
  70
  71
  72 #include "fpu_asm.h"
  73 #include "exception.h"
  74 #include "control_w.h"
  75
  76 /* Flags for FPU_bits_lost */
  77 #define LOST_DOWN       $1
  78 #define LOST_UP         $2
  79
  80 /* Flags for FPU_denormal */
  81 #define DENORMAL        $1
  82 #define UNMASKED_UNDERFLOW $2
  83
  84
  85 #ifdef REENTRANT_FPU
  86 /*      Make the code re-entrant by putting
  87         local storage on the stack: */
  88 #define FPU_bits_lost   (%esp)
  89 #define FPU_denormal    1(%esp)
  90
  91 #else
  92 /*      Not re-entrant, so we can gain speed by putting
  93         local storage in a static area: */
  94 .data
  95         .align 2,0
  96 FPU_bits_lost:
  97         .byte   0
  98 FPU_denormal:
  99         .byte   0
 100 #endif REENTRANT_FPU
 101
 102
 103 .text
 104         .align 2,144
 105 .globl fpu_reg_round
 106 .globl fpu_reg_round_sqrt
 107 .globl fpu_Arith_exit
 108 .globl _round_reg
 109
 110 /* Entry point when called from C */
 111 _round_reg:
 112         pushl   %ebp
 113         movl    %esp,%ebp
 114         pushl   %esi
 115         pushl   %edi
 116         pushl   %ebx
 117
 118         movl    PARAM1,%edi
 119         movl    SIGH(%edi),%eax
 120         movl    SIGL(%edi),%ebx
 121         movl    PARAM2,%edx
 122         movl    PARAM3,%ecx
 123         jmp     fpu_reg_round_sqrt
 124
 125 fpu_reg_round:                  /* Normal entry point */
 126         movl    PARAM4,%ecx
 127
 128 fpu_reg_round_sqrt:             /* Entry point from wm_sqrt.S */
 129
 130 #ifdef REENTRANT_FPU
 131         pushl   %ebx            /* adjust the stack pointer */
 132 #endif REENTRANT_FPU
 133
 134 #ifdef PARANOID
 135 /* Cannot use this here yet */
 136 /*      orl     %eax,%eax */
 137 /*      jns     L_entry_bugged */
 138 #endif PARANOID
 139
 140         cmpl    EXP_UNDER,EXP(%edi)
 141         jle     xMake_denorm                    /* The number is a de-normal */
 142
 143         movb    $0,FPU_denormal                 /* 0 -> not a de-normal */
 144
 145 xDenorm_done:
 146         movb    $0,FPU_bits_lost                /* No bits yet lost in rounding */
 147
 148         movl    %ecx,%esi
 149         andl    CW_PC,%ecx
 150         cmpl    PR_64_BITS,%ecx
 151         je      LRound_To_64
 152
 153         cmpl    PR_53_BITS,%ecx
 154         je      LRound_To_53
 155
 156         cmpl    PR_24_BITS,%ecx
 157         je      LRound_To_24
 158
 159 #ifdef PARANOID
 160         jmp     L_bugged        /* There is no bug, just a bad control word */
 161 #endif PARANOID
 162
 163
 164 /* Round etc to 24 bit precision */
 165 LRound_To_24:
 166         movl    %esi,%ecx
 167         andl    CW_RC,%ecx
 168         cmpl    RC_RND,%ecx
 169         je      LRound_nearest_24
 170
 171         cmpl    RC_CHOP,%ecx
 172         je      LCheck_truncate_24
 173
 174         cmpl    RC_UP,%ecx              /* Towards +infinity */
 175         je      LUp_24
 176
 177         cmpl    RC_DOWN,%ecx            /* Towards -infinity */
 178         je      LDown_24
 179
 180 #ifdef PARANOID
 181         jmp     L_bugged
 182 #endif PARANOID
 183
 184 LUp_24:
 185         cmpb    SIGN_POS,SIGN(%edi)
 186         jne     LCheck_truncate_24      /* If negative then  up==truncate */
 187
 188         jmp     LCheck_24_round_up
 189
 190 LDown_24:
 191         cmpb    SIGN_POS,SIGN(%edi)
 192         je      LCheck_truncate_24      /* If positive then  down==truncate */
 193
 194 LCheck_24_round_up:
 195         movl    %eax,%ecx
 196         andl    $0x000000ff,%ecx
 197         orl     %ebx,%ecx
 198         orl     %edx,%ecx
 199         jnz     LDo_24_round_up
 200         jmp     LRe_normalise
 201
 202 LRound_nearest_24:
 203         /* Do rounding of the 24th bit if needed (nearest or even) */
 204         movl    %eax,%ecx
 205         andl    $0x000000ff,%ecx
 206         cmpl    $0x00000080,%ecx
 207         jc      LCheck_truncate_24      /* less than half, no increment needed */
 208
 209         jne     LGreater_Half_24        /* greater than half, increment needed */
 210
 211         /* Possibly half, we need to check the ls bits */
 212         orl     %ebx,%ebx
 213         jnz     LGreater_Half_24        /* greater than half, increment needed */
 214
 215         orl     %edx,%edx
 216         jnz     LGreater_Half_24        /* greater than half, increment needed */
 217
 218         /* Exactly half, increment only if 24th bit is 1 (round to even) */
 219         testl   $0x00000100,%eax
 220         jz      LDo_truncate_24
 221
 222 LGreater_Half_24:                       /* Rounding: increment at the 24th bit */
 223 LDo_24_round_up:
 224         andl    $0xffffff00,%eax        /* Truncate to 24 bits */
 225         xorl    %ebx,%ebx
 226         movb    LOST_UP,FPU_bits_lost
 227         addl    $0x00000100,%eax
 228         jmp     LCheck_Round_Overflow
 229
 230 LCheck_truncate_24:
 231         movl    %eax,%ecx
 232         andl    $0x000000ff,%ecx
 233         orl     %ebx,%ecx
 234         orl     %edx,%ecx
 235         jz      LRe_normalise           /* No truncation needed */
 236
 237 LDo_truncate_24:
 238         andl    $0xffffff00,%eax        /* Truncate to 24 bits */
 239         xorl    %ebx,%ebx
 240         movb    LOST_DOWN,FPU_bits_lost
 241         jmp     LRe_normalise
 242
 243
 244 /* Round etc to 53 bit precision */
 245 LRound_To_53:
 246         movl    %esi,%ecx
 247         andl    CW_RC,%ecx
 248         cmpl    RC_RND,%ecx
 249         je      LRound_nearest_53
 250
 251         cmpl    RC_CHOP,%ecx
 252         je      LCheck_truncate_53
 253
 254         cmpl    RC_UP,%ecx              /* Towards +infinity */
 255         je      LUp_53
 256
 257         cmpl    RC_DOWN,%ecx            /* Towards -infinity */
 258         je      LDown_53
 259
 260 #ifdef PARANOID
 261         jmp     L_bugged
 262 #endif PARANOID
 263
 264 LUp_53:
 265         cmpb    SIGN_POS,SIGN(%edi)
 266         jne     LCheck_truncate_53      /* If negative then  up==truncate */
 267
 268         jmp     LCheck_53_round_up
 269
 270 LDown_53:
 271         cmpb    SIGN_POS,SIGN(%edi)
 272         je      LCheck_truncate_53      /* If positive then  down==truncate */
 273
 274 LCheck_53_round_up:
 275         movl    %ebx,%ecx
 276         andl    $0x000007ff,%ecx
 277         orl     %edx,%ecx
 278         jnz     LDo_53_round_up
 279         jmp     LRe_normalise
 280
 281 LRound_nearest_53:
 282         /* Do rounding of the 53rd bit if needed (nearest or even) */
 283         movl    %ebx,%ecx
 284         andl    $0x000007ff,%ecx
 285         cmpl    $0x00000400,%ecx
 286         jc      LCheck_truncate_53      /* less than half, no increment needed */
 287
 288         jnz     LGreater_Half_53        /* greater than half, increment needed */
 289
 290         /* Possibly half, we need to check the ls bits */
 291         orl     %edx,%edx
 292         jnz     LGreater_Half_53        /* greater than half, increment needed */
 293
 294         /* Exactly half, increment only if 53rd bit is 1 (round to even) */
 295         testl   $0x00000800,%ebx
 296         jz      LTruncate_53
 297
 298 LGreater_Half_53:                       /* Rounding: increment at the 53rd bit */
 299 LDo_53_round_up:
 300         movb    LOST_UP,FPU_bits_lost
 301         andl    $0xfffff800,%ebx        /* Truncate to 53 bits */
 302         addl    $0x00000800,%ebx
 303         adcl    $0,%eax
 304         jmp     LCheck_Round_Overflow
 305
 306 LCheck_truncate_53:
 307         movl    %ebx,%ecx
 308         andl    $0x000007ff,%ecx
 309         orl     %edx,%ecx
 310         jz      LRe_normalise
 311
 312 LTruncate_53:
 313         movb    LOST_DOWN,FPU_bits_lost
 314         andl    $0xfffff800,%ebx        /* Truncate to 53 bits */
 315         jmp     LRe_normalise
 316
 317
 318 /* Round etc to 64 bit precision */
 319 LRound_To_64:
 320         movl    %esi,%ecx
 321         andl    CW_RC,%ecx
 322         cmpl    RC_RND,%ecx
 323         je      LRound_nearest_64
 324
 325         cmpl    RC_CHOP,%ecx
 326         je      LCheck_truncate_64
 327
 328         cmpl    RC_UP,%ecx              /* Towards +infinity */
 329         je      LUp_64
 330
 331         cmpl    RC_DOWN,%ecx            /* Towards -infinity */
 332         je      LDown_64
 333
 334 #ifdef PARANOID
 335         jmp     L_bugged
 336 #endif PARANOID
 337
 338 LUp_64:
 339         cmpb    SIGN_POS,SIGN(%edi)
 340         jne     LCheck_truncate_64      /* If negative then  up==truncate */
 341
 342         orl     %edx,%edx
 343         jnz     LDo_64_round_up
 344         jmp     LRe_normalise
 345
 346 LDown_64:
 347         cmpb    SIGN_POS,SIGN(%edi)
 348         je      LCheck_truncate_64      /* If positive then  down==truncate */
 349
 350         orl     %edx,%edx
 351         jnz     LDo_64_round_up
 352         jmp     LRe_normalise
 353
 354 LRound_nearest_64:
 355         cmpl    $0x80000000,%edx
 356         jc      LCheck_truncate_64
 357
 358         jne     LDo_64_round_up
 359
 360         /* Now test for round-to-even */
 361         testb   $1,%ebx
 362         jz      LCheck_truncate_64
 363
 364 LDo_64_round_up:
 365         movb    LOST_UP,FPU_bits_lost
 366         addl    $1,%ebx
 367         adcl    $0,%eax
 368
 369 LCheck_Round_Overflow:
 370         jnc     LRe_normalise
 371
 372         /* Overflow, adjust the result (significand to 1.0) */
 373         rcrl    $1,%eax
 374         rcrl    $1,%ebx
 375         incl    EXP(%edi)
 376         jmp     LRe_normalise
 377
 378 LCheck_truncate_64:
 379         orl     %edx,%edx
 380         jz      LRe_normalise
 381
 382 LTruncate_64:
 383         movb    LOST_DOWN,FPU_bits_lost
 384
 385 LRe_normalise:
 386         testb   $0xff,FPU_denormal
 387         jnz     xNormalise_result
 388
 389 xL_Normalised:
 390         cmpb    LOST_UP,FPU_bits_lost
 391         je      xL_precision_lost_up
 392
 393         cmpb    LOST_DOWN,FPU_bits_lost
 394         je      xL_precision_lost_down
 395
 396 xL_no_precision_loss:
 397         /* store the result */
 398         movb    TW_Valid,TAG(%edi)
 399
 400 xL_Store_significand:
 401         movl    %eax,SIGH(%edi)
 402         movl    %ebx,SIGL(%edi)
 403
 404         xorl    %eax,%eax       /* No errors detected. */
 405
 406         cmpl    EXP_OVER,EXP(%edi)
 407         jge     L_overflow
 408
 409 fpu_reg_round_exit:
 410 #ifdef REENTRANT_FPU
 411         popl    %ebx            /* adjust the stack pointer */
 412 #endif REENTRANT_FPU
 413
 414 fpu_Arith_exit:
 415         popl    %ebx
 416         popl    %edi
 417         popl    %esi
 418         leave
 419         ret
 420
 421
 422 /*
 423  * Set the FPU status flags to represent precision loss due to
 424  * round-up.
 425  */
 426 xL_precision_lost_up:
 427         push    %eax
 428         call    _set_precision_flag_up
 429         popl    %eax
 430         jmp     xL_no_precision_loss
 431
 432 /*
 433  * Set the FPU status flags to represent precision loss due to
 434  * truncation.
 435  */
 436 xL_precision_lost_down:
 437         push    %eax
 438         call    _set_precision_flag_down
 439         popl    %eax
 440         jmp     xL_no_precision_loss
 441
 442
 443 /*
 444  * The number is a denormal (which might get rounded up to a normal)
 445  * Shift the number right the required number of bits, which will
 446  * have to be undone later...
 447  */
 448 xMake_denorm:
 449         /* The action to be taken depends upon whether the underflow
 450            exception is masked */
 451         testb   CW_Underflow,%cl                /* Underflow mask. */
 452         jz      xUnmasked_underflow             /* Do not make a denormal. */
 453
 454         movb    DENORMAL,FPU_denormal
 455
 456         pushl   %ecx            /* Save */
 457         movl    EXP_UNDER+1,%ecx
 458         subl    EXP(%edi),%ecx
 459
 460         cmpl    $64,%ecx        /* shrd only works for 0..31 bits */
 461         jnc     xDenorm_shift_more_than_63
 462
 463         cmpl    $32,%ecx        /* shrd only works for 0..31 bits */
 464         jnc     xDenorm_shift_more_than_32
 465
 466 /*
 467  * We got here without jumps by assuming that the most common requirement
 468  *   is for a small de-normalising shift.
 469  * Shift by [1..31] bits
 470  */
 471         addl    %ecx,EXP(%edi)
 472         orl     %edx,%edx       /* extension */
 473         setne   %ch             /* Save whether %edx is non-zero */
 474         xorl    %edx,%edx
 475         shrd    %cl,%ebx,%edx
 476         shrd    %cl,%eax,%ebx
 477         shr     %cl,%eax
 478         orb     %ch,%dl
 479         popl    %ecx
 480         jmp     xDenorm_done
 481
 482 /* Shift by [32..63] bits */
 483 xDenorm_shift_more_than_32:
 484         addl    %ecx,EXP(%edi)
 485         subb    $32,%cl
 486         orl     %edx,%edx
 487         setne   %ch
 488         orb     %ch,%bl
 489         xorl    %edx,%edx
 490         shrd    %cl,%ebx,%edx
 491         shrd    %cl,%eax,%ebx
 492         shr     %cl,%eax
 493         orl     %edx,%edx               /* test these 32 bits */
 494         setne   %cl
 495         orb     %ch,%bl
 496         orb     %cl,%bl
 497         movl    %ebx,%edx
 498         movl    %eax,%ebx
 499         xorl    %eax,%eax
 500         popl    %ecx
 501         jmp     xDenorm_done
 502
 503 /* Shift by [64..) bits */
 504 xDenorm_shift_more_than_63:
 505         cmpl    $64,%ecx
 506         jne     xDenorm_shift_more_than_64
 507
 508 /* Exactly 64 bit shift */
 509         addl    %ecx,EXP(%edi)
 510         xorl    %ecx,%ecx
 511         orl     %edx,%edx
 512         setne   %cl
 513         orl     %ebx,%ebx
 514         setne   %ch
 515         orb     %ch,%cl
 516         orb     %cl,%al
 517         movl    %eax,%edx
 518         xorl    %eax,%eax
 519         xorl    %ebx,%ebx
 520         popl    %ecx
 521         jmp     xDenorm_done
 522
 523 xDenorm_shift_more_than_64:
 524         movl    EXP_UNDER+1,EXP(%edi)
 525 /* This is easy, %eax must be non-zero, so.. */
 526         movl    $1,%edx
 527         xorl    %eax,%eax
 528         xorl    %ebx,%ebx
 529         popl    %ecx
 530         jmp     xDenorm_done
 531
 532
 533 xUnmasked_underflow:
 534         movb    UNMASKED_UNDERFLOW,FPU_denormal
 535         jmp     xDenorm_done
 536
 537
 538 /* Undo the de-normalisation. */
 539 xNormalise_result:
 540         cmpb    UNMASKED_UNDERFLOW,FPU_denormal
 541         je      xSignal_underflow
 542
 543 /* The number must be a denormal if we got here. */
 544 #ifdef PARANOID
 545         /* But check it... just in case. */
 546         cmpl    EXP_UNDER+1,EXP(%edi)
 547         jne     L_norm_bugged
 548 #endif PARANOID
 549
 550 #ifdef PECULIAR_486
 551         /*
 552          * This implements a special feature of 80486 behaviour.
 553          * Underflow will be signalled even if the number is
 554          * not a denormal after rounding.
 555          * This difference occurs only for masked underflow, and not
 556          * in the unmasked case.
 557          * Actual 80486 behaviour differs from this in some circumstances.
 558          */
 559         orl     %eax,%eax               /* ms bits */
 560         js      LNormalise_shift_done   /* Will be masked underflow */
 561 #endif PECULIAR_486
 562
 563         orl     %eax,%eax               /* ms bits */
 564         js      xL_Normalised           /* No longer a denormal */
 565
 566         jnz     LNormalise_shift_up_to_31       /* Shift left 0 - 31 bits */
 567
 568         orl     %ebx,%ebx
 569         jz      L_underflow_to_zero     /* The contents are zero */
 570
 571 /* Shift left 32 - 63 bits */
 572         movl    %ebx,%eax
 573         xorl    %ebx,%ebx
 574         subl    $32,EXP(%edi)
 575
 576 LNormalise_shift_up_to_31:
 577         bsrl    %eax,%ecx       /* get the required shift in %ecx */
 578         subl    $31,%ecx
 579         negl    %ecx
 580         shld    %cl,%ebx,%eax
 581         shl     %cl,%ebx
 582         subl    %ecx,EXP(%edi)
 583
 584 LNormalise_shift_done:
 585         testb   $0xff,FPU_bits_lost     /* bits lost == underflow */
 586         jz      xL_Normalised
 587
 588         /* There must be a masked underflow */
 589         push    %eax
 590         pushl   EX_Underflow
 591         call    _exception
 592         popl    %eax
 593         popl    %eax
 594         jmp     xL_Normalised
 595
 596
 597 /*
 598  * The operations resulted in a number too small to represent.
 599  * Masked response.
 600  */
 601 L_underflow_to_zero:
 602         push    %eax
 603         call    _set_precision_flag_down
 604         popl    %eax
 605
 606         push    %eax
 607         pushl   EX_Underflow
 608         call    _exception
 609         popl    %eax
 610         popl    %eax
 611
 612 /* Reduce the exponent to EXP_UNDER */
 613         movl    EXP_UNDER,EXP(%edi)
 614         movb    TW_Zero,TAG(%edi)
 615         jmp     xL_Store_significand
 616
 617
 618 /* The operations resulted in a number too large to represent. */
 619 L_overflow:
 620         push    %edi
 621         call    _arith_overflow
 622         pop     %edi
 623         jmp     fpu_reg_round_exit
 624
 625
 626 xSignal_underflow:
 627         /* The number may have been changed to a non-denormal */
 628         /* by the rounding operations. */
 629         cmpl    EXP_UNDER,EXP(%edi)
 630         jle     xDo_unmasked_underflow
 631
 632         jmp     xL_Normalised
 633
 634 xDo_unmasked_underflow:
 635         /* Increase the exponent by the magic number */
 636         addl    $(3*(1<<13)),EXP(%edi)
 637         push    %eax
 638         pushl   EX_Underflow
 639         call    EXCEPTION
 640         popl    %eax
 641         popl    %eax
 642         jmp     xL_Normalised
 643
 644
 645 #ifdef PARANOID
 646 /* If we ever get here then we have problems! */
 647 L_bugged:
 648         pushl   EX_INTERNAL|0x201
 649         call    EXCEPTION
 650         popl    %ebx
 651         jmp     L_exception_exit
 652
 653 L_norm_bugged:
 654         pushl   EX_INTERNAL|0x216
 655         call    EXCEPTION
 656         popl    %ebx
 657         jmp     L_exception_exit
 658
 659 L_entry_bugged:
 660         pushl   EX_INTERNAL|0x217
 661         call    EXCEPTION
 662         popl    %ebx
 663 L_exception_exit:
 664         mov     $1,%eax
 665         jmp     fpu_reg_round_exit
 666 #endif PARANOID