compilers/linux86-0.16.17/libc/i386fp/fadd.x

   1 ! bcc 386 floating point routines (version 2)
   2 ! -- Fadd, Faddd, Faddf, Fsub, Fsubd, Fsubf, normalize2
   3 ! author: Bruce Evans
   4
   5 #include "fplib.h"
   6
   7 #define FRAME_SIZE      (3 * GENREG_SIZE + PC_SIZE)
   8
   9         .extern Fpushf
  10         .extern fpdenormal
  11         .extern fpoverflow
  12         .extern fpunderflow
  13
  14         .globl  Fadd
  15         .align  ALIGNMENT
  16 Fadd:
  17         push    ebp
  18         push    edi
  19         push    esi
  20         mov     eax,FRAME_SIZE+D_LOW[esp]
  21         mov     edx,FRAME_SIZE+D_HIGH[esp]
  22         mov     ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
  23         mov     ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
  24         call    addition
  25         mov     FRAME_SIZE+D_SIZE+D_LOW[esp],eax
  26         mov     FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
  27         pop     esi
  28         pop     edi
  29         pop     ebp
  30         ret     #D_SIZE
  31
  32         .globl  Faddd
  33         .align  ALIGNMENT
  34 Faddd:
  35         push    ebp
  36         push    edi
  37         push    esi
  38         mov     eax,FRAME_SIZE+D_LOW[esp]
  39         mov     edx,FRAME_SIZE+D_HIGH[esp]
  40         mov     ecx,D_HIGH[ebx]
  41         mov     ebx,D_LOW[ebx]
  42         call    addition
  43         mov     FRAME_SIZE+D_LOW[esp],eax
  44         mov     FRAME_SIZE+D_HIGH[esp],edx
  45         pop     esi
  46         pop     edi
  47         pop     ebp
  48         ret
  49
  50         .globl  Faddf
  51         .align  ALIGNMENT
  52 Faddf:
  53         push    ebp
  54         push    edi
  55         push    esi
  56         call    Fpushf
  57         pop     ebx             ! yl
  58         pop     ecx             ! yu
  59         mov     eax,FRAME_SIZE+D_LOW[esp]       ! xl
  60         mov     edx,FRAME_SIZE+D_HIGH[esp]      ! xu
  61         call    addition
  62         mov     FRAME_SIZE+D_LOW[esp],eax
  63         mov     FRAME_SIZE+D_HIGH[esp],edx
  64         pop     esi
  65         pop     edi
  66         pop     ebp
  67         ret
  68
  69         .globl  Fsub
  70         .align  ALIGNMENT
  71 Fsub:
  72         push    ebp
  73         push    edi
  74         push    esi
  75         mov     eax,FRAME_SIZE+D_LOW[esp]
  76         mov     edx,FRAME_SIZE+D_HIGH[esp]
  77         mov     ebx,FRAME_SIZE+D_SIZE+D_LOW[esp]
  78         mov     ecx,FRAME_SIZE+D_SIZE+D_HIGH[esp]
  79         xor     ecx,#D_SIGN_MASK        ! complement sign
  80         call    addition
  81         mov     FRAME_SIZE+D_SIZE+D_LOW[esp],eax
  82         mov     FRAME_SIZE+D_SIZE+D_HIGH[esp],edx
  83         pop     esi
  84         pop     edi
  85         pop     ebp
  86         ret     #D_SIZE
  87
  88         .globl  Fsubd
  89         .align  ALIGNMENT
  90 Fsubd:
  91         push    ebp
  92         push    edi
  93         push    esi
  94         mov     eax,FRAME_SIZE+D_LOW[esp]
  95         mov     edx,FRAME_SIZE+D_HIGH[esp]
  96         mov     ecx,D_HIGH[ebx]
  97         mov     ebx,D_LOW[ebx]
  98         xor     ecx,#D_SIGN_MASK        ! complement sign
  99         call    addition
 100         mov     FRAME_SIZE+D_LOW[esp],eax
 101         mov     FRAME_SIZE+D_HIGH[esp],edx
 102         pop     esi
 103         pop     edi
 104         pop     ebp
 105         ret
 106
 107         .globl  Fsubf
 108         .align  ALIGNMENT
 109 Fsubf:
 110         push    ebp
 111         push    edi
 112         push    esi
 113         call    Fpushf
 114         pop     ebx             ! yl
 115         pop     ecx             ! yu
 116         mov     eax,FRAME_SIZE+D_LOW[esp]       ! xl
 117         mov     edx,FRAME_SIZE+D_HIGH[esp]      ! xu
 118         xor     ecx,#D_SIGN_MASK        ! complement sign
 119         call    addition
 120         mov     FRAME_SIZE+D_LOW[esp],eax
 121         mov     FRAME_SIZE+D_HIGH[esp],edx
 122         pop     esi
 123         pop     edi
 124         pop     ebp
 125         ret
 126
 127         .align  ALIGNMENT
 128 exp_y_0:
 129
 130 ! Check for x denormal, to split off special case where both are denormal,
 131 ! so the norm bit (or 1 higher) is known to be set for addition, so addition
 132 ! can be done faster
 133
 134         test    esi,#D_EXP_MASK
 135         jnz     x_normal_exp_y_0
 136         test    esi,esi         ! test top bits of x fraction
 137         jnz     both_denorm     ! denormal iff nonzero fraction with zero exp
 138         test    eax,eax         ! test rest of fraction
 139         jz      return_edx_eax  ! everything 0 (XXX - do signs matter?)
 140 both_denorm:
 141         call    fpdenormal
 142         test    ebp,#D_SIGN_MASK
 143         jnz     denorm_subtract
 144
 145 ! Add denormal x to denormal or zero y
 146
 147 #if D_NORM_BIT != D_EXP_SHIFT
 148 #include "error, carry into norm bit does not go into exponent"
 149 #endif
 150
 151         add     eax,ebx
 152         adc     esi,edi
 153         or      edx,esi
 154         ret
 155
 156 denorm_subtract:
 157         sub     eax,ebx
 158         sbb     esi,edi
 159         or      edx,esi
 160         ret
 161
 162         .align  ALIGNMENT
 163 x_normal_exp_y_0:
 164         test    edi,edi         ! this is like the check for x denormal
 165         jnz     y_denorm
 166         test    ebx,ebx
 167         jz      return_edx_eax  ! y = 0
 168 y_denorm:
 169         call    fpdenormal
 170         or      ecx,#1 << D_EXP_SHIFT   ! normalize y by setting exponent to 1
 171         jmp     got_y
 172
 173         .align  ALIGNMENT
 174 return_edx_eax:
 175         ret
 176
 177         .align  ALIGNMENT
 178 add_bigshift:
 179         cmp     ecx,#D_FRAC_BIT+2
 180         jae     return_edx_eax  ! x dominates y
 181         sub     ecx,#REG_BIT
 182         shrd    ebp,ebx,cl
 183         shrd    ebx,edi,cl
 184         shr     edi,cl
 185         add     eax,edi
 186         adc     esi,#0
 187         xchg    ebp,ebx
 188         br      normalize
 189
 190         .align  ALIGNMENT
 191 addition:
 192         mov     esi,edx         ! this mainly for consistent naming
 193         and     esi,#D_EXP_MASK | D_FRAC_MASK   ! discard sign so comparison is simple
 194         mov     edi,ecx         ! free cl for shifts
 195         and     edi,#D_EXP_MASK | D_FRAC_MASK
 196         cmp     esi,edi
 197         ja      xbigger
 198         jb      swap
 199         cmp     eax,ebx
 200         jae     xbigger
 201 swap:
 202         xchg    edx,ecx
 203         xchg    eax,ebx
 204         xchg    esi,edi
 205 xbigger:
 206
 207 ! edx holds sign of result from here on
 208 ! and exponent of result before the normalization step
 209
 210         mov     ebp,edx         ! prepare difference of signs
 211         xor     ebp,ecx
 212
 213         and     ecx,#D_EXP_MASK ! extract exp_y and check for y 0 or denormal
 214         beq     exp_y_0         ! otherwise x is not 0 or denormal either
 215         and     edi,#D_FRAC_MASK        ! extract fraction
 216         or      edi,#D_NORM_MASK        ! normalize
 217 got_y:
 218         and     esi,#D_FRAC_MASK        ! extract fraction
 219         or      esi,#D_NORM_MASK        ! normalize
 220
 221         sub     ecx,edx         ! carries from non-exp bits in edx killed later
 222         neg     ecx
 223         and     ecx,#D_EXP_MASK
 224         shr     ecx,#D_EXP_SHIFT        ! difference of exponents
 225
 226 got_x_and_y:
 227         and     ebp,#D_SIGN_MASK        ! see if signs are same
 228         bne     subtract        ! else roundoff reg ebp has been cleared
 229
 230         cmp     cl,#REG_BIT
 231         bhis    add_bigshift
 232         shrd    ebp,ebx,cl
 233         shrd    ebx,edi,cl
 234         shr     edi,cl
 235         add     eax,ebx
 236         adc     esi,edi
 237
 238 ! result edx(D_SIGN_MASK | D_EXP_MASK bits):esi:eax:ebp but needs normalization
 239
 240         mov     edi,edx
 241         and     edi,#D_EXP_MASK
 242         test    esi,#D_NORM_MASK << 1
 243         jnz     add_loverflow
 244
 245 add_round:
 246         cmp     ebp,#1 << (REG_BIT-1)   ! test roundoff register
 247         jb      add_done        ! no rounding
 248         jz      tie
 249 add_roundup:
 250         add     eax,#1
 251         adc     esi,#0
 252         test    esi,#D_NORM_MASK << 1
 253         jnz     pre_add_loverflow       ! rounding may cause overflow!
 254 add_done:
 255         mov     ecx,edx         ! duplicated code from 'done'
 256         and     edx,#D_SIGN_MASK
 257         or      edx,edi
 258         and     esi,#D_FRAC_MASK
 259         or      edx,esi
 260         ret
 261
 262         .align  ALIGNMENT
 263 tie:
 264         test    al,#1           ! tie case, round to even
 265         jz      add_done        ! even, no rounding
 266         jmp     add_roundup
 267
 268         .align  ALIGNMENT
 269 pre_add_loverflow:
 270         sub     ebp,ebp         ! clear rounding register
 271                                 ! probably avoiding tests for more rounding
 272 add_loverflow:
 273         shrd    ebp,eax,#1
 274         jnc     over_set_sticky_bit
 275         or      ebp,#1
 276 over_set_sticky_bit:
 277         shrd    eax,esi,#1
 278         shr     esi,#1
 279         add     edi,1 << D_EXP_SHIFT
 280         cmp     edi,#D_EXP_INFINITE << D_EXP_SHIFT
 281         jl      add_round
 282 overflow:
 283         call    fpoverflow
 284         mov     eax,ecx         ! XXX - wrong reg
 285         ret
 286
 287 ! result edx(D_SIGN_MASK | D_EXP_MASK bits):
 288 !        esi((D_NORM_MASK << 1) | D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx
 289 ! but needs normalization
 290
 291         .align  ALIGNMENT
 292 normalize:
 293         mov     edi,edx
 294         and     edi,#D_EXP_MASK
 295         test    esi,#D_NORM_MASK << 1
 296         bne     loverflow
 297
 298 ! result edx(D_SIGN_MASK bit):edi(D_EXP_MASK bits):
 299 !        esi(D_NORM_MASK | D_FRAC_MASK bits):eax:ebp:ebx
 300 ! but needs normalization
 301
 302         .globl  normalize2
 303 normalize2:
 304         test    esi,#D_NORM_MASK        ! already-normalized is very common
 305         jz      normalize3
 306 round:
 307         cmp     ebp,#1 << (REG_BIT-1)   ! test roundoff register
 308         jb      done            ! no rounding
 309         jz      near_tie
 310 roundup:
 311         add     eax,#1
 312         adc     esi,#0
 313         test    esi,#D_NORM_MASK << 1
 314         bne     pre_loverflow   ! rounding may cause overflow!
 315 done:
 316 cmp     edi,#D_EXP_INFINITE << D_EXP_SHIFT
 317 jae     overflow
 318         and     edx,#D_SIGN_MASK        ! extract sign of largest and result
 319         or      edx,edi         ! include exponent with sign
 320         and     esi,#D_FRAC_MASK        ! discard norm bit
 321         or      edx,esi         ! include fraction with sign and exponent
 322         ret
 323
 324         .align  ALIGNMENT
 325 near_tie:
 326         test    ebx,ebx
 327         jnz     roundup
 328         test    al,#1           ! tie case, round to even
 329         jz      done            ! even, no rounding
 330         jmp     roundup
 331
 332         .align  ALIGNMENT
 333 not_in_8_below:
 334         shld    ecx,esi,#REG_BIT-D_NORM_BIT+16  ! in 9 to 16 below?
 335         jz      not_in_16_below ! must be way below (17-20 for usual D_NORM_BIT)
 336         mov     cl,bsr_table[ecx]       ! bsr(esi) - (D_NORM_BIT-16)
 337         neg     ecx             ! (D_NORM_BIT-16) - bsr(esi)
 338         add     ecx,#16
 339         jmp     got_shift
 340
 341         .align  ALIGNMENT
 342 not_in_16_below:
 343         mov     cl,bsr_table[esi]       ! bsr(esi) directly
 344         neg     ecx                     ! -bsr(esi)
 345         add     ecx,#D_NORM_BIT         ! D_NORM_BIT - bsr(esi)
 346         jmp     got_shift
 347
 348         .align  ALIGNMENT
 349 normalize3:
 350         test    esi,esi
 351         jz      shift32
 352
 353 ! Find first nonzero bit in esi
 354 ! Don't use bsr, it is very slow (const + 3 * bit_found)
 355 ! We know that there is some nonzero bit, and the norm bit and above are clear
 356
 357         sub     ecx,ecx         ! prepare unsigned extension of cl
 358         shld    ecx,esi,#REG_BIT-D_NORM_BIT+8   ! any bits in 8 below norm bit?
 359         jz      not_in_8_below
 360         mov     cl,bsr_table[ecx]       ! bsr(esi) - (D_NORM_BIT-8)
 361         neg     ecx             ! (D_NORM_BIT-8) - bsr(esi)
 362         add     ecx,#8          ! D_NORM_BIT - bsr(esi)
 363 got_shift:
 364         shld    esi,eax,cl
 365         shld    eax,ebp,cl
 366         shld    ebp,ebx,cl
 367         shl     ebx,cl
 368         shl     ecx,D_EXP_SHIFT
 369         sub     edi,ecx
 370         bhi     round           ! XXX - can rounding change the exponent to > 0?
 371                                 ! not bgt since edi may be 0x80000000
 372         neg     edi
 373         shr     edi,#D_EXP_SHIFT
 374         inc     edi
 375         br      fpunderflow
 376
 377         .align  ALIGNMENT
 378 pre_loverflow:
 379         sub     ebp,ebp         ! clear rounding registers
 380         sub     ebx,ebx         ! probably avoiding tests for more rounding
 381
 382 loverflow:
 383         shr     esi,#1          ! carry bit stayed in the reg
 384         rcr     eax,#1
 385         rcr     ebp,#1
 386         rcr     ebx,#1
 387         add     edi,1 << D_EXP_SHIFT
 388         cmp     edi,#D_EXP_INFINITE << D_EXP_SHIFT
 389         blt     round
 390         call    fpoverflow
 391         mov     eax,ecx         ! XXX - wrong reg
 392         ret
 393
 394         .align  ALIGNMENT
 395 shift32:
 396         test    eax,eax
 397         jz      shift64
 398         mov     esi,eax
 399         mov     eax,ebp
 400         mov     ebp,ebx
 401         sub     ebx,ebx
 402         sub     edi,#REG_BIT << D_EXP_SHIFT
 403 shiftxx:
 404         test    esi,#~(D_NORM_MASK | D_FRAC_MASK)
 405         jz      over_adjust     ! else too big already
 406         shrd    ebx,ebp,#D_BIT-D_FRAC_BIT
 407         shrd    ebp,eax,#D_BIT-D_FRAC_BIT
 408         shrd    eax,esi,#D_BIT-D_FRAC_BIT
 409         shr     esi,#D_BIT-D_FRAC_BIT
 410         add     edi,#(D_BIT-D_FRAC_BIT) << D_EXP_SHIFT
 411 over_adjust:
 412         test    edi,edi
 413         bgt     normalize2
 414         neg     edi
 415         shr     edi,#D_EXP_SHIFT
 416         inc     edi
 417         br      fpunderflow
 418
 419         .align  ALIGNMENT
 420 shift64:
 421         test    ebp,ebp
 422         jz      shift96
 423         mov     esi,ebp
 424         mov     eax,ebx
 425         sub     ebp,ebp
 426         mov     ebx,ebp
 427         sub     edi,#(2*REG_BIT) << D_EXP_SHIFT
 428         jmp     shiftxx
 429
 430         .align  ALIGNMENT
 431 shift96:
 432         test    ebx,ebx         ! XXX - this test is probably unnecessary
 433                                 ! since the shift must be small unless we
 434                                 ! are subtracting 2 almost-equal numbers,
 435                                 ! and then the bits beyond 64 will mostly
 436                                 ! be 0
 437         jz      return_esi_eax  ! all zero
 438         mov     esi,ebx
 439         sub     ebx,ebx
 440         sub     edi,#(3*REG_BIT) << D_EXP_SHIFT
 441         jmp     shiftxx
 442
 443         .align  ALIGNMENT
 444 return_esi_eax:
 445         mov     edx,esi
 446         ret
 447
 448         .align  ALIGNMENT
 449 subtract:
 450         sub     ebp,ebp         ! set up roundoff register
 451         cmp     ecx,#REG_BIT
 452         jae     subtract_bigshift
 453         shrd    ebp,ebx,cl
 454         shrd    ebx,edi,cl
 455         shr     edi,cl
 456         neg     ebp             ! begin subtraction esi:eax:0 - edi:ebx:ebp
 457         sbb     eax,ebx
 458         sbb     esi,edi
 459         sub     ebx,ebx
 460         mov     edi,edx
 461         and     edi,#D_EXP_MASK
 462         br      normalize2
 463
 464         .align  ALIGNMENT
 465 subtract_bigshift:
 466         cmp     ecx,#D_FRAC_BIT+2
 467         bhis    return_edx_eax  ! x dominates y
 468         sub     ecx,#REG_BIT
 469         shrd    ebp,ebx,cl
 470         shrd    ebx,edi,cl
 471         shr     edi,cl
 472         not     ebp             ! begin subtraction esi:eax:0:0 - 0:edi:ebx:ebp
 473         not     ebx
 474         add     ebp,#1
 475         adc     ebx,#0
 476         cmc
 477         sbb     eax,edi
 478         sbb     esi,#0
 479         xchg    ebp,ebx
 480         mov     edi,edx
 481         and     edi,#D_EXP_MASK
 482         br      normalize2
 483
 484         .data
 485         .extern bsr_table