arch/x86/math-emu/wm_sqrt.S

   1         .file   "wm_sqrt.S"
   2 /*---------------------------------------------------------------------------+
   3  |  wm_sqrt.S                                                                |
   4  |                                                                           |
   5  | Fixed point arithmetic square root evaluation.                            |
   6  |                                                                           |
   7  | Copyright (C) 1992,1993,1995,1997                                         |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail billm@suburbia.net               |
  10  |                                                                           |
  11  | Call from C as:                                                           |
  12  |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
  13  |                                                                           |
  14  +---------------------------------------------------------------------------*/
  15
  16 /*---------------------------------------------------------------------------+
  17  |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
  18  |    returns the square root of n in n.                                     |
  19  |                                                                           |
  20  |  Use Newton's method to compute the square root of a number, which must   |
  21  |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
  22  |  Does not check the sign or tag of the argument.                          |
  23  |  Sets the exponent, but not the sign or tag of the result.                |
  24  |                                                                           |
  25  |  The guess is kept in %esi:%edi                                           |
  26  +---------------------------------------------------------------------------*/
  27
  28 #include "exception.h"
  29 #include "fpu_emu.h"
  30
  31
  32 #ifndef NON_REENTRANT_FPU
  33 /*      Local storage on the stack: */
  34 #define FPU_accum_3     -4(%ebp)        /* ms word */
  35 #define FPU_accum_2     -8(%ebp)
  36 #define FPU_accum_1     -12(%ebp)
  37 #define FPU_accum_0     -16(%ebp)
  38
  39 /*
  40  * The de-normalised argument:
  41  *                  sq_2                  sq_1              sq_0
  42  *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
  43  *           ^ binary point here
  44  */
  45 #define FPU_fsqrt_arg_2 -20(%ebp)       /* ms word */
  46 #define FPU_fsqrt_arg_1 -24(%ebp)
  47 #define FPU_fsqrt_arg_0 -28(%ebp)       /* ls word, at most the ms bit is set */
  48
  49 #else
  50 /*      Local storage in a static area: */
  51 .data
  52         .align 4,0
  53 FPU_accum_3:
  54         .long   0               /* ms word */
  55 FPU_accum_2:
  56         .long   0
  57 FPU_accum_1:
  58         .long   0
  59 FPU_accum_0:
  60         .long   0
  61
  62 /* The de-normalised argument:
  63                     sq_2                  sq_1              sq_0
  64           b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
  65              ^ binary point here
  66  */
  67 FPU_fsqrt_arg_2:
  68         .long   0               /* ms word */
  69 FPU_fsqrt_arg_1:
  70         .long   0
  71 FPU_fsqrt_arg_0:
  72         .long   0               /* ls word, at most the ms bit is set */
  73 #endif /* NON_REENTRANT_FPU */
  74
  75
  76 .text
  77 ENTRY(wm_sqrt)
  78         pushl   %ebp
  79         movl    %esp,%ebp
  80 #ifndef NON_REENTRANT_FPU
  81         subl    $28,%esp
  82 #endif /* NON_REENTRANT_FPU */
  83         pushl   %esi
  84         pushl   %edi
  85         pushl   %ebx
  86
  87         movl    PARAM1,%esi
  88
  89         movl    SIGH(%esi),%eax
  90         movl    SIGL(%esi),%ecx
  91         xorl    %edx,%edx
  92
  93 /* We use a rough linear estimate for the first guess.. */
  94
  95         cmpw    EXP_BIAS,EXP(%esi)
  96         jnz     sqrt_arg_ge_2
  97
  98         shrl    $1,%eax                 /* arg is in the range  [1.0 .. 2.0) */
  99         rcrl    $1,%ecx
 100         rcrl    $1,%edx
 101
 102 sqrt_arg_ge_2:
 103 /* From here on, n is never accessed directly again until it is
 104    replaced by the answer. */
 105
 106         movl    %eax,FPU_fsqrt_arg_2            /* ms word of n */
 107         movl    %ecx,FPU_fsqrt_arg_1
 108         movl    %edx,FPU_fsqrt_arg_0
 109
 110 /* Make a linear first estimate */
 111         shrl    $1,%eax
 112         addl    $0x40000000,%eax
 113         movl    $0xaaaaaaaa,%ecx
 114         mull    %ecx
 115         shll    %edx                    /* max result was 7fff... */
 116         testl   $0x80000000,%edx        /* but min was 3fff... */
 117         jnz     sqrt_prelim_no_adjust
 118
 119         movl    $0x80000000,%edx        /* round up */
 120
 121 sqrt_prelim_no_adjust:
 122         movl    %edx,%esi       /* Our first guess */
 123
 124 /* We have now computed (approx)   (2 + x) / 3, which forms the basis
 125    for a few iterations of Newton's method */
 126
 127         movl    FPU_fsqrt_arg_2,%ecx    /* ms word */
 128
 129 /*
 130  * From our initial estimate, three iterations are enough to get us
 131  * to 30 bits or so. This will then allow two iterations at better
 132  * precision to complete the process.
 133  */
 134
 135 /* Compute  (g + n/g)/2  at each iteration (g is the guess). */
 136         shrl    %ecx            /* Doing this first will prevent a divide */
 137                                 /* overflow later. */
 138
 139         movl    %ecx,%edx       /* msw of the arg / 2 */
 140         divl    %esi            /* current estimate */
 141         shrl    %esi            /* divide by 2 */
 142         addl    %eax,%esi       /* the new estimate */
 143
 144         movl    %ecx,%edx
 145         divl    %esi
 146         shrl    %esi
 147         addl    %eax,%esi
 148
 149         movl    %ecx,%edx
 150         divl    %esi
 151         shrl    %esi
 152         addl    %eax,%esi
 153
 154 /*
 155  * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 156  * we improve it to 60 bits or so.
 157  *
 158  * The strategy from now on is to compute new estimates from
 159  *      guess := guess + (n - guess^2) / (2 * guess)
 160  */
 161
 162 /* First, find the square of the guess */
 163         movl    %esi,%eax
 164         mull    %esi
 165 /* guess^2 now in %edx:%eax */
 166
 167         movl    FPU_fsqrt_arg_1,%ecx
 168         subl    %ecx,%eax
 169         movl    FPU_fsqrt_arg_2,%ecx    /* ms word of normalized n */
 170         sbbl    %ecx,%edx
 171         jnc     sqrt_stage_2_positive
 172
 173 /* Subtraction gives a negative result,
 174    negate the result before division. */
 175         notl    %edx
 176         notl    %eax
 177         addl    $1,%eax
 178         adcl    $0,%edx
 179
 180         divl    %esi
 181         movl    %eax,%ecx
 182
 183         movl    %edx,%eax
 184         divl    %esi
 185         jmp     sqrt_stage_2_finish
 186
 187 sqrt_stage_2_positive:
 188         divl    %esi
 189         movl    %eax,%ecx
 190
 191         movl    %edx,%eax
 192         divl    %esi
 193
 194         notl    %ecx
 195         notl    %eax
 196         addl    $1,%eax
 197         adcl    $0,%ecx
 198
 199 sqrt_stage_2_finish:
 200         sarl    $1,%ecx         /* divide by 2 */
 201         rcrl    $1,%eax
 202
 203         /* Form the new estimate in %esi:%edi */
 204         movl    %eax,%edi
 205         addl    %ecx,%esi
 206
 207         jnz     sqrt_stage_2_done       /* result should be [1..2) */
 208
 209 #ifdef PARANOID
 210 /* It should be possible to get here only if the arg is ffff....ffff */
 211         cmpl    $0xffffffff,FPU_fsqrt_arg_1
 212         jnz     sqrt_stage_2_error
 213 #endif /* PARANOID */
 214
 215 /* The best rounded result. */
 216         xorl    %eax,%eax
 217         decl    %eax
 218         movl    %eax,%edi
 219         movl    %eax,%esi
 220         movl    $0x7fffffff,%eax
 221         jmp     sqrt_round_result
 222
 223 #ifdef PARANOID
 224 sqrt_stage_2_error:
 225         pushl   EX_INTERNAL|0x213
 226         call    EXCEPTION
 227 #endif /* PARANOID */
 228
 229 sqrt_stage_2_done:
 230
 231 /* Now the square root has been computed to better than 60 bits. */
 232
 233 /* Find the square of the guess. */
 234         movl    %edi,%eax               /* ls word of guess */
 235         mull    %edi
 236         movl    %edx,FPU_accum_1
 237
 238         movl    %esi,%eax
 239         mull    %esi
 240         movl    %edx,FPU_accum_3
 241         movl    %eax,FPU_accum_2
 242
 243         movl    %edi,%eax
 244         mull    %esi
 245         addl    %eax,FPU_accum_1
 246         adcl    %edx,FPU_accum_2
 247         adcl    $0,FPU_accum_3
 248
 249 /*      movl    %esi,%eax */
 250 /*      mull    %edi */
 251         addl    %eax,FPU_accum_1
 252         adcl    %edx,FPU_accum_2
 253         adcl    $0,FPU_accum_3
 254
 255 /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
 256
 257         movl    FPU_fsqrt_arg_0,%eax            /* get normalized n */
 258         subl    %eax,FPU_accum_1
 259         movl    FPU_fsqrt_arg_1,%eax
 260         sbbl    %eax,FPU_accum_2
 261         movl    FPU_fsqrt_arg_2,%eax            /* ms word of normalized n */
 262         sbbl    %eax,FPU_accum_3
 263         jnc     sqrt_stage_3_positive
 264
 265 /* Subtraction gives a negative result,
 266    negate the result before division */
 267         notl    FPU_accum_1
 268         notl    FPU_accum_2
 269         notl    FPU_accum_3
 270         addl    $1,FPU_accum_1
 271         adcl    $0,FPU_accum_2
 272
 273 #ifdef PARANOID
 274         adcl    $0,FPU_accum_3  /* This must be zero */
 275         jz      sqrt_stage_3_no_error
 276
 277 sqrt_stage_3_error:
 278         pushl   EX_INTERNAL|0x207
 279         call    EXCEPTION
 280
 281 sqrt_stage_3_no_error:
 282 #endif /* PARANOID */
 283
 284         movl    FPU_accum_2,%edx
 285         movl    FPU_accum_1,%eax
 286         divl    %esi
 287         movl    %eax,%ecx
 288
 289         movl    %edx,%eax
 290         divl    %esi
 291
 292         sarl    $1,%ecx         /* divide by 2 */
 293         rcrl    $1,%eax
 294
 295         /* prepare to round the result */
 296
 297         addl    %ecx,%edi
 298         adcl    $0,%esi
 299
 300         jmp     sqrt_stage_3_finished
 301
 302 sqrt_stage_3_positive:
 303         movl    FPU_accum_2,%edx
 304         movl    FPU_accum_1,%eax
 305         divl    %esi
 306         movl    %eax,%ecx
 307
 308         movl    %edx,%eax
 309         divl    %esi
 310
 311         sarl    $1,%ecx         /* divide by 2 */
 312         rcrl    $1,%eax
 313
 314         /* prepare to round the result */
 315
 316         notl    %eax            /* Negate the correction term */
 317         notl    %ecx
 318         addl    $1,%eax
 319         adcl    $0,%ecx         /* carry here ==> correction == 0 */
 320         adcl    $0xffffffff,%esi
 321
 322         addl    %ecx,%edi
 323         adcl    $0,%esi
 324
 325 sqrt_stage_3_finished:
 326
 327 /*
 328  * The result in %esi:%edi:%esi should be good to about 90 bits here,
 329  * and the rounding information here does not have sufficient accuracy
 330  * in a few rare cases.
 331  */
 332         cmpl    $0xffffffe0,%eax
 333         ja      sqrt_near_exact_x
 334
 335         cmpl    $0x00000020,%eax
 336         jb      sqrt_near_exact
 337
 338         cmpl    $0x7fffffe0,%eax
 339         jb      sqrt_round_result
 340
 341         cmpl    $0x80000020,%eax
 342         jb      sqrt_get_more_precision
 343
 344 sqrt_round_result:
 345 /* Set up for rounding operations */
 346         movl    %eax,%edx
 347         movl    %esi,%eax
 348         movl    %edi,%ebx
 349         movl    PARAM1,%edi
 350         movw    EXP_BIAS,EXP(%edi)      /* Result is in  [1.0 .. 2.0) */
 351         jmp     fpu_reg_round
 352
 353
 354 sqrt_near_exact_x:
 355 /* First, the estimate must be rounded up. */
 356         addl    $1,%edi
 357         adcl    $0,%esi
 358
 359 sqrt_near_exact:
 360 /*
 361  * This is an easy case because x^1/2 is monotonic.
 362  * We need just find the square of our estimate, compare it
 363  * with the argument, and deduce whether our estimate is
 364  * above, below, or exact. We use the fact that the estimate
 365  * is known to be accurate to about 90 bits.
 366  */
 367         movl    %edi,%eax               /* ls word of guess */
 368         mull    %edi
 369         movl    %edx,%ebx               /* 2nd ls word of square */
 370         movl    %eax,%ecx               /* ls word of square */
 371
 372         movl    %edi,%eax
 373         mull    %esi
 374         addl    %eax,%ebx
 375         addl    %eax,%ebx
 376
 377 #ifdef PARANOID
 378         cmp     $0xffffffb0,%ebx
 379         jb      sqrt_near_exact_ok
 380
 381         cmp     $0x00000050,%ebx
 382         ja      sqrt_near_exact_ok
 383
 384         pushl   EX_INTERNAL|0x214
 385         call    EXCEPTION
 386
 387 sqrt_near_exact_ok:
 388 #endif /* PARANOID */
 389
 390         or      %ebx,%ebx
 391         js      sqrt_near_exact_small
 392
 393         jnz     sqrt_near_exact_large
 394
 395         or      %ebx,%edx
 396         jnz     sqrt_near_exact_large
 397
 398 /* Our estimate is exactly the right answer */
 399         xorl    %eax,%eax
 400         jmp     sqrt_round_result
 401
 402 sqrt_near_exact_small:
 403 /* Our estimate is too small */
 404         movl    $0x000000ff,%eax
 405         jmp     sqrt_round_result
 406
 407 sqrt_near_exact_large:
 408 /* Our estimate is too large, we need to decrement it */
 409         subl    $1,%edi
 410         sbbl    $0,%esi
 411         movl    $0xffffff00,%eax
 412         jmp     sqrt_round_result
 413
 414
 415 sqrt_get_more_precision:
 416 /* This case is almost the same as the above, except we start
 417    with an extra bit of precision in the estimate. */
 418         stc                     /* The extra bit. */
 419         rcll    $1,%edi         /* Shift the estimate left one bit */
 420         rcll    $1,%esi
 421
 422         movl    %edi,%eax               /* ls word of guess */
 423         mull    %edi
 424         movl    %edx,%ebx               /* 2nd ls word of square */
 425         movl    %eax,%ecx               /* ls word of square */
 426
 427         movl    %edi,%eax
 428         mull    %esi
 429         addl    %eax,%ebx
 430         addl    %eax,%ebx
 431
 432 /* Put our estimate back to its original value */
 433         stc                     /* The ms bit. */
 434         rcrl    $1,%esi         /* Shift the estimate left one bit */
 435         rcrl    $1,%edi
 436
 437 #ifdef PARANOID
 438         cmp     $0xffffff60,%ebx
 439         jb      sqrt_more_prec_ok
 440
 441         cmp     $0x000000a0,%ebx
 442         ja      sqrt_more_prec_ok
 443
 444         pushl   EX_INTERNAL|0x215
 445         call    EXCEPTION
 446
 447 sqrt_more_prec_ok:
 448 #endif /* PARANOID */
 449
 450         or      %ebx,%ebx
 451         js      sqrt_more_prec_small
 452
 453         jnz     sqrt_more_prec_large
 454
 455         or      %ebx,%ecx
 456         jnz     sqrt_more_prec_large
 457
 458 /* Our estimate is exactly the right answer */
 459         movl    $0x80000000,%eax
 460         jmp     sqrt_round_result
 461
 462 sqrt_more_prec_small:
 463 /* Our estimate is too small */
 464         movl    $0x800000ff,%eax
 465         jmp     sqrt_round_result
 466
 467 sqrt_more_prec_large:
 468 /* Our estimate is too large */
 469         movl    $0x7fffff00,%eax
 470         jmp     sqrt_round_result