libgcc/config/sparc/lb1spc.S

   1 /* This is an assembly language implementation of mulsi3, divsi3, and modsi3
   2    for the sparc processor.
   3
   4    These routines are derived from the SPARC Architecture Manual, version 8,
   5    slightly edited to match the desired calling convention, and also to
   6    optimize them for our purposes.  */
   7
   8 /* An executable stack is *not* required for these functions.  */
   9 #if defined(__ELF__) && defined(__linux__)
  10 .section .note.GNU-stack,"",%progbits
  11 .previous
  12 #endif
  13
  14 #ifdef L_mulsi3
  15 .text
  16         .align 4
  17         .global .umul
  18         .proc 4
  19 .umul:
  20         or      %o0, %o1, %o4   ! logical or of multiplier and multiplicand
  21         mov     %o0, %y         ! multiplier to Y register
  22         andncc  %o4, 0xfff, %o5 ! mask out lower 12 bits
  23         be      mul_shortway    ! can do it the short way
  24         andcc   %g0, %g0, %o4   ! zero the partial product and clear NV cc
  25         !
  26         ! long multiply
  27         !
  28         mulscc  %o4, %o1, %o4   ! first iteration of 33
  29         mulscc  %o4, %o1, %o4
  30         mulscc  %o4, %o1, %o4
  31         mulscc  %o4, %o1, %o4
  32         mulscc  %o4, %o1, %o4
  33         mulscc  %o4, %o1, %o4
  34         mulscc  %o4, %o1, %o4
  35         mulscc  %o4, %o1, %o4
  36         mulscc  %o4, %o1, %o4
  37         mulscc  %o4, %o1, %o4
  38         mulscc  %o4, %o1, %o4
  39         mulscc  %o4, %o1, %o4
  40         mulscc  %o4, %o1, %o4
  41         mulscc  %o4, %o1, %o4
  42         mulscc  %o4, %o1, %o4
  43         mulscc  %o4, %o1, %o4
  44         mulscc  %o4, %o1, %o4
  45         mulscc  %o4, %o1, %o4
  46         mulscc  %o4, %o1, %o4
  47         mulscc  %o4, %o1, %o4
  48         mulscc  %o4, %o1, %o4
  49         mulscc  %o4, %o1, %o4
  50         mulscc  %o4, %o1, %o4
  51         mulscc  %o4, %o1, %o4
  52         mulscc  %o4, %o1, %o4
  53         mulscc  %o4, %o1, %o4
  54         mulscc  %o4, %o1, %o4
  55         mulscc  %o4, %o1, %o4
  56         mulscc  %o4, %o1, %o4
  57         mulscc  %o4, %o1, %o4
  58         mulscc  %o4, %o1, %o4
  59         mulscc  %o4, %o1, %o4   ! 32nd iteration
  60         mulscc  %o4, %g0, %o4   ! last iteration only shifts
  61         ! the upper 32 bits of product are wrong, but we do not care
  62         retl
  63         rd      %y, %o0
  64         !
  65         ! short multiply
  66         !
  67 mul_shortway:
  68         mulscc  %o4, %o1, %o4   ! first iteration of 13
  69         mulscc  %o4, %o1, %o4
  70         mulscc  %o4, %o1, %o4
  71         mulscc  %o4, %o1, %o4
  72         mulscc  %o4, %o1, %o4
  73         mulscc  %o4, %o1, %o4
  74         mulscc  %o4, %o1, %o4
  75         mulscc  %o4, %o1, %o4
  76         mulscc  %o4, %o1, %o4
  77         mulscc  %o4, %o1, %o4
  78         mulscc  %o4, %o1, %o4
  79         mulscc  %o4, %o1, %o4   ! 12th iteration
  80         mulscc  %o4, %g0, %o4   ! last iteration only shifts
  81         rd      %y, %o5
  82         sll     %o4, 12, %o4    ! left shift partial product by 12 bits
  83         srl     %o5, 20, %o5    ! right shift partial product by 20 bits
  84         retl
  85         or      %o5, %o4, %o0   ! merge for true product
  86 #endif
  87
  88 #ifdef L_divsi3
  89 /*
  90  * Division and remainder, from Appendix E of the SPARC Version 8
  91  * Architecture Manual, with fixes from Gordon Irlam.
  92  */
  93
  94 /*
  95  * Input: dividend and divisor in %o0 and %o1 respectively.
  96  *
  97  * m4 parameters:
  98  *  .div        name of function to generate
  99  *  div         div=div => %o0 / %o1; div=rem => %o0 % %o1
 100  *  true                true=true => signed; true=false => unsigned
 101  *
 102  * Algorithm parameters:
 103  *  N           how many bits per iteration we try to get (4)
 104  *  WORDSIZE    total number of bits (32)
 105  *
 106  * Derived constants:
 107  *  TOPBITS     number of bits in the top decade of a number
 108  *
 109  * Important variables:
 110  *  Q           the partial quotient under development (initially 0)
 111  *  R           the remainder so far, initially the dividend
 112  *  ITER        number of main division loop iterations required;
 113  *              equal to ceil(log2(quotient) / N).  Note that this
 114  *              is the log base (2^N) of the quotient.
 115  *  V           the current comparand, initially divisor*2^(ITER*N-1)
 116  *
 117  * Cost:
 118  *  Current estimate for non-large dividend is
 119  *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
 120  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
 121  *  different path, as the upper bits of the quotient must be developed
 122  *  one bit at a time.
 123  */
 124         .global .udiv
 125         .align 4
 126         .proc 4
 127         .text
 128 .udiv:
 129          b ready_to_divide
 130          mov 0, %g3             ! result is always positive
 131
 132         .global .div
 133         .align 4
 134         .proc 4
 135         .text
 136 .div:
 137         ! compute sign of result; if neither is negative, no problem
 138         orcc    %o1, %o0, %g0   ! either negative?
 139         bge     ready_to_divide ! no, go do the divide
 140         xor     %o1, %o0, %g3   ! compute sign in any case
 141         tst     %o1
 142         bge     1f
 143         tst     %o0
 144         ! %o1 is definitely negative; %o0 might also be negative
 145         bge     ready_to_divide ! if %o0 not negative...
 146         sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
 147 1:      ! %o0 is negative, %o1 is nonnegative
 148         sub     %g0, %o0, %o0   ! make %o0 nonnegative
 149
 150
 151 ready_to_divide:
 152
 153         ! Ready to divide.  Compute size of quotient; scale comparand.
 154         orcc    %o1, %g0, %o5
 155         bne     1f
 156         mov     %o0, %o3
 157
 158         ! Divide by zero trap.  If it returns, return 0 (about as
 159         ! wrong as possible, but that is what SunOS does...).
 160         ta      0x2             ! ST_DIV0
 161         retl
 162         clr     %o0
 163
 164 1:
 165         cmp     %o3, %o5                ! if %o1 exceeds %o0, done
 166         blu     got_result              ! (and algorithm fails otherwise)
 167         clr     %o2
 168         sethi   %hi(1 << (32 - 4 - 1)), %g1
 169         cmp     %o3, %g1
 170         blu     not_really_big
 171         clr     %o4
 172
 173         ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
 174         ! as our usual N-at-a-shot divide step will cause overflow and havoc.
 175         ! The number of bits in the result here is N*ITER+SC, where SC <= N.
 176         ! Compute ITER in an unorthodox manner: know we need to shift V into
 177         ! the top decade: so do not even bother to compare to R.
 178         1:
 179                 cmp     %o5, %g1
 180                 bgeu    3f
 181                 mov     1, %g2
 182                 sll     %o5, 4, %o5
 183                 b       1b
 184                 add     %o4, 1, %o4
 185
 186         ! Now compute %g2.
 187         2:      addcc   %o5, %o5, %o5
 188                 bcc     not_too_big
 189                 add     %g2, 1, %g2
 190
 191                 ! We get here if the %o1 overflowed while shifting.
 192                 ! This means that %o3 has the high-order bit set.
 193                 ! Restore %o5 and subtract from %o3.
 194                 sll     %g1, 4, %g1     ! high order bit
 195                 srl     %o5, 1, %o5     ! rest of %o5
 196                 add     %o5, %g1, %o5
 197                 b       do_single_div
 198                 sub     %g2, 1, %g2
 199
 200         not_too_big:
 201         3:      cmp     %o5, %o3
 202                 blu     2b
 203                 nop
 204                 be      do_single_div
 205                 nop
 206         /* NB: these are commented out in the V8-SPARC manual as well */
 207         /* (I do not understand this) */
 208         ! %o5 > %o3: went too far: back up 1 step
 209         !       srl     %o5, 1, %o5
 210         !       dec     %g2
 211         ! do single-bit divide steps
 212         !
 213         ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
 214         ! first divide step without thinking.  BUT, the others are conditional,
 215         ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
 216         ! order bit set in the first step, just falling into the regular
 217         ! division loop will mess up the first time around.
 218         ! So we unroll slightly...
 219         do_single_div:
 220                 subcc   %g2, 1, %g2
 221                 bl      end_regular_divide
 222                 nop
 223                 sub     %o3, %o5, %o3
 224                 mov     1, %o2
 225                 b       end_single_divloop
 226                 nop
 227         single_divloop:
 228                 sll     %o2, 1, %o2
 229                 bl      1f
 230                 srl     %o5, 1, %o5
 231                 ! %o3 >= 0
 232                 sub     %o3, %o5, %o3
 233                 b       2f
 234                 add     %o2, 1, %o2
 235         1:      ! %o3 < 0
 236                 add     %o3, %o5, %o3
 237                 sub     %o2, 1, %o2
 238         2:
 239         end_single_divloop:
 240                 subcc   %g2, 1, %g2
 241                 bge     single_divloop
 242                 tst     %o3
 243                 b,a     end_regular_divide
 244
 245 not_really_big:
 246 1:
 247         sll     %o5, 4, %o5
 248         cmp     %o5, %o3
 249         bleu    1b
 250         addcc   %o4, 1, %o4
 251         be      got_result
 252         sub     %o4, 1, %o4
 253
 254         tst     %o3     ! set up for initial iteration
 255 divloop:
 256         sll     %o2, 4, %o2
 257         ! depth 1, accumulated bits 0
 258         bl      L1.16
 259         srl     %o5,1,%o5
 260         ! remainder is positive
 261         subcc   %o3,%o5,%o3
 262         ! depth 2, accumulated bits 1
 263         bl      L2.17
 264         srl     %o5,1,%o5
 265         ! remainder is positive
 266         subcc   %o3,%o5,%o3
 267         ! depth 3, accumulated bits 3
 268         bl      L3.19
 269         srl     %o5,1,%o5
 270         ! remainder is positive
 271         subcc   %o3,%o5,%o3
 272         ! depth 4, accumulated bits 7
 273         bl      L4.23
 274         srl     %o5,1,%o5
 275         ! remainder is positive
 276         subcc   %o3,%o5,%o3
 277         b       9f
 278         add     %o2, (7*2+1), %o2
 279
 280 L4.23:
 281         ! remainder is negative
 282         addcc   %o3,%o5,%o3
 283         b       9f
 284         add     %o2, (7*2-1), %o2
 285
 286
 287 L3.19:
 288         ! remainder is negative
 289         addcc   %o3,%o5,%o3
 290         ! depth 4, accumulated bits 5
 291         bl      L4.21
 292         srl     %o5,1,%o5
 293         ! remainder is positive
 294         subcc   %o3,%o5,%o3
 295         b       9f
 296         add     %o2, (5*2+1), %o2
 297
 298 L4.21:
 299         ! remainder is negative
 300         addcc   %o3,%o5,%o3
 301         b       9f
 302         add     %o2, (5*2-1), %o2
 303
 304 L2.17:
 305         ! remainder is negative
 306         addcc   %o3,%o5,%o3
 307         ! depth 3, accumulated bits 1
 308         bl      L3.17
 309         srl     %o5,1,%o5
 310         ! remainder is positive
 311         subcc   %o3,%o5,%o3
 312         ! depth 4, accumulated bits 3
 313         bl      L4.19
 314         srl     %o5,1,%o5
 315         ! remainder is positive
 316         subcc   %o3,%o5,%o3
 317         b       9f
 318         add     %o2, (3*2+1), %o2
 319
 320 L4.19:
 321         ! remainder is negative
 322         addcc   %o3,%o5,%o3
 323         b       9f
 324         add     %o2, (3*2-1), %o2
 325
 326 L3.17:
 327         ! remainder is negative
 328         addcc   %o3,%o5,%o3
 329         ! depth 4, accumulated bits 1
 330         bl      L4.17
 331         srl     %o5,1,%o5
 332         ! remainder is positive
 333         subcc   %o3,%o5,%o3
 334         b       9f
 335         add     %o2, (1*2+1), %o2
 336
 337 L4.17:
 338         ! remainder is negative
 339         addcc   %o3,%o5,%o3
 340         b       9f
 341         add     %o2, (1*2-1), %o2
 342
 343 L1.16:
 344         ! remainder is negative
 345         addcc   %o3,%o5,%o3
 346         ! depth 2, accumulated bits -1
 347         bl      L2.15
 348         srl     %o5,1,%o5
 349         ! remainder is positive
 350         subcc   %o3,%o5,%o3
 351         ! depth 3, accumulated bits -1
 352         bl      L3.15
 353         srl     %o5,1,%o5
 354         ! remainder is positive
 355         subcc   %o3,%o5,%o3
 356         ! depth 4, accumulated bits -1
 357         bl      L4.15
 358         srl     %o5,1,%o5
 359         ! remainder is positive
 360         subcc   %o3,%o5,%o3
 361         b       9f
 362         add     %o2, (-1*2+1), %o2
 363
 364 L4.15:
 365         ! remainder is negative
 366         addcc   %o3,%o5,%o3
 367         b       9f
 368         add     %o2, (-1*2-1), %o2
 369
 370 L3.15:
 371         ! remainder is negative
 372         addcc   %o3,%o5,%o3
 373         ! depth 4, accumulated bits -3
 374         bl      L4.13
 375         srl     %o5,1,%o5
 376         ! remainder is positive
 377         subcc   %o3,%o5,%o3
 378         b       9f
 379         add     %o2, (-3*2+1), %o2
 380
 381 L4.13:
 382         ! remainder is negative
 383         addcc   %o3,%o5,%o3
 384         b       9f
 385         add     %o2, (-3*2-1), %o2
 386
 387 L2.15:
 388         ! remainder is negative
 389         addcc   %o3,%o5,%o3
 390         ! depth 3, accumulated bits -3
 391         bl      L3.13
 392         srl     %o5,1,%o5
 393         ! remainder is positive
 394         subcc   %o3,%o5,%o3
 395         ! depth 4, accumulated bits -5
 396         bl      L4.11
 397         srl     %o5,1,%o5
 398         ! remainder is positive
 399         subcc   %o3,%o5,%o3
 400         b       9f
 401         add     %o2, (-5*2+1), %o2
 402
 403 L4.11:
 404         ! remainder is negative
 405         addcc   %o3,%o5,%o3
 406         b       9f
 407         add     %o2, (-5*2-1), %o2
 408
 409 L3.13:
 410         ! remainder is negative
 411         addcc   %o3,%o5,%o3
 412         ! depth 4, accumulated bits -7
 413         bl      L4.9
 414         srl     %o5,1,%o5
 415         ! remainder is positive
 416         subcc   %o3,%o5,%o3
 417         b       9f
 418         add     %o2, (-7*2+1), %o2
 419
 420 L4.9:
 421         ! remainder is negative
 422         addcc   %o3,%o5,%o3
 423         b       9f
 424         add     %o2, (-7*2-1), %o2
 425
 426         9:
 427 end_regular_divide:
 428         subcc   %o4, 1, %o4
 429         bge     divloop
 430         tst     %o3
 431         bl,a    got_result
 432         ! non-restoring fixup here (one instruction only!)
 433         sub     %o2, 1, %o2
 434
 435
 436 got_result:
 437         ! check to see if answer should be < 0
 438         tst     %g3
 439         bl,a    1f
 440         sub %g0, %o2, %o2
 441 1:
 442         retl
 443         mov %o2, %o0
 444 #endif
 445
 446 #ifdef L_modsi3
 447 /* This implementation was taken from glibc:
 448  *
 449  * Input: dividend and divisor in %o0 and %o1 respectively.
 450  *
 451  * Algorithm parameters:
 452  *  N           how many bits per iteration we try to get (4)
 453  *  WORDSIZE    total number of bits (32)
 454  *
 455  * Derived constants:
 456  *  TOPBITS     number of bits in the top decade of a number
 457  *
 458  * Important variables:
 459  *  Q           the partial quotient under development (initially 0)
 460  *  R           the remainder so far, initially the dividend
 461  *  ITER        number of main division loop iterations required;
 462  *              equal to ceil(log2(quotient) / N).  Note that this
 463  *              is the log base (2^N) of the quotient.
 464  *  V           the current comparand, initially divisor*2^(ITER*N-1)
 465  *
 466  * Cost:
 467  *  Current estimate for non-large dividend is
 468  *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
 469  *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
 470  *  different path, as the upper bits of the quotient must be developed
 471  *  one bit at a time.
 472  */
 473 .text
 474         .align 4
 475         .global .urem
 476         .proc 4
 477 .urem:
 478         b       divide
 479         mov     0, %g3          ! result always positive
 480
 481         .align 4
 482         .global .rem
 483         .proc 4
 484 .rem:
 485         ! compute sign of result; if neither is negative, no problem
 486         orcc    %o1, %o0, %g0   ! either negative?
 487         bge     2f                      ! no, go do the divide
 488         mov     %o0, %g3                ! sign of remainder matches %o0
 489         tst     %o1
 490         bge     1f
 491         tst     %o0
 492         ! %o1 is definitely negative; %o0 might also be negative
 493         bge     2f                      ! if %o0 not negative...
 494         sub     %g0, %o1, %o1   ! in any case, make %o1 nonneg
 495 1:      ! %o0 is negative, %o1 is nonnegative
 496         sub     %g0, %o0, %o0   ! make %o0 nonnegative
 497 2:
 498
 499         ! Ready to divide.  Compute size of quotient; scale comparand.
 500 divide:
 501         orcc    %o1, %g0, %o5
 502         bne     1f
 503         mov     %o0, %o3
 504
 505                 ! Divide by zero trap.  If it returns, return 0 (about as
 506                 ! wrong as possible, but that is what SunOS does...).
 507                 ta      0x2   !ST_DIV0
 508                 retl
 509                 clr     %o0
 510
 511 1:
 512         cmp     %o3, %o5                ! if %o1 exceeds %o0, done
 513         blu     got_result              ! (and algorithm fails otherwise)
 514         clr     %o2
 515         sethi   %hi(1 << (32 - 4 - 1)), %g1
 516         cmp     %o3, %g1
 517         blu     not_really_big
 518         clr     %o4
 519
 520         ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
 521         ! as our usual N-at-a-shot divide step will cause overflow and havoc.
 522         ! The number of bits in the result here is N*ITER+SC, where SC <= N.
 523         ! Compute ITER in an unorthodox manner: know we need to shift V into
 524         ! the top decade: so do not even bother to compare to R.
 525         1:
 526                 cmp     %o5, %g1
 527                 bgeu    3f
 528                 mov     1, %g2
 529                 sll     %o5, 4, %o5
 530                 b       1b
 531                 add     %o4, 1, %o4
 532
 533         ! Now compute %g2.
 534         2:      addcc   %o5, %o5, %o5
 535                 bcc     not_too_big
 536                 add     %g2, 1, %g2
 537
 538                 ! We get here if the %o1 overflowed while shifting.
 539                 ! This means that %o3 has the high-order bit set.
 540                 ! Restore %o5 and subtract from %o3.
 541                 sll     %g1, 4, %g1     ! high order bit
 542                 srl     %o5, 1, %o5             ! rest of %o5
 543                 add     %o5, %g1, %o5
 544                 b       do_single_div
 545                 sub     %g2, 1, %g2
 546
 547         not_too_big:
 548         3:      cmp     %o5, %o3
 549                 blu     2b
 550                 nop
 551                 be      do_single_div
 552                 nop
 553         /* NB: these are commented out in the V8-SPARC manual as well */
 554         /* (I do not understand this) */
 555         ! %o5 > %o3: went too far: back up 1 step
 556         !       srl     %o5, 1, %o5
 557         !       dec     %g2
 558         ! do single-bit divide steps
 559         !
 560         ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
 561         ! first divide step without thinking.  BUT, the others are conditional,
 562         ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
 563         ! order bit set in the first step, just falling into the regular
 564         ! division loop will mess up the first time around.
 565         ! So we unroll slightly...
 566         do_single_div:
 567                 subcc   %g2, 1, %g2
 568                 bl      end_regular_divide
 569                 nop
 570                 sub     %o3, %o5, %o3
 571                 mov     1, %o2
 572                 b       end_single_divloop
 573                 nop
 574         single_divloop:
 575                 sll     %o2, 1, %o2
 576                 bl      1f
 577                 srl     %o5, 1, %o5
 578                 ! %o3 >= 0
 579                 sub     %o3, %o5, %o3
 580                 b       2f
 581                 add     %o2, 1, %o2
 582         1:      ! %o3 < 0
 583                 add     %o3, %o5, %o3
 584                 sub     %o2, 1, %o2
 585         2:
 586         end_single_divloop:
 587                 subcc   %g2, 1, %g2
 588                 bge     single_divloop
 589                 tst     %o3
 590                 b,a     end_regular_divide
 591
 592 not_really_big:
 593 1:
 594         sll     %o5, 4, %o5
 595         cmp     %o5, %o3
 596         bleu    1b
 597         addcc   %o4, 1, %o4
 598         be      got_result
 599         sub     %o4, 1, %o4
 600
 601         tst     %o3     ! set up for initial iteration
 602 divloop:
 603         sll     %o2, 4, %o2
 604                 ! depth 1, accumulated bits 0
 605         bl      L1.16
 606         srl     %o5,1,%o5
 607         ! remainder is positive
 608         subcc   %o3,%o5,%o3
 609         ! depth 2, accumulated bits 1
 610         bl      L2.17
 611         srl     %o5,1,%o5
 612         ! remainder is positive
 613         subcc   %o3,%o5,%o3
 614         ! depth 3, accumulated bits 3
 615         bl      L3.19
 616         srl     %o5,1,%o5
 617         ! remainder is positive
 618         subcc   %o3,%o5,%o3
 619         ! depth 4, accumulated bits 7
 620         bl      L4.23
 621         srl     %o5,1,%o5
 622         ! remainder is positive
 623         subcc   %o3,%o5,%o3
 624         b       9f
 625         add     %o2, (7*2+1), %o2
 626 L4.23:
 627         ! remainder is negative
 628         addcc   %o3,%o5,%o3
 629         b       9f
 630         add     %o2, (7*2-1), %o2
 631
 632 L3.19:
 633         ! remainder is negative
 634         addcc   %o3,%o5,%o3
 635         ! depth 4, accumulated bits 5
 636         bl      L4.21
 637         srl     %o5,1,%o5
 638         ! remainder is positive
 639         subcc   %o3,%o5,%o3
 640         b       9f
 641         add     %o2, (5*2+1), %o2
 642
 643 L4.21:
 644         ! remainder is negative
 645         addcc   %o3,%o5,%o3
 646         b       9f
 647         add     %o2, (5*2-1), %o2
 648
 649 L2.17:
 650         ! remainder is negative
 651         addcc   %o3,%o5,%o3
 652         ! depth 3, accumulated bits 1
 653         bl      L3.17
 654         srl     %o5,1,%o5
 655         ! remainder is positive
 656         subcc   %o3,%o5,%o3
 657         ! depth 4, accumulated bits 3
 658         bl      L4.19
 659         srl     %o5,1,%o5
 660         ! remainder is positive
 661         subcc   %o3,%o5,%o3
 662         b       9f
 663         add     %o2, (3*2+1), %o2
 664
 665 L4.19:
 666         ! remainder is negative
 667         addcc   %o3,%o5,%o3
 668         b       9f
 669         add     %o2, (3*2-1), %o2
 670
 671 L3.17:
 672         ! remainder is negative
 673         addcc   %o3,%o5,%o3
 674         ! depth 4, accumulated bits 1
 675         bl      L4.17
 676         srl     %o5,1,%o5
 677         ! remainder is positive
 678         subcc   %o3,%o5,%o3
 679         b       9f
 680         add     %o2, (1*2+1), %o2
 681
 682 L4.17:
 683         ! remainder is negative
 684         addcc   %o3,%o5,%o3
 685         b       9f
 686         add     %o2, (1*2-1), %o2
 687
 688 L1.16:
 689         ! remainder is negative
 690         addcc   %o3,%o5,%o3
 691         ! depth 2, accumulated bits -1
 692         bl      L2.15
 693         srl     %o5,1,%o5
 694         ! remainder is positive
 695         subcc   %o3,%o5,%o3
 696         ! depth 3, accumulated bits -1
 697         bl      L3.15
 698         srl     %o5,1,%o5
 699         ! remainder is positive
 700         subcc   %o3,%o5,%o3
 701         ! depth 4, accumulated bits -1
 702         bl      L4.15
 703         srl     %o5,1,%o5
 704         ! remainder is positive
 705         subcc   %o3,%o5,%o3
 706         b       9f
 707         add     %o2, (-1*2+1), %o2
 708
 709 L4.15:
 710         ! remainder is negative
 711         addcc   %o3,%o5,%o3
 712         b       9f
 713         add     %o2, (-1*2-1), %o2
 714
 715 L3.15:
 716         ! remainder is negative
 717         addcc   %o3,%o5,%o3
 718         ! depth 4, accumulated bits -3
 719         bl      L4.13
 720         srl     %o5,1,%o5
 721         ! remainder is positive
 722         subcc   %o3,%o5,%o3
 723         b       9f
 724         add     %o2, (-3*2+1), %o2
 725
 726 L4.13:
 727         ! remainder is negative
 728         addcc   %o3,%o5,%o3
 729         b       9f
 730         add     %o2, (-3*2-1), %o2
 731
 732 L2.15:
 733         ! remainder is negative
 734         addcc   %o3,%o5,%o3
 735         ! depth 3, accumulated bits -3
 736         bl      L3.13
 737         srl     %o5,1,%o5
 738         ! remainder is positive
 739         subcc   %o3,%o5,%o3
 740         ! depth 4, accumulated bits -5
 741         bl      L4.11
 742         srl     %o5,1,%o5
 743         ! remainder is positive
 744         subcc   %o3,%o5,%o3
 745         b       9f
 746         add     %o2, (-5*2+1), %o2
 747
 748 L4.11:
 749         ! remainder is negative
 750         addcc   %o3,%o5,%o3
 751         b       9f
 752         add     %o2, (-5*2-1), %o2
 753
 754 L3.13:
 755         ! remainder is negative
 756         addcc   %o3,%o5,%o3
 757         ! depth 4, accumulated bits -7
 758         bl      L4.9
 759         srl     %o5,1,%o5
 760         ! remainder is positive
 761         subcc   %o3,%o5,%o3
 762         b       9f
 763         add     %o2, (-7*2+1), %o2
 764
 765 L4.9:
 766         ! remainder is negative
 767         addcc   %o3,%o5,%o3
 768         b       9f
 769         add     %o2, (-7*2-1), %o2
 770
 771         9:
 772 end_regular_divide:
 773         subcc   %o4, 1, %o4
 774         bge     divloop
 775         tst     %o3
 776         bl,a    got_result
 777         ! non-restoring fixup here (one instruction only!)
 778         add     %o3, %o1, %o3
 779
 780 got_result:
 781         ! check to see if answer should be < 0
 782         tst     %g3
 783         bl,a    1f
 784         sub %g0, %o3, %o3
 785 1:
 786         retl
 787         mov %o3, %o0
 788
 789 #endif
 790