usr/src/cmd/sgs/rtld.4.x/div.s

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22
  23 !       .seg    "data"
  24 !       .asciz  "Copyr 1986 Sun Micro"
  25         .seg    "text"
  26
  27 #ident  "%Z%%M% %I%     %E% SMI"
  28
  29 /*
  30  * Copyright 1986 Sun Microsystems, Inc.  All rights reserved.
  31  * Use is subject to license terms.
  32  */
  33
  34 /*
  35  * divison/remainder
  36  *
  37  * Input is:
  38  *      dividend -- the thing being divided
  39  * divisor  -- how many ways to divide
  40  * Important parameters:
  41  *      N -- how many bits per iteration we try to get
  42  *              as our current guess:
  43  *      WORDSIZE -- how many bits altogether we're talking about:
  44  *              obviously:
  45  * A derived constant:
  46  *      TOPBITS -- how many bits are in the top "decade" of a number:
  47  *
  48  * Important variables are:
  49  *      Q -- the partial quotient under development -- initally 0
  50  *      R -- the remainder so far -- initially == the dividend
  51  *      ITER -- number of iterations of the main division loop will
  52  *              be required. Equal to CEIL( lg2(quotient)/4 )
  53  *              Note that this is log_base_(2^4) of the quotient.
  54  *      V -- the current comparand -- initially divisor*2^(ITER*4-1)
  55  * Cost:
  56  *      current estimate for non-large dividend is
  57  *              CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
  58  *      a large dividend is one greater than 2^(31-4 ) and takes a
  59  *      different path, as the upper bits of the quotient must be developed
  60  *      one bit at a time.
  61  */
  62
  63 #include <sys/trap.h>
  64 #include <sys/asm_linkage.h>
  65
  66
  67
  68
  69
  70
  71
  72
  73         ! working variable
  74
  75
  76 /*
  77  * this is the recursive definition of how we develop quotient digits.
  78  * it takes three important parameters:
  79  *      $1 -- the current depth, 1<=$1<=4
  80  *      $2 -- the current accumulation of quotient bits
  81  *      4  -- max depth
  82  * We add a new bit to $2 and either recurse or
  83  * insert the bits in the quotient.
  84  * Dynamic input:
  85  *      %o3 -- current remainder
  86  *      %o2 -- current quotient
  87  *      %o5 -- current comparand
  88  *      cc -- set on current value of %o3
  89  * Dynamic output:
  90  * %o3', %o2', %o5', cc'
  91  */
  92
  93
  94
  95
  96
  97 !       RTENTRY(.udiv)          ! unsigned divide
  98         .global .udiv
  99 .udiv:
 100         b       divide
 101         mov     0,%g1           ! result always positive
 102
 103 !       RTENTRY(.div)           ! SIGNED DIVIDE
 104         .global .div
 105 .div:
 106         orcc    %o1,%o0,%g0 ! are either %o0 or %o1 negative
 107         bge     divide          ! if not, skip this junk
 108         xor     %o1,%o0,%g1 ! record sign of result in sign of %g1
 109                 tst     %o1
 110                 bge     2f
 111                 tst     %o0
 112         !       %o1 < 0
 113                 bge     divide
 114                 neg     %o1
 115         2:
 116         !       %o0 < 0
 117                 neg     %o0
 118         !       FALL THROUGH
 119
 120
 121 divide:
 122 !       compute size of quotient, scale comparand
 123         orcc    %o1,%g0,%o5     ! movcc %o1,%o5
 124         bnz     0f              ! if %o1 != 0
 125         mov     %o0,%o3
 126         ba      zero_divide
 127         nop
 128 0:
 129         cmp     %o3,%o5
 130         blu     got_result ! if %o3<%o5 already, there's no point in continuing
 131         mov     0,%o2
 132         sethi   %hi(1<<(32-4 -1)),%g2
 133         cmp     %o3,%g2
 134         blu     not_really_big
 135         mov     0,%o4
 136         !
 137         ! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
 138         ! our usual 4-at-a-shot divide step will cause overflow and havoc. The
 139         ! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
 140         ! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
 141         !       the top decade: so don't even bother to compare to %o3.
 142         1:
 143                 cmp     %o5,%g2
 144                 bgeu    3f
 145                 mov     1,%g3
 146                 sll     %o5,4,%o5
 147                 b       1b
 148                 inc     %o4
 149         ! now compute %g3
 150         2:      addcc   %o5,%o5,%o5
 151                 bcc     not_too_big ! bcc       not_too_big
 152                 add     %g3,1,%g3
 153                         !
 154                         ! here if the %o1 overflowed when Shifting
 155                         ! this means that %o3 has the high-order bit set
 156                         ! restore %o5 and subtract from %o3
 157                         sll     %g2,4 ,%g2 ! high order bit
 158                         srl     %o5,1,%o5 ! rest of %o5
 159                         add     %o5,%g2,%o5
 160                         b       do_single_div
 161                         sub     %g3,1,%g3
 162         not_too_big:
 163         3:      cmp     %o5,%o3
 164                 blu     2b
 165                 nop
 166                 be      do_single_div
 167                 nop
 168         ! %o5 > %o3: went too far: back up 1 step
 169         !       srl     %o5,1,%o5
 170         !       dec     %g3
 171         ! do single-bit divide steps
 172         !
 173         ! we have to be careful here. We know that %o3 >= %o5, so we can do the
 174         ! first divide step without thinking. BUT, the others are conditional,
 175         ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
 176         ! order bit set in the first step, just falling into the regular
 177         ! division loop will mess up the first time around.
 178         ! So we unroll slightly...
 179         do_single_div:
 180                 deccc   %g3
 181                 bl      end_regular_divide
 182                 nop
 183                 sub     %o3,%o5,%o3
 184                 mov     1,%o2
 185                 b,a     end_single_divloop
 186         single_divloop:
 187                 sll     %o2,1,%o2
 188                 bl      1f
 189                 srl     %o5,1,%o5
 190                 ! %o3 >= 0
 191                 sub     %o3,%o5,%o3
 192                 b       2f
 193                 inc     %o2
 194         1:      ! %o3 < 0
 195                 add     %o3,%o5,%o3
 196                 dec     %o2
 197         2:
 198         end_single_divloop:
 199                 deccc   %g3
 200                 bge     single_divloop
 201                 tst     %o3
 202                 b,a     end_regular_divide
 203
 204 not_really_big:
 205 1:
 206         sll     %o5,4,%o5
 207         cmp     %o5,%o3
 208         bleu    1b
 209         inccc   %o4
 210         be      got_result
 211         dec     %o4
 212 do_regular_divide:
 213
 214 !       do the main division iteration
 215         tst     %o3
 216 !       fall through into divide loop
 217 divloop:
 218         sll     %o2,4,%o2
 219                 !depth 1, accumulated bits 0
 220         bl      L.1.16
 221         srl     %o5,1,%o5
 222         ! remainder is positive
 223         subcc   %o3,%o5,%o3
 224                         !depth 2, accumulated bits 1
 225         bl      L.2.17
 226         srl     %o5,1,%o5
 227         ! remainder is positive
 228         subcc   %o3,%o5,%o3
 229                         !depth 3, accumulated bits 3
 230         bl      L.3.19
 231         srl     %o5,1,%o5
 232         ! remainder is positive
 233         subcc   %o3,%o5,%o3
 234                         !depth 4, accumulated bits 7
 235         bl      L.4.23
 236         srl     %o5,1,%o5
 237         ! remainder is positive
 238         subcc   %o3,%o5,%o3
 239                 b       9f
 240                 add     %o2, (7*2+1), %o2
 241
 242 L.4.23: ! remainder is negative
 243         addcc   %o3,%o5,%o3
 244                 b       9f
 245                 add     %o2, (7*2-1), %o2
 246
 247
 248
 249
 250 L.3.19: ! remainder is negative
 251         addcc   %o3,%o5,%o3
 252                         !depth 4, accumulated bits 5
 253         bl      L.4.21
 254         srl     %o5,1,%o5
 255         ! remainder is positive
 256         subcc   %o3,%o5,%o3
 257                 b       9f
 258                 add     %o2, (5*2+1), %o2
 259
 260 L.4.21: ! remainder is negative
 261         addcc   %o3,%o5,%o3
 262                 b       9f
 263                 add     %o2, (5*2-1), %o2
 264
 265
 266
 267
 268
 269
 270
 271 L.2.17: ! remainder is negative
 272         addcc   %o3,%o5,%o3
 273                         !depth 3, accumulated bits 1
 274         bl      L.3.17
 275         srl     %o5,1,%o5
 276         ! remainder is positive
 277         subcc   %o3,%o5,%o3
 278                         !depth 4, accumulated bits 3
 279         bl      L.4.19
 280         srl     %o5,1,%o5
 281         ! remainder is positive
 282         subcc   %o3,%o5,%o3
 283                 b       9f
 284                 add     %o2, (3*2+1), %o2
 285
 286 L.4.19: ! remainder is negative
 287         addcc   %o3,%o5,%o3
 288                 b       9f
 289                 add     %o2, (3*2-1), %o2
 290
 291
 292
 293
 294 L.3.17: ! remainder is negative
 295         addcc   %o3,%o5,%o3
 296                         !depth 4, accumulated bits 1
 297         bl      L.4.17
 298         srl     %o5,1,%o5
 299         ! remainder is positive
 300         subcc   %o3,%o5,%o3
 301                 b       9f
 302                 add     %o2, (1*2+1), %o2
 303
 304 L.4.17: ! remainder is negative
 305         addcc   %o3,%o5,%o3
 306                 b       9f
 307                 add     %o2, (1*2-1), %o2
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318 L.1.16: ! remainder is negative
 319         addcc   %o3,%o5,%o3
 320                         !depth 2, accumulated bits -1
 321         bl      L.2.15
 322         srl     %o5,1,%o5
 323         ! remainder is positive
 324         subcc   %o3,%o5,%o3
 325                         !depth 3, accumulated bits -1
 326         bl      L.3.15
 327         srl     %o5,1,%o5
 328         ! remainder is positive
 329         subcc   %o3,%o5,%o3
 330                         !depth 4, accumulated bits -1
 331         bl      L.4.15
 332         srl     %o5,1,%o5
 333         ! remainder is positive
 334         subcc   %o3,%o5,%o3
 335                 b       9f
 336                 add     %o2, (-1*2+1), %o2
 337
 338 L.4.15: ! remainder is negative
 339         addcc   %o3,%o5,%o3
 340                 b       9f
 341                 add     %o2, (-1*2-1), %o2
 342
 343
 344
 345
 346 L.3.15: ! remainder is negative
 347         addcc   %o3,%o5,%o3
 348                         !depth 4, accumulated bits -3
 349         bl      L.4.13
 350         srl     %o5,1,%o5
 351         ! remainder is positive
 352         subcc   %o3,%o5,%o3
 353                 b       9f
 354                 add     %o2, (-3*2+1), %o2
 355
 356 L.4.13: ! remainder is negative
 357         addcc   %o3,%o5,%o3
 358                 b       9f
 359                 add     %o2, (-3*2-1), %o2
 360
 361
 362
 363
 364
 365
 366
 367 L.2.15: ! remainder is negative
 368         addcc   %o3,%o5,%o3
 369                         !depth 3, accumulated bits -3
 370         bl      L.3.13
 371         srl     %o5,1,%o5
 372         ! remainder is positive
 373         subcc   %o3,%o5,%o3
 374                         !depth 4, accumulated bits -5
 375         bl      L.4.11
 376         srl     %o5,1,%o5
 377         ! remainder is positive
 378         subcc   %o3,%o5,%o3
 379                 b       9f
 380                 add     %o2, (-5*2+1), %o2
 381
 382 L.4.11: ! remainder is negative
 383         addcc   %o3,%o5,%o3
 384                 b       9f
 385                 add     %o2, (-5*2-1), %o2
 386
 387
 388
 389
 390 L.3.13: ! remainder is negative
 391         addcc   %o3,%o5,%o3
 392                         !depth 4, accumulated bits -7
 393         bl      L.4.9
 394         srl     %o5,1,%o5
 395         ! remainder is positive
 396         subcc   %o3,%o5,%o3
 397                 b       9f
 398                 add     %o2, (-7*2+1), %o2
 399
 400 L.4.9:  ! remainder is negative
 401         addcc   %o3,%o5,%o3
 402                 b       9f
 403                 add     %o2, (-7*2-1), %o2
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414         9:
 415
 416 end_regular_divide:
 417         deccc   %o4
 418         bge     divloop
 419         tst     %o3
 420         bl,a    got_result
 421         dec     %o2
 422
 423
 424 got_result:
 425         tst     %g1
 426         bl,a    1f
 427         neg     %o2     ! quotient  <- -%o2
 428
 429 1:
 430         retl
 431         mov     %o2,%o0 ! quotient  <-  %o2
 432
 433
 434 zero_divide:
 435         ta      ST_DIV0         ! divide by zero trap
 436         retl                    ! if handled, ignored, return
 437         mov     0, %o0