usr/src/cmd/sgs/rtld.4.x/rem.s

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22
  23 !       .seg    "data"
  24 !       .asciz  "Copyr 1986 Sun Micro"
  25         .seg    "text"
  26
  27 #ident  "%Z%%M% %I%     %E% SMI"
  28
  29 /*
  30  * Copyright 1986 Sun Microsystems, Inc.  All rights reserved.
  31  * Use is subject to license terms.
  32  */
  33
  34 /*
  35  * divison/remainder
  36  *
  37  * Input is:
  38  *      dividend -- the thing being divided
  39  * divisor  -- how many ways to divide
  40  * Important parameters:
  41  *      N -- how many bits per iteration we try to get
  42  *              as our current guess:
  43  *      WORDSIZE -- how many bits altogether we're talking about:
  44  *              obviously:
  45  * A derived constant:
  46  *      TOPBITS -- how many bits are in the top "decade" of a number:
  47  *
  48  * Important variables are:
  49  *      Q -- the partial quotient under development -- initally 0
  50  *      R -- the remainder so far -- initially == the dividend
  51  *      ITER -- number of iterations of the main division loop will
  52  *              be required. Equal to CEIL( lg2(quotient)/4 )
  53  *              Note that this is log_base_(2^4) of the quotient.
  54  *      V -- the current comparand -- initially divisor*2^(ITER*4-1)
  55  * Cost:
  56  *      current estimate for non-large dividend is
  57  *              CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
  58  *      a large dividend is one greater than 2^(31-4 ) and takes a
  59  *      different path, as the upper bits of the quotient must be developed
  60  *      one bit at a time.
  61  */
  62
  63 #include <sys/trap.h>
  64 #include <sys/asm_linkage.h>
  65
  66
  67
  68
  69
  70
  71
  72
  73         ! working variable
  74
  75
  76 /*
  77  * this is the recursive definition of how we develop quotient digits.
  78  * it takes three important parameters:
  79  *      $1 -- the current depth, 1<=$1<=4
  80  *      $2 -- the current accumulation of quotient bits
  81  *      4  -- max depth
  82  * We add a new bit to $2 and either recurse or
  83  * insert the bits in the quotient.
  84  * Dynamic input:
  85  *      %o3 -- current remainder
  86  *      %o2 -- current quotient
  87  *      %o5 -- current comparand
  88  *      cc -- set on current value of %o3
  89  * Dynamic output:
  90  * %o3', %o2', %o5', cc'
  91  */
  92
  93
  94
  95
  96 !       RTENTRY(.urem)          ! UNSIGNED REMAINDER
  97         .global .urem
  98 .urem:
  99         b       divide
 100         mov     0,%g1           ! result always positive
 101
 102 !       RTENTRY(.rem)           ! SIGNED REMAINDER
 103         .global .rem
 104 .rem:
 105         orcc    %o1,%o0,%g0 ! are either %o0 or %o1 negative
 106         bge     divide          ! if not, skip this junk
 107         mov     %o0,%g1 ! record sign of result in sign of %g1
 108                 tst     %o1
 109                 bge     2f
 110                 tst     %o0
 111         !       %o1 < 0
 112                 bge     divide
 113                 neg     %o1
 114         2:
 115         !       %o0 < 0
 116                 neg     %o0
 117         !       FALL THROUGH
 118
 119
 120 divide:
 121 !       compute size of quotient, scale comparand
 122         orcc    %o1,%g0,%o5     ! movcc %o1,%o5
 123         bnz     0f              ! if %o1 != 0
 124         mov     %o0,%o3
 125         ba      zero_divide
 126         nop
 127 0:
 128         cmp     %o3,%o5
 129         blu     got_result ! if %o3<%o5 already, there's no point in continuing
 130         mov     0,%o2
 131         sethi   %hi(1<<(32-4 -1)),%g2
 132         cmp     %o3,%g2
 133         blu     not_really_big
 134         mov     0,%o4
 135         !
 136         ! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as
 137         ! our usual 4-at-a-shot divide step will cause overflow and havoc. The
 138         ! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4.
 139         ! compute %o4, in an unorthodox manner: know we need to Shift %o5 into
 140         !       the top decade: so don't even bother to compare to %o3.
 141         1:
 142                 cmp     %o5,%g2
 143                 bgeu    3f
 144                 mov     1,%g3
 145                 sll     %o5,4,%o5
 146                 b       1b
 147                 inc     %o4
 148         ! now compute %g3
 149         2:      addcc   %o5,%o5,%o5
 150                 bcc     not_too_big ! bcc       not_too_big
 151                 add     %g3,1,%g3
 152                         !
 153                         ! here if the %o1 overflowed when Shifting
 154                         ! this means that %o3 has the high-order bit set
 155                         ! restore %o5 and subtract from %o3
 156                         sll     %g2,4 ,%g2 ! high order bit
 157                         srl     %o5,1,%o5 ! rest of %o5
 158                         add     %o5,%g2,%o5
 159                         b       do_single_div
 160                         sub     %g3,1,%g3
 161         not_too_big:
 162         3:      cmp     %o5,%o3
 163                 blu     2b
 164                 nop
 165                 be      do_single_div
 166                 nop
 167         ! %o5 > %o3: went too far: back up 1 step
 168         !       srl     %o5,1,%o5
 169         !       dec     %g3
 170         ! do single-bit divide steps
 171         !
 172         ! we have to be careful here. We know that %o3 >= %o5, so we can do the
 173         ! first divide step without thinking. BUT, the others are conditional,
 174         ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
 175         ! order bit set in the first step, just falling into the regular
 176         ! division loop will mess up the first time around.
 177         ! So we unroll slightly...
 178         do_single_div:
 179                 deccc   %g3
 180                 bl      end_regular_divide
 181                 nop
 182                 sub     %o3,%o5,%o3
 183                 mov     1,%o2
 184                 b,a     end_single_divloop
 185         single_divloop:
 186                 sll     %o2,1,%o2
 187                 bl      1f
 188                 srl     %o5,1,%o5
 189                 ! %o3 >= 0
 190                 sub     %o3,%o5,%o3
 191                 b       2f
 192                 inc     %o2
 193         1:      ! %o3 < 0
 194                 add     %o3,%o5,%o3
 195                 dec     %o2
 196         2:
 197         end_single_divloop:
 198                 deccc   %g3
 199                 bge     single_divloop
 200                 tst     %o3
 201                 b,a     end_regular_divide
 202
 203 not_really_big:
 204 1:
 205         sll     %o5,4,%o5
 206         cmp     %o5,%o3
 207         bleu    1b
 208         inccc   %o4
 209         be      got_result
 210         dec     %o4
 211 do_regular_divide:
 212
 213 !       do the main division iteration
 214         tst     %o3
 215 !       fall through into divide loop
 216 divloop:
 217         sll     %o2,4,%o2
 218                 !depth 1, accumulated bits 0
 219         bl      L.1.16
 220         srl     %o5,1,%o5
 221         ! remainder is positive
 222         subcc   %o3,%o5,%o3
 223                         !depth 2, accumulated bits 1
 224         bl      L.2.17
 225         srl     %o5,1,%o5
 226         ! remainder is positive
 227         subcc   %o3,%o5,%o3
 228                         !depth 3, accumulated bits 3
 229         bl      L.3.19
 230         srl     %o5,1,%o5
 231         ! remainder is positive
 232         subcc   %o3,%o5,%o3
 233                         !depth 4, accumulated bits 7
 234         bl      L.4.23
 235         srl     %o5,1,%o5
 236         ! remainder is positive
 237         subcc   %o3,%o5,%o3
 238                 b       9f
 239                 add     %o2, (7*2+1), %o2
 240
 241 L.4.23: ! remainder is negative
 242         addcc   %o3,%o5,%o3
 243                 b       9f
 244                 add     %o2, (7*2-1), %o2
 245
 246
 247
 248
 249 L.3.19: ! remainder is negative
 250         addcc   %o3,%o5,%o3
 251                         !depth 4, accumulated bits 5
 252         bl      L.4.21
 253         srl     %o5,1,%o5
 254         ! remainder is positive
 255         subcc   %o3,%o5,%o3
 256                 b       9f
 257                 add     %o2, (5*2+1), %o2
 258
 259 L.4.21: ! remainder is negative
 260         addcc   %o3,%o5,%o3
 261                 b       9f
 262                 add     %o2, (5*2-1), %o2
 263
 264
 265
 266
 267
 268
 269
 270 L.2.17: ! remainder is negative
 271         addcc   %o3,%o5,%o3
 272                         !depth 3, accumulated bits 1
 273         bl      L.3.17
 274         srl     %o5,1,%o5
 275         ! remainder is positive
 276         subcc   %o3,%o5,%o3
 277                         !depth 4, accumulated bits 3
 278         bl      L.4.19
 279         srl     %o5,1,%o5
 280         ! remainder is positive
 281         subcc   %o3,%o5,%o3
 282                 b       9f
 283                 add     %o2, (3*2+1), %o2
 284
 285 L.4.19: ! remainder is negative
 286         addcc   %o3,%o5,%o3
 287                 b       9f
 288                 add     %o2, (3*2-1), %o2
 289
 290
 291
 292
 293 L.3.17: ! remainder is negative
 294         addcc   %o3,%o5,%o3
 295                         !depth 4, accumulated bits 1
 296         bl      L.4.17
 297         srl     %o5,1,%o5
 298         ! remainder is positive
 299         subcc   %o3,%o5,%o3
 300                 b       9f
 301                 add     %o2, (1*2+1), %o2
 302
 303 L.4.17: ! remainder is negative
 304         addcc   %o3,%o5,%o3
 305                 b       9f
 306                 add     %o2, (1*2-1), %o2
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317 L.1.16: ! remainder is negative
 318         addcc   %o3,%o5,%o3
 319                         !depth 2, accumulated bits -1
 320         bl      L.2.15
 321         srl     %o5,1,%o5
 322         ! remainder is positive
 323         subcc   %o3,%o5,%o3
 324                         !depth 3, accumulated bits -1
 325         bl      L.3.15
 326         srl     %o5,1,%o5
 327         ! remainder is positive
 328         subcc   %o3,%o5,%o3
 329                         !depth 4, accumulated bits -1
 330         bl      L.4.15
 331         srl     %o5,1,%o5
 332         ! remainder is positive
 333         subcc   %o3,%o5,%o3
 334                 b       9f
 335                 add     %o2, (-1*2+1), %o2
 336
 337 L.4.15: ! remainder is negative
 338         addcc   %o3,%o5,%o3
 339                 b       9f
 340                 add     %o2, (-1*2-1), %o2
 341
 342
 343
 344
 345 L.3.15: ! remainder is negative
 346         addcc   %o3,%o5,%o3
 347                         !depth 4, accumulated bits -3
 348         bl      L.4.13
 349         srl     %o5,1,%o5
 350         ! remainder is positive
 351         subcc   %o3,%o5,%o3
 352                 b       9f
 353                 add     %o2, (-3*2+1), %o2
 354
 355 L.4.13: ! remainder is negative
 356         addcc   %o3,%o5,%o3
 357                 b       9f
 358                 add     %o2, (-3*2-1), %o2
 359
 360
 361
 362
 363
 364
 365
 366 L.2.15: ! remainder is negative
 367         addcc   %o3,%o5,%o3
 368                         !depth 3, accumulated bits -3
 369         bl      L.3.13
 370         srl     %o5,1,%o5
 371         ! remainder is positive
 372         subcc   %o3,%o5,%o3
 373                         !depth 4, accumulated bits -5
 374         bl      L.4.11
 375         srl     %o5,1,%o5
 376         ! remainder is positive
 377         subcc   %o3,%o5,%o3
 378                 b       9f
 379                 add     %o2, (-5*2+1), %o2
 380
 381 L.4.11: ! remainder is negative
 382         addcc   %o3,%o5,%o3
 383                 b       9f
 384                 add     %o2, (-5*2-1), %o2
 385
 386
 387
 388
 389 L.3.13: ! remainder is negative
 390         addcc   %o3,%o5,%o3
 391                         !depth 4, accumulated bits -7
 392         bl      L.4.9
 393         srl     %o5,1,%o5
 394         ! remainder is positive
 395         subcc   %o3,%o5,%o3
 396                 b       9f
 397                 add     %o2, (-7*2+1), %o2
 398
 399 L.4.9:  ! remainder is negative
 400         addcc   %o3,%o5,%o3
 401                 b       9f
 402                 add     %o2, (-7*2-1), %o2
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413         9:
 414
 415 end_regular_divide:
 416         deccc   %o4
 417         bge     divloop
 418         tst     %o3
 419         bl,a    got_result
 420         add     %o3,%o1,%o3
 421
 422
 423 got_result:
 424         tst     %g1
 425         bl,a    1f
 426         neg     %o3     ! remainder <- -%o3
 427
 428 1:
 429         retl
 430         mov     %o3,%o0 ! remainder <-  %o3
 431
 432
 433 zero_divide:
 434         ta      ST_DIV0         ! divide by zero trap
 435         retl                    ! if handled, ignored, return
 436         mov     0, %o0