usr/src/common/util/i386/muldiv.s

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 /*
  23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27         .ident  "%Z%%M% %I%     %E% SMI"
  28
  29         .file   "muldiv.s"
  30
  31 #if defined(__i386) && !defined(__amd64)
  32
  33 /*
  34  * Helper routines for 32-bit compilers to perform 64-bit math.
  35  * These are used both by the Sun and GCC compilers.
  36  */
  37
  38 #include <sys/asm_linkage.h>
  39 #include <sys/asm_misc.h>
  40
  41
  42
  43 /
  44 /   function __mul64(A,B:Longint):Longint;
  45 /       {Overflow is not checked}
  46 /
  47 / We essentially do multiply by longhand, using base 2**32 digits.
  48 /               a       b       parameter A
  49 /            x  c       d       parameter B
  50 /               ---------
  51 /               ad      bd
  52 /       ac      bc
  53 /       -----------------
  54 /       ac      ad+bc   bd
  55 /
  56 /       We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened.
  57 /
  58         ENTRY(__mul64)
  59         push    %ebp
  60         mov     %esp,%ebp
  61         pushl   %esi
  62         mov     12(%ebp),%eax   / A.hi (a)
  63         mull    16(%ebp)        / Multiply A.hi by B.lo (produces ad)
  64         xchg    %ecx,%eax       / ecx = bottom half of ad.
  65         movl    8(%ebp),%eax    / A.Lo (b)
  66         movl    %eax,%esi       / Save A.lo for later
  67         mull    16(%ebp)        / Multiply A.Lo by B.LO (dx:ax = bd.)
  68         addl    %edx,%ecx       / cx is ad
  69         xchg    %eax,%esi       / esi is bd, eax = A.lo (d)
  70         mull    20(%ebp)        / Multiply A.lo * B.hi (producing bc)
  71         addl    %ecx,%eax       / Produce ad+bc
  72         movl    %esi,%edx
  73         xchg    %eax,%edx
  74         popl    %esi
  75         movl    %ebp,%esp
  76         popl    %ebp
  77         ret     $16
  78         SET_SIZE(__mul64)
  79
  80
  81 /*
  82  * C support for 64-bit modulo and division.
  83  * Hand-customized compiler output - see comments for details.
  84  */
  85
  86 / /*
  87 /  * Unsigned division with remainder.
  88 /  * Divide two uint64_ts, and calculate remainder.
  89 /  */
  90 / uint64_t
  91 / UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
  92 / {
  93 /       /* simple cases: y is a single uint32_t */
  94 /       if (HI(y) == 0) {
  95 /               uint32_t        div_hi, div_rem;
  96 /               uint32_t        q0, q1;
  97 /
  98 /               /* calculate q1 */
  99 /               if (HI(x) < LO(y)) {
 100 /                       /* result is a single uint32_t, use one division */
 101 /                       q1 = 0;
 102 /                       div_hi = HI(x);
 103 /               } else {
 104 /                       /* result is a double uint32_t, use two divisions */
 105 /                       A_DIV32(HI(x), 0, LO(y), q1, div_hi);
 106 /               }
 107 /
 108 /               /* calculate q0 and remainder */
 109 /               A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
 110 /
 111 /               /* return remainder */
 112 /               *pmod = div_rem;
 113 /
 114 /               /* return result */
 115 /               return (HILO(q1, q0));
 116 /
 117 /       } else if (HI(x) < HI(y)) {
 118 /               /* HI(x) < HI(y) => x < y => result is 0 */
 119 /
 120 /               /* return remainder */
 121 /               *pmod = x;
 122 /
 123 /               /* return result */
 124 /               return (0);
 125 /
 126 /       } else {
 127 /               /*
 128 /                * uint64_t by uint64_t division, resulting in a one-uint32_t
 129 /                * result
 130 /                */
 131 /               uint32_t                y0, y1;
 132 /               uint32_t                x1, x0;
 133 /               uint32_t                q0;
 134 /               uint32_t                normshift;
 135 /
 136 /               /* normalize by shifting x and y so MSB(y) == 1 */
 137 /               HIBIT(HI(y), normshift);        /* index of highest 1 bit */
 138 /               normshift = 31 - normshift;
 139 /
 140 /               if (normshift == 0) {
 141 /                       /* no shifting needed, and x < 2*y so q <= 1 */
 142 /                       y1 = HI(y);
 143 /                       y0 = LO(y);
 144 /                       x1 = HI(x);
 145 /                       x0 = LO(x);
 146 /
 147 /                       /* if x >= y then q = 1 (note x1 >= y1) */
 148 /                       if (x1 > y1 || x0 >= y0) {
 149 /                               q0 = 1;
 150 /                               /* subtract y from x to get remainder */
 151 /                               A_SUB2(y0, y1, x0, x1);
 152 /                       } else {
 153 /                               q0 = 0;
 154 /                       }
 155 /
 156 /                       /* return remainder */
 157 /                       *pmod = HILO(x1, x0);
 158 /
 159 /                       /* return result */
 160 /                       return (q0);
 161 /
 162 /               } else {
 163 /                       /*
 164 /                        * the last case: result is one uint32_t, but we need to
 165 /                        * normalize
 166 /                        */
 167 /                       uint64_t        dt;
 168 /                       uint32_t                t0, t1, x2;
 169 /
 170 /                       /* normalize y */
 171 /                       dt = (y << normshift);
 172 /                       y1 = HI(dt);
 173 /                       y0 = LO(dt);
 174 /
 175 /                       /* normalize x (we need 3 uint32_ts!!!) */
 176 /                       x2 = (HI(x) >> (32 - normshift));
 177 /                       dt = (x << normshift);
 178 /                       x1 = HI(dt);
 179 /                       x0 = LO(dt);
 180 /
 181 /                       /* estimate q0, and reduce x to a two uint32_t value */
 182 /                       A_DIV32(x1, x2, y1, q0, x1);
 183 /
 184 /                       /* adjust q0 down if too high */
 185 /                       /*
 186 /                        * because of the limited range of x2 we can only be
 187 /                        * one off
 188 /                        */
 189 /                       A_MUL32(y0, q0, t0, t1);
 190 /                       if (t1 > x1 || (t1 == x1 && t0 > x0)) {
 191 /                               q0--;
 192 /                               A_SUB2(y0, y1, t0, t1);
 193 /                       }
 194 /                       /* return remainder */
 195 /                       /* subtract product from x to get remainder */
 196 /                       A_SUB2(t0, t1, x0, x1);
 197 /                       *pmod = (HILO(x1, x0) >> normshift);
 198 /
 199 /                       /* return result */
 200 /                       return (q0);
 201 /               }
 202 /       }
 203 / }
 204         ENTRY(UDivRem)
 205         pushl   %ebp
 206         pushl   %edi
 207         pushl   %esi
 208         subl    $48, %esp
 209         movl    68(%esp), %edi  / y,
 210         testl   %edi, %edi      / tmp63
 211         movl    %eax, 40(%esp)  / x, x
 212         movl    %edx, 44(%esp)  / x, x
 213         movl    %edi, %esi      /, tmp62
 214         movl    %edi, %ecx      / tmp62, tmp63
 215         jne     .LL2
 216         movl    %edx, %eax      /, tmp68
 217         cmpl    64(%esp), %eax  / y, tmp68
 218         jae     .LL21
 219 .LL4:
 220         movl    72(%esp), %ebp  / pmod,
 221         xorl    %esi, %esi      / <result>
 222         movl    40(%esp), %eax  / x, q0
 223         movl    %ecx, %edi      / <result>, <result>
 224         divl    64(%esp)        / y
 225         movl    %edx, (%ebp)    / div_rem,
 226         xorl    %edx, %edx      / q0
 227         addl    %eax, %esi      / q0, <result>
 228         movl    $0, 4(%ebp)
 229         adcl    %edx, %edi      / q0, <result>
 230         addl    $48, %esp
 231         movl    %esi, %eax      / <result>, <result>
 232         popl    %esi
 233         movl    %edi, %edx      / <result>, <result>
 234         popl    %edi
 235         popl    %ebp
 236         ret
 237         .align  16
 238 .LL2:
 239         movl    44(%esp), %eax  / x,
 240         xorl    %edx, %edx
 241         cmpl    %esi, %eax      / tmp62, tmp5
 242         movl    %eax, 32(%esp)  / tmp5,
 243         movl    %edx, 36(%esp)
 244         jae     .LL6
 245         movl    72(%esp), %esi  / pmod,
 246         movl    40(%esp), %ebp  / x,
 247         movl    44(%esp), %ecx  / x,
 248         movl    %ebp, (%esi)
 249         movl    %ecx, 4(%esi)
 250         xorl    %edi, %edi      / <result>
 251         xorl    %esi, %esi      / <result>
 252 .LL22:
 253         addl    $48, %esp
 254         movl    %esi, %eax      / <result>, <result>
 255         popl    %esi
 256         movl    %edi, %edx      / <result>, <result>
 257         popl    %edi
 258         popl    %ebp
 259         ret
 260         .align  16
 261 .LL21:
 262         movl    %edi, %edx      / tmp63, div_hi
 263         divl    64(%esp)        / y
 264         movl    %eax, %ecx      /, q1
 265         jmp     .LL4
 266         .align  16
 267 .LL6:
 268         movl    $31, %edi       /, tmp87
 269         bsrl    %esi,%edx       / tmp62, normshift
 270         subl    %edx, %edi      / normshift, tmp87
 271         movl    %edi, 28(%esp)  / tmp87,
 272         jne     .LL8
 273         movl    32(%esp), %edx  /, x1
 274         cmpl    %ecx, %edx      / y1, x1
 275         movl    64(%esp), %edi  / y, y0
 276         movl    40(%esp), %esi  / x, x0
 277         ja      .LL10
 278         xorl    %ebp, %ebp      / q0
 279         cmpl    %edi, %esi      / y0, x0
 280         jb      .LL11
 281 .LL10:
 282         movl    $1, %ebp        /, q0
 283         subl    %edi,%esi       / y0, x0
 284         sbbl    %ecx,%edx       / tmp63, x1
 285 .LL11:
 286         movl    %edx, %ecx      / x1, x1
 287         xorl    %edx, %edx      / x1
 288         xorl    %edi, %edi      / x0
 289         addl    %esi, %edx      / x0, x1
 290         adcl    %edi, %ecx      / x0, x1
 291         movl    72(%esp), %esi  / pmod,
 292         movl    %edx, (%esi)    / x1,
 293         movl    %ecx, 4(%esi)   / x1,
 294         xorl    %edi, %edi      / <result>
 295         movl    %ebp, %esi      / q0, <result>
 296         jmp     .LL22
 297         .align  16
 298 .LL8:
 299         movb    28(%esp), %cl
 300         movl    64(%esp), %esi  / y, dt
 301         movl    68(%esp), %edi  / y, dt
 302         shldl   %esi, %edi      /, dt, dt
 303         sall    %cl, %esi       /, dt
 304         andl    $32, %ecx
 305         jne     .LL23
 306 .LL17:
 307         movl    $32, %ecx       /, tmp102
 308         subl    28(%esp), %ecx  /, tmp102
 309         movl    %esi, %ebp      / dt, y0
 310         movl    32(%esp), %esi
 311         shrl    %cl, %esi       / tmp102,
 312         movl    %edi, 24(%esp)  / tmp99,
 313         movb    28(%esp), %cl
 314         movl    %esi, 12(%esp)  /, x2
 315         movl    44(%esp), %edi  / x, dt
 316         movl    40(%esp), %esi  / x, dt
 317         shldl   %esi, %edi      /, dt, dt
 318         sall    %cl, %esi       /, dt
 319         andl    $32, %ecx
 320         je      .LL18
 321         movl    %esi, %edi      / dt, dt
 322         xorl    %esi, %esi      / dt
 323 .LL18:
 324         movl    %edi, %ecx      / dt,
 325         movl    %edi, %eax      / tmp2,
 326         movl    %ecx, (%esp)
 327         movl    12(%esp), %edx  / x2,
 328         divl    24(%esp)
 329         movl    %edx, %ecx      /, x1
 330         xorl    %edi, %edi
 331         movl    %eax, 20(%esp)
 332         movl    %ebp, %eax      / y0, t0
 333         mull    20(%esp)
 334         cmpl    %ecx, %edx      / x1, t1
 335         movl    %edi, 4(%esp)
 336         ja      .LL14
 337         je      .LL24
 338 .LL15:
 339         movl    %ecx, %edi      / x1,
 340         subl    %eax,%esi       / t0, x0
 341         sbbl    %edx,%edi       / t1,
 342         movl    %edi, %eax      /, x1
 343         movl    %eax, %edx      / x1, x1
 344         xorl    %eax, %eax      / x1
 345         xorl    %ebp, %ebp      / x0
 346         addl    %esi, %eax      / x0, x1
 347         adcl    %ebp, %edx      / x0, x1
 348         movb    28(%esp), %cl
 349         shrdl   %edx, %eax      /, x1, x1
 350         shrl    %cl, %edx       /, x1
 351         andl    $32, %ecx
 352         je      .LL16
 353         movl    %edx, %eax      / x1, x1
 354         xorl    %edx, %edx      / x1
 355 .LL16:
 356         movl    72(%esp), %ecx  / pmod,
 357         movl    20(%esp), %esi  /, <result>
 358         xorl    %edi, %edi      / <result>
 359         movl    %eax, (%ecx)    / x1,
 360         movl    %edx, 4(%ecx)   / x1,
 361         jmp     .LL22
 362         .align  16
 363 .LL24:
 364         cmpl    %esi, %eax      / x0, t0
 365         jbe     .LL15
 366 .LL14:
 367         decl    20(%esp)
 368         subl    %ebp,%eax       / y0, t0
 369         sbbl    24(%esp),%edx   /, t1
 370         jmp     .LL15
 371 .LL23:
 372         movl    %esi, %edi      / dt, dt
 373         xorl    %esi, %esi      / dt
 374         jmp     .LL17
 375         SET_SIZE(UDivRem)
 376
 377 /*
 378  * Unsigned division without remainder.
 379  */
 380 / uint64_t
 381 / UDiv(uint64_t x, uint64_t y)
 382 / {
 383 /       if (HI(y) == 0) {
 384 /               /* simple cases: y is a single uint32_t */
 385 /               uint32_t        div_hi, div_rem;
 386 /               uint32_t        q0, q1;
 387 /
 388 /               /* calculate q1 */
 389 /               if (HI(x) < LO(y)) {
 390 /                       /* result is a single uint32_t, use one division */
 391 /                       q1 = 0;
 392 /                       div_hi = HI(x);
 393 /               } else {
 394 /                       /* result is a double uint32_t, use two divisions */
 395 /                       A_DIV32(HI(x), 0, LO(y), q1, div_hi);
 396 /               }
 397 /
 398 /               /* calculate q0 and remainder */
 399 /               A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
 400 /
 401 /               /* return result */
 402 /               return (HILO(q1, q0));
 403 /
 404 /       } else if (HI(x) < HI(y)) {
 405 /               /* HI(x) < HI(y) => x < y => result is 0 */
 406 /
 407 /               /* return result */
 408 /               return (0);
 409 /
 410 /       } else {
 411 /               /*
 412 /                * uint64_t by uint64_t division, resulting in a one-uint32_t
 413 /                * result
 414 /                */
 415 /               uint32_t                y0, y1;
 416 /               uint32_t                x1, x0;
 417 /               uint32_t                q0;
 418 /               unsigned                normshift;
 419 /
 420 /               /* normalize by shifting x and y so MSB(y) == 1 */
 421 /               HIBIT(HI(y), normshift);        /* index of highest 1 bit */
 422 /               normshift = 31 - normshift;
 423 /
 424 /               if (normshift == 0) {
 425 /                       /* no shifting needed, and x < 2*y so q <= 1 */
 426 /                       y1 = HI(y);
 427 /                       y0 = LO(y);
 428 /                       x1 = HI(x);
 429 /                       x0 = LO(x);
 430 /
 431 /                       /* if x >= y then q = 1 (note x1 >= y1) */
 432 /                       if (x1 > y1 || x0 >= y0) {
 433 /                               q0 = 1;
 434 /                               /* subtract y from x to get remainder */
 435 /                               /* A_SUB2(y0, y1, x0, x1); */
 436 /                       } else {
 437 /                               q0 = 0;
 438 /                       }
 439 /
 440 /                       /* return result */
 441 /                       return (q0);
 442 /
 443 /               } else {
 444 /                       /*
 445 /                        * the last case: result is one uint32_t, but we need to
 446 /                        * normalize
 447 /                        */
 448 /                       uint64_t        dt;
 449 /                       uint32_t                t0, t1, x2;
 450 /
 451 /                       /* normalize y */
 452 /                       dt = (y << normshift);
 453 /                       y1 = HI(dt);
 454 /                       y0 = LO(dt);
 455 /
 456 /                       /* normalize x (we need 3 uint32_ts!!!) */
 457 /                       x2 = (HI(x) >> (32 - normshift));
 458 /                       dt = (x << normshift);
 459 /                       x1 = HI(dt);
 460 /                       x0 = LO(dt);
 461 /
 462 /                       /* estimate q0, and reduce x to a two uint32_t value */
 463 /                       A_DIV32(x1, x2, y1, q0, x1);
 464 /
 465 /                       /* adjust q0 down if too high */
 466 /                       /*
 467 /                        * because of the limited range of x2 we can only be
 468 /                        * one off
 469 /                        */
 470 /                       A_MUL32(y0, q0, t0, t1);
 471 /                       if (t1 > x1 || (t1 == x1 && t0 > x0)) {
 472 /                               q0--;
 473 /                       }
 474 /                       /* return result */
 475 /                       return (q0);
 476 /               }
 477 /       }
 478 / }
 479         ENTRY(UDiv)
 480         pushl   %ebp
 481         pushl   %edi
 482         pushl   %esi
 483         subl    $40, %esp
 484         movl    %edx, 36(%esp)  / x, x
 485         movl    60(%esp), %edx  / y,
 486         testl   %edx, %edx      / tmp62
 487         movl    %eax, 32(%esp)  / x, x
 488         movl    %edx, %ecx      / tmp61, tmp62
 489         movl    %edx, %eax      /, tmp61
 490         jne     .LL26
 491         movl    36(%esp), %esi  / x,
 492         cmpl    56(%esp), %esi  / y, tmp67
 493         movl    %esi, %eax      /, tmp67
 494         movl    %esi, %edx      / tmp67, div_hi
 495         jb      .LL28
 496         movl    %ecx, %edx      / tmp62, div_hi
 497         divl    56(%esp)        / y
 498         movl    %eax, %ecx      /, q1
 499 .LL28:
 500         xorl    %esi, %esi      / <result>
 501         movl    %ecx, %edi      / <result>, <result>
 502         movl    32(%esp), %eax  / x, q0
 503         xorl    %ecx, %ecx      / q0
 504         divl    56(%esp)        / y
 505         addl    %eax, %esi      / q0, <result>
 506         adcl    %ecx, %edi      / q0, <result>
 507 .LL25:
 508         addl    $40, %esp
 509         movl    %esi, %eax      / <result>, <result>
 510         popl    %esi
 511         movl    %edi, %edx      / <result>, <result>
 512         popl    %edi
 513         popl    %ebp
 514         ret
 515         .align  16
 516 .LL26:
 517         movl    36(%esp), %esi  / x,
 518         xorl    %edi, %edi
 519         movl    %esi, 24(%esp)  / tmp1,
 520         movl    %edi, 28(%esp)
 521         xorl    %esi, %esi      / <result>
 522         xorl    %edi, %edi      / <result>
 523         cmpl    %eax, 24(%esp)  / tmp61,
 524         jb      .LL25
 525         bsrl    %eax,%ebp       / tmp61, normshift
 526         movl    $31, %eax       /, tmp85
 527         subl    %ebp, %eax      / normshift, normshift
 528         jne     .LL32
 529         movl    24(%esp), %eax  /, x1
 530         cmpl    %ecx, %eax      / tmp62, x1
 531         movl    56(%esp), %esi  / y, y0
 532         movl    32(%esp), %edx  / x, x0
 533         ja      .LL34
 534         xorl    %eax, %eax      / q0
 535         cmpl    %esi, %edx      / y0, x0
 536         jb      .LL35
 537 .LL34:
 538         movl    $1, %eax        /, q0
 539 .LL35:
 540         movl    %eax, %esi      / q0, <result>
 541         xorl    %edi, %edi      / <result>
 542 .LL45:
 543         addl    $40, %esp
 544         movl    %esi, %eax      / <result>, <result>
 545         popl    %esi
 546         movl    %edi, %edx      / <result>, <result>
 547         popl    %edi
 548         popl    %ebp
 549         ret
 550         .align  16
 551 .LL32:
 552         movb    %al, %cl
 553         movl    56(%esp), %esi  / y,
 554         movl    60(%esp), %edi  / y,
 555         shldl   %esi, %edi
 556         sall    %cl, %esi
 557         andl    $32, %ecx
 558         jne     .LL43
 559 .LL40:
 560         movl    $32, %ecx       /, tmp96
 561         subl    %eax, %ecx      / normshift, tmp96
 562         movl    %edi, %edx
 563         movl    %edi, 20(%esp)  /, dt
 564         movl    24(%esp), %ebp  /, x2
 565         xorl    %edi, %edi
 566         shrl    %cl, %ebp       / tmp96, x2
 567         movl    %esi, 16(%esp)  /, dt
 568         movb    %al, %cl
 569         movl    32(%esp), %esi  / x, dt
 570         movl    %edi, 12(%esp)
 571         movl    36(%esp), %edi  / x, dt
 572         shldl   %esi, %edi      /, dt, dt
 573         sall    %cl, %esi       /, dt
 574         andl    $32, %ecx
 575         movl    %edx, 8(%esp)
 576         je      .LL41
 577         movl    %esi, %edi      / dt, dt
 578         xorl    %esi, %esi      / dt
 579 .LL41:
 580         xorl    %ecx, %ecx
 581         movl    %edi, %eax      / tmp1,
 582         movl    %ebp, %edx      / x2,
 583         divl    8(%esp)
 584         movl    %edx, %ebp      /, x1
 585         movl    %ecx, 4(%esp)
 586         movl    %eax, %ecx      /, q0
 587         movl    16(%esp), %eax  / dt,
 588         mull    %ecx    / q0
 589         cmpl    %ebp, %edx      / x1, t1
 590         movl    %edi, (%esp)
 591         movl    %esi, %edi      / dt, x0
 592         ja      .LL38
 593         je      .LL44
 594 .LL39:
 595         movl    %ecx, %esi      / q0, <result>
 596 .LL46:
 597         xorl    %edi, %edi      / <result>
 598         jmp     .LL45
 599 .LL44:
 600         cmpl    %edi, %eax      / x0, t0
 601         jbe     .LL39
 602 .LL38:
 603         decl    %ecx            / q0
 604         movl    %ecx, %esi      / q0, <result>
 605         jmp     .LL46
 606 .LL43:
 607         movl    %esi, %edi
 608         xorl    %esi, %esi
 609         jmp     .LL40
 610         SET_SIZE(UDiv)
 611
 612 /*
 613  * __udiv64
 614  *
 615  * Perform division of two unsigned 64-bit quantities, returning the
 616  * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
 617  */
 618         ENTRY(__udiv64)
 619         movl    4(%esp), %eax   / x, x
 620         movl    8(%esp), %edx   / x, x
 621         pushl   16(%esp)        / y
 622         pushl   16(%esp)
 623         call    UDiv
 624         addl    $8, %esp
 625         ret     $16
 626         SET_SIZE(__udiv64)
 627
 628 /*
 629  * __urem64
 630  *
 631  * Perform division of two unsigned 64-bit quantities, returning the
 632  * remainder in %edx:%eax.  __urem64 pops the arguments on return
 633  */
 634         ENTRY(__urem64)
 635         subl    $12, %esp
 636         movl    %esp, %ecx      /, tmp65
 637         movl    16(%esp), %eax  / x, x
 638         movl    20(%esp), %edx  / x, x
 639         pushl   %ecx            / tmp65
 640         pushl   32(%esp)        / y
 641         pushl   32(%esp)
 642         call    UDivRem
 643         movl    12(%esp), %eax  / rem, rem
 644         movl    16(%esp), %edx  / rem, rem
 645         addl    $24, %esp
 646         ret     $16
 647         SET_SIZE(__urem64)
 648
 649 /*
 650  * __div64
 651  *
 652  * Perform division of two signed 64-bit quantities, returning the
 653  * quotient in %edx:%eax.  __div64 pops the arguments on return.
 654  */
 655 / int64_t
 656 / __div64(int64_t x, int64_t y)
 657 / {
 658 /       int             negative;
 659 /       uint64_t        xt, yt, r;
 660 /
 661 /       if (x < 0) {
 662 /               xt = -(uint64_t) x;
 663 /               negative = 1;
 664 /       } else {
 665 /               xt = x;
 666 /               negative = 0;
 667 /       }
 668 /       if (y < 0) {
 669 /               yt = -(uint64_t) y;
 670 /               negative ^= 1;
 671 /       } else {
 672 /               yt = y;
 673 /       }
 674 /       r = UDiv(xt, yt);
 675 /       return (negative ? (int64_t) - r : r);
 676 / }
 677         ENTRY(__div64)
 678         pushl   %ebp
 679         pushl   %edi
 680         pushl   %esi
 681         subl    $8, %esp
 682         movl    28(%esp), %edx  / x, x
 683         testl   %edx, %edx      / x
 684         movl    24(%esp), %eax  / x, x
 685         movl    32(%esp), %esi  / y, y
 686         movl    36(%esp), %edi  / y, y
 687         js      .LL84
 688         xorl    %ebp, %ebp      / negative
 689         testl   %edi, %edi      / y
 690         movl    %eax, (%esp)    / x, xt
 691         movl    %edx, 4(%esp)   / x, xt
 692         movl    %esi, %eax      / y, yt
 693         movl    %edi, %edx      / y, yt
 694         js      .LL85
 695 .LL82:
 696         pushl   %edx            / yt
 697         pushl   %eax            / yt
 698         movl    8(%esp), %eax   / xt, xt
 699         movl    12(%esp), %edx  / xt, xt
 700         call    UDiv
 701         popl    %ecx
 702         testl   %ebp, %ebp      / negative
 703         popl    %esi
 704         je      .LL83
 705         negl    %eax            / r
 706         adcl    $0, %edx        /, r
 707         negl    %edx            / r
 708 .LL83:
 709         addl    $8, %esp
 710         popl    %esi
 711         popl    %edi
 712         popl    %ebp
 713         ret     $16
 714         .align  16
 715 .LL84:
 716         negl    %eax            / x
 717         adcl    $0, %edx        /, x
 718         negl    %edx            / x
 719         testl   %edi, %edi      / y
 720         movl    %eax, (%esp)    / x, xt
 721         movl    %edx, 4(%esp)   / x, xt
 722         movl    $1, %ebp        /, negative
 723         movl    %esi, %eax      / y, yt
 724         movl    %edi, %edx      / y, yt
 725         jns     .LL82
 726         .align  16
 727 .LL85:
 728         negl    %eax            / yt
 729         adcl    $0, %edx        /, yt
 730         negl    %edx            / yt
 731         xorl    $1, %ebp        /, negative
 732         jmp     .LL82
 733         SET_SIZE(__div64)
 734
 735 /*
 736  * __rem64
 737  *
 738  * Perform division of two signed 64-bit quantities, returning the
 739  * remainder in %edx:%eax.  __rem64 pops the arguments on return.
 740  */
 741 / int64_t
 742 / __rem64(int64_t x, int64_t y)
 743 / {
 744 /       uint64_t        xt, yt, rem;
 745 /
 746 /       if (x < 0) {
 747 /               xt = -(uint64_t) x;
 748 /       } else {
 749 /               xt = x;
 750 /       }
 751 /       if (y < 0) {
 752 /               yt = -(uint64_t) y;
 753 /       } else {
 754 /               yt = y;
 755 /       }
 756 /       (void) UDivRem(xt, yt, &rem);
 757 /       return (x < 0 ? (int64_t) - rem : rem);
 758 / }
 759         ENTRY(__rem64)
 760         pushl   %edi
 761         pushl   %esi
 762         subl    $20, %esp
 763         movl    36(%esp), %ecx  / x,
 764         movl    32(%esp), %esi  / x,
 765         movl    36(%esp), %edi  / x,
 766         testl   %ecx, %ecx
 767         movl    40(%esp), %eax  / y, y
 768         movl    44(%esp), %edx  / y, y
 769         movl    %esi, (%esp)    /, xt
 770         movl    %edi, 4(%esp)   /, xt
 771         js      .LL92
 772         testl   %edx, %edx      / y
 773         movl    %eax, %esi      / y, yt
 774         movl    %edx, %edi      / y, yt
 775         js      .LL93
 776 .LL90:
 777         leal    8(%esp), %eax   /, tmp66
 778         pushl   %eax            / tmp66
 779         pushl   %edi            / yt
 780         pushl   %esi            / yt
 781         movl    12(%esp), %eax  / xt, xt
 782         movl    16(%esp), %edx  / xt, xt
 783         call    UDivRem
 784         addl    $12, %esp
 785         movl    36(%esp), %edi  / x,
 786         testl   %edi, %edi
 787         movl    8(%esp), %eax   / rem, rem
 788         movl    12(%esp), %edx  / rem, rem
 789         js      .LL94
 790         addl    $20, %esp
 791         popl    %esi
 792         popl    %edi
 793         ret     $16
 794         .align  16
 795 .LL92:
 796         negl    %esi
 797         adcl    $0, %edi
 798         negl    %edi
 799         testl   %edx, %edx      / y
 800         movl    %esi, (%esp)    /, xt
 801         movl    %edi, 4(%esp)   /, xt
 802         movl    %eax, %esi      / y, yt
 803         movl    %edx, %edi      / y, yt
 804         jns     .LL90
 805         .align  16
 806 .LL93:
 807         negl    %esi            / yt
 808         adcl    $0, %edi        /, yt
 809         negl    %edi            / yt
 810         jmp     .LL90
 811         .align  16
 812 .LL94:
 813         negl    %eax            / rem
 814         adcl    $0, %edx        /, rem
 815         addl    $20, %esp
 816         popl    %esi
 817         negl    %edx            / rem
 818         popl    %edi
 819         ret     $16
 820         SET_SIZE(__rem64)
 821
 822 /*
 823  * C support for 64-bit modulo and division.
 824  * GNU routines callable from C (though generated by the compiler).
 825  * Hand-customized compiler output - see comments for details.
 826  */
 827
 828 /*
 829  * int32_t/int64_t division/manipulation
 830  *
 831  * Hand-customized compiler output: the non-GCC entry points depart from
 832  * the SYS V ABI by requiring their arguments to be popped, and in the
 833  * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
 834  * compiler-generated use of %edx:%eax for the first argument of
 835  * internal entry points.
 836  *
 837  * Inlines for speed:
 838  * - counting the number of leading zeros in a word
 839  * - multiplying two 32-bit numbers giving a 64-bit result
 840  * - dividing a 64-bit number by a 32-bit number, giving both quotient
 841  *      and remainder
 842  * - subtracting two 64-bit results
 843  */
 844 / #define       LO(X)           ((uint32_t)(X) & 0xffffffff)
 845 / #define       HI(X)           ((uint32_t)((X) >> 32) & 0xffffffff)
 846 / #define       HILO(H, L)      (((uint64_t)(H) << 32) + (L))
 847 /
 848 / /* give index of highest bit */
 849 / #define       HIBIT(a, r) \
 850 /     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
 851 /
 852 / /* multiply two uint32_ts resulting in a uint64_t */
 853 / #define       A_MUL32(a, b, lo, hi) \
 854 /     asm("mull %2" \
 855 /       : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
 856 /
 857 / /* divide a uint64_t by a uint32_t */
 858 / #define       A_DIV32(lo, hi, b, q, r) \
 859 /     asm("divl %2" \
 860 /       : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
 861 /       : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
 862 /
 863 / /* subtract two uint64_ts (with borrow) */
 864 / #define       A_SUB2(bl, bh, al, ah) \
 865 /     asm("subl %4,%0\n\tsbbl %5,%1" \
 866 /       : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
 867 /       : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
 868 /       "g"((uint32_t)(bh)))
 869
 870 /*
 871  * __udivdi3
 872  *
 873  * Perform division of two unsigned 64-bit quantities, returning the
 874  * quotient in %edx:%eax.
 875  */
 876         ENTRY(__udivdi3)
 877         movl    4(%esp), %eax   / x, x
 878         movl    8(%esp), %edx   / x, x
 879         pushl   16(%esp)        / y
 880         pushl   16(%esp)
 881         call    UDiv
 882         addl    $8, %esp
 883         ret
 884         SET_SIZE(__udivdi3)
 885
 886 /*
 887  * __umoddi3
 888  *
 889  * Perform division of two unsigned 64-bit quantities, returning the
 890  * remainder in %edx:%eax.
 891  */
 892         ENTRY(__umoddi3)
 893         subl    $12, %esp
 894         movl    %esp, %ecx      /, tmp65
 895         movl    16(%esp), %eax  / x, x
 896         movl    20(%esp), %edx  / x, x
 897         pushl   %ecx            / tmp65
 898         pushl   32(%esp)        / y
 899         pushl   32(%esp)
 900         call    UDivRem
 901         movl    12(%esp), %eax  / rem, rem
 902         movl    16(%esp), %edx  / rem, rem
 903         addl    $24, %esp
 904         ret
 905         SET_SIZE(__umoddi3)
 906
 907 /*
 908  * __divdi3
 909  *
 910  * Perform division of two signed 64-bit quantities, returning the
 911  * quotient in %edx:%eax.
 912  */
 913 / int64_t
 914 / __divdi3(int64_t x, int64_t y)
 915 / {
 916 /       int             negative;
 917 /       uint64_t        xt, yt, r;
 918 /
 919 /       if (x < 0) {
 920 /               xt = -(uint64_t) x;
 921 /               negative = 1;
 922 /       } else {
 923 /               xt = x;
 924 /               negative = 0;
 925 /       }
 926 /       if (y < 0) {
 927 /               yt = -(uint64_t) y;
 928 /               negative ^= 1;
 929 /       } else {
 930 /               yt = y;
 931 /       }
 932 /       r = UDiv(xt, yt);
 933 /       return (negative ? (int64_t) - r : r);
 934 / }
 935         ENTRY(__divdi3)
 936         pushl   %ebp
 937         pushl   %edi
 938         pushl   %esi
 939         subl    $8, %esp
 940         movl    28(%esp), %edx  / x, x
 941         testl   %edx, %edx      / x
 942         movl    24(%esp), %eax  / x, x
 943         movl    32(%esp), %esi  / y, y
 944         movl    36(%esp), %edi  / y, y
 945         js      .LL55
 946         xorl    %ebp, %ebp      / negative
 947         testl   %edi, %edi      / y
 948         movl    %eax, (%esp)    / x, xt
 949         movl    %edx, 4(%esp)   / x, xt
 950         movl    %esi, %eax      / y, yt
 951         movl    %edi, %edx      / y, yt
 952         js      .LL56
 953 .LL53:
 954         pushl   %edx            / yt
 955         pushl   %eax            / yt
 956         movl    8(%esp), %eax   / xt, xt
 957         movl    12(%esp), %edx  / xt, xt
 958         call    UDiv
 959         popl    %ecx
 960         testl   %ebp, %ebp      / negative
 961         popl    %esi
 962         je      .LL54
 963         negl    %eax            / r
 964         adcl    $0, %edx        /, r
 965         negl    %edx            / r
 966 .LL54:
 967         addl    $8, %esp
 968         popl    %esi
 969         popl    %edi
 970         popl    %ebp
 971         ret
 972         .align  16
 973 .LL55:
 974         negl    %eax            / x
 975         adcl    $0, %edx        /, x
 976         negl    %edx            / x
 977         testl   %edi, %edi      / y
 978         movl    %eax, (%esp)    / x, xt
 979         movl    %edx, 4(%esp)   / x, xt
 980         movl    $1, %ebp        /, negative
 981         movl    %esi, %eax      / y, yt
 982         movl    %edi, %edx      / y, yt
 983         jns     .LL53
 984         .align  16
 985 .LL56:
 986         negl    %eax            / yt
 987         adcl    $0, %edx        /, yt
 988         negl    %edx            / yt
 989         xorl    $1, %ebp        /, negative
 990         jmp     .LL53
 991         SET_SIZE(__divdi3)
 992
 993 /*
 994  * __moddi3
 995  *
 996  * Perform division of two signed 64-bit quantities, returning the
 997  * quotient in %edx:%eax.
 998  */
 999 / int64_t
1000 / __moddi3(int64_t x, int64_t y)
1001 / {
1002 /       uint64_t        xt, yt, rem;
1003 /
1004 /       if (x < 0) {
1005 /               xt = -(uint64_t) x;
1006 /       } else {
1007 /               xt = x;
1008 /       }
1009 /       if (y < 0) {
1010 /               yt = -(uint64_t) y;
1011 /       } else {
1012 /               yt = y;
1013 /       }
1014 /       (void) UDivRem(xt, yt, &rem);
1015 /       return (x < 0 ? (int64_t) - rem : rem);
1016 / }
1017         ENTRY(__moddi3)
1018         pushl   %edi
1019         pushl   %esi
1020         subl    $20, %esp
1021         movl    36(%esp), %ecx  / x,
1022         movl    32(%esp), %esi  / x,
1023         movl    36(%esp), %edi  / x,
1024         testl   %ecx, %ecx
1025         movl    40(%esp), %eax  / y, y
1026         movl    44(%esp), %edx  / y, y
1027         movl    %esi, (%esp)    /, xt
1028         movl    %edi, 4(%esp)   /, xt
1029         js      .LL63
1030         testl   %edx, %edx      / y
1031         movl    %eax, %esi      / y, yt
1032         movl    %edx, %edi      / y, yt
1033         js      .LL64
1034 .LL61:
1035         leal    8(%esp), %eax   /, tmp66
1036         pushl   %eax            / tmp66
1037         pushl   %edi            / yt
1038         pushl   %esi            / yt
1039         movl    12(%esp), %eax  / xt, xt
1040         movl    16(%esp), %edx  / xt, xt
1041         call    UDivRem
1042         addl    $12, %esp
1043         movl    36(%esp), %edi  / x,
1044         testl   %edi, %edi
1045         movl    8(%esp), %eax   / rem, rem
1046         movl    12(%esp), %edx  / rem, rem
1047         js      .LL65
1048         addl    $20, %esp
1049         popl    %esi
1050         popl    %edi
1051         ret
1052         .align  16
1053 .LL63:
1054         negl    %esi
1055         adcl    $0, %edi
1056         negl    %edi
1057         testl   %edx, %edx      / y
1058         movl    %esi, (%esp)    /, xt
1059         movl    %edi, 4(%esp)   /, xt
1060         movl    %eax, %esi      / y, yt
1061         movl    %edx, %edi      / y, yt
1062         jns     .LL61
1063         .align  16
1064 .LL64:
1065         negl    %esi            / yt
1066         adcl    $0, %edi        /, yt
1067         negl    %edi            / yt
1068         jmp     .LL61
1069         .align  16
1070 .LL65:
1071         negl    %eax            / rem
1072         adcl    $0, %edx        /, rem
1073         addl    $20, %esp
1074         popl    %esi
1075         negl    %edx            / rem
1076         popl    %edi
1077         ret
1078         SET_SIZE(__moddi3)
1079
1080 /*
1081  * __udivrem64
1082  *
1083  * Perform division of two unsigned 64-bit quantities, returning the
1084  * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
1085  * pops the arguments on return.
1086  */
1087         ENTRY(__udivrem64)
1088         subl    $12, %esp
1089         movl    %esp, %ecx      /, tmp64
1090         movl    16(%esp), %eax  / x, x
1091         movl    20(%esp), %edx  / x, x
1092         pushl   %ecx            / tmp64
1093         pushl   32(%esp)        / y
1094         pushl   32(%esp)
1095         call    UDivRem
1096         movl    16(%esp), %ecx  / rem, tmp63
1097         movl    12(%esp), %esi  / rem
1098         addl    $24, %esp
1099         ret     $16
1100         SET_SIZE(__udivrem64)
1101
1102 /*
1103  * Signed division with remainder.
1104  */
1105 / int64_t
1106 / SDivRem(int64_t x, int64_t y, int64_t * pmod)
1107 / {
1108 /       int             negative;
1109 /       uint64_t        xt, yt, r, rem;
1110 /
1111 /       if (x < 0) {
1112 /               xt = -(uint64_t) x;
1113 /               negative = 1;
1114 /       } else {
1115 /               xt = x;
1116 /               negative = 0;
1117 /       }
1118 /       if (y < 0) {
1119 /               yt = -(uint64_t) y;
1120 /               negative ^= 1;
1121 /       } else {
1122 /               yt = y;
1123 /       }
1124 /       r = UDivRem(xt, yt, &rem);
1125 /       *pmod = (x < 0 ? (int64_t) - rem : rem);
1126 /       return (negative ? (int64_t) - r : r);
1127 / }
1128         ENTRY(SDivRem)
1129         pushl   %ebp
1130         pushl   %edi
1131         pushl   %esi
1132         subl    $24, %esp
1133         testl   %edx, %edx      / x
1134         movl    %edx, %edi      / x, x
1135         js      .LL73
1136         movl    44(%esp), %esi  / y,
1137         xorl    %ebp, %ebp      / negative
1138         testl   %esi, %esi
1139         movl    %edx, 12(%esp)  / x, xt
1140         movl    %eax, 8(%esp)   / x, xt
1141         movl    40(%esp), %edx  / y, yt
1142         movl    44(%esp), %ecx  / y, yt
1143         js      .LL74
1144 .LL70:
1145         leal    16(%esp), %eax  /, tmp70
1146         pushl   %eax            / tmp70
1147         pushl   %ecx            / yt
1148         pushl   %edx            / yt
1149         movl    20(%esp), %eax  / xt, xt
1150         movl    24(%esp), %edx  / xt, xt
1151         call    UDivRem
1152         movl    %edx, 16(%esp)  /, r
1153         movl    %eax, 12(%esp)  /, r
1154         addl    $12, %esp
1155         testl   %edi, %edi      / x
1156         movl    16(%esp), %edx  / rem, rem
1157         movl    20(%esp), %ecx  / rem, rem
1158         js      .LL75
1159 .LL71:
1160         movl    48(%esp), %edi  / pmod, pmod
1161         testl   %ebp, %ebp      / negative
1162         movl    %edx, (%edi)    / rem,* pmod
1163         movl    %ecx, 4(%edi)   / rem,
1164         movl    (%esp), %eax    / r, r
1165         movl    4(%esp), %edx   / r, r
1166         je      .LL72
1167         negl    %eax            / r
1168         adcl    $0, %edx        /, r
1169         negl    %edx            / r
1170 .LL72:
1171         addl    $24, %esp
1172         popl    %esi
1173         popl    %edi
1174         popl    %ebp
1175         ret
1176         .align  16
1177 .LL73:
1178         negl    %eax
1179         adcl    $0, %edx
1180         movl    44(%esp), %esi  / y,
1181         negl    %edx
1182         testl   %esi, %esi
1183         movl    %edx, 12(%esp)  /, xt
1184         movl    %eax, 8(%esp)   /, xt
1185         movl    $1, %ebp        /, negative
1186         movl    40(%esp), %edx  / y, yt
1187         movl    44(%esp), %ecx  / y, yt
1188         jns     .LL70
1189         .align  16
1190 .LL74:
1191         negl    %edx            / yt
1192         adcl    $0, %ecx        /, yt
1193         negl    %ecx            / yt
1194         xorl    $1, %ebp        /, negative
1195         jmp     .LL70
1196         .align  16
1197 .LL75:
1198         negl    %edx            / rem
1199         adcl    $0, %ecx        /, rem
1200         negl    %ecx            / rem
1201         jmp     .LL71
1202         SET_SIZE(SDivRem)
1203
1204 /*
1205  * __divrem64
1206  *
1207  * Perform division of two signed 64-bit quantities, returning the
1208  * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
1209  * pops the arguments on return.
1210  */
1211         ENTRY(__divrem64)
1212         subl    $20, %esp
1213         movl    %esp, %ecx      /, tmp64
1214         movl    24(%esp), %eax  / x, x
1215         movl    28(%esp), %edx  / x, x
1216         pushl   %ecx            / tmp64
1217         pushl   40(%esp)        / y
1218         pushl   40(%esp)
1219         call    SDivRem
1220         movl    16(%esp), %ecx
1221         movl    12(%esp),%esi   / rem
1222         addl    $32, %esp
1223         ret     $16
1224         SET_SIZE(__divrem64)
1225
1226
1227
1228 #endif /* defined(__i386) && !defined(__amd64) */