usr/src/lib/libc/i386/gen/_div64.s

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26         .file   "_div64.s"
  27
  28 #include "SYS.h"
  29
  30 /*
  31  * C support for 64-bit modulo and division.
  32  * Hand-customized compiler output - see comments for details.
  33  */
  34
  35 /*
  36  * int32_t/int64_t division/manipulation
  37  *
  38  * Hand-customized compiler output: the non-GCC entry points depart from
  39  * the SYS V ABI by requiring their arguments to be popped, and in the
  40  * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
  41  * compiler-generated use of %edx:%eax for the first argument of
  42  * internal entry points.
  43  *
  44  * Inlines for speed:
  45  * - counting the number of leading zeros in a word
  46  * - multiplying two 32-bit numbers giving a 64-bit result
  47  * - dividing a 64-bit number by a 32-bit number, giving both quotient
  48  *      and remainder
  49  * - subtracting two 64-bit results
  50  */
  51 / #define       LO(X)           ((uint32_t)(X) & 0xffffffff)
  52 / #define       HI(X)           ((uint32_t)((X) >> 32) & 0xffffffff)
  53 / #define       HILO(H, L)      (((uint64_t)(H) << 32) + (L))
  54 /
  55 / /* give index of highest bit */
  56 / #define       HIBIT(a, r) \
  57 /     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
  58 /
  59 / /* multiply two uint32_ts resulting in a uint64_t */
  60 / #define       A_MUL32(a, b, lo, hi) \
  61 /     asm("mull %2" \
  62 /       : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
  63 /
  64 / /* divide a uint64_t by a uint32_t */
  65 / #define       A_DIV32(lo, hi, b, q, r) \
  66 /     asm("divl %2" \
  67 /       : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
  68 /       : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
  69 /
  70 / /* subtract two uint64_ts (with borrow) */
  71 / #define       A_SUB2(bl, bh, al, ah) \
  72 /     asm("subl %4,%0\n\tsbbl %5,%1" \
  73 /       : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
  74 /       : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
  75 /       "g"((uint32_t)(bh)))
  76 /
  77 / /*
  78 /  * Unsigned division with remainder.
  79 /  * Divide two uint64_ts, and calculate remainder.
  80 /  */
  81 / uint64_t
  82 / UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
  83 / {
  84 /       /* simple cases: y is a single uint32_t */
  85 /       if (HI(y) == 0) {
  86 /               uint32_t        div_hi, div_rem;
  87 /               uint32_t        q0, q1;
  88 /
  89 /               /* calculate q1 */
  90 /               if (HI(x) < LO(y)) {
  91 /                       /* result is a single uint32_t, use one division */
  92 /                       q1 = 0;
  93 /                       div_hi = HI(x);
  94 /               } else {
  95 /                       /* result is a double uint32_t, use two divisions */
  96 /                       A_DIV32(HI(x), 0, LO(y), q1, div_hi);
  97 /               }
  98 /
  99 /               /* calculate q0 and remainder */
 100 /               A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
 101 /
 102 /               /* return remainder */
 103 /               *pmod = div_rem;
 104 /
 105 /               /* return result */
 106 /               return (HILO(q1, q0));
 107 /
 108 /       } else if (HI(x) < HI(y)) {
 109 /               /* HI(x) < HI(y) => x < y => result is 0 */
 110 /
 111 /               /* return remainder */
 112 /               *pmod = x;
 113 /
 114 /               /* return result */
 115 /               return (0);
 116 /
 117 /       } else {
 118 /               /*
 119 /                * uint64_t by uint64_t division, resulting in a one-uint32_t
 120 /                * result
 121 /                */
 122 /               uint32_t                y0, y1;
 123 /               uint32_t                x1, x0;
 124 /               uint32_t                q0;
 125 /               uint32_t                normshift;
 126 /
 127 /               /* normalize by shifting x and y so MSB(y) == 1 */
 128 /               HIBIT(HI(y), normshift);        /* index of highest 1 bit */
 129 /               normshift = 31 - normshift;
 130 /
 131 /               if (normshift == 0) {
 132 /                       /* no shifting needed, and x < 2*y so q <= 1 */
 133 /                       y1 = HI(y);
 134 /                       y0 = LO(y);
 135 /                       x1 = HI(x);
 136 /                       x0 = LO(x);
 137 /
 138 /                       /* if x >= y then q = 1 (note x1 >= y1) */
 139 /                       if (x1 > y1 || x0 >= y0) {
 140 /                               q0 = 1;
 141 /                               /* subtract y from x to get remainder */
 142 /                               A_SUB2(y0, y1, x0, x1);
 143 /                       } else {
 144 /                               q0 = 0;
 145 /                       }
 146 /
 147 /                       /* return remainder */
 148 /                       *pmod = HILO(x1, x0);
 149 /
 150 /                       /* return result */
 151 /                       return (q0);
 152 /
 153 /               } else {
 154 /                       /*
 155 /                        * the last case: result is one uint32_t, but we need to
 156 /                        * normalize
 157 /                        */
 158 /                       uint64_t        dt;
 159 /                       uint32_t                t0, t1, x2;
 160 /
 161 /                       /* normalize y */
 162 /                       dt = (y << normshift);
 163 /                       y1 = HI(dt);
 164 /                       y0 = LO(dt);
 165 /
 166 /                       /* normalize x (we need 3 uint32_ts!!!) */
 167 /                       x2 = (HI(x) >> (32 - normshift));
 168 /                       dt = (x << normshift);
 169 /                       x1 = HI(dt);
 170 /                       x0 = LO(dt);
 171 /
 172 /                       /* estimate q0, and reduce x to a two uint32_t value */
 173 /                       A_DIV32(x1, x2, y1, q0, x1);
 174 /
 175 /                       /* adjust q0 down if too high */
 176 /                       /*
 177 /                        * because of the limited range of x2 we can only be
 178 /                        * one off
 179 /                        */
 180 /                       A_MUL32(y0, q0, t0, t1);
 181 /                       if (t1 > x1 || (t1 == x1 && t0 > x0)) {
 182 /                               q0--;
 183 /                               A_SUB2(y0, y1, t0, t1);
 184 /                       }
 185 /                       /* return remainder */
 186 /                       /* subtract product from x to get remainder */
 187 /                       A_SUB2(t0, t1, x0, x1);
 188 /                       *pmod = (HILO(x1, x0) >> normshift);
 189 /
 190 /                       /* return result */
 191 /                       return (q0);
 192 /               }
 193 /       }
 194 / }
 195         ENTRY(UDivRem)
 196         pushl   %ebp
 197         pushl   %edi
 198         pushl   %esi
 199         subl    $48, %esp
 200         movl    68(%esp), %edi  / y,
 201         testl   %edi, %edi      / tmp63
 202         movl    %eax, 40(%esp)  / x, x
 203         movl    %edx, 44(%esp)  / x, x
 204         movl    %edi, %esi      /, tmp62
 205         movl    %edi, %ecx      / tmp62, tmp63
 206         jne     .LL2
 207         movl    %edx, %eax      /, tmp68
 208         cmpl    64(%esp), %eax  / y, tmp68
 209         jae     .LL21
 210 .LL4:
 211         movl    72(%esp), %ebp  / pmod,
 212         xorl    %esi, %esi      / <result>
 213         movl    40(%esp), %eax  / x, q0
 214         movl    %ecx, %edi      / <result>, <result>
 215         divl    64(%esp)        / y
 216         movl    %edx, (%ebp)    / div_rem,
 217         xorl    %edx, %edx      / q0
 218         addl    %eax, %esi      / q0, <result>
 219         movl    $0, 4(%ebp)
 220         adcl    %edx, %edi      / q0, <result>
 221         addl    $48, %esp
 222         movl    %esi, %eax      / <result>, <result>
 223         popl    %esi
 224         movl    %edi, %edx      / <result>, <result>
 225         popl    %edi
 226         popl    %ebp
 227         ret
 228         .align  16
 229 .LL2:
 230         movl    44(%esp), %eax  / x,
 231         xorl    %edx, %edx
 232         cmpl    %esi, %eax      / tmp62, tmp5
 233         movl    %eax, 32(%esp)  / tmp5,
 234         movl    %edx, 36(%esp)
 235         jae     .LL6
 236         movl    72(%esp), %esi  / pmod,
 237         movl    40(%esp), %ebp  / x,
 238         movl    44(%esp), %ecx  / x,
 239         movl    %ebp, (%esi)
 240         movl    %ecx, 4(%esi)
 241         xorl    %edi, %edi      / <result>
 242         xorl    %esi, %esi      / <result>
 243 .LL22:
 244         addl    $48, %esp
 245         movl    %esi, %eax      / <result>, <result>
 246         popl    %esi
 247         movl    %edi, %edx      / <result>, <result>
 248         popl    %edi
 249         popl    %ebp
 250         ret
 251         .align  16
 252 .LL21:
 253         movl    %edi, %edx      / tmp63, div_hi
 254         divl    64(%esp)        / y
 255         movl    %eax, %ecx      /, q1
 256         jmp     .LL4
 257         .align  16
 258 .LL6:
 259         movl    $31, %edi       /, tmp87
 260         bsrl    %esi,%edx       / tmp62, normshift
 261         subl    %edx, %edi      / normshift, tmp87
 262         movl    %edi, 28(%esp)  / tmp87,
 263         jne     .LL8
 264         movl    32(%esp), %edx  /, x1
 265         cmpl    %ecx, %edx      / y1, x1
 266         movl    64(%esp), %edi  / y, y0
 267         movl    40(%esp), %esi  / x, x0
 268         ja      .LL10
 269         xorl    %ebp, %ebp      / q0
 270         cmpl    %edi, %esi      / y0, x0
 271         jb      .LL11
 272 .LL10:
 273         movl    $1, %ebp        /, q0
 274         subl    %edi,%esi       / y0, x0
 275         sbbl    %ecx,%edx       / tmp63, x1
 276 .LL11:
 277         movl    %edx, %ecx      / x1, x1
 278         xorl    %edx, %edx      / x1
 279         xorl    %edi, %edi      / x0
 280         addl    %esi, %edx      / x0, x1
 281         adcl    %edi, %ecx      / x0, x1
 282         movl    72(%esp), %esi  / pmod,
 283         movl    %edx, (%esi)    / x1,
 284         movl    %ecx, 4(%esi)   / x1,
 285         xorl    %edi, %edi      / <result>
 286         movl    %ebp, %esi      / q0, <result>
 287         jmp     .LL22
 288         .align  16
 289 .LL8:
 290         movb    28(%esp), %cl
 291         movl    64(%esp), %esi  / y, dt
 292         movl    68(%esp), %edi  / y, dt
 293         shldl   %esi, %edi      /, dt, dt
 294         sall    %cl, %esi       /, dt
 295         andl    $32, %ecx
 296         jne     .LL23
 297 .LL17:
 298         movl    $32, %ecx       /, tmp102
 299         subl    28(%esp), %ecx  /, tmp102
 300         movl    %esi, %ebp      / dt, y0
 301         movl    32(%esp), %esi
 302         shrl    %cl, %esi       / tmp102,
 303         movl    %edi, 24(%esp)  / tmp99,
 304         movb    28(%esp), %cl
 305         movl    %esi, 12(%esp)  /, x2
 306         movl    44(%esp), %edi  / x, dt
 307         movl    40(%esp), %esi  / x, dt
 308         shldl   %esi, %edi      /, dt, dt
 309         sall    %cl, %esi       /, dt
 310         andl    $32, %ecx
 311         je      .LL18
 312         movl    %esi, %edi      / dt, dt
 313         xorl    %esi, %esi      / dt
 314 .LL18:
 315         movl    %edi, %ecx      / dt,
 316         movl    %edi, %eax      / tmp2,
 317         movl    %ecx, (%esp)
 318         movl    12(%esp), %edx  / x2,
 319         divl    24(%esp)
 320         movl    %edx, %ecx      /, x1
 321         xorl    %edi, %edi
 322         movl    %eax, 20(%esp)
 323         movl    %ebp, %eax      / y0, t0
 324         mull    20(%esp)
 325         cmpl    %ecx, %edx      / x1, t1
 326         movl    %edi, 4(%esp)
 327         ja      .LL14
 328         je      .LL24
 329 .LL15:
 330         movl    %ecx, %edi      / x1,
 331         subl    %eax,%esi       / t0, x0
 332         sbbl    %edx,%edi       / t1,
 333         movl    %edi, %eax      /, x1
 334         movl    %eax, %edx      / x1, x1
 335         xorl    %eax, %eax      / x1
 336         xorl    %ebp, %ebp      / x0
 337         addl    %esi, %eax      / x0, x1
 338         adcl    %ebp, %edx      / x0, x1
 339         movb    28(%esp), %cl
 340         shrdl   %edx, %eax      /, x1, x1
 341         shrl    %cl, %edx       /, x1
 342         andl    $32, %ecx
 343         je      .LL16
 344         movl    %edx, %eax      / x1, x1
 345         xorl    %edx, %edx      / x1
 346 .LL16:
 347         movl    72(%esp), %ecx  / pmod,
 348         movl    20(%esp), %esi  /, <result>
 349         xorl    %edi, %edi      / <result>
 350         movl    %eax, (%ecx)    / x1,
 351         movl    %edx, 4(%ecx)   / x1,
 352         jmp     .LL22
 353         .align  16
 354 .LL24:
 355         cmpl    %esi, %eax      / x0, t0
 356         jbe     .LL15
 357 .LL14:
 358         decl    20(%esp)
 359         subl    %ebp,%eax       / y0, t0
 360         sbbl    24(%esp),%edx   /, t1
 361         jmp     .LL15
 362 .LL23:
 363         movl    %esi, %edi      / dt, dt
 364         xorl    %esi, %esi      / dt
 365         jmp     .LL17
 366         SET_SIZE(UDivRem)
 367
 368 /*
 369  * Unsigned division without remainder.
 370  */
 371 / uint64_t
 372 / UDiv(uint64_t x, uint64_t y)
 373 / {
 374 /       if (HI(y) == 0) {
 375 /               /* simple cases: y is a single uint32_t */
 376 /               uint32_t        div_hi, div_rem;
 377 /               uint32_t        q0, q1;
 378 /
 379 /               /* calculate q1 */
 380 /               if (HI(x) < LO(y)) {
 381 /                       /* result is a single uint32_t, use one division */
 382 /                       q1 = 0;
 383 /                       div_hi = HI(x);
 384 /               } else {
 385 /                       /* result is a double uint32_t, use two divisions */
 386 /                       A_DIV32(HI(x), 0, LO(y), q1, div_hi);
 387 /               }
 388 /
 389 /               /* calculate q0 and remainder */
 390 /               A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
 391 /
 392 /               /* return result */
 393 /               return (HILO(q1, q0));
 394 /
 395 /       } else if (HI(x) < HI(y)) {
 396 /               /* HI(x) < HI(y) => x < y => result is 0 */
 397 /
 398 /               /* return result */
 399 /               return (0);
 400 /
 401 /       } else {
 402 /               /*
 403 /                * uint64_t by uint64_t division, resulting in a one-uint32_t
 404 /                * result
 405 /                */
 406 /               uint32_t                y0, y1;
 407 /               uint32_t                x1, x0;
 408 /               uint32_t                q0;
 409 /               unsigned                normshift;
 410 /
 411 /               /* normalize by shifting x and y so MSB(y) == 1 */
 412 /               HIBIT(HI(y), normshift);        /* index of highest 1 bit */
 413 /               normshift = 31 - normshift;
 414 /
 415 /               if (normshift == 0) {
 416 /                       /* no shifting needed, and x < 2*y so q <= 1 */
 417 /                       y1 = HI(y);
 418 /                       y0 = LO(y);
 419 /                       x1 = HI(x);
 420 /                       x0 = LO(x);
 421 /
 422 /                       /* if x >= y then q = 1 (note x1 >= y1) */
 423 /                       if (x1 > y1 || x0 >= y0) {
 424 /                               q0 = 1;
 425 /                               /* subtract y from x to get remainder */
 426 /                               /* A_SUB2(y0, y1, x0, x1); */
 427 /                       } else {
 428 /                               q0 = 0;
 429 /                       }
 430 /
 431 /                       /* return result */
 432 /                       return (q0);
 433 /
 434 /               } else {
 435 /                       /*
 436 /                        * the last case: result is one uint32_t, but we need to
 437 /                        * normalize
 438 /                        */
 439 /                       uint64_t        dt;
 440 /                       uint32_t                t0, t1, x2;
 441 /
 442 /                       /* normalize y */
 443 /                       dt = (y << normshift);
 444 /                       y1 = HI(dt);
 445 /                       y0 = LO(dt);
 446 /
 447 /                       /* normalize x (we need 3 uint32_ts!!!) */
 448 /                       x2 = (HI(x) >> (32 - normshift));
 449 /                       dt = (x << normshift);
 450 /                       x1 = HI(dt);
 451 /                       x0 = LO(dt);
 452 /
 453 /                       /* estimate q0, and reduce x to a two uint32_t value */
 454 /                       A_DIV32(x1, x2, y1, q0, x1);
 455 /
 456 /                       /* adjust q0 down if too high */
 457 /                       /*
 458 /                        * because of the limited range of x2 we can only be
 459 /                        * one off
 460 /                        */
 461 /                       A_MUL32(y0, q0, t0, t1);
 462 /                       if (t1 > x1 || (t1 == x1 && t0 > x0)) {
 463 /                               q0--;
 464 /                       }
 465 /                       /* return result */
 466 /                       return (q0);
 467 /               }
 468 /       }
 469 / }
 470         ENTRY(UDiv)
 471         pushl   %ebp
 472         pushl   %edi
 473         pushl   %esi
 474         subl    $40, %esp
 475         movl    %edx, 36(%esp)  / x, x
 476         movl    60(%esp), %edx  / y,
 477         testl   %edx, %edx      / tmp62
 478         movl    %eax, 32(%esp)  / x, x
 479         movl    %edx, %ecx      / tmp61, tmp62
 480         movl    %edx, %eax      /, tmp61
 481         jne     .LL26
 482         movl    36(%esp), %esi  / x,
 483         cmpl    56(%esp), %esi  / y, tmp67
 484         movl    %esi, %eax      /, tmp67
 485         movl    %esi, %edx      / tmp67, div_hi
 486         jb      .LL28
 487         movl    %ecx, %edx      / tmp62, div_hi
 488         divl    56(%esp)        / y
 489         movl    %eax, %ecx      /, q1
 490 .LL28:
 491         xorl    %esi, %esi      / <result>
 492         movl    %ecx, %edi      / <result>, <result>
 493         movl    32(%esp), %eax  / x, q0
 494         xorl    %ecx, %ecx      / q0
 495         divl    56(%esp)        / y
 496         addl    %eax, %esi      / q0, <result>
 497         adcl    %ecx, %edi      / q0, <result>
 498 .LL25:
 499         addl    $40, %esp
 500         movl    %esi, %eax      / <result>, <result>
 501         popl    %esi
 502         movl    %edi, %edx      / <result>, <result>
 503         popl    %edi
 504         popl    %ebp
 505         ret
 506         .align  16
 507 .LL26:
 508         movl    36(%esp), %esi  / x,
 509         xorl    %edi, %edi
 510         movl    %esi, 24(%esp)  / tmp1,
 511         movl    %edi, 28(%esp)
 512         xorl    %esi, %esi      / <result>
 513         xorl    %edi, %edi      / <result>
 514         cmpl    %eax, 24(%esp)  / tmp61,
 515         jb      .LL25
 516         bsrl    %eax,%ebp       / tmp61, normshift
 517         movl    $31, %eax       /, tmp85
 518         subl    %ebp, %eax      / normshift, normshift
 519         jne     .LL32
 520         movl    24(%esp), %eax  /, x1
 521         cmpl    %ecx, %eax      / tmp62, x1
 522         movl    56(%esp), %esi  / y, y0
 523         movl    32(%esp), %edx  / x, x0
 524         ja      .LL34
 525         xorl    %eax, %eax      / q0
 526         cmpl    %esi, %edx      / y0, x0
 527         jb      .LL35
 528 .LL34:
 529         movl    $1, %eax        /, q0
 530 .LL35:
 531         movl    %eax, %esi      / q0, <result>
 532         xorl    %edi, %edi      / <result>
 533 .LL45:
 534         addl    $40, %esp
 535         movl    %esi, %eax      / <result>, <result>
 536         popl    %esi
 537         movl    %edi, %edx      / <result>, <result>
 538         popl    %edi
 539         popl    %ebp
 540         ret
 541         .align  16
 542 .LL32:
 543         movb    %al, %cl
 544         movl    56(%esp), %esi  / y,
 545         movl    60(%esp), %edi  / y,
 546         shldl   %esi, %edi
 547         sall    %cl, %esi
 548         andl    $32, %ecx
 549         jne     .LL43
 550 .LL40:
 551         movl    $32, %ecx       /, tmp96
 552         subl    %eax, %ecx      / normshift, tmp96
 553         movl    %edi, %edx
 554         movl    %edi, 20(%esp)  /, dt
 555         movl    24(%esp), %ebp  /, x2
 556         xorl    %edi, %edi
 557         shrl    %cl, %ebp       / tmp96, x2
 558         movl    %esi, 16(%esp)  /, dt
 559         movb    %al, %cl
 560         movl    32(%esp), %esi  / x, dt
 561         movl    %edi, 12(%esp)
 562         movl    36(%esp), %edi  / x, dt
 563         shldl   %esi, %edi      /, dt, dt
 564         sall    %cl, %esi       /, dt
 565         andl    $32, %ecx
 566         movl    %edx, 8(%esp)
 567         je      .LL41
 568         movl    %esi, %edi      / dt, dt
 569         xorl    %esi, %esi      / dt
 570 .LL41:
 571         xorl    %ecx, %ecx
 572         movl    %edi, %eax      / tmp1,
 573         movl    %ebp, %edx      / x2,
 574         divl    8(%esp)
 575         movl    %edx, %ebp      /, x1
 576         movl    %ecx, 4(%esp)
 577         movl    %eax, %ecx      /, q0
 578         movl    16(%esp), %eax  / dt,
 579         mull    %ecx    / q0
 580         cmpl    %ebp, %edx      / x1, t1
 581         movl    %edi, (%esp)
 582         movl    %esi, %edi      / dt, x0
 583         ja      .LL38
 584         je      .LL44
 585 .LL39:
 586         movl    %ecx, %esi      / q0, <result>
 587 .LL46:
 588         xorl    %edi, %edi      / <result>
 589         jmp     .LL45
 590 .LL44:
 591         cmpl    %edi, %eax      / x0, t0
 592         jbe     .LL39
 593 .LL38:
 594         decl    %ecx            / q0
 595         movl    %ecx, %esi      / q0, <result>
 596         jmp     .LL46
 597 .LL43:
 598         movl    %esi, %edi
 599         xorl    %esi, %esi
 600         jmp     .LL40
 601         SET_SIZE(UDiv)
 602
 603 /*
 604  * __udiv64
 605  *
 606  * Perform division of two unsigned 64-bit quantities, returning the
 607  * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
 608  */
 609         ENTRY(__udiv64)
 610         movl    4(%esp), %eax   / x, x
 611         movl    8(%esp), %edx   / x, x
 612         pushl   16(%esp)        / y
 613         pushl   16(%esp)
 614         call    UDiv
 615         addl    $8, %esp
 616         ret     $16
 617         SET_SIZE(__udiv64)
 618
 619 /*
 620  * __urem64
 621  *
 622  * Perform division of two unsigned 64-bit quantities, returning the
 623  * remainder in %edx:%eax.  __urem64 pops the arguments on return
 624  */
 625         ENTRY(__urem64)
 626         subl    $12, %esp
 627         movl    %esp, %ecx      /, tmp65
 628         movl    16(%esp), %eax  / x, x
 629         movl    20(%esp), %edx  / x, x
 630         pushl   %ecx            / tmp65
 631         pushl   32(%esp)        / y
 632         pushl   32(%esp)
 633         call    UDivRem
 634         movl    12(%esp), %eax  / rem, rem
 635         movl    16(%esp), %edx  / rem, rem
 636         addl    $24, %esp
 637         ret     $16
 638         SET_SIZE(__urem64)
 639
 640 /*
 641  * __div64
 642  *
 643  * Perform division of two signed 64-bit quantities, returning the
 644  * quotient in %edx:%eax.  __div64 pops the arguments on return.
 645  */
 646 / int64_t
 647 / __div64(int64_t x, int64_t y)
 648 / {
 649 /       int             negative;
 650 /       uint64_t        xt, yt, r;
 651 /
 652 /       if (x < 0) {
 653 /               xt = -(uint64_t) x;
 654 /               negative = 1;
 655 /       } else {
 656 /               xt = x;
 657 /               negative = 0;
 658 /       }
 659 /       if (y < 0) {
 660 /               yt = -(uint64_t) y;
 661 /               negative ^= 1;
 662 /       } else {
 663 /               yt = y;
 664 /       }
 665 /       r = UDiv(xt, yt);
 666 /       return (negative ? (int64_t) - r : r);
 667 / }
 668         ENTRY(__div64)
 669         pushl   %ebp
 670         pushl   %edi
 671         pushl   %esi
 672         subl    $8, %esp
 673         movl    28(%esp), %edx  / x, x
 674         testl   %edx, %edx      / x
 675         movl    24(%esp), %eax  / x, x
 676         movl    32(%esp), %esi  / y, y
 677         movl    36(%esp), %edi  / y, y
 678         js      .LL84
 679         xorl    %ebp, %ebp      / negative
 680         testl   %edi, %edi      / y
 681         movl    %eax, (%esp)    / x, xt
 682         movl    %edx, 4(%esp)   / x, xt
 683         movl    %esi, %eax      / y, yt
 684         movl    %edi, %edx      / y, yt
 685         js      .LL85
 686 .LL82:
 687         pushl   %edx            / yt
 688         pushl   %eax            / yt
 689         movl    8(%esp), %eax   / xt, xt
 690         movl    12(%esp), %edx  / xt, xt
 691         call    UDiv
 692         popl    %ecx
 693         testl   %ebp, %ebp      / negative
 694         popl    %esi
 695         je      .LL83
 696         negl    %eax            / r
 697         adcl    $0, %edx        /, r
 698         negl    %edx            / r
 699 .LL83:
 700         addl    $8, %esp
 701         popl    %esi
 702         popl    %edi
 703         popl    %ebp
 704         ret     $16
 705         .align  16
 706 .LL84:
 707         negl    %eax            / x
 708         adcl    $0, %edx        /, x
 709         negl    %edx            / x
 710         testl   %edi, %edi      / y
 711         movl    %eax, (%esp)    / x, xt
 712         movl    %edx, 4(%esp)   / x, xt
 713         movl    $1, %ebp        /, negative
 714         movl    %esi, %eax      / y, yt
 715         movl    %edi, %edx      / y, yt
 716         jns     .LL82
 717         .align  16
 718 .LL85:
 719         negl    %eax            / yt
 720         adcl    $0, %edx        /, yt
 721         negl    %edx            / yt
 722         xorl    $1, %ebp        /, negative
 723         jmp     .LL82
 724         SET_SIZE(__div64)
 725
 726 /*
 727  * __rem64
 728  *
 729  * Perform division of two signed 64-bit quantities, returning the
 730  * remainder in %edx:%eax.  __rem64 pops the arguments on return.
 731  */
 732 / int64_t
 733 / __rem64(int64_t x, int64_t y)
 734 / {
 735 /       uint64_t        xt, yt, rem;
 736 /
 737 /       if (x < 0) {
 738 /               xt = -(uint64_t) x;
 739 /       } else {
 740 /               xt = x;
 741 /       }
 742 /       if (y < 0) {
 743 /               yt = -(uint64_t) y;
 744 /       } else {
 745 /               yt = y;
 746 /       }
 747 /       (void) UDivRem(xt, yt, &rem);
 748 /       return (x < 0 ? (int64_t) - rem : rem);
 749 / }
 750         ENTRY(__rem64)
 751         pushl   %edi
 752         pushl   %esi
 753         subl    $20, %esp
 754         movl    36(%esp), %ecx  / x,
 755         movl    32(%esp), %esi  / x,
 756         movl    36(%esp), %edi  / x,
 757         testl   %ecx, %ecx
 758         movl    40(%esp), %eax  / y, y
 759         movl    44(%esp), %edx  / y, y
 760         movl    %esi, (%esp)    /, xt
 761         movl    %edi, 4(%esp)   /, xt
 762         js      .LL92
 763         testl   %edx, %edx      / y
 764         movl    %eax, %esi      / y, yt
 765         movl    %edx, %edi      / y, yt
 766         js      .LL93
 767 .LL90:
 768         leal    8(%esp), %eax   /, tmp66
 769         pushl   %eax            / tmp66
 770         pushl   %edi            / yt
 771         pushl   %esi            / yt
 772         movl    12(%esp), %eax  / xt, xt
 773         movl    16(%esp), %edx  / xt, xt
 774         call    UDivRem
 775         addl    $12, %esp
 776         movl    36(%esp), %edi  / x,
 777         testl   %edi, %edi
 778         movl    8(%esp), %eax   / rem, rem
 779         movl    12(%esp), %edx  / rem, rem
 780         js      .LL94
 781         addl    $20, %esp
 782         popl    %esi
 783         popl    %edi
 784         ret     $16
 785         .align  16
 786 .LL92:
 787         negl    %esi
 788         adcl    $0, %edi
 789         negl    %edi
 790         testl   %edx, %edx      / y
 791         movl    %esi, (%esp)    /, xt
 792         movl    %edi, 4(%esp)   /, xt
 793         movl    %eax, %esi      / y, yt
 794         movl    %edx, %edi      / y, yt
 795         jns     .LL90
 796         .align  16
 797 .LL93:
 798         negl    %esi            / yt
 799         adcl    $0, %edi        /, yt
 800         negl    %edi            / yt
 801         jmp     .LL90
 802         .align  16
 803 .LL94:
 804         negl    %eax            / rem
 805         adcl    $0, %edx        /, rem
 806         addl    $20, %esp
 807         popl    %esi
 808         negl    %edx            / rem
 809         popl    %edi
 810         ret     $16
 811         SET_SIZE(__rem64)
 812
 813 /*
 814  * __udivrem64
 815  *
 816  * Perform division of two unsigned 64-bit quantities, returning the
 817  * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
 818  * pops the arguments on return.
 819  */
 820         ENTRY(__udivrem64)
 821         subl    $12, %esp
 822         movl    %esp, %ecx      /, tmp64
 823         movl    16(%esp), %eax  / x, x
 824         movl    20(%esp), %edx  / x, x
 825         pushl   %ecx            / tmp64
 826         pushl   32(%esp)        / y
 827         pushl   32(%esp)
 828         call    UDivRem
 829         movl    16(%esp), %ecx  / rem, tmp63
 830         movl    12(%esp), %esi  / rem
 831         addl    $24, %esp
 832         ret     $16
 833         SET_SIZE(__udivrem64)
 834
 835 /*
 836  * Signed division with remainder.
 837  */
 838 / int64_t
 839 / SDivRem(int64_t x, int64_t y, int64_t * pmod)
 840 / {
 841 /       int             negative;
 842 /       uint64_t        xt, yt, r, rem;
 843 /
 844 /       if (x < 0) {
 845 /               xt = -(uint64_t) x;
 846 /               negative = 1;
 847 /       } else {
 848 /               xt = x;
 849 /               negative = 0;
 850 /       }
 851 /       if (y < 0) {
 852 /               yt = -(uint64_t) y;
 853 /               negative ^= 1;
 854 /       } else {
 855 /               yt = y;
 856 /       }
 857 /       r = UDivRem(xt, yt, &rem);
 858 /       *pmod = (x < 0 ? (int64_t) - rem : rem);
 859 /       return (negative ? (int64_t) - r : r);
 860 / }
 861         ENTRY(SDivRem)
 862         pushl   %ebp
 863         pushl   %edi
 864         pushl   %esi
 865         subl    $24, %esp
 866         testl   %edx, %edx      / x
 867         movl    %edx, %edi      / x, x
 868         js      .LL73
 869         movl    44(%esp), %esi  / y,
 870         xorl    %ebp, %ebp      / negative
 871         testl   %esi, %esi
 872         movl    %edx, 12(%esp)  / x, xt
 873         movl    %eax, 8(%esp)   / x, xt
 874         movl    40(%esp), %edx  / y, yt
 875         movl    44(%esp), %ecx  / y, yt
 876         js      .LL74
 877 .LL70:
 878         leal    16(%esp), %eax  /, tmp70
 879         pushl   %eax            / tmp70
 880         pushl   %ecx            / yt
 881         pushl   %edx            / yt
 882         movl    20(%esp), %eax  / xt, xt
 883         movl    24(%esp), %edx  / xt, xt
 884         call    UDivRem
 885         movl    %edx, 16(%esp)  /, r
 886         movl    %eax, 12(%esp)  /, r
 887         addl    $12, %esp
 888         testl   %edi, %edi      / x
 889         movl    16(%esp), %edx  / rem, rem
 890         movl    20(%esp), %ecx  / rem, rem
 891         js      .LL75
 892 .LL71:
 893         movl    48(%esp), %edi  / pmod, pmod
 894         testl   %ebp, %ebp      / negative
 895         movl    %edx, (%edi)    / rem,* pmod
 896         movl    %ecx, 4(%edi)   / rem,
 897         movl    (%esp), %eax    / r, r
 898         movl    4(%esp), %edx   / r, r
 899         je      .LL72
 900         negl    %eax            / r
 901         adcl    $0, %edx        /, r
 902         negl    %edx            / r
 903 .LL72:
 904         addl    $24, %esp
 905         popl    %esi
 906         popl    %edi
 907         popl    %ebp
 908         ret
 909         .align  16
 910 .LL73:
 911         negl    %eax
 912         adcl    $0, %edx
 913         movl    44(%esp), %esi  / y,
 914         negl    %edx
 915         testl   %esi, %esi
 916         movl    %edx, 12(%esp)  /, xt
 917         movl    %eax, 8(%esp)   /, xt
 918         movl    $1, %ebp        /, negative
 919         movl    40(%esp), %edx  / y, yt
 920         movl    44(%esp), %ecx  / y, yt
 921         jns     .LL70
 922         .align  16
 923 .LL74:
 924         negl    %edx            / yt
 925         adcl    $0, %ecx        /, yt
 926         negl    %ecx            / yt
 927         xorl    $1, %ebp        /, negative
 928         jmp     .LL70
 929         .align  16
 930 .LL75:
 931         negl    %edx            / rem
 932         adcl    $0, %ecx        /, rem
 933         negl    %ecx            / rem
 934         jmp     .LL71
 935         SET_SIZE(SDivRem)
 936
 937 /*
 938  * __divrem64
 939  *
 940  * Perform division of two signed 64-bit quantities, returning the
 941  * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
 942  * pops the arguments on return.
 943  */
 944         ENTRY(__divrem64)
 945         subl    $20, %esp
 946         movl    %esp, %ecx      /, tmp64
 947         movl    24(%esp), %eax  / x, x
 948         movl    28(%esp), %edx  / x, x
 949         pushl   %ecx            / tmp64
 950         pushl   40(%esp)        / y
 951         pushl   40(%esp)
 952         call    SDivRem
 953         movl    16(%esp), %ecx
 954         movl    12(%esp),%esi   / rem
 955         addl    $32, %esp
 956         ret     $16
 957         SET_SIZE(__divrem64)