4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
31 * C support for 64-bit modulo and division.
32 * Hand-customized compiler output - see comments for details.
36 * int32_t/int64_t division/manipulation
38 * Hand-customized compiler output: the non-GCC entry points depart from
39 * the SYS V ABI by requiring their arguments to be popped, and in the
40 * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
41 * compiler-generated use of %edx:%eax for the first argument of
42 * internal entry points.
45 * - counting the number of leading zeros in a word
46 * - multiplying two 32-bit numbers giving a 64-bit result
47 * - dividing a 64-bit number by a 32-bit number, giving both quotient
49 * - subtracting two 64-bit results
51 / #define LO(X) ((uint32_t)(X) & 0xffffffff)
52 / #define HI(X) ((uint32_t)((X) >> 32) & 0xffffffff)
53 / #define HILO(H, L) (((uint64_t)(H) << 32) + (L))
55 / /* give index of highest bit */
56 / #define HIBIT(a, r) \
57 / asm
("bsrl %1,%0": "=r"((uint32_t
)(r
)) : "g" (a))
59 / /* multiply two uint32_ts resulting in a uint64_t */
60 / #define A_MUL32(a, b, lo, hi) \
62 / : "=a"((uint32_t
)(lo
)), "=d"((uint32_t
)(hi
)) : "g" (b), "0"(a))
64 / /* divide a uint64_t by a uint32_t */
65 / #define A_DIV32(lo, hi, b, q, r) \
67 / : "=a"((uint32_t
)(q
)), "=d"((uint32_t
)(r
)) \
68 / : "g" (b), "0"((uint32_t
)(lo
)), "1"((uint32_t
)hi
))
70 / /* subtract two uint64_ts (with borrow) */
71 / #define A_SUB2(bl, bh, al, ah) \
72 / asm
("subl %4,%0\n\tsbbl %5,%1" \
73 / : "=&r"((uint32_t
)(al
)), "=r"((uint32_t
)(ah
)) \
74 / : "0"((uint32_t
)(al
)), "1"((uint32_t
)(ah
)), "g"((uint32_t
)(bl)), \
75 / "g"((uint32_t
)(bh
)))
78 / * Unsigned division with remainder.
79 / * Divide two uint64_ts, and calculate remainder.
82 / UDivRem
(uint64_t x
, uint64_t y
, uint64_t
* pmod
)
84 / /* simple cases: y is a single uint32_t */
86 / uint32_t div_hi
, div_rem;
90 / if
(HI
(x
) < LO
(y
)) {
91 / /* result is a single uint32_t, use one division */
95 / /* result is a double uint32_t, use two divisions */
96 / A_DIV32
(HI
(x
), 0, LO
(y
), q1
, div_hi
);
99 / /* calculate q0 and remainder */
100 / A_DIV32
(LO
(x
), div_hi
, LO
(y
), q0
, div_rem
);
102 / /* return remainder */
105 / /* return result */
106 / return
(HILO
(q1
, q0
));
108 / } else if
(HI
(x
) < HI
(y
)) {
109 / /* HI(x) < HI(y) => x < y => result is 0 */
111 / /* return remainder */
114 / /* return result */
119 / * uint64_t by uint64_t division, resulting in a one-uint32_t
125 / uint32_t normshift;
127 / /* normalize by shifting x and y so MSB(y) == 1 */
128 / HIBIT
(HI
(y
), normshift
);
/* index of highest 1 bit */
129 / normshift
= 31 - normshift;
131 / if
(normshift
== 0) {
132 / /* no shifting needed, and x < 2*y so q <= 1 */
138 / /* if x >= y then q = 1 (note x1 >= y1) */
139 / if
(x1
> y1 || x0
>= y0
) {
141 / /* subtract y from x to get remainder */
142 / A_SUB2
(y0
, y1
, x0
, x1
);
147 / /* return remainder */
148 / *pmod
= HILO
(x1
, x0
);
150 / /* return result */
155 / * the last case: result is one uint32_t, but we need to
159 / uint32_t t0
, t1
, x2;
162 / dt
= (y
<< normshift
);
166 / /* normalize x (we need 3 uint32_ts!!!) */
167 / x2
= (HI
(x
) >> (32 - normshift
));
168 / dt
= (x
<< normshift
);
172 / /* estimate q0, and reduce x to a two uint32_t value */
173 / A_DIV32
(x1
, x2
, y1
, q0
, x1
);
175 / /* adjust q0 down if too high */
177 / * because of the limited range of x2 we can only be
180 / A_MUL32
(y0
, q0
, t0
, t1
);
181 / if
(t1
> x1 ||
(t1
== x1
&& t0
> x0
)) {
183 / A_SUB2
(y0
, y1
, t0
, t1
);
185 / /* return remainder */
186 / /* subtract product from x to get remainder */
187 / A_SUB2
(t0
, t1
, x0
, x1
);
188 / *pmod
= (HILO
(x1
, x0
) >> normshift
);
190 / /* return result */
200 movl
68(%esp
), %edi
/ y
,
201 testl
%edi
, %edi
/ tmp63
202 movl
%eax
, 40(%esp
) / x
, x
203 movl
%edx
, 44(%esp
) / x
, x
204 movl
%edi
, %esi
/, tmp62
205 movl
%edi
, %ecx
/ tmp62
, tmp63
207 movl
%edx
, %eax
/, tmp68
208 cmpl 64(%esp
), %eax
/ y
, tmp68
211 movl
72(%esp
), %ebp
/ pmod
,
212 xorl
%esi
, %esi
/ <result
>
213 movl
40(%esp
), %eax
/ x
, q0
214 movl
%ecx
, %edi
/ <result
>, <result
>
216 movl
%edx
, (%ebp
) / div_rem
,
218 addl
%eax
, %esi
/ q0
, <result
>
220 adcl
%edx
, %edi
/ q0
, <result
>
222 movl
%esi
, %eax
/ <result
>, <result
>
224 movl
%edi
, %edx
/ <result
>, <result
>
230 movl
44(%esp
), %eax
/ x
,
232 cmpl %esi
, %eax
/ tmp62
, tmp5
233 movl
%eax
, 32(%esp
) / tmp5
,
236 movl
72(%esp
), %esi
/ pmod
,
237 movl
40(%esp
), %ebp
/ x
,
238 movl
44(%esp
), %ecx
/ x
,
241 xorl
%edi
, %edi
/ <result
>
242 xorl
%esi
, %esi
/ <result
>
245 movl
%esi
, %eax
/ <result
>, <result
>
247 movl
%edi
, %edx
/ <result
>, <result
>
253 movl
%edi
, %edx
/ tmp63
, div_hi
255 movl
%eax
, %ecx
/, q1
259 movl $
31, %edi
/, tmp87
260 bsrl
%esi
,%edx
/ tmp62
, normshift
261 subl
%edx
, %edi
/ normshift
, tmp87
262 movl
%edi
, 28(%esp
) / tmp87
,
264 movl
32(%esp
), %edx
/, x1
265 cmpl %ecx
, %edx
/ y1
, x1
266 movl
64(%esp
), %edi
/ y
, y0
267 movl
40(%esp
), %esi
/ x
, x0
270 cmpl %edi
, %esi
/ y0
, x0
274 subl
%edi
,%esi
/ y0
, x0
275 sbbl
%ecx
,%edx
/ tmp63
, x1
277 movl
%edx
, %ecx
/ x1
, x1
280 addl
%esi
, %edx
/ x0
, x1
281 adcl
%edi
, %ecx
/ x0
, x1
282 movl
72(%esp
), %esi
/ pmod
,
283 movl
%edx
, (%esi
) / x1
,
284 movl
%ecx
, 4(%esi
) / x1
,
285 xorl
%edi
, %edi
/ <result
>
286 movl
%ebp
, %esi
/ q0
, <result
>
291 movl
64(%esp
), %esi
/ y
, dt
292 movl
68(%esp
), %edi
/ y
, dt
293 shldl
%esi
, %edi
/, dt
, dt
298 movl $
32, %ecx
/, tmp102
299 subl
28(%esp
), %ecx
/, tmp102
300 movl
%esi
, %ebp
/ dt
, y0
302 shrl
%cl
, %esi
/ tmp102
,
303 movl
%edi
, 24(%esp
) / tmp99
,
305 movl
%esi
, 12(%esp
) /, x2
306 movl
44(%esp
), %edi
/ x
, dt
307 movl
40(%esp
), %esi
/ x
, dt
308 shldl
%esi
, %edi
/, dt
, dt
312 movl
%esi
, %edi
/ dt
, dt
315 movl
%edi
, %ecx
/ dt
,
316 movl
%edi
, %eax
/ tmp2
,
318 movl
12(%esp
), %edx
/ x2
,
320 movl
%edx
, %ecx
/, x1
323 movl
%ebp
, %eax
/ y0
, t0
325 cmpl %ecx
, %edx
/ x1
, t1
330 movl
%ecx
, %edi
/ x1
,
331 subl
%eax
,%esi
/ t0
, x0
333 movl
%edi
, %eax
/, x1
334 movl
%eax
, %edx
/ x1
, x1
337 addl
%esi
, %eax
/ x0
, x1
338 adcl
%ebp
, %edx
/ x0
, x1
340 shrdl
%edx
, %eax
/, x1
, x1
344 movl
%edx
, %eax
/ x1
, x1
347 movl
72(%esp
), %ecx
/ pmod
,
348 movl
20(%esp
), %esi
/, <result
>
349 xorl
%edi
, %edi
/ <result
>
350 movl
%eax
, (%ecx
) / x1
,
351 movl
%edx
, 4(%ecx
) / x1
,
355 cmpl %esi
, %eax
/ x0
, t0
359 subl
%ebp
,%eax
/ y0
, t0
360 sbbl
24(%esp
),%edx
/, t1
363 movl
%esi
, %edi
/ dt
, dt
369 * Unsigned division without remainder.
372 / UDiv
(uint64_t x
, uint64_t y
)
375 / /* simple cases: y is a single uint32_t */
376 / uint32_t div_hi
, div_rem;
380 / if
(HI
(x
) < LO
(y
)) {
381 / /* result is a single uint32_t, use one division */
385 / /* result is a double uint32_t, use two divisions */
386 / A_DIV32
(HI
(x
), 0, LO
(y
), q1
, div_hi
);
389 / /* calculate q0 and remainder */
390 / A_DIV32
(LO
(x
), div_hi
, LO
(y
), q0
, div_rem
);
392 / /* return result */
393 / return
(HILO
(q1
, q0
));
395 / } else if
(HI
(x
) < HI
(y
)) {
396 / /* HI(x) < HI(y) => x < y => result is 0 */
398 / /* return result */
403 / * uint64_t by uint64_t division, resulting in a one-uint32_t
409 / unsigned normshift;
411 / /* normalize by shifting x and y so MSB(y) == 1 */
412 / HIBIT
(HI
(y
), normshift
);
/* index of highest 1 bit */
413 / normshift
= 31 - normshift;
415 / if
(normshift
== 0) {
416 / /* no shifting needed, and x < 2*y so q <= 1 */
422 / /* if x >= y then q = 1 (note x1 >= y1) */
423 / if
(x1
> y1 || x0
>= y0
) {
425 / /* subtract y from x to get remainder */
426 / /* A_SUB2(y0, y1, x0, x1); */
431 / /* return result */
436 / * the last case: result is one uint32_t, but we need to
440 / uint32_t t0
, t1
, x2;
443 / dt
= (y
<< normshift
);
447 / /* normalize x (we need 3 uint32_ts!!!) */
448 / x2
= (HI
(x
) >> (32 - normshift
));
449 / dt
= (x
<< normshift
);
453 / /* estimate q0, and reduce x to a two uint32_t value */
454 / A_DIV32
(x1
, x2
, y1
, q0
, x1
);
456 / /* adjust q0 down if too high */
458 / * because of the limited range of x2 we can only be
461 / A_MUL32
(y0
, q0
, t0
, t1
);
462 / if
(t1
> x1 ||
(t1
== x1
&& t0
> x0
)) {
465 / /* return result */
475 movl
%edx
, 36(%esp
) / x
, x
476 movl
60(%esp
), %edx
/ y
,
477 testl
%edx
, %edx
/ tmp62
478 movl
%eax
, 32(%esp
) / x
, x
479 movl
%edx
, %ecx
/ tmp61
, tmp62
480 movl
%edx
, %eax
/, tmp61
482 movl
36(%esp
), %esi
/ x
,
483 cmpl 56(%esp
), %esi
/ y
, tmp67
484 movl
%esi
, %eax
/, tmp67
485 movl
%esi
, %edx
/ tmp67
, div_hi
487 movl
%ecx
, %edx
/ tmp62
, div_hi
489 movl
%eax
, %ecx
/, q1
491 xorl
%esi
, %esi
/ <result
>
492 movl
%ecx
, %edi
/ <result
>, <result
>
493 movl
32(%esp
), %eax
/ x
, q0
496 addl
%eax
, %esi
/ q0
, <result
>
497 adcl
%ecx
, %edi
/ q0
, <result
>
500 movl
%esi
, %eax
/ <result
>, <result
>
502 movl
%edi
, %edx
/ <result
>, <result
>
508 movl
36(%esp
), %esi
/ x
,
510 movl
%esi
, 24(%esp
) / tmp1
,
512 xorl
%esi
, %esi
/ <result
>
513 xorl
%edi
, %edi
/ <result
>
514 cmpl %eax
, 24(%esp
) / tmp61
,
516 bsrl
%eax
,%ebp
/ tmp61
, normshift
517 movl $
31, %eax
/, tmp85
518 subl
%ebp
, %eax
/ normshift
, normshift
520 movl
24(%esp
), %eax
/, x1
521 cmpl %ecx
, %eax
/ tmp62
, x1
522 movl
56(%esp
), %esi
/ y
, y0
523 movl
32(%esp
), %edx
/ x
, x0
526 cmpl %esi
, %edx
/ y0
, x0
531 movl
%eax
, %esi
/ q0
, <result
>
532 xorl
%edi
, %edi
/ <result
>
535 movl
%esi
, %eax
/ <result
>, <result
>
537 movl
%edi
, %edx
/ <result
>, <result
>
544 movl
56(%esp
), %esi
/ y
,
545 movl
60(%esp
), %edi
/ y
,
551 movl $
32, %ecx
/, tmp96
552 subl
%eax
, %ecx
/ normshift
, tmp96
554 movl
%edi
, 20(%esp
) /, dt
555 movl
24(%esp
), %ebp
/, x2
557 shrl
%cl
, %ebp
/ tmp96
, x2
558 movl
%esi
, 16(%esp
) /, dt
560 movl
32(%esp
), %esi
/ x
, dt
562 movl
36(%esp
), %edi
/ x
, dt
563 shldl
%esi
, %edi
/, dt
, dt
568 movl
%esi
, %edi
/ dt
, dt
572 movl
%edi
, %eax
/ tmp1
,
573 movl
%ebp
, %edx
/ x2
,
575 movl
%edx
, %ebp
/, x1
577 movl
%eax
, %ecx
/, q0
578 movl
16(%esp
), %eax
/ dt
,
580 cmpl %ebp
, %edx
/ x1
, t1
582 movl
%esi
, %edi
/ dt
, x0
586 movl
%ecx
, %esi
/ q0
, <result
>
588 xorl
%edi
, %edi
/ <result
>
591 cmpl %edi
, %eax
/ x0
, t0
595 movl
%ecx
, %esi
/ q0
, <result
>
606 * Perform division of two unsigned 64-bit quantities, returning the
607 * quotient in %edx:%eax. __udiv64 pops the arguments on return,
610 movl
4(%esp
), %eax
/ x
, x
611 movl
8(%esp
), %edx
/ x
, x
622 * Perform division of two unsigned 64-bit quantities, returning the
623 * remainder in %edx:%eax. __urem64 pops the arguments on return
627 movl
%esp
, %ecx
/, tmp65
628 movl
16(%esp
), %eax
/ x
, x
629 movl
20(%esp
), %edx
/ x
, x
634 movl
12(%esp
), %eax
/ rem
, rem
635 movl
16(%esp
), %edx
/ rem
, rem
643 * Perform division of two signed 64-bit quantities, returning the
644 * quotient in %edx:%eax. __div64 pops the arguments on return.
647 / __div64
(int64_t x
, int64_t y
)
650 / uint64_t xt
, yt
, r;
653 / xt
= -(uint64_t
) x;
660 / yt
= -(uint64_t
) y;
666 / return
(negative ?
(int64_t
) - r
: r
);
673 movl
28(%esp
), %edx
/ x
, x
675 movl
24(%esp
), %eax
/ x
, x
676 movl
32(%esp
), %esi
/ y
, y
677 movl
36(%esp
), %edi
/ y
, y
679 xorl
%ebp
, %ebp
/ negative
681 movl
%eax
, (%esp
) / x
, xt
682 movl
%edx
, 4(%esp
) / x
, xt
683 movl
%esi
, %eax
/ y
, yt
684 movl
%edi
, %edx
/ y
, yt
689 movl
8(%esp
), %eax
/ xt
, xt
690 movl
12(%esp
), %edx
/ xt
, xt
693 testl
%ebp
, %ebp
/ negative
711 movl
%eax
, (%esp
) / x
, xt
712 movl
%edx
, 4(%esp
) / x
, xt
713 movl $
1, %ebp
/, negative
714 movl
%esi
, %eax
/ y
, yt
715 movl
%edi
, %edx
/ y
, yt
722 xorl $
1, %ebp
/, negative
729 * Perform division of two signed 64-bit quantities, returning the
730 * remainder in %edx:%eax. __rem64 pops the arguments on return.
733 / __rem64
(int64_t x
, int64_t y
)
735 / uint64_t xt
, yt
, rem;
738 / xt
= -(uint64_t
) x;
743 / yt
= -(uint64_t
) y;
747 / (void
) UDivRem
(xt
, yt
, &rem
);
748 / return
(x
< 0 ?
(int64_t
) - rem
: rem
);
754 movl
36(%esp
), %ecx
/ x
,
755 movl
32(%esp
), %esi
/ x
,
756 movl
36(%esp
), %edi
/ x
,
758 movl
40(%esp
), %eax
/ y
, y
759 movl
44(%esp
), %edx
/ y
, y
760 movl
%esi
, (%esp
) /, xt
761 movl
%edi
, 4(%esp
) /, xt
764 movl
%eax
, %esi
/ y
, yt
765 movl
%edx
, %edi
/ y
, yt
768 leal
8(%esp
), %eax
/, tmp66
772 movl
12(%esp
), %eax
/ xt
, xt
773 movl
16(%esp
), %edx
/ xt
, xt
776 movl
36(%esp
), %edi
/ x
,
778 movl
8(%esp
), %eax
/ rem
, rem
779 movl
12(%esp
), %edx
/ rem
, rem
791 movl
%esi
, (%esp
) /, xt
792 movl
%edi
, 4(%esp
) /, xt
793 movl
%eax
, %esi
/ y
, yt
794 movl
%edx
, %edi
/ y
, yt
816 * Perform division of two unsigned 64-bit quantities, returning the
817 * quotient in %edx:%eax, and the remainder in %ecx:%esi. __udivrem64
818 * pops the arguments on return.
822 movl
%esp
, %ecx
/, tmp64
823 movl
16(%esp
), %eax
/ x
, x
824 movl
20(%esp
), %edx
/ x
, x
829 movl
16(%esp
), %ecx
/ rem
, tmp63
830 movl
12(%esp
), %esi
/ rem
833 SET_SIZE
(__udivrem64
)
836 * Signed division with remainder.
839 / SDivRem
(int64_t x
, int64_t y
, int64_t
* pmod
)
842 / uint64_t xt
, yt
, r
, rem;
845 / xt
= -(uint64_t
) x;
852 / yt
= -(uint64_t
) y;
857 / r
= UDivRem
(xt
, yt
, &rem
);
858 / *pmod
= (x
< 0 ?
(int64_t
) - rem
: rem
);
859 / return
(negative ?
(int64_t
) - r
: r
);
867 movl
%edx
, %edi
/ x
, x
869 movl
44(%esp
), %esi
/ y
,
870 xorl
%ebp
, %ebp
/ negative
872 movl
%edx
, 12(%esp
) / x
, xt
873 movl
%eax
, 8(%esp
) / x
, xt
874 movl
40(%esp
), %edx
/ y
, yt
875 movl
44(%esp
), %ecx
/ y
, yt
878 leal
16(%esp
), %eax
/, tmp70
882 movl
20(%esp
), %eax
/ xt
, xt
883 movl
24(%esp
), %edx
/ xt
, xt
885 movl
%edx
, 16(%esp
) /, r
886 movl
%eax
, 12(%esp
) /, r
889 movl
16(%esp
), %edx
/ rem
, rem
890 movl
20(%esp
), %ecx
/ rem
, rem
893 movl
48(%esp
), %edi
/ pmod
, pmod
894 testl
%ebp
, %ebp
/ negative
895 movl
%edx
, (%edi
) / rem
,* pmod
896 movl
%ecx
, 4(%edi
) / rem
,
897 movl
(%esp
), %eax
/ r
, r
898 movl
4(%esp
), %edx
/ r
, r
913 movl
44(%esp
), %esi
/ y
,
916 movl
%edx
, 12(%esp
) /, xt
917 movl
%eax
, 8(%esp
) /, xt
918 movl $
1, %ebp
/, negative
919 movl
40(%esp
), %edx
/ y
, yt
920 movl
44(%esp
), %ecx
/ y
, yt
927 xorl $
1, %ebp
/, negative
940 * Perform division of two signed 64-bit quantities, returning the
941 * quotient in %edx:%eax, and the remainder in %ecx:%esi. __divrem64
942 * pops the arguments on return.
946 movl
%esp
, %ecx
/, tmp64
947 movl
24(%esp
), %eax
/ x
, x
948 movl
28(%esp
), %edx
/ x
, x
954 movl
12(%esp
),%esi
/ rem