dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / common / util / i386 / muldiv.s
blob8fade8c0ca8f348b3a4264383eefca58cc189a9e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 .ident "%Z%%M% %I% %E% SMI"
29 .file "muldiv.s"
31 #if defined(__i386) && !defined(__amd64)
34 * Helper routines for 32-bit compilers to perform 64-bit math.
35 * These are used both by the Sun and GCC compilers.
38 #include <sys/asm_linkage.h>
39 #include <sys/asm_misc.h>
44 / function __mul64(A,B:Longint):Longint;
45 / {Overflow is not checked}
47 / We essentially do multiply by longhand, using base 2**32 digits.
48 / a b parameter A
49 / x c d parameter B
50 / ---------
51 / ad bd
52 / ac bc
53 / -----------------
54 / ac ad+bc bd
56 / We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened.
58 ENTRY(__mul64)
59 push %ebp
60 mov %esp,%ebp
61 pushl %esi
62 mov 12(%ebp),%eax / A.hi (a)
63 mull 16(%ebp) / Multiply A.hi by B.lo (produces ad)
64 xchg %ecx,%eax / ecx = bottom half of ad.
65 movl 8(%ebp),%eax / A.Lo (b)
66 movl %eax,%esi / Save A.lo for later
67 mull 16(%ebp) / Multiply A.Lo by B.LO (dx:ax = bd.)
68 addl %edx,%ecx / cx is ad
69 xchg %eax,%esi / esi is bd, eax = A.lo (d)
70 mull 20(%ebp) / Multiply A.lo * B.hi (producing bc)
71 addl %ecx,%eax / Produce ad+bc
72 movl %esi,%edx
73 xchg %eax,%edx
74 popl %esi
75 movl %ebp,%esp
76 popl %ebp
77 ret $16
78 SET_SIZE(__mul64)
82 * C support for 64-bit modulo and division.
83 * Hand-customized compiler output - see comments for details.
86 / /*
87 / * Unsigned division with remainder.
88 / * Divide two uint64_ts, and calculate remainder.
89 / */
90 / uint64_t
91 / UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
92 / {
93 / /* simple cases: y is a single uint32_t */
94 / if (HI(y) == 0) {
95 / uint32_t div_hi, div_rem;
96 / uint32_t q0, q1;
98 / /* calculate q1 */
99 / if (HI(x) < LO(y)) {
100 / /* result is a single uint32_t, use one division */
101 / q1 = 0;
102 / div_hi = HI(x);
103 / } else {
104 / /* result is a double uint32_t, use two divisions */
105 / A_DIV32(HI(x), 0, LO(y), q1, div_hi);
108 / /* calculate q0 and remainder */
109 / A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
111 / /* return remainder */
112 / *pmod = div_rem;
114 / /* return result */
115 / return (HILO(q1, q0));
117 / } else if (HI(x) < HI(y)) {
118 / /* HI(x) < HI(y) => x < y => result is 0 */
120 / /* return remainder */
121 / *pmod = x;
123 / /* return result */
124 / return (0);
126 / } else {
127 / /*
128 / * uint64_t by uint64_t division, resulting in a one-uint32_t
129 / * result
130 / */
131 / uint32_t y0, y1;
132 / uint32_t x1, x0;
133 / uint32_t q0;
134 / uint32_t normshift;
136 / /* normalize by shifting x and y so MSB(y) == 1 */
137 / HIBIT(HI(y), normshift); /* index of highest 1 bit */
138 / normshift = 31 - normshift;
140 / if (normshift == 0) {
141 / /* no shifting needed, and x < 2*y so q <= 1 */
142 / y1 = HI(y);
143 / y0 = LO(y);
144 / x1 = HI(x);
145 / x0 = LO(x);
147 / /* if x >= y then q = 1 (note x1 >= y1) */
148 / if (x1 > y1 || x0 >= y0) {
149 / q0 = 1;
150 / /* subtract y from x to get remainder */
151 / A_SUB2(y0, y1, x0, x1);
152 / } else {
153 / q0 = 0;
156 / /* return remainder */
157 / *pmod = HILO(x1, x0);
159 / /* return result */
160 / return (q0);
162 / } else {
163 / /*
164 / * the last case: result is one uint32_t, but we need to
165 / * normalize
166 / */
167 / uint64_t dt;
168 / uint32_t t0, t1, x2;
170 / /* normalize y */
171 / dt = (y << normshift);
172 / y1 = HI(dt);
173 / y0 = LO(dt);
175 / /* normalize x (we need 3 uint32_ts!!!) */
176 / x2 = (HI(x) >> (32 - normshift));
177 / dt = (x << normshift);
178 / x1 = HI(dt);
179 / x0 = LO(dt);
181 / /* estimate q0, and reduce x to a two uint32_t value */
182 / A_DIV32(x1, x2, y1, q0, x1);
184 / /* adjust q0 down if too high */
185 / /*
186 / * because of the limited range of x2 we can only be
187 / * one off
188 / */
189 / A_MUL32(y0, q0, t0, t1);
190 / if (t1 > x1 || (t1 == x1 && t0 > x0)) {
191 / q0--;
192 / A_SUB2(y0, y1, t0, t1);
194 / /* return remainder */
195 / /* subtract product from x to get remainder */
196 / A_SUB2(t0, t1, x0, x1);
197 / *pmod = (HILO(x1, x0) >> normshift);
199 / /* return result */
200 / return (q0);
204 ENTRY(UDivRem)
205 pushl %ebp
206 pushl %edi
207 pushl %esi
208 subl $48, %esp
209 movl 68(%esp), %edi / y,
210 testl %edi, %edi / tmp63
211 movl %eax, 40(%esp) / x, x
212 movl %edx, 44(%esp) / x, x
213 movl %edi, %esi /, tmp62
214 movl %edi, %ecx / tmp62, tmp63
215 jne .LL2
216 movl %edx, %eax /, tmp68
217 cmpl 64(%esp), %eax / y, tmp68
218 jae .LL21
219 .LL4:
220 movl 72(%esp), %ebp / pmod,
221 xorl %esi, %esi / <result>
222 movl 40(%esp), %eax / x, q0
223 movl %ecx, %edi / <result>, <result>
224 divl 64(%esp) / y
225 movl %edx, (%ebp) / div_rem,
226 xorl %edx, %edx / q0
227 addl %eax, %esi / q0, <result>
228 movl $0, 4(%ebp)
229 adcl %edx, %edi / q0, <result>
230 addl $48, %esp
231 movl %esi, %eax / <result>, <result>
232 popl %esi
233 movl %edi, %edx / <result>, <result>
234 popl %edi
235 popl %ebp
237 .align 16
238 .LL2:
239 movl 44(%esp), %eax / x,
240 xorl %edx, %edx
241 cmpl %esi, %eax / tmp62, tmp5
242 movl %eax, 32(%esp) / tmp5,
243 movl %edx, 36(%esp)
244 jae .LL6
245 movl 72(%esp), %esi / pmod,
246 movl 40(%esp), %ebp / x,
247 movl 44(%esp), %ecx / x,
248 movl %ebp, (%esi)
249 movl %ecx, 4(%esi)
250 xorl %edi, %edi / <result>
251 xorl %esi, %esi / <result>
252 .LL22:
253 addl $48, %esp
254 movl %esi, %eax / <result>, <result>
255 popl %esi
256 movl %edi, %edx / <result>, <result>
257 popl %edi
258 popl %ebp
260 .align 16
261 .LL21:
262 movl %edi, %edx / tmp63, div_hi
263 divl 64(%esp) / y
264 movl %eax, %ecx /, q1
265 jmp .LL4
266 .align 16
267 .LL6:
268 movl $31, %edi /, tmp87
269 bsrl %esi,%edx / tmp62, normshift
270 subl %edx, %edi / normshift, tmp87
271 movl %edi, 28(%esp) / tmp87,
272 jne .LL8
273 movl 32(%esp), %edx /, x1
274 cmpl %ecx, %edx / y1, x1
275 movl 64(%esp), %edi / y, y0
276 movl 40(%esp), %esi / x, x0
277 ja .LL10
278 xorl %ebp, %ebp / q0
279 cmpl %edi, %esi / y0, x0
280 jb .LL11
281 .LL10:
282 movl $1, %ebp /, q0
283 subl %edi,%esi / y0, x0
284 sbbl %ecx,%edx / tmp63, x1
285 .LL11:
286 movl %edx, %ecx / x1, x1
287 xorl %edx, %edx / x1
288 xorl %edi, %edi / x0
289 addl %esi, %edx / x0, x1
290 adcl %edi, %ecx / x0, x1
291 movl 72(%esp), %esi / pmod,
292 movl %edx, (%esi) / x1,
293 movl %ecx, 4(%esi) / x1,
294 xorl %edi, %edi / <result>
295 movl %ebp, %esi / q0, <result>
296 jmp .LL22
297 .align 16
298 .LL8:
299 movb 28(%esp), %cl
300 movl 64(%esp), %esi / y, dt
301 movl 68(%esp), %edi / y, dt
302 shldl %esi, %edi /, dt, dt
303 sall %cl, %esi /, dt
304 andl $32, %ecx
305 jne .LL23
306 .LL17:
307 movl $32, %ecx /, tmp102
308 subl 28(%esp), %ecx /, tmp102
309 movl %esi, %ebp / dt, y0
310 movl 32(%esp), %esi
311 shrl %cl, %esi / tmp102,
312 movl %edi, 24(%esp) / tmp99,
313 movb 28(%esp), %cl
314 movl %esi, 12(%esp) /, x2
315 movl 44(%esp), %edi / x, dt
316 movl 40(%esp), %esi / x, dt
317 shldl %esi, %edi /, dt, dt
318 sall %cl, %esi /, dt
319 andl $32, %ecx
320 je .LL18
321 movl %esi, %edi / dt, dt
322 xorl %esi, %esi / dt
323 .LL18:
324 movl %edi, %ecx / dt,
325 movl %edi, %eax / tmp2,
326 movl %ecx, (%esp)
327 movl 12(%esp), %edx / x2,
328 divl 24(%esp)
329 movl %edx, %ecx /, x1
330 xorl %edi, %edi
331 movl %eax, 20(%esp)
332 movl %ebp, %eax / y0, t0
333 mull 20(%esp)
334 cmpl %ecx, %edx / x1, t1
335 movl %edi, 4(%esp)
336 ja .LL14
337 je .LL24
338 .LL15:
339 movl %ecx, %edi / x1,
340 subl %eax,%esi / t0, x0
341 sbbl %edx,%edi / t1,
342 movl %edi, %eax /, x1
343 movl %eax, %edx / x1, x1
344 xorl %eax, %eax / x1
345 xorl %ebp, %ebp / x0
346 addl %esi, %eax / x0, x1
347 adcl %ebp, %edx / x0, x1
348 movb 28(%esp), %cl
349 shrdl %edx, %eax /, x1, x1
350 shrl %cl, %edx /, x1
351 andl $32, %ecx
352 je .LL16
353 movl %edx, %eax / x1, x1
354 xorl %edx, %edx / x1
355 .LL16:
356 movl 72(%esp), %ecx / pmod,
357 movl 20(%esp), %esi /, <result>
358 xorl %edi, %edi / <result>
359 movl %eax, (%ecx) / x1,
360 movl %edx, 4(%ecx) / x1,
361 jmp .LL22
362 .align 16
363 .LL24:
364 cmpl %esi, %eax / x0, t0
365 jbe .LL15
366 .LL14:
367 decl 20(%esp)
368 subl %ebp,%eax / y0, t0
369 sbbl 24(%esp),%edx /, t1
370 jmp .LL15
371 .LL23:
372 movl %esi, %edi / dt, dt
373 xorl %esi, %esi / dt
374 jmp .LL17
375 SET_SIZE(UDivRem)
378 * Unsigned division without remainder.
380 / uint64_t
381 / UDiv(uint64_t x, uint64_t y)
383 / if (HI(y) == 0) {
384 / /* simple cases: y is a single uint32_t */
385 / uint32_t div_hi, div_rem;
386 / uint32_t q0, q1;
388 / /* calculate q1 */
389 / if (HI(x) < LO(y)) {
390 / /* result is a single uint32_t, use one division */
391 / q1 = 0;
392 / div_hi = HI(x);
393 / } else {
394 / /* result is a double uint32_t, use two divisions */
395 / A_DIV32(HI(x), 0, LO(y), q1, div_hi);
398 / /* calculate q0 and remainder */
399 / A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
401 / /* return result */
402 / return (HILO(q1, q0));
404 / } else if (HI(x) < HI(y)) {
405 / /* HI(x) < HI(y) => x < y => result is 0 */
407 / /* return result */
408 / return (0);
410 / } else {
411 / /*
412 / * uint64_t by uint64_t division, resulting in a one-uint32_t
413 / * result
414 / */
415 / uint32_t y0, y1;
416 / uint32_t x1, x0;
417 / uint32_t q0;
418 / unsigned normshift;
420 / /* normalize by shifting x and y so MSB(y) == 1 */
421 / HIBIT(HI(y), normshift); /* index of highest 1 bit */
422 / normshift = 31 - normshift;
424 / if (normshift == 0) {
425 / /* no shifting needed, and x < 2*y so q <= 1 */
426 / y1 = HI(y);
427 / y0 = LO(y);
428 / x1 = HI(x);
429 / x0 = LO(x);
431 / /* if x >= y then q = 1 (note x1 >= y1) */
432 / if (x1 > y1 || x0 >= y0) {
433 / q0 = 1;
434 / /* subtract y from x to get remainder */
435 / /* A_SUB2(y0, y1, x0, x1); */
436 / } else {
437 / q0 = 0;
440 / /* return result */
441 / return (q0);
443 / } else {
444 / /*
445 / * the last case: result is one uint32_t, but we need to
446 / * normalize
447 / */
448 / uint64_t dt;
449 / uint32_t t0, t1, x2;
451 / /* normalize y */
452 / dt = (y << normshift);
453 / y1 = HI(dt);
454 / y0 = LO(dt);
456 / /* normalize x (we need 3 uint32_ts!!!) */
457 / x2 = (HI(x) >> (32 - normshift));
458 / dt = (x << normshift);
459 / x1 = HI(dt);
460 / x0 = LO(dt);
462 / /* estimate q0, and reduce x to a two uint32_t value */
463 / A_DIV32(x1, x2, y1, q0, x1);
465 / /* adjust q0 down if too high */
466 / /*
467 / * because of the limited range of x2 we can only be
468 / * one off
469 / */
470 / A_MUL32(y0, q0, t0, t1);
471 / if (t1 > x1 || (t1 == x1 && t0 > x0)) {
472 / q0--;
474 / /* return result */
475 / return (q0);
479 ENTRY(UDiv)
480 pushl %ebp
481 pushl %edi
482 pushl %esi
483 subl $40, %esp
484 movl %edx, 36(%esp) / x, x
485 movl 60(%esp), %edx / y,
486 testl %edx, %edx / tmp62
487 movl %eax, 32(%esp) / x, x
488 movl %edx, %ecx / tmp61, tmp62
489 movl %edx, %eax /, tmp61
490 jne .LL26
491 movl 36(%esp), %esi / x,
492 cmpl 56(%esp), %esi / y, tmp67
493 movl %esi, %eax /, tmp67
494 movl %esi, %edx / tmp67, div_hi
495 jb .LL28
496 movl %ecx, %edx / tmp62, div_hi
497 divl 56(%esp) / y
498 movl %eax, %ecx /, q1
499 .LL28:
500 xorl %esi, %esi / <result>
501 movl %ecx, %edi / <result>, <result>
502 movl 32(%esp), %eax / x, q0
503 xorl %ecx, %ecx / q0
504 divl 56(%esp) / y
505 addl %eax, %esi / q0, <result>
506 adcl %ecx, %edi / q0, <result>
507 .LL25:
508 addl $40, %esp
509 movl %esi, %eax / <result>, <result>
510 popl %esi
511 movl %edi, %edx / <result>, <result>
512 popl %edi
513 popl %ebp
515 .align 16
516 .LL26:
517 movl 36(%esp), %esi / x,
518 xorl %edi, %edi
519 movl %esi, 24(%esp) / tmp1,
520 movl %edi, 28(%esp)
521 xorl %esi, %esi / <result>
522 xorl %edi, %edi / <result>
523 cmpl %eax, 24(%esp) / tmp61,
524 jb .LL25
525 bsrl %eax,%ebp / tmp61, normshift
526 movl $31, %eax /, tmp85
527 subl %ebp, %eax / normshift, normshift
528 jne .LL32
529 movl 24(%esp), %eax /, x1
530 cmpl %ecx, %eax / tmp62, x1
531 movl 56(%esp), %esi / y, y0
532 movl 32(%esp), %edx / x, x0
533 ja .LL34
534 xorl %eax, %eax / q0
535 cmpl %esi, %edx / y0, x0
536 jb .LL35
537 .LL34:
538 movl $1, %eax /, q0
539 .LL35:
540 movl %eax, %esi / q0, <result>
541 xorl %edi, %edi / <result>
542 .LL45:
543 addl $40, %esp
544 movl %esi, %eax / <result>, <result>
545 popl %esi
546 movl %edi, %edx / <result>, <result>
547 popl %edi
548 popl %ebp
550 .align 16
551 .LL32:
552 movb %al, %cl
553 movl 56(%esp), %esi / y,
554 movl 60(%esp), %edi / y,
555 shldl %esi, %edi
556 sall %cl, %esi
557 andl $32, %ecx
558 jne .LL43
559 .LL40:
560 movl $32, %ecx /, tmp96
561 subl %eax, %ecx / normshift, tmp96
562 movl %edi, %edx
563 movl %edi, 20(%esp) /, dt
564 movl 24(%esp), %ebp /, x2
565 xorl %edi, %edi
566 shrl %cl, %ebp / tmp96, x2
567 movl %esi, 16(%esp) /, dt
568 movb %al, %cl
569 movl 32(%esp), %esi / x, dt
570 movl %edi, 12(%esp)
571 movl 36(%esp), %edi / x, dt
572 shldl %esi, %edi /, dt, dt
573 sall %cl, %esi /, dt
574 andl $32, %ecx
575 movl %edx, 8(%esp)
576 je .LL41
577 movl %esi, %edi / dt, dt
578 xorl %esi, %esi / dt
579 .LL41:
580 xorl %ecx, %ecx
581 movl %edi, %eax / tmp1,
582 movl %ebp, %edx / x2,
583 divl 8(%esp)
584 movl %edx, %ebp /, x1
585 movl %ecx, 4(%esp)
586 movl %eax, %ecx /, q0
587 movl 16(%esp), %eax / dt,
588 mull %ecx / q0
589 cmpl %ebp, %edx / x1, t1
590 movl %edi, (%esp)
591 movl %esi, %edi / dt, x0
592 ja .LL38
593 je .LL44
594 .LL39:
595 movl %ecx, %esi / q0, <result>
596 .LL46:
597 xorl %edi, %edi / <result>
598 jmp .LL45
599 .LL44:
600 cmpl %edi, %eax / x0, t0
601 jbe .LL39
602 .LL38:
603 decl %ecx / q0
604 movl %ecx, %esi / q0, <result>
605 jmp .LL46
606 .LL43:
607 movl %esi, %edi
608 xorl %esi, %esi
609 jmp .LL40
610 SET_SIZE(UDiv)
613 * __udiv64
615 * Perform division of two unsigned 64-bit quantities, returning the
616 * quotient in %edx:%eax. __udiv64 pops the arguments on return,
618 ENTRY(__udiv64)
619 movl 4(%esp), %eax / x, x
620 movl 8(%esp), %edx / x, x
621 pushl 16(%esp) / y
622 pushl 16(%esp)
623 call UDiv
624 addl $8, %esp
625 ret $16
626 SET_SIZE(__udiv64)
629 * __urem64
631 * Perform division of two unsigned 64-bit quantities, returning the
632 * remainder in %edx:%eax. __urem64 pops the arguments on return
634 ENTRY(__urem64)
635 subl $12, %esp
636 movl %esp, %ecx /, tmp65
637 movl 16(%esp), %eax / x, x
638 movl 20(%esp), %edx / x, x
639 pushl %ecx / tmp65
640 pushl 32(%esp) / y
641 pushl 32(%esp)
642 call UDivRem
643 movl 12(%esp), %eax / rem, rem
644 movl 16(%esp), %edx / rem, rem
645 addl $24, %esp
646 ret $16
647 SET_SIZE(__urem64)
650 * __div64
652 * Perform division of two signed 64-bit quantities, returning the
653 * quotient in %edx:%eax. __div64 pops the arguments on return.
655 / int64_t
656 / __div64(int64_t x, int64_t y)
658 / int negative;
659 / uint64_t xt, yt, r;
661 / if (x < 0) {
662 / xt = -(uint64_t) x;
663 / negative = 1;
664 / } else {
665 / xt = x;
666 / negative = 0;
668 / if (y < 0) {
669 / yt = -(uint64_t) y;
670 / negative ^= 1;
671 / } else {
672 / yt = y;
674 / r = UDiv(xt, yt);
675 / return (negative ? (int64_t) - r : r);
677 ENTRY(__div64)
678 pushl %ebp
679 pushl %edi
680 pushl %esi
681 subl $8, %esp
682 movl 28(%esp), %edx / x, x
683 testl %edx, %edx / x
684 movl 24(%esp), %eax / x, x
685 movl 32(%esp), %esi / y, y
686 movl 36(%esp), %edi / y, y
687 js .LL84
688 xorl %ebp, %ebp / negative
689 testl %edi, %edi / y
690 movl %eax, (%esp) / x, xt
691 movl %edx, 4(%esp) / x, xt
692 movl %esi, %eax / y, yt
693 movl %edi, %edx / y, yt
694 js .LL85
695 .LL82:
696 pushl %edx / yt
697 pushl %eax / yt
698 movl 8(%esp), %eax / xt, xt
699 movl 12(%esp), %edx / xt, xt
700 call UDiv
701 popl %ecx
702 testl %ebp, %ebp / negative
703 popl %esi
704 je .LL83
705 negl %eax / r
706 adcl $0, %edx /, r
707 negl %edx / r
708 .LL83:
709 addl $8, %esp
710 popl %esi
711 popl %edi
712 popl %ebp
713 ret $16
714 .align 16
715 .LL84:
716 negl %eax / x
717 adcl $0, %edx /, x
718 negl %edx / x
719 testl %edi, %edi / y
720 movl %eax, (%esp) / x, xt
721 movl %edx, 4(%esp) / x, xt
722 movl $1, %ebp /, negative
723 movl %esi, %eax / y, yt
724 movl %edi, %edx / y, yt
725 jns .LL82
726 .align 16
727 .LL85:
728 negl %eax / yt
729 adcl $0, %edx /, yt
730 negl %edx / yt
731 xorl $1, %ebp /, negative
732 jmp .LL82
733 SET_SIZE(__div64)
736 * __rem64
738 * Perform division of two signed 64-bit quantities, returning the
739 * remainder in %edx:%eax. __rem64 pops the arguments on return.
741 / int64_t
742 / __rem64(int64_t x, int64_t y)
744 / uint64_t xt, yt, rem;
746 / if (x < 0) {
747 / xt = -(uint64_t) x;
748 / } else {
749 / xt = x;
751 / if (y < 0) {
752 / yt = -(uint64_t) y;
753 / } else {
754 / yt = y;
756 / (void) UDivRem(xt, yt, &rem);
757 / return (x < 0 ? (int64_t) - rem : rem);
759 ENTRY(__rem64)
760 pushl %edi
761 pushl %esi
762 subl $20, %esp
763 movl 36(%esp), %ecx / x,
764 movl 32(%esp), %esi / x,
765 movl 36(%esp), %edi / x,
766 testl %ecx, %ecx
767 movl 40(%esp), %eax / y, y
768 movl 44(%esp), %edx / y, y
769 movl %esi, (%esp) /, xt
770 movl %edi, 4(%esp) /, xt
771 js .LL92
772 testl %edx, %edx / y
773 movl %eax, %esi / y, yt
774 movl %edx, %edi / y, yt
775 js .LL93
776 .LL90:
777 leal 8(%esp), %eax /, tmp66
778 pushl %eax / tmp66
779 pushl %edi / yt
780 pushl %esi / yt
781 movl 12(%esp), %eax / xt, xt
782 movl 16(%esp), %edx / xt, xt
783 call UDivRem
784 addl $12, %esp
785 movl 36(%esp), %edi / x,
786 testl %edi, %edi
787 movl 8(%esp), %eax / rem, rem
788 movl 12(%esp), %edx / rem, rem
789 js .LL94
790 addl $20, %esp
791 popl %esi
792 popl %edi
793 ret $16
794 .align 16
795 .LL92:
796 negl %esi
797 adcl $0, %edi
798 negl %edi
799 testl %edx, %edx / y
800 movl %esi, (%esp) /, xt
801 movl %edi, 4(%esp) /, xt
802 movl %eax, %esi / y, yt
803 movl %edx, %edi / y, yt
804 jns .LL90
805 .align 16
806 .LL93:
807 negl %esi / yt
808 adcl $0, %edi /, yt
809 negl %edi / yt
810 jmp .LL90
811 .align 16
812 .LL94:
813 negl %eax / rem
814 adcl $0, %edx /, rem
815 addl $20, %esp
816 popl %esi
817 negl %edx / rem
818 popl %edi
819 ret $16
820 SET_SIZE(__rem64)
823 * C support for 64-bit modulo and division.
824 * GNU routines callable from C (though generated by the compiler).
825 * Hand-customized compiler output - see comments for details.
829 * int32_t/int64_t division/manipulation
831 * Hand-customized compiler output: the non-GCC entry points depart from
832 * the SYS V ABI by requiring their arguments to be popped, and in the
833 * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
834 * compiler-generated use of %edx:%eax for the first argument of
835 * internal entry points.
837 * Inlines for speed:
838 * - counting the number of leading zeros in a word
839 * - multiplying two 32-bit numbers giving a 64-bit result
840 * - dividing a 64-bit number by a 32-bit number, giving both quotient
841 * and remainder
842 * - subtracting two 64-bit results
844 / #define LO(X) ((uint32_t)(X) & 0xffffffff)
845 / #define HI(X) ((uint32_t)((X) >> 32) & 0xffffffff)
846 / #define HILO(H, L) (((uint64_t)(H) << 32) + (L))
848 / /* give index of highest bit */
849 / #define HIBIT(a, r) \
850 / asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
852 / /* multiply two uint32_ts resulting in a uint64_t */
853 / #define A_MUL32(a, b, lo, hi) \
854 / asm("mull %2" \
855 / : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
857 / /* divide a uint64_t by a uint32_t */
858 / #define A_DIV32(lo, hi, b, q, r) \
859 / asm("divl %2" \
860 / : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
861 / : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
863 / /* subtract two uint64_ts (with borrow) */
864 / #define A_SUB2(bl, bh, al, ah) \
865 / asm("subl %4,%0\n\tsbbl %5,%1" \
866 / : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
867 / : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
868 / "g"((uint32_t)(bh)))
871 * __udivdi3
873 * Perform division of two unsigned 64-bit quantities, returning the
874 * quotient in %edx:%eax.
876 ENTRY(__udivdi3)
877 movl 4(%esp), %eax / x, x
878 movl 8(%esp), %edx / x, x
879 pushl 16(%esp) / y
880 pushl 16(%esp)
881 call UDiv
882 addl $8, %esp
884 SET_SIZE(__udivdi3)
887 * __umoddi3
889 * Perform division of two unsigned 64-bit quantities, returning the
890 * remainder in %edx:%eax.
892 ENTRY(__umoddi3)
893 subl $12, %esp
894 movl %esp, %ecx /, tmp65
895 movl 16(%esp), %eax / x, x
896 movl 20(%esp), %edx / x, x
897 pushl %ecx / tmp65
898 pushl 32(%esp) / y
899 pushl 32(%esp)
900 call UDivRem
901 movl 12(%esp), %eax / rem, rem
902 movl 16(%esp), %edx / rem, rem
903 addl $24, %esp
905 SET_SIZE(__umoddi3)
908 * __divdi3
910 * Perform division of two signed 64-bit quantities, returning the
911 * quotient in %edx:%eax.
913 / int64_t
914 / __divdi3(int64_t x, int64_t y)
916 / int negative;
917 / uint64_t xt, yt, r;
919 / if (x < 0) {
920 / xt = -(uint64_t) x;
921 / negative = 1;
922 / } else {
923 / xt = x;
924 / negative = 0;
926 / if (y < 0) {
927 / yt = -(uint64_t) y;
928 / negative ^= 1;
929 / } else {
930 / yt = y;
932 / r = UDiv(xt, yt);
933 / return (negative ? (int64_t) - r : r);
935 ENTRY(__divdi3)
936 pushl %ebp
937 pushl %edi
938 pushl %esi
939 subl $8, %esp
940 movl 28(%esp), %edx / x, x
941 testl %edx, %edx / x
942 movl 24(%esp), %eax / x, x
943 movl 32(%esp), %esi / y, y
944 movl 36(%esp), %edi / y, y
945 js .LL55
946 xorl %ebp, %ebp / negative
947 testl %edi, %edi / y
948 movl %eax, (%esp) / x, xt
949 movl %edx, 4(%esp) / x, xt
950 movl %esi, %eax / y, yt
951 movl %edi, %edx / y, yt
952 js .LL56
953 .LL53:
954 pushl %edx / yt
955 pushl %eax / yt
956 movl 8(%esp), %eax / xt, xt
957 movl 12(%esp), %edx / xt, xt
958 call UDiv
959 popl %ecx
960 testl %ebp, %ebp / negative
961 popl %esi
962 je .LL54
963 negl %eax / r
964 adcl $0, %edx /, r
965 negl %edx / r
966 .LL54:
967 addl $8, %esp
968 popl %esi
969 popl %edi
970 popl %ebp
972 .align 16
973 .LL55:
974 negl %eax / x
975 adcl $0, %edx /, x
976 negl %edx / x
977 testl %edi, %edi / y
978 movl %eax, (%esp) / x, xt
979 movl %edx, 4(%esp) / x, xt
980 movl $1, %ebp /, negative
981 movl %esi, %eax / y, yt
982 movl %edi, %edx / y, yt
983 jns .LL53
984 .align 16
985 .LL56:
986 negl %eax / yt
987 adcl $0, %edx /, yt
988 negl %edx / yt
989 xorl $1, %ebp /, negative
990 jmp .LL53
991 SET_SIZE(__divdi3)
994 * __moddi3
996 * Perform division of two signed 64-bit quantities, returning the
997 * quotient in %edx:%eax.
999 / int64_t
1000 / __moddi3(int64_t x, int64_t y)
1002 / uint64_t xt, yt, rem;
1004 / if (x < 0) {
1005 / xt = -(uint64_t) x;
1006 / } else {
1007 / xt = x;
1009 / if (y < 0) {
1010 / yt = -(uint64_t) y;
1011 / } else {
1012 / yt = y;
1014 / (void) UDivRem(xt, yt, &rem);
1015 / return (x < 0 ? (int64_t) - rem : rem);
1017 ENTRY(__moddi3)
1018 pushl %edi
1019 pushl %esi
1020 subl $20, %esp
1021 movl 36(%esp), %ecx / x,
1022 movl 32(%esp), %esi / x,
1023 movl 36(%esp), %edi / x,
1024 testl %ecx, %ecx
1025 movl 40(%esp), %eax / y, y
1026 movl 44(%esp), %edx / y, y
1027 movl %esi, (%esp) /, xt
1028 movl %edi, 4(%esp) /, xt
1029 js .LL63
1030 testl %edx, %edx / y
1031 movl %eax, %esi / y, yt
1032 movl %edx, %edi / y, yt
1033 js .LL64
1034 .LL61:
1035 leal 8(%esp), %eax /, tmp66
1036 pushl %eax / tmp66
1037 pushl %edi / yt
1038 pushl %esi / yt
1039 movl 12(%esp), %eax / xt, xt
1040 movl 16(%esp), %edx / xt, xt
1041 call UDivRem
1042 addl $12, %esp
1043 movl 36(%esp), %edi / x,
1044 testl %edi, %edi
1045 movl 8(%esp), %eax / rem, rem
1046 movl 12(%esp), %edx / rem, rem
1047 js .LL65
1048 addl $20, %esp
1049 popl %esi
1050 popl %edi
1052 .align 16
1053 .LL63:
1054 negl %esi
1055 adcl $0, %edi
1056 negl %edi
1057 testl %edx, %edx / y
1058 movl %esi, (%esp) /, xt
1059 movl %edi, 4(%esp) /, xt
1060 movl %eax, %esi / y, yt
1061 movl %edx, %edi / y, yt
1062 jns .LL61
1063 .align 16
1064 .LL64:
1065 negl %esi / yt
1066 adcl $0, %edi /, yt
1067 negl %edi / yt
1068 jmp .LL61
1069 .align 16
1070 .LL65:
1071 negl %eax / rem
1072 adcl $0, %edx /, rem
1073 addl $20, %esp
1074 popl %esi
1075 negl %edx / rem
1076 popl %edi
1078 SET_SIZE(__moddi3)
1081 * __udivrem64
1083 * Perform division of two unsigned 64-bit quantities, returning the
1084 * quotient in %edx:%eax, and the remainder in %ecx:%esi. __udivrem64
1085 * pops the arguments on return.
1087 ENTRY(__udivrem64)
1088 subl $12, %esp
1089 movl %esp, %ecx /, tmp64
1090 movl 16(%esp), %eax / x, x
1091 movl 20(%esp), %edx / x, x
1092 pushl %ecx / tmp64
1093 pushl 32(%esp) / y
1094 pushl 32(%esp)
1095 call UDivRem
1096 movl 16(%esp), %ecx / rem, tmp63
1097 movl 12(%esp), %esi / rem
1098 addl $24, %esp
1099 ret $16
1100 SET_SIZE(__udivrem64)
1103 * Signed division with remainder.
1105 / int64_t
1106 / SDivRem(int64_t x, int64_t y, int64_t * pmod)
1108 / int negative;
1109 / uint64_t xt, yt, r, rem;
1111 / if (x < 0) {
1112 / xt = -(uint64_t) x;
1113 / negative = 1;
1114 / } else {
1115 / xt = x;
1116 / negative = 0;
1118 / if (y < 0) {
1119 / yt = -(uint64_t) y;
1120 / negative ^= 1;
1121 / } else {
1122 / yt = y;
1124 / r = UDivRem(xt, yt, &rem);
1125 / *pmod = (x < 0 ? (int64_t) - rem : rem);
1126 / return (negative ? (int64_t) - r : r);
1128 ENTRY(SDivRem)
1129 pushl %ebp
1130 pushl %edi
1131 pushl %esi
1132 subl $24, %esp
1133 testl %edx, %edx / x
1134 movl %edx, %edi / x, x
1135 js .LL73
1136 movl 44(%esp), %esi / y,
1137 xorl %ebp, %ebp / negative
1138 testl %esi, %esi
1139 movl %edx, 12(%esp) / x, xt
1140 movl %eax, 8(%esp) / x, xt
1141 movl 40(%esp), %edx / y, yt
1142 movl 44(%esp), %ecx / y, yt
1143 js .LL74
1144 .LL70:
1145 leal 16(%esp), %eax /, tmp70
1146 pushl %eax / tmp70
1147 pushl %ecx / yt
1148 pushl %edx / yt
1149 movl 20(%esp), %eax / xt, xt
1150 movl 24(%esp), %edx / xt, xt
1151 call UDivRem
1152 movl %edx, 16(%esp) /, r
1153 movl %eax, 12(%esp) /, r
1154 addl $12, %esp
1155 testl %edi, %edi / x
1156 movl 16(%esp), %edx / rem, rem
1157 movl 20(%esp), %ecx / rem, rem
1158 js .LL75
1159 .LL71:
1160 movl 48(%esp), %edi / pmod, pmod
1161 testl %ebp, %ebp / negative
1162 movl %edx, (%edi) / rem,* pmod
1163 movl %ecx, 4(%edi) / rem,
1164 movl (%esp), %eax / r, r
1165 movl 4(%esp), %edx / r, r
1166 je .LL72
1167 negl %eax / r
1168 adcl $0, %edx /, r
1169 negl %edx / r
1170 .LL72:
1171 addl $24, %esp
1172 popl %esi
1173 popl %edi
1174 popl %ebp
1176 .align 16
1177 .LL73:
1178 negl %eax
1179 adcl $0, %edx
1180 movl 44(%esp), %esi / y,
1181 negl %edx
1182 testl %esi, %esi
1183 movl %edx, 12(%esp) /, xt
1184 movl %eax, 8(%esp) /, xt
1185 movl $1, %ebp /, negative
1186 movl 40(%esp), %edx / y, yt
1187 movl 44(%esp), %ecx / y, yt
1188 jns .LL70
1189 .align 16
1190 .LL74:
1191 negl %edx / yt
1192 adcl $0, %ecx /, yt
1193 negl %ecx / yt
1194 xorl $1, %ebp /, negative
1195 jmp .LL70
1196 .align 16
1197 .LL75:
1198 negl %edx / rem
1199 adcl $0, %ecx /, rem
1200 negl %ecx / rem
1201 jmp .LL71
1202 SET_SIZE(SDivRem)
1205 * __divrem64
1207 * Perform division of two signed 64-bit quantities, returning the
1208 * quotient in %edx:%eax, and the remainder in %ecx:%esi. __divrem64
1209 * pops the arguments on return.
1211 ENTRY(__divrem64)
1212 subl $20, %esp
1213 movl %esp, %ecx /, tmp64
1214 movl 24(%esp), %eax / x, x
1215 movl 28(%esp), %edx / x, x
1216 pushl %ecx / tmp64
1217 pushl 40(%esp) / y
1218 pushl 40(%esp)
1219 call SDivRem
1220 movl 16(%esp), %ecx
1221 movl 12(%esp),%esi / rem
1222 addl $32, %esp
1223 ret $16
1224 SET_SIZE(__divrem64)
1228 #endif /* defined(__i386) && !defined(__amd64) */