x86: fix a bug that we misgenerated the the 8-bit imul with a constant
[ajla.git] / mini-gmp.c
blob1c385778730e1b9e406c3d78616b7319d84ce96a
1 /* mini-gmp, a minimalistic implementation of a GNU GMP subset.
3 Contributed to the GNU project by Niels Möller
5 Copyright 1991-1997, 1999-2019 Free Software Foundation, Inc.
7 This file is part of the GNU MP Library.
9 The GNU MP Library is free software; you can redistribute it and/or modify
10 it under the terms of either:
12 * the GNU Lesser General Public License as published by the Free
13 Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
18 * the GNU General Public License as published by the Free Software
19 Foundation; either version 2 of the License, or (at your option) any
20 later version.
22 or both in parallel, as here.
24 The GNU MP Library is distributed in the hope that it will be useful, but
25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
27 for more details.
29 You should have received copies of the GNU General Public License and the
30 GNU Lesser General Public License along with the GNU MP Library. If not,
31 see https://www.gnu.org/licenses/. */
33 /* Modified by Mikulas Patocka to fit in Ajla */
35 /* NOTE: All functions in this file which are not declared in
36 mini-gmp.h are internal, and are not intended to be compatible
37 with GMP or with future versions of mini-gmp. */
39 /* Much of the material copied from GMP files, including: gmp-impl.h,
40 longlong.h, mpn/generic/add_n.c, mpn/generic/addmul_1.c,
41 mpn/generic/lshift.c, mpn/generic/mul_1.c,
42 mpn/generic/mul_basecase.c, mpn/generic/rshift.c,
43 mpn/generic/sbpi1_div_qr.c, mpn/generic/sub_n.c,
44 mpn/generic/submul_1.c. */
46 #include "ajla.h"
48 #ifndef MPINT_GMP
50 #include <limits.h>
52 #ifdef __BORLANDC__
53 #define const
54 #endif
56 #include "mini-gmp.h"
58 #define assert(x) ajla_assert(x, (file_line, "gmp assertion failed"))
60 /* Macros */
61 #define GMP_LIMB_BITS ((int)sizeof(mp_limb_t) * CHAR_BIT)
63 #define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0)
64 #define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
66 #define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
67 #define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
69 #define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
70 #define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
72 #define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
73 #define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
75 #define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
76 #define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
78 #define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b)))
80 #if defined(DBL_MANT_DIG) && FLT_RADIX == 2
81 #define GMP_DBL_MANT_BITS DBL_MANT_DIG
82 #else
83 #define GMP_DBL_MANT_BITS (53)
84 #endif
86 /* Return non-zero if xp,xsize and yp,ysize overlap.
87 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
88 overlap. If both these are false, there's an overlap. */
89 #define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize) \
90 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
92 #define gmp_assert_nocarry(x) do { \
93 mp_limb_t attr_unused __cy = (x); \
94 assert (__cy == 0); \
95 } while (0)
97 #define gmp_clz(count, x) do { \
98 mp_limb_t __clz_x = (x); \
99 unsigned __clz_c = 0; \
100 int LOCAL_SHIFT_BITS = 8; \
101 if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS) \
102 for (; \
103 (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \
104 __clz_c += 8) \
105 { __clz_x <<= LOCAL_SHIFT_BITS; } \
106 for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \
107 __clz_x <<= 1; \
108 (count) = __clz_c; \
109 } while (0)
111 #define gmp_ctz(count, x) do { \
112 mp_limb_t __ctz_x = (x); \
113 unsigned __ctz_c = 0; \
114 gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \
115 (count) = GMP_LIMB_BITS - 1 - __ctz_c; \
116 } while (0)
118 #define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
119 do { \
120 mp_limb_t __x; \
121 __x = (al) + (bl); \
122 (sh) = (ah) + (bh) + (__x < (al)); \
123 (sl) = __x; \
124 } while (0)
126 #define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
127 do { \
128 mp_limb_t __x; \
129 __x = (al) - (bl); \
130 (sh) = (ah) - (bh) - ((al) < (bl)); \
131 (sl) = __x; \
132 } while (0)
134 #define gmp_umul_ppmm(w1, w0, u, v) \
135 do { \
136 int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS; \
137 if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS) \
139 unsigned int __ww = (unsigned int) (u) * (v); \
140 w0 = (mp_limb_t) __ww; \
141 w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
143 else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS) \
145 unsigned long int __ww = (unsigned long int) (u) * (v); \
146 w0 = (mp_limb_t) __ww; \
147 w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS); \
149 else { \
150 mp_limb_t __x0, __x1, __x2, __x3; \
151 unsigned __ul, __vl, __uh, __vh; \
152 mp_limb_t __u = (u), __v = (v); \
154 __ul = __u & GMP_LLIMB_MASK; \
155 __uh = __u >> (GMP_LIMB_BITS / 2); \
156 __vl = __v & GMP_LLIMB_MASK; \
157 __vh = __v >> (GMP_LIMB_BITS / 2); \
159 __x0 = (mp_limb_t) __ul * __vl; \
160 __x1 = (mp_limb_t) __ul * __vh; \
161 __x2 = (mp_limb_t) __uh * __vl; \
162 __x3 = (mp_limb_t) __uh * __vh; \
164 __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \
165 __x1 += __x2; /* but this indeed can */ \
166 if (__x1 < __x2) /* did we get it? */ \
167 __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \
169 (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \
170 (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \
172 } while (0)
174 #define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
175 do { \
176 mp_limb_t _qh, _ql, _r, _mask; \
177 gmp_umul_ppmm (_qh, _ql, (nh), (di)); \
178 gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
179 _r = (nl) - _qh * (d); \
180 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
181 _qh += _mask; \
182 _r += _mask & (d); \
183 if (_r >= (d)) \
185 _r -= (d); \
186 _qh++; \
189 (r) = _r; \
190 (q) = _qh; \
191 } while (0)
193 #define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
194 do { \
195 mp_limb_t _q0, _t1, _t0, _mask; \
196 gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \
197 gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
199 /* Compute the two most significant limbs of n - q'd */ \
200 (r1) = (n1) - (d1) * (q); \
201 gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
202 gmp_umul_ppmm (_t1, _t0, (d0), (q)); \
203 gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
204 (q)++; \
206 /* Conditionally adjust q and the remainders */ \
207 _mask = - (mp_limb_t) ((r1) >= _q0); \
208 (q) += _mask; \
209 gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
210 if ((r1) >= (d1)) \
212 if ((r1) > (d1) || (r0) >= (d0)) \
214 (q)++; \
215 gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
218 } while (0)
220 /* Swap macros. */
221 #define MP_LIMB_T_SWAP(x, y) \
222 do { \
223 mp_limb_t __mp_limb_t_swap__tmp = (x); \
224 (x) = (y); \
225 (y) = __mp_limb_t_swap__tmp; \
226 } while (0)
227 #define MP_SIZE_T_SWAP(x, y) \
228 do { \
229 mp_size_t __mp_size_t_swap__tmp = (x); \
230 (x) = (y); \
231 (y) = __mp_size_t_swap__tmp; \
232 } while (0)
233 #define MP_BITCNT_T_SWAP(x,y) \
234 do { \
235 mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \
236 (x) = (y); \
237 (y) = __mp_bitcnt_t_swap__tmp; \
238 } while (0)
239 #define MP_PTR_SWAP(x, y) \
240 do { \
241 mp_ptr __mp_ptr_swap__tmp = (x); \
242 (x) = (y); \
243 (y) = __mp_ptr_swap__tmp; \
244 } while (0)
245 #define MP_SRCPTR_SWAP(x, y) \
246 do { \
247 mp_srcptr __mp_srcptr_swap__tmp = (x); \
248 (x) = (y); \
249 (y) = __mp_srcptr_swap__tmp; \
250 } while (0)
252 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
253 do { \
254 MP_PTR_SWAP (xp, yp); \
255 MP_SIZE_T_SWAP (xs, ys); \
256 } while(0)
257 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
258 do { \
259 MP_SRCPTR_SWAP (xp, yp); \
260 MP_SIZE_T_SWAP (xs, ys); \
261 } while(0)
263 #define MPZ_PTR_SWAP(x, y) \
264 do { \
265 mpz_ptr __mpz_ptr_swap__tmp = (x); \
266 (x) = (y); \
267 (y) = __mpz_ptr_swap__tmp; \
268 } while (0)
269 #define MPZ_SRCPTR_SWAP(x, y) \
270 do { \
271 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
272 (x) = (y); \
273 (y) = __mpz_srcptr_swap__tmp; \
274 } while (0)
276 const int mp_bits_per_limb = GMP_LIMB_BITS;
279 /* Memory allocation and other helper functions. */
280 static attr_noreturn
281 gmp_die (const char *msg)
283 internal(file_line, "%s", msg);
286 static void *
287 gmp_default_alloc (size_t size)
289 void *p;
291 assert (size > 0);
293 p = malloc (size);
294 if (!p)
295 gmp_die("gmp_default_alloc: Virtual memory exhausted.");
297 return p;
300 static void *
301 gmp_default_realloc (void *old, size_t attr_unused unused_old_size, size_t new_size)
303 void * p;
305 p = realloc (old, new_size);
307 if (!p)
308 gmp_die("gmp_default_realloc: Virtual memory exhausted.");
310 return p;
313 static void
314 gmp_default_free (void *p, size_t attr_unused unused_size)
316 free (p);
319 static void * (*gmp_allocate_func) (size_t) = gmp_default_alloc;
320 static void * (*gmp_reallocate_func) (void *, size_t, size_t) = gmp_default_realloc;
321 static void (*gmp_free_func) (void *, size_t) = gmp_default_free;
323 void
324 mp_set_memory_functions (void *(*alloc_func) (size_t),
325 void *(*realloc_func) (void *, size_t, size_t),
326 void (*free_func) (void *, size_t))
328 if (!alloc_func)
329 alloc_func = gmp_default_alloc;
330 if (!realloc_func)
331 realloc_func = gmp_default_realloc;
332 if (!free_func)
333 free_func = gmp_default_free;
335 gmp_allocate_func = alloc_func;
336 gmp_reallocate_func = realloc_func;
337 gmp_free_func = free_func;
340 #define gmp_xalloc(size) ((*gmp_allocate_func)((size)))
341 #define gmp_free(p) ((*gmp_free_func) ((p), 0))
343 static mp_ptr
344 gmp_xalloc_limbs (mp_size_t size)
346 return (mp_ptr) gmp_xalloc (size * sizeof (mp_limb_t));
349 static mp_ptr
350 gmp_xrealloc_limbs (mp_ptr old, mp_size_t size)
352 assert (size > 0);
353 return (mp_ptr) (*gmp_reallocate_func) (old, 0, size * sizeof (mp_limb_t));
357 /* MPN interface */
359 static void
360 mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
362 mp_size_t i;
363 for (i = 0; i < n; i++)
364 d[i] = s[i];
367 static void
368 mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
370 while (--n >= 0)
371 d[n] = s[n];
374 static int
375 mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
377 while (--n >= 0)
379 if (ap[n] != bp[n])
380 return ap[n] > bp[n] ? 1 : -1;
382 return 0;
385 static int
386 mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
388 if (an != bn)
389 return an < bn ? -1 : 1;
390 else
391 return mpn_cmp (ap, bp, an);
394 static mp_size_t
395 mpn_normalized_size (mp_srcptr xp, mp_size_t n)
397 while (n > 0 && xp[n-1] == 0)
398 --n;
399 return n;
402 static int
403 mpn_zero_p(mp_srcptr rp, mp_size_t n)
405 return mpn_normalized_size (rp, n) == 0;
408 static void
409 mpn_zero (mp_ptr rp, mp_size_t n)
411 while (--n >= 0)
412 rp[n] = 0;
415 static mp_limb_t
416 mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
418 mp_size_t i;
420 assert (n > 0);
421 i = 0;
424 mp_limb_t r = ap[i] + b;
425 /* Carry out */
426 b = (r < b);
427 rp[i] = r;
429 while (++i < n);
431 return b;
434 static mp_limb_t
435 mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
437 mp_size_t i;
438 mp_limb_t cy;
440 for (i = 0, cy = 0; i < n; i++)
442 mp_limb_t a, b, r;
443 a = ap[i]; b = bp[i];
444 r = a + cy;
445 cy = (r < cy);
446 r += b;
447 cy += (r < b);
448 rp[i] = r;
450 return cy;
453 static mp_limb_t
454 mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
456 mp_limb_t cy;
458 assert (an >= bn);
460 cy = mpn_add_n (rp, ap, bp, bn);
461 if (an > bn)
462 cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
463 return cy;
466 static mp_limb_t
467 mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
469 mp_size_t i;
471 assert (n > 0);
473 i = 0;
476 mp_limb_t a = ap[i];
477 /* Carry out */
478 mp_limb_t cy = a < b;
479 rp[i] = a - b;
480 b = cy;
482 while (++i < n);
484 return b;
487 static mp_limb_t
488 mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
490 mp_size_t i;
491 mp_limb_t cy;
493 for (i = 0, cy = 0; i < n; i++)
495 mp_limb_t a, b;
496 a = ap[i]; b = bp[i];
497 b += cy;
498 cy = (b < cy);
499 cy += (a < b);
500 rp[i] = a - b;
502 return cy;
505 static mp_limb_t
506 mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
508 mp_limb_t cy;
510 assert (an >= bn);
512 cy = mpn_sub_n (rp, ap, bp, bn);
513 if (an > bn)
514 cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
515 return cy;
518 static mp_limb_t
519 mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
521 mp_limb_t ul, cl, hpl, lpl;
523 assert (n >= 1);
525 cl = 0;
528 ul = *up++;
529 gmp_umul_ppmm (hpl, lpl, ul, vl);
531 lpl += cl;
532 cl = (lpl < cl) + hpl;
534 *rp++ = lpl;
536 while (--n != 0);
538 return cl;
541 static mp_limb_t
542 mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
544 mp_limb_t ul, cl, hpl, lpl, rl;
546 assert (n >= 1);
548 cl = 0;
551 ul = *up++;
552 gmp_umul_ppmm (hpl, lpl, ul, vl);
554 lpl += cl;
555 cl = (lpl < cl) + hpl;
557 rl = *rp;
558 lpl = rl + lpl;
559 cl += lpl < rl;
560 *rp++ = lpl;
562 while (--n != 0);
564 return cl;
567 static mp_limb_t
568 mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
570 mp_limb_t ul, cl, hpl, lpl, rl;
572 assert (n >= 1);
574 cl = 0;
577 ul = *up++;
578 gmp_umul_ppmm (hpl, lpl, ul, vl);
580 lpl += cl;
581 cl = (lpl < cl) + hpl;
583 rl = *rp;
584 lpl = rl - lpl;
585 cl += lpl > rl;
586 *rp++ = lpl;
588 while (--n != 0);
590 return cl;
593 static mp_limb_t
594 mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
596 assert (un >= vn);
597 assert (vn >= 1);
598 assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un));
599 assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn));
601 /* We first multiply by the low order limb. This result can be
602 stored, not added, to rp. We also avoid a loop for zeroing this
603 way. */
605 rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
607 /* Now accumulate the product of up[] and the next higher limb from
608 vp[]. */
610 while (--vn >= 1)
612 rp += 1, vp += 1;
613 rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
615 return rp[un];
619 static mp_limb_t
620 mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
622 mp_limb_t high_limb, low_limb;
623 unsigned int tnc;
624 mp_limb_t retval;
626 assert (n >= 1);
627 assert (cnt >= 1);
628 assert (cnt < GMP_LIMB_BITS);
630 up += n;
631 rp += n;
633 tnc = GMP_LIMB_BITS - cnt;
634 low_limb = *--up;
635 retval = low_limb >> tnc;
636 high_limb = (low_limb << cnt);
638 while (--n != 0)
640 low_limb = *--up;
641 *--rp = high_limb | (low_limb >> tnc);
642 high_limb = (low_limb << cnt);
644 *--rp = high_limb;
646 return retval;
649 static mp_limb_t
650 mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
652 mp_limb_t high_limb, low_limb;
653 unsigned int tnc;
654 mp_limb_t retval;
656 assert (n >= 1);
657 assert (cnt >= 1);
658 assert (cnt < GMP_LIMB_BITS);
660 tnc = GMP_LIMB_BITS - cnt;
661 high_limb = *up++;
662 retval = (high_limb << tnc);
663 low_limb = high_limb >> cnt;
665 while (--n != 0)
667 high_limb = *up++;
668 *rp++ = low_limb | (high_limb << tnc);
669 low_limb = high_limb >> cnt;
671 *rp = low_limb;
673 return retval;
676 static mp_bitcnt_t
677 mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
678 mp_limb_t ux)
680 unsigned cnt;
682 assert (ux == 0 || ux == GMP_LIMB_MAX);
683 assert (0 <= i && i <= un );
685 while (limb == 0)
687 i++;
688 if (i == un)
689 return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * (mp_bitcnt_t)GMP_LIMB_BITS);
690 limb = ux ^ up[i];
692 gmp_ctz (cnt, limb);
693 return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
696 mp_bitcnt_t
697 mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit)
699 mp_size_t i;
700 i = bit / GMP_LIMB_BITS;
702 return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
703 i, ptr, i, 0);
707 /* MPN division interface. */
709 /* The 3/2 inverse is defined as
711 m = floor( (B^3-1) / (B u1 + u0)) - B
713 static mp_limb_t
714 mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
716 mp_limb_t r, m;
719 mp_limb_t p, ql;
720 unsigned ul, uh, qh;
722 /* For notation, let b denote the half-limb base, so that B = b^2.
723 Split u1 = b uh + ul. */
724 ul = u1 & GMP_LLIMB_MASK;
725 uh = u1 >> (GMP_LIMB_BITS / 2);
727 /* Approximation of the high half of quotient. Differs from the 2/1
728 inverse of the half limb uh, since we have already subtracted
729 u0. */
730 qh = (u1 ^ GMP_LIMB_MAX) / uh;
732 /* Adjust to get a half-limb 3/2 inverse, i.e., we want
734 qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
735 = floor( (b (~u) + b-1) / u),
737 and the remainder
739 r = b (~u) + b-1 - qh (b uh + ul)
740 = b (~u - qh uh) + b-1 - qh ul
742 Subtraction of qh ul may underflow, which implies adjustments.
743 But by normalization, 2 u >= B > qh ul, so we need to adjust by
744 at most 2.
747 r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
749 p = (mp_limb_t) qh * ul;
750 /* Adjustment steps taken from udiv_qrnnd_c */
751 if (r < p)
753 qh--;
754 r += u1;
755 if (r >= u1) /* i.e. we didn't get carry when adding to r */
756 if (r < p)
758 qh--;
759 r += u1;
762 r -= p;
764 /* Low half of the quotient is
766 ql = floor ( (b r + b-1) / u1).
768 This is a 3/2 division (on half-limbs), for which qh is a
769 suitable inverse. */
771 p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
772 /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
773 work, it is essential that ql is a full mp_limb_t. */
774 ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
776 /* By the 3/2 trick, we don't need the high half limb. */
777 r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
779 if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2))))
781 ql--;
782 r += u1;
784 m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
785 if (r >= u1)
787 m++;
788 r -= u1;
792 /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a
793 3/2 inverse. */
794 if (u0 > 0)
796 mp_limb_t th, tl;
797 r = ~r;
798 r += u0;
799 if (r < u0)
801 m--;
802 if (r >= u1)
804 m--;
805 r -= u1;
807 r -= u1;
809 gmp_umul_ppmm (th, tl, u0, m);
810 r += th;
811 if (r < th)
813 m--;
814 m -= ((r > u1) | ((r == u1) & (tl > u0)));
818 return m;
821 struct gmp_div_inverse
823 /* Normalization shift count. */
824 unsigned shift;
825 /* Normalized divisor (d0 unused for mpn_div_qr_1) */
826 mp_limb_t d1, d0;
827 /* Inverse, for 2/1 or 3/2. */
828 mp_limb_t di;
831 static void
832 mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
834 unsigned shift;
836 assert (d > 0);
837 gmp_clz (shift, d);
838 inv->shift = shift;
839 inv->d1 = d << shift;
840 inv->di = mpn_invert_limb (inv->d1);
843 static void
844 mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
845 mp_limb_t d1, mp_limb_t d0)
847 unsigned shift;
849 assert (d1 > 0);
850 gmp_clz (shift, d1);
851 inv->shift = shift;
852 if (shift > 0)
854 d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
855 d0 <<= shift;
857 inv->d1 = d1;
858 inv->d0 = d0;
859 inv->di = mpn_invert_3by2 (d1, d0);
862 static void
863 mpn_div_qr_invert (struct gmp_div_inverse *inv,
864 mp_srcptr dp, mp_size_t dn)
866 assert (dn > 0);
868 if (dn == 1)
869 mpn_div_qr_1_invert (inv, dp[0]);
870 else if (dn == 2)
871 mpn_div_qr_2_invert (inv, dp[1], dp[0]);
872 else
874 unsigned shift;
875 mp_limb_t d1, d0;
877 d1 = dp[dn-1];
878 d0 = dp[dn-2];
879 assert (d1 > 0);
880 gmp_clz (shift, d1);
881 inv->shift = shift;
882 if (shift > 0)
884 d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
885 d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
887 inv->d1 = d1;
888 inv->d0 = d0;
889 inv->di = mpn_invert_3by2 (d1, d0);
893 /* Not matching current public gmp interface, rather corresponding to
894 the sbpi1_div_* functions. */
895 static mp_limb_t
896 mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
897 const struct gmp_div_inverse *inv)
899 mp_limb_t d, di;
900 mp_limb_t r;
901 mp_ptr tp = NULL;
903 if (inv->shift > 0)
905 /* Shift, reusing qp area if possible. In-place shift if qp == np. */
906 tp = qp ? qp : gmp_xalloc_limbs (nn);
907 r = mpn_lshift (tp, np, nn, inv->shift);
908 np = tp;
910 else
911 r = 0;
913 d = inv->d1;
914 di = inv->di;
915 while (--nn >= 0)
917 mp_limb_t q;
919 gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
920 if (qp)
921 qp[nn] = q;
923 if ((inv->shift > 0) && (tp != qp))
924 gmp_free (tp);
926 return r >> inv->shift;
929 static void
930 mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
931 const struct gmp_div_inverse *inv)
933 unsigned shift;
934 mp_size_t i;
935 mp_limb_t d1, d0, di, r1, r0;
937 assert (nn >= 2);
938 shift = inv->shift;
939 d1 = inv->d1;
940 d0 = inv->d0;
941 di = inv->di;
943 if (shift > 0)
944 r1 = mpn_lshift (np, np, nn, shift);
945 else
946 r1 = 0;
948 r0 = np[nn - 1];
950 i = nn - 2;
953 mp_limb_t n0, q;
954 n0 = np[i];
955 gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
957 if (qp)
958 qp[i] = q;
960 while (--i >= 0);
962 if (shift > 0)
964 assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0);
965 r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
966 r1 >>= shift;
969 np[1] = r1;
970 np[0] = r0;
973 static void
974 mpn_div_qr_pi1 (mp_ptr qp,
975 mp_ptr np, mp_size_t nn, mp_limb_t n1,
976 mp_srcptr dp, mp_size_t dn,
977 mp_limb_t dinv)
979 mp_size_t i;
981 mp_limb_t d1, d0;
982 mp_limb_t cy, cy1;
983 mp_limb_t q;
985 assert (dn > 2);
986 assert (nn >= dn);
988 d1 = dp[dn - 1];
989 d0 = dp[dn - 2];
991 assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
992 /* Iteration variable is the index of the q limb.
994 * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
995 * by <d1, d0, dp[dn-3], ..., dp[0] >
998 i = nn - dn;
1001 mp_limb_t n0 = np[dn-1+i];
1003 if (n1 == d1 && n0 == d0)
1005 q = GMP_LIMB_MAX;
1006 mpn_submul_1 (np+i, dp, dn, q);
1007 n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */
1009 else
1011 gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
1013 cy = mpn_submul_1 (np + i, dp, dn-2, q);
1015 cy1 = n0 < cy;
1016 n0 = n0 - cy;
1017 cy = n1 < cy1;
1018 n1 = n1 - cy1;
1019 np[dn-2+i] = n0;
1021 if (cy != 0)
1023 n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
1024 q--;
1028 if (qp)
1029 qp[i] = q;
1031 while (--i >= 0);
1033 np[dn - 1] = n1;
1036 static void
1037 mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
1038 mp_srcptr dp, mp_size_t dn,
1039 const struct gmp_div_inverse *inv)
1041 assert (dn > 0);
1042 assert (nn >= dn);
1044 if (dn == 1)
1045 np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
1046 else if (dn == 2)
1047 mpn_div_qr_2_preinv (qp, np, nn, inv);
1048 else
1050 mp_limb_t nh;
1051 unsigned shift;
1053 assert (inv->d1 == dp[dn-1]);
1054 assert (inv->d0 == dp[dn-2]);
1055 assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
1057 shift = inv->shift;
1058 if (shift > 0)
1059 nh = mpn_lshift (np, np, nn, shift);
1060 else
1061 nh = 0;
1063 mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
1065 if (shift > 0)
1066 gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
1070 static void
1071 mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
1073 struct gmp_div_inverse inv;
1074 mp_ptr tp = NULL;
1076 assert (dn > 0);
1077 assert (nn >= dn);
1079 mpn_div_qr_invert (&inv, dp, dn);
1080 if (dn > 2 && inv.shift > 0)
1082 tp = gmp_xalloc_limbs (dn);
1083 gmp_assert_nocarry (mpn_lshift (tp, dp, dn, inv.shift));
1084 dp = tp;
1086 mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
1087 if (tp)
1088 gmp_free (tp);
1092 static mp_bitcnt_t
1093 mpn_limb_size_in_base_2 (mp_limb_t u)
1095 unsigned shift;
1097 assert (u > 0);
1098 gmp_clz (shift, u);
1099 return GMP_LIMB_BITS - shift;
1103 /* MPZ interface */
1104 void
1105 mpz_init (mpz_t r)
1107 static const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0;
1109 r->_mp_alloc = 0;
1110 r->_mp_size = 0;
1111 r->_mp_d = (mp_ptr) &dummy_limb;
1114 /* The utility of this function is a bit limited, since many functions
1115 assigns the result variable using mpz_swap. */
1116 void
1117 mpz_init2 (mpz_t r, mp_bitcnt_t bits)
1119 mp_size_t rn;
1121 bits -= (bits != 0); /* Round down, except if 0 */
1122 rn = 1 + bits / GMP_LIMB_BITS;
1124 r->_mp_alloc = rn;
1125 r->_mp_size = 0;
1126 r->_mp_d = gmp_xalloc_limbs (rn);
1129 void
1130 mpz_clear (mpz_t r)
1132 if (r->_mp_alloc)
1133 gmp_free (r->_mp_d);
1136 static mp_ptr
1137 mpz_realloc (mpz_t r, mp_size_t size)
1139 size = GMP_MAX (size, 1);
1141 if (r->_mp_alloc)
1142 r->_mp_d = gmp_xrealloc_limbs (r->_mp_d, size);
1143 else
1144 r->_mp_d = gmp_xalloc_limbs (size);
1145 r->_mp_alloc = size;
1147 if (GMP_ABS (r->_mp_size) > size)
1148 r->_mp_size = 0;
1150 return r->_mp_d;
1153 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
1154 #define MPZ_REALLOC(z,n) ((n) > (z)->_mp_alloc \
1155 ? mpz_realloc(z,n) \
1156 : (z)->_mp_d)
1158 /* MPZ assignment and basic conversions. */
1159 void
1160 mpz_set_si (mpz_t r, signed long int x)
1162 if (x >= 0)
1163 mpz_set_ui (r, x);
1164 else /* (x < 0) */
1165 if (GMP_LIMB_BITS < GMP_ULONG_BITS)
1167 mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x));
1168 mpz_neg (r, r);
1170 else
1172 r->_mp_size = -1;
1173 MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x);
1177 void
1178 mpz_set_ui (mpz_t r, unsigned long int x)
1180 if (x > 0)
1182 r->_mp_size = 1;
1183 MPZ_REALLOC (r, 1)[0] = x;
1184 if (GMP_LIMB_BITS < GMP_ULONG_BITS)
1186 int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
1187 while (x >>= LOCAL_GMP_LIMB_BITS)
1189 ++ r->_mp_size;
1190 MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x;
1194 else
1195 r->_mp_size = 0;
1198 void
1199 mpz_set (mpz_t r, const mpz_t x)
1201 /* Allow the NOP r == x */
1202 if (r != x)
1204 mp_size_t n;
1205 mp_ptr rp;
1207 n = GMP_ABS (x->_mp_size);
1208 rp = MPZ_REALLOC (r, n);
1210 mpn_copyi (rp, x->_mp_d, n);
1211 r->_mp_size = x->_mp_size;
1215 void
1216 mpz_init_set_si (mpz_t r, signed long int x)
1218 mpz_init (r);
1219 mpz_set_si (r, x);
1222 static void
1223 mpz_init_set_ui (mpz_t r, unsigned long int x)
1225 mpz_init (r);
1226 mpz_set_ui (r, x);
1229 void
1230 mpz_init_set (mpz_t r, const mpz_t x)
1232 mpz_init (r);
1233 mpz_set (r, x);
1236 static int
1237 mpz_cmp_ui (const mpz_t u, unsigned long v);
1239 mpz_cmpabs_ui (const mpz_t u, unsigned long v);
1242 mpz_fits_slong_p (const mpz_t u)
1244 return (LONG_MAX + LONG_MIN == 0 || mpz_cmp_ui (u, LONG_MAX) <= 0) &&
1245 mpz_cmpabs_ui (u, GMP_NEG_CAST (unsigned long int, LONG_MIN)) <= 0;
1248 static int
1249 mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un)
1251 int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS;
1252 mp_limb_t ulongrem = 0;
1254 if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0)
1255 ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1;
1257 return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1);
1261 mpz_fits_ulong_p (const mpz_t u)
1263 mp_size_t us = u->_mp_size;
1265 return us >= 0 && mpn_absfits_ulong_p (u->_mp_d, us);
1268 long int
1269 mpz_get_si (const mpz_t u)
1271 unsigned long r = mpz_get_ui (u);
1272 unsigned long c = -LONG_MAX - LONG_MIN;
1274 if (u->_mp_size < 0)
1275 /* This expression is necessary to properly handle -LONG_MIN */
1276 return -(long) c - (long) ((r - c) & LONG_MAX);
1277 else
1278 return (long) (r & LONG_MAX);
1281 unsigned long int
1282 mpz_get_ui (const mpz_t u)
1284 if (GMP_LIMB_BITS < GMP_ULONG_BITS)
1286 int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
1287 unsigned long r = 0;
1288 mp_size_t n = GMP_ABS (u->_mp_size);
1289 n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS);
1290 while (--n >= 0)
1291 r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n];
1292 return r;
1295 return u->_mp_size == 0 ? 0 : u->_mp_d[0];
1298 size_t
1299 mpz_size (const mpz_t u)
1301 return GMP_ABS (u->_mp_size);
1305 /* MPZ comparisons and the like. */
1307 mpz_sgn (const mpz_t u)
1309 return GMP_CMP (u->_mp_size, 0);
1312 static int
1313 mpz_cmp_ui (const mpz_t u, unsigned long v)
1315 mp_size_t usize = u->_mp_size;
1317 if (usize < 0)
1318 return -1;
1319 else
1320 return mpz_cmpabs_ui (u, v);
1324 mpz_cmp (const mpz_t a, const mpz_t b)
1326 mp_size_t asize = a->_mp_size;
1327 mp_size_t bsize = b->_mp_size;
1329 if (asize != bsize)
1330 return (asize < bsize) ? -1 : 1;
1331 else if (asize >= 0)
1332 return mpn_cmp (a->_mp_d, b->_mp_d, asize);
1333 else
1334 return mpn_cmp (b->_mp_d, a->_mp_d, -asize);
1338 mpz_cmpabs_ui (const mpz_t u, unsigned long v)
1340 mp_size_t un = GMP_ABS (u->_mp_size);
1342 if (! mpn_absfits_ulong_p (u->_mp_d, un))
1343 return 1;
1344 else
1346 unsigned long uu = mpz_get_ui (u);
1347 return GMP_CMP(uu, v);
1351 void
1352 mpz_neg (mpz_t r, const mpz_t u)
1354 mpz_set (r, u);
1355 r->_mp_size = -r->_mp_size;
1358 static void
1359 mpz_swap (mpz_t u, mpz_t v)
1361 MP_SIZE_T_SWAP (u->_mp_size, v->_mp_size);
1362 MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
1363 MP_PTR_SWAP (u->_mp_d, v->_mp_d);
1367 /* MPZ addition and subtraction */
1370 void
1371 mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
1373 mpz_t bb;
1374 mpz_init_set_ui (bb, b);
1375 mpz_add (r, a, bb);
1376 mpz_clear (bb);
1379 static void
1380 mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b);
1382 void
1383 mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
1385 mpz_ui_sub (r, b, a);
1386 mpz_neg (r, r);
1389 static void
1390 mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
1392 mpz_neg (r, b);
1393 mpz_add_ui (r, r, a);
1396 static mp_size_t
1397 mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
1399 mp_size_t an = GMP_ABS (a->_mp_size);
1400 mp_size_t bn = GMP_ABS (b->_mp_size);
1401 mp_ptr rp;
1402 mp_limb_t cy;
1404 if (an < bn)
1406 MPZ_SRCPTR_SWAP (a, b);
1407 MP_SIZE_T_SWAP (an, bn);
1410 rp = MPZ_REALLOC (r, an + 1);
1411 cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
1413 rp[an] = cy;
1415 return an + cy;
1418 static mp_size_t
1419 mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
1421 mp_size_t an = GMP_ABS (a->_mp_size);
1422 mp_size_t bn = GMP_ABS (b->_mp_size);
1423 int cmp;
1424 mp_ptr rp;
1426 cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
1427 if (cmp > 0)
1429 rp = MPZ_REALLOC (r, an);
1430 gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
1431 return mpn_normalized_size (rp, an);
1433 else if (cmp < 0)
1435 rp = MPZ_REALLOC (r, bn);
1436 gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
1437 return -mpn_normalized_size (rp, bn);
1439 else
1440 return 0;
1443 void
1444 mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
1446 mp_size_t rn;
1448 if ( (a->_mp_size ^ b->_mp_size) >= 0)
1449 rn = mpz_abs_add (r, a, b);
1450 else
1451 rn = mpz_abs_sub (r, a, b);
1453 r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
1456 void
1457 mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
1459 mp_size_t rn;
1461 if ( (a->_mp_size ^ b->_mp_size) >= 0)
1462 rn = mpz_abs_sub (r, a, b);
1463 else
1464 rn = mpz_abs_add (r, a, b);
1466 r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
1470 void
1471 mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
1473 int sign;
1474 mp_size_t un, vn, rn;
1475 mpz_t t;
1476 mp_ptr tp;
1478 un = u->_mp_size;
1479 vn = v->_mp_size;
1481 if (un == 0 || vn == 0)
1483 r->_mp_size = 0;
1484 return;
1487 sign = (un ^ vn) < 0;
1489 un = GMP_ABS (un);
1490 vn = GMP_ABS (vn);
1492 mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
1494 tp = t->_mp_d;
1495 if (un >= vn)
1496 mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
1497 else
1498 mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
1500 rn = un + vn;
1501 rn -= tp[rn-1] == 0;
1503 t->_mp_size = sign ? - rn : rn;
1504 mpz_swap (r, t);
1505 mpz_clear (t);
1508 void
1509 mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
1511 mp_size_t un, rn;
1512 mp_size_t limbs;
1513 unsigned shift;
1514 mp_ptr rp;
1516 un = GMP_ABS (u->_mp_size);
1517 if (un == 0)
1519 r->_mp_size = 0;
1520 return;
1523 limbs = bits / GMP_LIMB_BITS;
1524 shift = bits % GMP_LIMB_BITS;
1526 rn = un + limbs + (shift > 0);
1527 rp = MPZ_REALLOC (r, rn);
1528 if (shift > 0)
1530 mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
1531 rp[rn-1] = cy;
1532 rn -= (cy == 0);
1534 else
1535 mpn_copyd (rp + limbs, u->_mp_d, un);
1537 mpn_zero (rp, limbs);
1539 r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
1543 /* MPZ division */
1544 enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
1546 /* Allows q or r to be zero. Returns 1 iff remainder is non-zero. */
1547 static int
1548 mpz_div_qr (mpz_t q, mpz_t r,
1549 const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
1551 mp_size_t ns, ds, nn, dn, qs;
1552 ns = n->_mp_size;
1553 ds = d->_mp_size;
1555 if (ds == 0)
1556 gmp_die("mpz_div_qr: Divide by zero.");
1558 if (ns == 0)
1560 if (q)
1561 q->_mp_size = 0;
1562 if (r)
1563 r->_mp_size = 0;
1564 return 0;
1567 nn = GMP_ABS (ns);
1568 dn = GMP_ABS (ds);
1570 qs = ds ^ ns;
1572 if (nn < dn)
1574 if (mode == GMP_DIV_CEIL && qs >= 0)
1576 /* q = 1, r = n - d */
1577 if (r)
1578 mpz_sub (r, n, d);
1579 if (q)
1580 mpz_set_ui (q, 1);
1582 else if (mode == GMP_DIV_FLOOR && qs < 0)
1584 /* q = -1, r = n + d */
1585 if (r)
1586 mpz_add (r, n, d);
1587 if (q)
1588 mpz_set_si (q, -1);
1590 else
1592 /* q = 0, r = d */
1593 if (r)
1594 mpz_set (r, n);
1595 if (q)
1596 q->_mp_size = 0;
1598 return 1;
1600 else
1602 mp_ptr np, qp;
1603 mp_size_t qn, rn;
1604 mpz_t tq, tr;
1606 mpz_init_set (tr, n);
1607 np = tr->_mp_d;
1609 qn = nn - dn + 1;
1611 if (q)
1613 mpz_init2 (tq, qn * GMP_LIMB_BITS);
1614 qp = tq->_mp_d;
1616 else
1617 qp = NULL;
1619 mpn_div_qr (qp, np, nn, d->_mp_d, dn);
1621 if (qp)
1623 qn -= (qp[qn-1] == 0);
1625 tq->_mp_size = qs < 0 ? -qn : qn;
1627 rn = mpn_normalized_size (np, dn);
1628 tr->_mp_size = ns < 0 ? - rn : rn;
1630 if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
1632 if (q)
1633 mpz_sub_ui (tq, tq, 1);
1634 if (r)
1635 mpz_add (tr, tr, d);
1637 else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
1639 if (q)
1640 mpz_add_ui (tq, tq, 1);
1641 if (r)
1642 mpz_sub (tr, tr, d);
1645 if (q)
1647 mpz_swap (tq, q);
1648 mpz_clear (tq);
1650 if (r)
1651 mpz_swap (tr, r);
1653 mpz_clear (tr);
1655 return rn != 0;
1659 void
1660 mpz_tdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
1662 mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC);
1665 void
1666 mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
1668 mpz_div_qr (NULL, r, n, d, GMP_DIV_TRUNC);
1671 static void
1672 mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
1673 enum mpz_div_round_mode mode)
1675 mp_size_t un, qn;
1676 mp_size_t limb_cnt;
1677 mp_ptr qp;
1678 int adjust;
1680 un = u->_mp_size;
1681 if (un == 0)
1683 q->_mp_size = 0;
1684 return;
1686 limb_cnt = bit_index / GMP_LIMB_BITS;
1687 qn = GMP_ABS (un) - limb_cnt;
1688 bit_index %= GMP_LIMB_BITS;
1690 if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
1691 /* Note: Below, the final indexing at limb_cnt is valid because at
1692 that point we have qn > 0. */
1693 adjust = (qn <= 0
1694 || !mpn_zero_p (u->_mp_d, limb_cnt)
1695 || (u->_mp_d[limb_cnt]
1696 & (((mp_limb_t) 1 << bit_index) - 1)));
1697 else
1698 adjust = 0;
1700 if (qn <= 0)
1701 qn = 0;
1702 else
1704 qp = MPZ_REALLOC (q, qn);
1706 if (bit_index != 0)
1708 mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
1709 qn -= qp[qn - 1] == 0;
1711 else
1713 mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
1717 q->_mp_size = qn;
1719 if (adjust)
1720 mpz_add_ui (q, q, 1);
1721 if (un < 0)
1722 mpz_neg (q, q);
1725 void
1726 mpz_fdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
1728 mpz_div_q_2exp (r, u, cnt, GMP_DIV_FLOOR);
1731 void
1732 mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
1734 mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
1738 /* Logical operations and bit manipulation. */
1740 /* Numbers are treated as if represented in two's complement (and
1741 infinitely sign extended). For a negative values we get the two's
1742 complement from -x = ~x + 1, where ~ is bitwise complement.
1743 Negation transforms
1745 xxxx10...0
1747 into
1749 yyyy10...0
1751 where yyyy is the bitwise complement of xxxx. So least significant
1752 bits, up to and including the first one bit, are unchanged, and
1753 the more significant bits are all complemented.
1755 To change a bit from zero to one in a negative number, subtract the
1756 corresponding power of two from the absolute value. This can never
1757 underflow. To change a bit from one to zero, add the corresponding
1758 power of two, and this might overflow. E.g., if x = -001111, the
1759 two's complement is 110001. Clearing the least significant bit, we
1760 get two's complement 110000, and -010000. */
1763 mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
1765 mp_size_t limb_index;
1766 unsigned shift;
1767 mp_size_t ds;
1768 mp_size_t dn;
1769 mp_limb_t w;
1770 int bit;
1772 ds = d->_mp_size;
1773 dn = GMP_ABS (ds);
1774 limb_index = bit_index / GMP_LIMB_BITS;
1775 if (limb_index >= dn)
1776 return ds < 0;
1778 shift = bit_index % GMP_LIMB_BITS;
1779 w = d->_mp_d[limb_index];
1780 bit = (w >> shift) & 1;
1782 if (ds < 0)
1784 /* d < 0. Check if any of the bits below is set: If so, our bit
1785 must be complemented. */
1786 if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0)
1787 return bit ^ 1;
1788 while (--limb_index >= 0)
1789 if (d->_mp_d[limb_index] > 0)
1790 return bit ^ 1;
1792 return bit;
1795 static void
1796 mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
1798 mp_size_t dn, limb_index;
1799 mp_limb_t bit;
1800 mp_ptr dp;
1802 dn = GMP_ABS (d->_mp_size);
1804 limb_index = bit_index / GMP_LIMB_BITS;
1805 bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
1807 if (limb_index >= dn)
1809 mp_size_t i;
1810 /* The bit should be set outside of the end of the number.
1811 We have to increase the size of the number. */
1812 dp = MPZ_REALLOC (d, limb_index + 1);
1814 dp[limb_index] = bit;
1815 for (i = dn; i < limb_index; i++)
1816 dp[i] = 0;
1817 dn = limb_index + 1;
1819 else
1821 mp_limb_t cy;
1823 dp = d->_mp_d;
1825 cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
1826 if (cy > 0)
1828 dp = MPZ_REALLOC (d, dn + 1);
1829 dp[dn++] = cy;
1833 d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
1836 static void
1837 mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
1839 mp_size_t dn, limb_index;
1840 mp_ptr dp;
1841 mp_limb_t bit;
1843 dn = GMP_ABS (d->_mp_size);
1844 dp = d->_mp_d;
1846 limb_index = bit_index / GMP_LIMB_BITS;
1847 bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
1849 assert (limb_index < dn);
1851 gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
1852 dn - limb_index, bit));
1853 dn = mpn_normalized_size (dp, dn);
1854 d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
1857 void
1858 mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
1860 if (!mpz_tstbit (d, bit_index))
1862 if (d->_mp_size >= 0)
1863 mpz_abs_add_bit (d, bit_index);
1864 else
1865 mpz_abs_sub_bit (d, bit_index);
1869 void
1870 mpz_clrbit (mpz_t d, mp_bitcnt_t bit_index)
1872 if (mpz_tstbit (d, bit_index))
1874 if (d->_mp_size >= 0)
1875 mpz_abs_sub_bit (d, bit_index);
1876 else
1877 mpz_abs_add_bit (d, bit_index);
1881 void
1882 mpz_com (mpz_t r, const mpz_t u)
1884 mpz_add_ui (r, u, 1);
1885 mpz_neg (r, r);
1888 void
1889 mpz_and (mpz_t r, const mpz_t u, const mpz_t v)
1891 mp_size_t un, vn, rn, i;
1892 mp_ptr up, vp, rp;
1894 mp_limb_t ux, vx, rx;
1895 mp_limb_t uc, vc, rc;
1896 mp_limb_t ul, vl, rl;
1898 un = GMP_ABS (u->_mp_size);
1899 vn = GMP_ABS (v->_mp_size);
1900 if (un < vn)
1902 MPZ_SRCPTR_SWAP (u, v);
1903 MP_SIZE_T_SWAP (un, vn);
1905 if (vn == 0)
1907 r->_mp_size = 0;
1908 return;
1911 uc = u->_mp_size < 0;
1912 vc = v->_mp_size < 0;
1913 rc = uc & vc;
1915 ux = -uc;
1916 vx = -vc;
1917 rx = -rc;
1919 /* If the smaller input is positive, higher limbs don't matter. */
1920 rn = vx ? un : vn;
1922 rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
1924 up = u->_mp_d;
1925 vp = v->_mp_d;
1927 i = 0;
1930 ul = (up[i] ^ ux) + uc;
1931 uc = ul < uc;
1933 vl = (vp[i] ^ vx) + vc;
1934 vc = vl < vc;
1936 rl = ( (ul & vl) ^ rx) + rc;
1937 rc = rl < rc;
1938 rp[i] = rl;
1940 while (++i < vn);
1941 assert (vc == 0);
1943 for (; i < rn; i++)
1945 ul = (up[i] ^ ux) + uc;
1946 uc = ul < uc;
1948 rl = ( (ul & vx) ^ rx) + rc;
1949 rc = rl < rc;
1950 rp[i] = rl;
1952 if (rc)
1953 rp[rn++] = rc;
1954 else
1955 rn = mpn_normalized_size (rp, rn);
1957 r->_mp_size = rx ? -rn : rn;
1960 void
1961 mpz_ior (mpz_t r, const mpz_t u, const mpz_t v)
1963 mp_size_t un, vn, rn, i;
1964 mp_ptr up, vp, rp;
1966 mp_limb_t ux, vx, rx;
1967 mp_limb_t uc, vc, rc;
1968 mp_limb_t ul, vl, rl;
1970 un = GMP_ABS (u->_mp_size);
1971 vn = GMP_ABS (v->_mp_size);
1972 if (un < vn)
1974 MPZ_SRCPTR_SWAP (u, v);
1975 MP_SIZE_T_SWAP (un, vn);
1977 if (vn == 0)
1979 mpz_set (r, u);
1980 return;
1983 uc = u->_mp_size < 0;
1984 vc = v->_mp_size < 0;
1985 rc = uc | vc;
1987 ux = -uc;
1988 vx = -vc;
1989 rx = -rc;
1991 /* If the smaller input is negative, by sign extension higher limbs
1992 don't matter. */
1993 rn = vx ? vn : un;
1995 rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
1997 up = u->_mp_d;
1998 vp = v->_mp_d;
2000 i = 0;
2003 ul = (up[i] ^ ux) + uc;
2004 uc = ul < uc;
2006 vl = (vp[i] ^ vx) + vc;
2007 vc = vl < vc;
2009 rl = ( (ul | vl) ^ rx) + rc;
2010 rc = rl < rc;
2011 rp[i] = rl;
2013 while (++i < vn);
2014 assert (vc == 0);
2016 for (; i < rn; i++)
2018 ul = (up[i] ^ ux) + uc;
2019 uc = ul < uc;
2021 rl = ( (ul | vx) ^ rx) + rc;
2022 rc = rl < rc;
2023 rp[i] = rl;
2025 if (rc)
2026 rp[rn++] = rc;
2027 else
2028 rn = mpn_normalized_size (rp, rn);
2030 r->_mp_size = rx ? -rn : rn;
2033 void
2034 mpz_xor (mpz_t r, const mpz_t u, const mpz_t v)
2036 mp_size_t un, vn, i;
2037 mp_ptr up, vp, rp;
2039 mp_limb_t ux, vx, rx;
2040 mp_limb_t uc, vc, rc;
2041 mp_limb_t ul, vl, rl;
2043 un = GMP_ABS (u->_mp_size);
2044 vn = GMP_ABS (v->_mp_size);
2045 if (un < vn)
2047 MPZ_SRCPTR_SWAP (u, v);
2048 MP_SIZE_T_SWAP (un, vn);
2050 if (vn == 0)
2052 mpz_set (r, u);
2053 return;
2056 uc = u->_mp_size < 0;
2057 vc = v->_mp_size < 0;
2058 rc = uc ^ vc;
2060 ux = -uc;
2061 vx = -vc;
2062 rx = -rc;
2064 rp = MPZ_REALLOC (r, un + (mp_size_t) rc);
2066 up = u->_mp_d;
2067 vp = v->_mp_d;
2069 i = 0;
2072 ul = (up[i] ^ ux) + uc;
2073 uc = ul < uc;
2075 vl = (vp[i] ^ vx) + vc;
2076 vc = vl < vc;
2078 rl = (ul ^ vl ^ rx) + rc;
2079 rc = rl < rc;
2080 rp[i] = rl;
2082 while (++i < vn);
2083 assert (vc == 0);
2085 for (; i < un; i++)
2087 ul = (up[i] ^ ux) + uc;
2088 uc = ul < uc;
2090 rl = (ul ^ ux) + rc;
2091 rc = rl < rc;
2092 rp[i] = rl;
2094 if (rc)
2095 rp[un++] = rc;
2096 else
2097 un = mpn_normalized_size (rp, un);
2099 r->_mp_size = rx ? -un : un;
2102 static unsigned
2103 gmp_popcount_limb (mp_limb_t x)
2105 unsigned c;
2107 /* Do 16 bits at a time, to avoid limb-sized constants. */
2108 int LOCAL_SHIFT_BITS = 16;
2109 for (c = 0; x > 0;)
2111 unsigned w = x - ((x >> 1) & 0x5555);
2112 w = ((w >> 2) & 0x3333) + (w & 0x3333);
2113 w = (w >> 4) + w;
2114 w = ((w >> 8) & 0x000f) + (w & 0x000f);
2115 c += w;
2116 if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS)
2117 x >>= LOCAL_SHIFT_BITS;
2118 else
2119 x = 0;
2121 return c;
2124 static mp_bitcnt_t
2125 mpn_popcount (mp_srcptr p, mp_size_t n)
2127 mp_size_t i;
2128 mp_bitcnt_t c;
2130 for (c = 0, i = 0; i < n; i++)
2131 c += gmp_popcount_limb (p[i]);
2133 return c;
2136 mp_bitcnt_t
2137 mpz_popcount (const mpz_t u)
2139 mp_size_t un;
2141 un = u->_mp_size;
2143 if (un < 0)
2144 return ~(mp_bitcnt_t) 0;
2146 return mpn_popcount (u->_mp_d, un);
2150 mp_bitcnt_t
2151 mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit)
2153 mp_ptr up;
2154 mp_size_t us, un, i;
2155 mp_limb_t limb, ux;
2157 us = u->_mp_size;
2158 un = GMP_ABS (us);
2159 i = starting_bit / GMP_LIMB_BITS;
2161 /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
2162 for u<0. Notice this test picks up any u==0 too. */
2163 if (i >= un)
2164 return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
2166 up = u->_mp_d;
2167 ux = 0;
2168 limb = up[i];
2170 if (starting_bit != 0)
2172 if (us < 0)
2174 ux = mpn_zero_p (up, i);
2175 limb = ~ limb + ux;
2176 ux = - (mp_limb_t) (limb >= ux);
2179 /* Mask to 0 all bits before starting_bit, thus ignoring them. */
2180 limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
2183 return mpn_common_scan (limb, i, up, un, ux);
2187 /* MPZ base conversion. */
2189 size_t
2190 mpz_sizeinbase (const mpz_t u, int base)
2192 mp_size_t un;
2193 mp_srcptr up;
2194 mp_ptr tp;
2195 mp_bitcnt_t bits;
2196 struct gmp_div_inverse bi;
2197 size_t ndigits;
2199 assert (base >= 2);
2200 assert (base <= 62);
2202 un = GMP_ABS (u->_mp_size);
2203 if (un == 0)
2204 return 1;
2206 up = u->_mp_d;
2208 bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
2209 switch (base)
2211 case 2:
2212 return bits;
2213 case 4:
2214 return (bits + 1) / 2;
2215 case 8:
2216 return (bits + 2) / 3;
2217 case 16:
2218 return (bits + 3) / 4;
2219 case 32:
2220 return (bits + 4) / 5;
2221 /* FIXME: Do something more clever for the common case of base
2222 10. */
2225 tp = gmp_xalloc_limbs (un);
2226 mpn_copyi (tp, up, un);
2227 mpn_div_qr_1_invert (&bi, base);
2229 ndigits = 0;
2232 ndigits++;
2233 mpn_div_qr_1_preinv (tp, tp, un, &bi);
2234 un -= (tp[un-1] == 0);
2236 while (un > 0);
2238 gmp_free (tp);
2239 return ndigits;
2243 static int
2244 gmp_detect_endian (void)
2246 static const int i = 2;
2247 const unsigned char *p = (const unsigned char *) &i;
2248 return 1 - *p;
2251 /* Import and export. Does not support nails. */
2252 void
2253 mpz_import (mpz_t r, size_t count, int order, size_t size, int endian,
2254 size_t nails, const void *src)
2256 const unsigned char *p;
2257 ptrdiff_t word_step;
2258 mp_ptr rp;
2259 mp_size_t rn;
2261 /* The current (partial) limb. */
2262 mp_limb_t limb;
2263 /* The number of bytes already copied to this limb (starting from
2264 the low end). */
2265 size_t bytes;
2266 /* The index where the limb should be stored, when completed. */
2267 mp_size_t i;
2269 if (nails != 0)
2270 gmp_die ("mpz_import: Nails not supported.");
2272 assert (order == 1 || order == -1);
2273 assert (endian >= -1 && endian <= 1);
2275 if (endian == 0)
2276 endian = gmp_detect_endian ();
2278 p = (unsigned char *) src;
2280 word_step = (order != endian) ? 2 * size : 0;
2282 /* Process bytes from the least significant end, so point p at the
2283 least significant word. */
2284 if (order == 1)
2286 p += size * (count - 1);
2287 word_step = - word_step;
2290 /* And at least significant byte of that word. */
2291 if (endian == 1)
2292 p += (size - 1);
2294 rn = (size * count + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t);
2295 rp = MPZ_REALLOC (r, rn);
2297 for (limb = 0, bytes = 0, i = 0; count > 0; count--, p += word_step)
2299 size_t j;
2300 for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
2302 limb |= (mp_limb_t) *p << (bytes++ * CHAR_BIT);
2303 if (bytes == sizeof(mp_limb_t))
2305 rp[i++] = limb;
2306 bytes = 0;
2307 limb = 0;
2311 assert (i + (bytes > 0) == rn);
2312 if (limb != 0)
2313 rp[i++] = limb;
2314 else
2315 i = mpn_normalized_size (rp, i);
2317 r->_mp_size = i;
2320 void *
2321 mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
2322 size_t nails, const mpz_t u)
2324 size_t count;
2325 mp_size_t un;
2327 if (nails != 0)
2328 gmp_die ("mpz_import: Nails not supported.");
2330 assert (order == 1 || order == -1);
2331 assert (endian >= -1 && endian <= 1);
2332 assert (size > 0 || u->_mp_size == 0);
2334 un = u->_mp_size;
2335 count = 0;
2336 if (un != 0)
2338 size_t k;
2339 unsigned char *p;
2340 ptrdiff_t word_step;
2341 /* The current (partial) limb. */
2342 mp_limb_t limb;
2343 /* The number of bytes left to do in this limb. */
2344 size_t bytes;
2345 /* The index where the limb was read. */
2346 mp_size_t i;
2348 un = GMP_ABS (un);
2350 /* Count bytes in top limb. */
2351 limb = u->_mp_d[un-1];
2352 assert (limb != 0);
2354 k = (GMP_LIMB_BITS <= CHAR_BIT);
2355 if (!k)
2357 do {
2358 int LOCAL_CHAR_BIT = CHAR_BIT;
2359 k++; limb >>= LOCAL_CHAR_BIT;
2360 } while (limb != 0);
2362 /* else limb = 0; */
2364 count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size;
2366 if (!r)
2367 r = gmp_xalloc (count * size);
2369 if (endian == 0)
2370 endian = gmp_detect_endian ();
2372 p = (unsigned char *) r;
2374 word_step = (order != endian) ? 2 * size : 0;
2376 /* Process bytes from the least significant end, so point p at the
2377 least significant word. */
2378 if (order == 1)
2380 p += size * (count - 1);
2381 word_step = - word_step;
2384 /* And at least significant byte of that word. */
2385 if (endian == 1)
2386 p += (size - 1);
2388 for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step)
2390 size_t j;
2391 for (j = 0; j < size; ++j, p -= (ptrdiff_t) endian)
2393 if (sizeof (mp_limb_t) == 1)
2395 if (i < un)
2396 *p = u->_mp_d[i++];
2397 else
2398 *p = 0;
2400 else
2402 int LOCAL_CHAR_BIT = CHAR_BIT;
2403 if (bytes == 0)
2405 if (i < un)
2406 limb = u->_mp_d[i++];
2407 bytes = sizeof (mp_limb_t);
2409 *p = limb;
2410 limb >>= LOCAL_CHAR_BIT;
2411 bytes--;
2415 assert (i == un);
2416 assert (k == count);
2419 if (countp)
2420 *countp = count;
2422 return r;
2425 #endif