maint: clarify integer operations in recent commit
[coreutils.git] / src / factor.c
blob60cf898aaa33810875c726e73643af8c09a2f295
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Using GMP is optional. Define HAVE_GMP to make this code include GMP
39 factoring code. The GMP factoring code is based on GMP's demos/factorize.c
40 (last synched 2012-09-07). The GMP-based factoring code will stay in GMP
41 factoring code even if numbers get small enough for using the two-word
42 code.
44 Algorithm:
46 (1) Perform trial division using a small primes table, but without hardware
47 division since the primes table store inverses modulo the word base.
48 (The GMP variant of this code doesn't make use of the precomputed
49 inverses, but instead relies on GMP for fast divisibility testing.)
50 (2) Check the nature of any non-factored part using Miller-Rabin for
51 detecting composites, and Lucas for detecting primes.
52 (3) Factor any remaining composite part using the Pollard-Brent rho
53 algorithm or the SQUFOF algorithm, checking status of found factors
54 again using Miller-Rabin and Lucas.
56 We prefer using Hensel norm in the divisions, not the more familiar
57 Euclidian norm, since the former leads to much faster code. In the
58 Pollard-Brent rho code and the prime testing code, we use Montgomery's
59 trick of multiplying all n-residues by the word base, allowing cheap Hensel
60 reductions mod n.
62 Improvements:
64 * Use modular inverses also for exact division in the Lucas code, and
65 elsewhere. A problem is to locate the inverses not from an index, but
66 from a prime. We might instead compute the inverse on-the-fly.
68 * Tune trial division table size (not forgetting that this is a standalone
69 program where the table will be read from disk for each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 #include <config.h>
88 #include <getopt.h>
89 #include <stdio.h>
90 #if HAVE_GMP
91 # include <gmp.h>
92 # if !HAVE_DECL_MPZ_INITS
93 # include <stdarg.h>
94 # endif
95 #endif
97 #include <assert.h>
99 #include "system.h"
100 #include "error.h"
101 #include "quote.h"
102 #include "readtokens.h"
103 #include "xstrtol.h"
105 /* The official name of this program (e.g., no 'g' prefix). */
106 #define PROGRAM_NAME "factor"
108 #define AUTHORS \
109 proper_name ("Paul Rubin"), \
110 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
111 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
113 /* Token delimiters when reading from a file. */
114 #define DELIM "\n\t "
116 #ifndef STAT_SQUFOF
117 # define STAT_SQUFOF 0
118 #endif
120 #ifndef USE_LONGLONG_H
121 /* With the way we use longlong.h, it's only safe to use
122 when UWtype = UHWtype, as there were various cases
123 (as can be seen in the history for longlong.h) where
124 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
125 to avoid compile time or run time issues. */
126 # if LONG_MAX == INTMAX_MAX
127 # define USE_LONGLONG_H 1
128 # endif
129 #endif
131 #if USE_LONGLONG_H
133 /* Make definitions for longlong.h to make it do what it can do for us */
135 /* bitcount for uintmax_t */
136 # if UINTMAX_MAX == UINT32_MAX
137 # define W_TYPE_SIZE 32
138 # elif UINTMAX_MAX == UINT64_MAX
139 # define W_TYPE_SIZE 64
140 # elif UINTMAX_MAX == UINT128_MAX
141 # define W_TYPE_SIZE 128
142 # endif
144 # define UWtype uintmax_t
145 # define UHWtype unsigned long int
146 # undef UDWtype
147 # if HAVE_ATTRIBUTE_MODE
148 typedef unsigned int UQItype __attribute__ ((mode (QI)));
149 typedef int SItype __attribute__ ((mode (SI)));
150 typedef unsigned int USItype __attribute__ ((mode (SI)));
151 typedef int DItype __attribute__ ((mode (DI)));
152 typedef unsigned int UDItype __attribute__ ((mode (DI)));
153 # else
154 typedef unsigned char UQItype;
155 typedef long SItype;
156 typedef unsigned long int USItype;
157 # if HAVE_LONG_LONG_INT
158 typedef long long int DItype;
159 typedef unsigned long long int UDItype;
160 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
161 typedef long int DItype;
162 typedef unsigned long int UDItype;
163 # endif
164 # endif
165 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
166 # define ASSERT(x) /* FIXME make longlong.h really standalone */
167 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
168 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
169 # ifndef __GMP_GNUC_PREREQ
170 # define __GMP_GNUC_PREREQ(a,b) 1
171 # endif
173 /* These stub macros are only used in longlong.h in certain system compiler
174 combinations, so ensure usage to avoid -Wunused-macros warnings. */
175 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
176 ASSERT (1)
177 __GMP_DECLSPEC
178 # endif
180 # if _ARCH_PPC
181 # define HAVE_HOST_CPU_FAMILY_powerpc 1
182 # endif
183 # include "longlong.h"
184 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
185 const unsigned char factor_clz_tab[129] =
187 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
188 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
189 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
190 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
193 # endif
195 #else /* not USE_LONGLONG_H */
197 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
198 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
199 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
200 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
202 #endif
204 #if !defined __clz_tab && !defined UHWtype
205 /* Without this seemingly useless conditional, gcc -Wunused-macros
206 warns that each of the two tested macros is unused on Fedora 18.
207 FIXME: this is just an ugly band-aid. Fix it properly. */
208 #endif
210 enum alg_type { ALG_POLLARD_RHO = 1, ALG_SQUFOF = 2 };
212 static enum alg_type alg;
214 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
215 #define MAX_NFACTS 26
217 enum
219 DEV_DEBUG_OPTION = CHAR_MAX + 1
222 static struct option const long_options[] =
224 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
225 {GETOPT_HELP_OPTION_DECL},
226 {GETOPT_VERSION_OPTION_DECL},
227 {NULL, 0, NULL, 0}
230 struct factors
232 uintmax_t plarge[2]; /* Can have a single large factor */
233 uintmax_t p[MAX_NFACTS];
234 unsigned char e[MAX_NFACTS];
235 unsigned char nfactors;
238 #if HAVE_GMP
239 struct mp_factors
241 mpz_t *p;
242 unsigned long int *e;
243 unsigned long int nfactors;
245 #endif
247 static void factor (uintmax_t, uintmax_t, struct factors *);
249 #ifndef umul_ppmm
250 # define umul_ppmm(w1, w0, u, v) \
251 do { \
252 uintmax_t __x0, __x1, __x2, __x3; \
253 unsigned long int __ul, __vl, __uh, __vh; \
254 uintmax_t __u = (u), __v = (v); \
256 __ul = __ll_lowpart (__u); \
257 __uh = __ll_highpart (__u); \
258 __vl = __ll_lowpart (__v); \
259 __vh = __ll_highpart (__v); \
261 __x0 = (uintmax_t) __ul * __vl; \
262 __x1 = (uintmax_t) __ul * __vh; \
263 __x2 = (uintmax_t) __uh * __vl; \
264 __x3 = (uintmax_t) __uh * __vh; \
266 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
267 __x1 += __x2; /* but this indeed can */ \
268 if (__x1 < __x2) /* did we get it? */ \
269 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
271 (w1) = __x3 + __ll_highpart (__x1); \
272 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
273 } while (0)
274 #endif
276 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
277 /* Define our own, not needing normalization. This function is
278 currently not performance critical, so keep it simple. Similar to
279 the mod macro below. */
280 # undef udiv_qrnnd
281 # define udiv_qrnnd(q, r, n1, n0, d) \
282 do { \
283 uintmax_t __d1, __d0, __q, __r1, __r0; \
285 assert ((n1) < (d)); \
286 __d1 = (d); __d0 = 0; \
287 __r1 = (n1); __r0 = (n0); \
288 __q = 0; \
289 for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \
291 rsh2 (__d1, __d0, __d1, __d0, 1); \
292 __q <<= 1; \
293 if (ge2 (__r1, __r0, __d1, __d0)) \
295 __q++; \
296 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
299 (r) = __r0; \
300 (q) = __q; \
301 } while (0)
302 #endif
304 #if !defined add_ssaaaa
305 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
306 do { \
307 uintmax_t _add_x; \
308 _add_x = (al) + (bl); \
309 (sh) = (ah) + (bh) + (_add_x < (al)); \
310 (sl) = _add_x; \
311 } while (0)
312 #endif
314 #define rsh2(rh, rl, ah, al, cnt) \
315 do { \
316 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
317 (rh) = (ah) >> (cnt); \
318 } while (0)
320 #define lsh2(rh, rl, ah, al, cnt) \
321 do { \
322 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
323 (rl) = (al) << (cnt); \
324 } while (0)
326 #define ge2(ah, al, bh, bl) \
327 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
329 #define gt2(ah, al, bh, bl) \
330 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
332 #ifndef sub_ddmmss
333 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
334 do { \
335 uintmax_t _cy; \
336 _cy = (al) < (bl); \
337 (rl) = (al) - (bl); \
338 (rh) = (ah) - (bh) - _cy; \
339 } while (0)
340 #endif
342 #ifndef count_leading_zeros
343 # define count_leading_zeros(count, x) do { \
344 uintmax_t __clz_x = (x); \
345 unsigned int __clz_c; \
346 for (__clz_c = 0; \
347 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
348 __clz_c += 8) \
349 __clz_x <<= 8; \
350 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
351 __clz_x <<= 1; \
352 (count) = __clz_c; \
353 } while (0)
354 #endif
356 #ifndef count_trailing_zeros
357 # define count_trailing_zeros(count, x) do { \
358 uintmax_t __ctz_x = (x); \
359 unsigned int __ctz_c = 0; \
360 while ((__ctz_x & 1) == 0) \
362 __ctz_x >>= 1; \
363 __ctz_c++; \
365 (count) = __ctz_c; \
366 } while (0)
367 #endif
369 /* Requires that a < n and b <= n */
370 #define submod(r,a,b,n) \
371 do { \
372 uintmax_t _t = - (uintmax_t) (a < b); \
373 (r) = ((n) & _t) + (a) - (b); \
374 } while (0)
376 #define addmod(r,a,b,n) \
377 submod ((r), (a), ((n) - (b)), (n))
379 /* Modular two-word addition and subtraction. For performance reasons, the
380 most significant bit of n1 must be clear. The destination variables must be
381 distinct from the mod operand. */
382 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
383 do { \
384 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
385 if (ge2 ((r1), (r0), (n1), (n0))) \
386 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
387 } while (0)
388 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
389 do { \
390 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
391 if ((intmax_t) (r1) < 0) \
392 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
393 } while (0)
395 #define HIGHBIT_TO_MASK(x) \
396 (((intmax_t)-1 >> 1) < 0 \
397 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
398 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
399 ? UINTMAX_MAX : (uintmax_t) 0))
401 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
402 Requires that d1 != 0. */
403 static uintmax_t
404 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
406 int cntd, cnta;
408 assert (d1 != 0);
410 if (a1 == 0)
412 *r1 = 0;
413 return a0;
416 count_leading_zeros (cntd, d1);
417 count_leading_zeros (cnta, a1);
418 int cnt = cntd - cnta;
419 lsh2 (d1, d0, d1, d0, cnt);
420 for (int i = 0; i < cnt; i++)
422 if (ge2 (a1, a0, d1, d0))
423 sub_ddmmss (a1, a0, a1, a0, d1, d0);
424 rsh2 (d1, d0, d1, d0, 1);
427 *r1 = a1;
428 return a0;
431 static uintmax_t _GL_ATTRIBUTE_CONST
432 gcd_odd (uintmax_t a, uintmax_t b)
434 if ( (b & 1) == 0)
436 uintmax_t t = b;
437 b = a;
438 a = t;
440 if (a == 0)
441 return b;
443 /* Take out least significant one bit, to make room for sign */
444 b >>= 1;
446 for (;;)
448 uintmax_t t;
449 uintmax_t bgta;
451 while ((a & 1) == 0)
452 a >>= 1;
453 a >>= 1;
455 t = a - b;
456 if (t == 0)
457 return (a << 1) + 1;
459 bgta = HIGHBIT_TO_MASK (t);
461 /* b <-- min (a, b) */
462 b += (bgta & t);
464 /* a <-- |a - b| */
465 a = (t ^ bgta) - bgta;
469 static uintmax_t
470 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
472 while ((a0 & 1) == 0)
473 rsh2 (a1, a0, a1, a0, 1);
474 while ((b0 & 1) == 0)
475 rsh2 (b1, b0, b1, b0, 1);
477 for (;;)
479 if ((b1 | a1) == 0)
481 *r1 = 0;
482 return gcd_odd (b0, a0);
485 if (gt2 (a1, a0, b1, b0))
487 sub_ddmmss (a1, a0, a1, a0, b1, b0);
489 rsh2 (a1, a0, a1, a0, 1);
490 while ((a0 & 1) == 0);
492 else if (gt2 (b1, b0, a1, a0))
494 sub_ddmmss (b1, b0, b1, b0, a1, a0);
496 rsh2 (b1, b0, b1, b0, 1);
497 while ((b0 & 1) == 0);
499 else
500 break;
503 *r1 = a1;
504 return a0;
507 static void
508 factor_insert_multiplicity (struct factors *factors,
509 uintmax_t prime, unsigned int m)
511 unsigned int nfactors = factors->nfactors;
512 uintmax_t *p = factors->p;
513 unsigned char *e = factors->e;
515 /* Locate position for insert new or increment e. */
516 int i;
517 for (i = nfactors - 1; i >= 0; i--)
519 if (p[i] <= prime)
520 break;
523 if (i < 0 || p[i] != prime)
525 for (int j = nfactors - 1; j > i; j--)
527 p[j + 1] = p[j];
528 e[j + 1] = e[j];
530 p[i + 1] = prime;
531 e[i + 1] = m;
532 factors->nfactors = nfactors + 1;
534 else
536 e[i] += m;
540 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
542 static void
543 factor_insert_large (struct factors *factors,
544 uintmax_t p1, uintmax_t p0)
546 if (p1 > 0)
548 assert (factors->plarge[1] == 0);
549 factors->plarge[0] = p0;
550 factors->plarge[1] = p1;
552 else
553 factor_insert (factors, p0);
556 #if HAVE_GMP
558 # if !HAVE_DECL_MPZ_INITS
560 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
561 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
563 static void
564 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
566 va_list ap;
568 va_start (ap, mpz_single_init);
570 mpz_t *mpz;
571 while ((mpz = va_arg (ap, mpz_t *)))
572 mpz_single_init (*mpz);
574 va_end (ap);
576 # endif
578 static void mp_factor (mpz_t, struct mp_factors *);
580 static void
581 mp_factor_init (struct mp_factors *factors)
583 factors->p = NULL;
584 factors->e = NULL;
585 factors->nfactors = 0;
588 static void
589 mp_factor_clear (struct mp_factors *factors)
591 for (unsigned int i = 0; i < factors->nfactors; i++)
592 mpz_clear (factors->p[i]);
594 free (factors->p);
595 free (factors->e);
598 static void
599 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
601 unsigned long int nfactors = factors->nfactors;
602 mpz_t *p = factors->p;
603 unsigned long int *e = factors->e;
604 long i;
606 /* Locate position for insert new or increment e. */
607 for (i = nfactors - 1; i >= 0; i--)
609 if (mpz_cmp (p[i], prime) <= 0)
610 break;
613 if (i < 0 || mpz_cmp (p[i], prime) != 0)
615 p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
616 e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
618 mpz_init (p[nfactors]);
619 for (long j = nfactors - 1; j > i; j--)
621 mpz_set (p[j + 1], p[j]);
622 e[j + 1] = e[j];
624 mpz_set (p[i + 1], prime);
625 e[i + 1] = 1;
627 factors->p = p;
628 factors->e = e;
629 factors->nfactors = nfactors + 1;
631 else
633 e[i] += 1;
637 static void
638 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
640 mpz_t pz;
642 mpz_init_set_ui (pz, prime);
643 mp_factor_insert (factors, pz);
644 mpz_clear (pz);
646 #endif /* HAVE_GMP */
649 /* Number of bits in an uintmax_t. */
650 enum { W = sizeof (uintmax_t) * CHAR_BIT };
652 /* Verify that uintmax_t does not have holes in its representation. */
653 verify (UINTMAX_MAX >> (W - 1) == 1);
655 #define P(a,b,c,d) a,
656 static const unsigned char primes_diff[] = {
657 #include "primes.h"
658 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
660 #undef P
662 #define PRIMES_PTAB_ENTRIES \
663 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
665 #define P(a,b,c,d) b,
666 static const unsigned char primes_diff8[] = {
667 #include "primes.h"
668 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
670 #undef P
672 struct primes_dtab
674 uintmax_t binv, lim;
677 #define P(a,b,c,d) {c,d},
678 static const struct primes_dtab primes_dtab[] = {
679 #include "primes.h"
680 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
682 #undef P
684 /* Verify that uintmax_t is not wider than
685 the integers used to generate primes.h. */
686 verify (W <= WIDE_UINT_BITS);
688 /* debugging for developers. Enables devmsg().
689 This flag is used only in the GMP code. */
690 static bool dev_debug = false;
692 /* Prove primality or run probabilistic tests. */
693 static bool flag_prove_primality = true;
695 /* Number of Miller-Rabin tests to run when not proving primality. */
696 #define MR_REPS 25
698 #ifdef __GNUC__
699 # define LIKELY(cond) __builtin_expect ((cond), 1)
700 # define UNLIKELY(cond) __builtin_expect ((cond), 0)
701 #else
702 # define LIKELY(cond) (cond)
703 # define UNLIKELY(cond) (cond)
704 #endif
706 static void
707 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
708 unsigned int off)
710 for (unsigned int j = 0; j < off; j++)
711 p += primes_diff[i + j];
712 factor_insert (factors, p);
715 /* Trial division with odd primes uses the following trick.
717 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
718 consider the case t < B (this is the second loop below).
720 From our tables we get
722 binv = p^{-1} (mod B)
723 lim = floor ( (B-1) / p ).
725 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
726 (and all quotients in this range occur for some t).
728 Then t = q * p is true also (mod B), and p is invertible we get
730 q = t * binv (mod B).
732 Next, assume that t is *not* divisible by p. Since multiplication
733 by binv (mod B) is a one-to-one mapping,
735 t * binv (mod B) > lim,
737 because all the smaller values are already taken.
739 This can be summed up by saying that the function
741 q(t) = binv * t (mod B)
743 is a permutation of the range 0 <= t < B, with the curious property
744 that it maps the multiples of p onto the range 0 <= q <= lim, in
745 order, and the non-multiples of p onto the range lim < q < B.
748 static uintmax_t
749 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
750 struct factors *factors)
752 if (t0 % 2 == 0)
754 unsigned int cnt;
756 if (t0 == 0)
758 count_trailing_zeros (cnt, t1);
759 t0 = t1 >> cnt;
760 t1 = 0;
761 cnt += W_TYPE_SIZE;
763 else
765 count_trailing_zeros (cnt, t0);
766 rsh2 (t1, t0, t1, t0, cnt);
769 factor_insert_multiplicity (factors, 2, cnt);
772 uintmax_t p = 3;
773 unsigned int i;
774 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
776 for (;;)
778 uintmax_t q1, q0, hi, lo _GL_UNUSED;
780 q0 = t0 * primes_dtab[i].binv;
781 umul_ppmm (hi, lo, q0, p);
782 if (hi > t1)
783 break;
784 hi = t1 - hi;
785 q1 = hi * primes_dtab[i].binv;
786 if (LIKELY (q1 > primes_dtab[i].lim))
787 break;
788 t1 = q1; t0 = q0;
789 factor_insert (factors, p);
791 p += primes_diff[i + 1];
793 if (t1p)
794 *t1p = t1;
796 #define DIVBLOCK(I) \
797 do { \
798 for (;;) \
800 q = t0 * pd[I].binv; \
801 if (LIKELY (q > pd[I].lim)) \
802 break; \
803 t0 = q; \
804 factor_insert_refind (factors, p, i + 1, I); \
806 } while (0)
808 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
810 uintmax_t q;
811 const struct primes_dtab *pd = &primes_dtab[i];
812 DIVBLOCK (0);
813 DIVBLOCK (1);
814 DIVBLOCK (2);
815 DIVBLOCK (3);
816 DIVBLOCK (4);
817 DIVBLOCK (5);
818 DIVBLOCK (6);
819 DIVBLOCK (7);
821 p += primes_diff8[i];
822 if (p * p > t0)
823 break;
826 return t0;
829 #if HAVE_GMP
830 static void
831 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
833 mpz_t q;
834 unsigned long int p;
836 devmsg ("[trial division] ");
838 mpz_init (q);
840 p = mpz_scan1 (t, 0);
841 mpz_div_2exp (t, t, p);
842 while (p)
844 mp_factor_insert_ui (factors, 2);
845 --p;
848 p = 3;
849 for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
851 if (! mpz_divisible_ui_p (t, p))
853 p += primes_diff[i++];
854 if (mpz_cmp_ui (t, p * p) < 0)
855 break;
857 else
859 mpz_tdiv_q_ui (t, t, p);
860 mp_factor_insert_ui (factors, p);
864 mpz_clear (q);
866 #endif
868 /* Entry i contains (2i+1)^(-1) mod 2^8. */
869 static const unsigned char binvert_table[128] =
871 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
872 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
873 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
874 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
875 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
876 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
877 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
878 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
879 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
880 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
881 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
882 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
883 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
884 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
885 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
886 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
889 /* Compute n^(-1) mod B, using a Newton iteration. */
890 #define binv(inv,n) \
891 do { \
892 uintmax_t __n = (n); \
893 uintmax_t __inv; \
895 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
896 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
897 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
898 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
900 if (W_TYPE_SIZE > 64) \
902 int __invbits = 64; \
903 do { \
904 __inv = 2 * __inv - __inv * __inv * __n; \
905 __invbits *= 2; \
906 } while (__invbits < W_TYPE_SIZE); \
909 (inv) = __inv; \
910 } while (0)
912 /* q = u / d, assuming d|u. */
913 #define divexact_21(q1, q0, u1, u0, d) \
914 do { \
915 uintmax_t _di, _q0; \
916 binv (_di, (d)); \
917 _q0 = (u0) * _di; \
918 if ((u1) >= (d)) \
920 uintmax_t _p1, _p0 _GL_UNUSED; \
921 umul_ppmm (_p1, _p0, _q0, d); \
922 (q1) = ((u1) - _p1) * _di; \
923 (q0) = _q0; \
925 else \
927 (q0) = _q0; \
928 (q1) = 0; \
930 } while (0)
932 /* x B (mod n). */
933 #define redcify(r_prim, r, n) \
934 do { \
935 uintmax_t _redcify_q _GL_UNUSED; \
936 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
937 } while (0)
939 /* x B^2 (mod n). Requires x > 0, n1 < B/2 */
940 #define redcify2(r1, r0, x, n1, n0) \
941 do { \
942 uintmax_t _r1, _r0, _i; \
943 if ((x) < (n1)) \
945 _r1 = (x); _r0 = 0; \
946 _i = W_TYPE_SIZE; \
948 else \
950 _r1 = 0; _r0 = (x); \
951 _i = 2*W_TYPE_SIZE; \
953 while (_i-- > 0) \
955 lsh2 (_r1, _r0, _r1, _r0, 1); \
956 if (ge2 (_r1, _r0, (n1), (n0))) \
957 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
959 (r1) = _r1; \
960 (r0) = _r0; \
961 } while (0)
963 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
964 Both a and b must be in redc form, the result will be in redc form too. */
965 static inline uintmax_t
966 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
968 uintmax_t rh, rl, q, th, tl _GL_UNUSED, xh;
970 umul_ppmm (rh, rl, a, b);
971 q = rl * mi;
972 umul_ppmm (th, tl, q, m);
973 xh = rh - th;
974 if (rh < th)
975 xh += m;
977 return xh;
980 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
981 Both a and b must be in redc form, the result will be in redc form too.
982 For performance reasons, the most significant bit of m must be clear. */
983 static uintmax_t
984 mulredc2 (uintmax_t *r1p,
985 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
986 uintmax_t m1, uintmax_t m0, uintmax_t mi)
988 uintmax_t r1, r0, q, p1, p0 _GL_UNUSED, t1, t0, s1, s0;
989 mi = -mi;
990 assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0);
991 assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0);
992 assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0);
994 /* First compute a0 * <b1, b0> B^{-1}
995 +-----+
996 |a0 b0|
997 +--+--+--+
998 |a0 b1|
999 +--+--+--+
1000 |q0 m0|
1001 +--+--+--+
1002 |q0 m1|
1003 -+--+--+--+
1004 |r1|r0| 0|
1005 +--+--+--+
1007 umul_ppmm (t1, t0, a0, b0);
1008 umul_ppmm (r1, r0, a0, b1);
1009 q = mi * t0;
1010 umul_ppmm (p1, p0, q, m0);
1011 umul_ppmm (s1, s0, q, m1);
1012 r0 += (t0 != 0); /* Carry */
1013 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1014 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1015 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1017 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1018 +-----+
1019 |a1 b0|
1020 +--+--+
1021 |r1|r0|
1022 +--+--+--+
1023 |a1 b1|
1024 +--+--+--+
1025 |q1 m0|
1026 +--+--+--+
1027 |q1 m1|
1028 -+--+--+--+
1029 |r1|r0| 0|
1030 +--+--+--+
1032 umul_ppmm (t1, t0, a1, b0);
1033 umul_ppmm (s1, s0, a1, b1);
1034 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1035 q = mi * t0;
1036 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1037 umul_ppmm (p1, p0, q, m0);
1038 umul_ppmm (s1, s0, q, m1);
1039 r0 += (t0 != 0); /* Carry */
1040 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1041 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1042 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1044 if (ge2 (r1, r0, m1, m0))
1045 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1047 *r1p = r1;
1048 return r0;
1051 static uintmax_t _GL_ATTRIBUTE_CONST
1052 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1054 uintmax_t y = one;
1056 if (e & 1)
1057 y = b;
1059 while (e != 0)
1061 b = mulredc (b, b, n, ni);
1062 e >>= 1;
1064 if (e & 1)
1065 y = mulredc (y, b, n, ni);
1068 return y;
1071 static uintmax_t
1072 powm2 (uintmax_t *r1m,
1073 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1074 uintmax_t ni, const uintmax_t *one)
1076 uintmax_t r1, r0, b1, b0, n1, n0;
1077 unsigned int i;
1078 uintmax_t e;
1080 b0 = bp[0];
1081 b1 = bp[1];
1082 n0 = np[0];
1083 n1 = np[1];
1085 r0 = one[0];
1086 r1 = one[1];
1088 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1090 if (e & 1)
1092 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1093 r1 = *r1m;
1095 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1096 b1 = *r1m;
1098 for (e = ep[1]; e > 0; e >>= 1)
1100 if (e & 1)
1102 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1103 r1 = *r1m;
1105 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1106 b1 = *r1m;
1108 *r1m = r1;
1109 return r0;
1112 static bool _GL_ATTRIBUTE_CONST
1113 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1114 unsigned int k, uintmax_t one)
1116 uintmax_t y = powm (b, q, n, ni, one);
1118 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1120 if (y == one || y == nm1)
1121 return true;
1123 for (unsigned int i = 1; i < k; i++)
1125 y = mulredc (y, y, n, ni);
1127 if (y == nm1)
1128 return true;
1129 if (y == one)
1130 return false;
1132 return false;
1135 static bool
1136 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1137 const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1139 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1141 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1142 y1 = r1m;
1144 if (y0 == one[0] && y1 == one[1])
1145 return true;
1147 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1149 if (y0 == nm1_0 && y1 == nm1_1)
1150 return true;
1152 for (unsigned int i = 1; i < k; i++)
1154 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1155 y1 = r1m;
1157 if (y0 == nm1_0 && y1 == nm1_1)
1158 return true;
1159 if (y0 == one[0] && y1 == one[1])
1160 return false;
1162 return false;
1165 #if HAVE_GMP
1166 static bool
1167 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1168 mpz_srcptr q, unsigned long int k)
1170 mpz_powm (y, x, q, n);
1172 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1173 return true;
1175 for (unsigned long int i = 1; i < k; i++)
1177 mpz_powm_ui (y, y, 2, n);
1178 if (mpz_cmp (y, nm1) == 0)
1179 return true;
1180 if (mpz_cmp_ui (y, 1) == 0)
1181 return false;
1183 return false;
1185 #endif
1187 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1188 have been observed. The average seem to be about 2. */
1189 static bool
1190 prime_p (uintmax_t n)
1192 int k;
1193 bool is_prime;
1194 uintmax_t a_prim, one, ni;
1195 struct factors factors;
1197 if (n <= 1)
1198 return false;
1200 /* We have already casted out small primes. */
1201 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1202 return true;
1204 /* Precomputation for Miller-Rabin. */
1205 uintmax_t q = n - 1;
1206 for (k = 0; (q & 1) == 0; k++)
1207 q >>= 1;
1209 uintmax_t a = 2;
1210 binv (ni, n); /* ni <- 1/n mod B */
1211 redcify (one, 1, n);
1212 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1214 /* Perform a Miller-Rabin test, finds most composites quickly. */
1215 if (!millerrabin (n, ni, a_prim, q, k, one))
1216 return false;
1218 if (flag_prove_primality)
1220 /* Factor n-1 for Lucas. */
1221 factor (0, n - 1, &factors);
1224 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1225 number composite. */
1226 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1228 if (flag_prove_primality)
1230 is_prime = true;
1231 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1233 is_prime
1234 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1237 else
1239 /* After enough Miller-Rabin runs, be content. */
1240 is_prime = (r == MR_REPS - 1);
1243 if (is_prime)
1244 return true;
1246 a += primes_diff[r]; /* Establish new base. */
1248 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1249 on most processors, since it avoids udiv_qrnnd. If we go down the
1250 udiv_qrnnd_preinv path, this code should be replaced. */
1252 uintmax_t s1, s0;
1253 umul_ppmm (s1, s0, one, a);
1254 if (LIKELY (s1 == 0))
1255 a_prim = s0 % n;
1256 else
1258 uintmax_t dummy _GL_UNUSED;
1259 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1263 if (!millerrabin (n, ni, a_prim, q, k, one))
1264 return false;
1267 error (0, 0, _("Lucas prime test failure. This should not happen"));
1268 abort ();
1271 static bool
1272 prime2_p (uintmax_t n1, uintmax_t n0)
1274 uintmax_t q[2], nm1[2];
1275 uintmax_t a_prim[2];
1276 uintmax_t one[2];
1277 uintmax_t na[2];
1278 uintmax_t ni;
1279 unsigned int k;
1280 struct factors factors;
1282 if (n1 == 0)
1283 return prime_p (n0);
1285 nm1[1] = n1 - (n0 == 0);
1286 nm1[0] = n0 - 1;
1287 if (nm1[0] == 0)
1289 count_trailing_zeros (k, nm1[1]);
1291 q[0] = nm1[1] >> k;
1292 q[1] = 0;
1293 k += W_TYPE_SIZE;
1295 else
1297 count_trailing_zeros (k, nm1[0]);
1298 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1301 uintmax_t a = 2;
1302 binv (ni, n0);
1303 redcify2 (one[1], one[0], 1, n1, n0);
1304 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1306 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1307 na[0] = n0;
1308 na[1] = n1;
1310 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1311 return false;
1313 if (flag_prove_primality)
1315 /* Factor n-1 for Lucas. */
1316 factor (nm1[1], nm1[0], &factors);
1319 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1320 number composite. */
1321 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1323 bool is_prime;
1324 uintmax_t e[2], y[2];
1326 if (flag_prove_primality)
1328 is_prime = true;
1329 if (factors.plarge[1])
1331 uintmax_t pi;
1332 binv (pi, factors.plarge[0]);
1333 e[0] = pi * nm1[0];
1334 e[1] = 0;
1335 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1336 is_prime = (y[0] != one[0] || y[1] != one[1]);
1338 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1340 /* FIXME: We always have the factor 2. Do we really need to
1341 handle it here? We have done the same powering as part
1342 of millerrabin. */
1343 if (factors.p[i] == 2)
1344 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1345 else
1346 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1347 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1348 is_prime = (y[0] != one[0] || y[1] != one[1]);
1351 else
1353 /* After enough Miller-Rabin runs, be content. */
1354 is_prime = (r == MR_REPS - 1);
1357 if (is_prime)
1358 return true;
1360 a += primes_diff[r]; /* Establish new base. */
1361 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1363 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1364 return false;
1367 error (0, 0, _("Lucas prime test failure. This should not happen"));
1368 abort ();
1371 #if HAVE_GMP
1372 static bool
1373 mp_prime_p (mpz_t n)
1375 bool is_prime;
1376 mpz_t q, a, nm1, tmp;
1377 struct mp_factors factors;
1379 if (mpz_cmp_ui (n, 1) <= 0)
1380 return false;
1382 /* We have already casted out small primes. */
1383 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1384 return true;
1386 mpz_inits (q, a, nm1, tmp, NULL);
1388 /* Precomputation for Miller-Rabin. */
1389 mpz_sub_ui (nm1, n, 1);
1391 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1392 unsigned long int k = mpz_scan1 (nm1, 0);
1393 mpz_tdiv_q_2exp (q, nm1, k);
1395 mpz_set_ui (a, 2);
1397 /* Perform a Miller-Rabin test, finds most composites quickly. */
1398 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1400 is_prime = false;
1401 goto ret2;
1404 if (flag_prove_primality)
1406 /* Factor n-1 for Lucas. */
1407 mpz_set (tmp, nm1);
1408 mp_factor (tmp, &factors);
1411 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1412 number composite. */
1413 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1415 if (flag_prove_primality)
1417 is_prime = true;
1418 for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1420 mpz_divexact (tmp, nm1, factors.p[i]);
1421 mpz_powm (tmp, a, tmp, n);
1422 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1425 else
1427 /* After enough Miller-Rabin runs, be content. */
1428 is_prime = (r == MR_REPS - 1);
1431 if (is_prime)
1432 goto ret1;
1434 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1436 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1438 is_prime = false;
1439 goto ret1;
1443 error (0, 0, _("Lucas prime test failure. This should not happen"));
1444 abort ();
1446 ret1:
1447 if (flag_prove_primality)
1448 mp_factor_clear (&factors);
1449 ret2:
1450 mpz_clears (q, a, nm1, tmp, NULL);
1452 return is_prime;
1454 #endif
1456 static void
1457 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1458 struct factors *factors)
1460 uintmax_t x, z, y, P, t, ni, g;
1462 unsigned long int k = 1;
1463 unsigned long int l = 1;
1465 redcify (P, 1, n);
1466 addmod (x, P, P, n); /* i.e., redcify(2) */
1467 y = z = x;
1469 while (n != 1)
1471 assert (a < n);
1473 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1475 for (;;)
1479 x = mulredc (x, x, n, ni);
1480 addmod (x, x, a, n);
1482 submod (t, z, x, n);
1483 P = mulredc (P, t, n, ni);
1485 if (k % 32 == 1)
1487 if (gcd_odd (P, n) != 1)
1488 goto factor_found;
1489 y = x;
1492 while (--k != 0);
1494 z = x;
1495 k = l;
1496 l = 2 * l;
1497 for (unsigned long int i = 0; i < k; i++)
1499 x = mulredc (x, x, n, ni);
1500 addmod (x, x, a, n);
1502 y = x;
1505 factor_found:
1508 y = mulredc (y, y, n, ni);
1509 addmod (y, y, a, n);
1511 submod (t, z, y, n);
1512 g = gcd_odd (t, n);
1514 while (g == 1);
1516 n = n / g;
1518 if (!prime_p (g))
1519 factor_using_pollard_rho (g, a + 1, factors);
1520 else
1521 factor_insert (factors, g);
1523 if (prime_p (n))
1525 factor_insert (factors, n);
1526 break;
1529 x = x % n;
1530 z = z % n;
1531 y = y % n;
1535 static void
1536 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1537 struct factors *factors)
1539 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1541 unsigned long int k = 1;
1542 unsigned long int l = 1;
1544 redcify2 (P1, P0, 1, n1, n0);
1545 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1546 y1 = z1 = x1;
1547 y0 = z0 = x0;
1549 while (n1 != 0 || n0 != 1)
1551 binv (ni, n0);
1553 for (;;)
1557 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1558 x1 = r1m;
1559 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1561 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1562 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1563 P1 = r1m;
1565 if (k % 32 == 1)
1567 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1568 if (g1 != 0 || g0 != 1)
1569 goto factor_found;
1570 y1 = x1; y0 = x0;
1573 while (--k != 0);
1575 z1 = x1; z0 = x0;
1576 k = l;
1577 l = 2 * l;
1578 for (unsigned long int i = 0; i < k; i++)
1580 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1581 x1 = r1m;
1582 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1584 y1 = x1; y0 = x0;
1587 factor_found:
1590 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1591 y1 = r1m;
1592 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1594 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1595 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1597 while (g1 == 0 && g0 == 1);
1599 if (g1 == 0)
1601 /* The found factor is one word. */
1602 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1604 if (!prime_p (g0))
1605 factor_using_pollard_rho (g0, a + 1, factors);
1606 else
1607 factor_insert (factors, g0);
1609 else
1611 /* The found factor is two words. This is highly unlikely, thus hard
1612 to trigger. Please be careful before you change this code! */
1613 uintmax_t ginv;
1615 binv (ginv, g0); /* Compute n = n / g. Since the result will */
1616 n0 = ginv * n0; /* fit one word, we can compute the quotient */
1617 n1 = 0; /* modulo B, ignoring the high divisor word. */
1619 if (!prime2_p (g1, g0))
1620 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1621 else
1622 factor_insert_large (factors, g1, g0);
1625 if (n1 == 0)
1627 if (prime_p (n0))
1629 factor_insert (factors, n0);
1630 break;
1633 factor_using_pollard_rho (n0, a, factors);
1634 return;
1637 if (prime2_p (n1, n0))
1639 factor_insert_large (factors, n1, n0);
1640 break;
1643 x0 = mod2 (&x1, x1, x0, n1, n0);
1644 z0 = mod2 (&z1, z1, z0, n1, n0);
1645 y0 = mod2 (&y1, y1, y0, n1, n0);
1649 #if HAVE_GMP
1650 static void
1651 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1652 struct mp_factors *factors)
1654 mpz_t x, z, y, P;
1655 mpz_t t, t2;
1657 devmsg ("[pollard-rho (%lu)] ", a);
1659 mpz_inits (t, t2, NULL);
1660 mpz_init_set_si (y, 2);
1661 mpz_init_set_si (x, 2);
1662 mpz_init_set_si (z, 2);
1663 mpz_init_set_ui (P, 1);
1665 unsigned long long int k = 1;
1666 unsigned long long int l = 1;
1668 while (mpz_cmp_ui (n, 1) != 0)
1670 for (;;)
1674 mpz_mul (t, x, x);
1675 mpz_mod (x, t, n);
1676 mpz_add_ui (x, x, a);
1678 mpz_sub (t, z, x);
1679 mpz_mul (t2, P, t);
1680 mpz_mod (P, t2, n);
1682 if (k % 32 == 1)
1684 mpz_gcd (t, P, n);
1685 if (mpz_cmp_ui (t, 1) != 0)
1686 goto factor_found;
1687 mpz_set (y, x);
1690 while (--k != 0);
1692 mpz_set (z, x);
1693 k = l;
1694 l = 2 * l;
1695 for (unsigned long long int i = 0; i < k; i++)
1697 mpz_mul (t, x, x);
1698 mpz_mod (x, t, n);
1699 mpz_add_ui (x, x, a);
1701 mpz_set (y, x);
1704 factor_found:
1707 mpz_mul (t, y, y);
1708 mpz_mod (y, t, n);
1709 mpz_add_ui (y, y, a);
1711 mpz_sub (t, z, y);
1712 mpz_gcd (t, t, n);
1714 while (mpz_cmp_ui (t, 1) == 0);
1716 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1718 if (!mp_prime_p (t))
1720 devmsg ("[composite factor--restarting pollard-rho] ");
1721 mp_factor_using_pollard_rho (t, a + 1, factors);
1723 else
1725 mp_factor_insert (factors, t);
1728 if (mp_prime_p (n))
1730 mp_factor_insert (factors, n);
1731 break;
1734 mpz_mod (x, x, n);
1735 mpz_mod (z, z, n);
1736 mpz_mod (y, y, n);
1739 mpz_clears (P, t2, t, z, x, y, NULL);
1741 #endif
1743 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1744 algorithm is replaced, consider also returning the remainder. */
1745 static uintmax_t _GL_ATTRIBUTE_CONST
1746 isqrt (uintmax_t n)
1748 uintmax_t x;
1749 unsigned c;
1750 if (n == 0)
1751 return 0;
1753 count_leading_zeros (c, n);
1755 /* Make x > sqrt(n). This will be invariant through the loop. */
1756 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1758 for (;;)
1760 uintmax_t y = (x + n/x) / 2;
1761 if (y >= x)
1762 return x;
1764 x = y;
1768 static uintmax_t _GL_ATTRIBUTE_CONST
1769 isqrt2 (uintmax_t nh, uintmax_t nl)
1771 unsigned int shift;
1772 uintmax_t x;
1774 /* Ensures the remainder fits in an uintmax_t. */
1775 assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1777 if (nh == 0)
1778 return isqrt (nl);
1780 count_leading_zeros (shift, nh);
1781 shift &= ~1;
1783 /* Make x > sqrt(n) */
1784 x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1785 x <<= (W_TYPE_SIZE - shift) / 2;
1787 /* Do we need more than one iteration? */
1788 for (;;)
1790 uintmax_t r _GL_UNUSED;
1791 uintmax_t q, y;
1792 udiv_qrnnd (q, r, nh, nl, x);
1793 y = (x + q) / 2;
1795 if (y >= x)
1797 uintmax_t hi, lo;
1798 umul_ppmm (hi, lo, x + 1, x + 1);
1799 assert (gt2 (hi, lo, nh, nl));
1801 umul_ppmm (hi, lo, x, x);
1802 assert (ge2 (nh, nl, hi, lo));
1803 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1804 assert (hi == 0);
1806 return x;
1809 x = y;
1813 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1814 #define MAGIC64 0x0202021202030213ULL
1815 #define MAGIC63 0x0402483012450293ULL
1816 #define MAGIC65 0x218a019866014613ULL
1817 #define MAGIC11 0x23b
1819 /* Return the square root if the input is a square, otherwise 0. */
1820 static uintmax_t _GL_ATTRIBUTE_CONST
1821 is_square (uintmax_t x)
1823 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1824 computing the square root. */
1825 if (((MAGIC64 >> (x & 63)) & 1)
1826 && ((MAGIC63 >> (x % 63)) & 1)
1827 /* Both 0 and 64 are squares mod (65) */
1828 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1829 && ((MAGIC11 >> (x % 11) & 1)))
1831 uintmax_t r = isqrt (x);
1832 if (r*r == x)
1833 return r;
1835 return 0;
1838 /* invtab[i] = floor(0x10000 / (0x100 + i) */
1839 static const unsigned short invtab[0x81] =
1841 0x200,
1842 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1843 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1844 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1845 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1846 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1847 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1848 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1849 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1850 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1851 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1852 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1853 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1854 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1855 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1856 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1857 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1860 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1861 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1862 #define div_smallq(q, r, u, d) \
1863 do { \
1864 if ((u) / 0x40 < (d)) \
1866 int _cnt; \
1867 uintmax_t _dinv, _mask, _q, _r; \
1868 count_leading_zeros (_cnt, (d)); \
1869 _r = (u); \
1870 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1872 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1873 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1875 else \
1877 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1878 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1880 _r -= _q*(d); \
1882 _mask = -(uintmax_t) (_r >= (d)); \
1883 (r) = _r - (_mask & (d)); \
1884 (q) = _q - _mask; \
1885 assert ( (q) * (d) + (r) == u); \
1887 else \
1889 uintmax_t _q = (u) / (d); \
1890 (r) = (u) - _q * (d); \
1891 (q) = _q; \
1893 } while (0)
1895 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1896 = 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
1897 with 3025 = 55^2.
1899 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1900 representing G_0 = (-55, 2*4652, 8653).
1902 In the notation of the paper:
1904 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1908 t_0 = floor([q_0 + R_0] / S0) = 1
1909 R_1 = t_0 * S_0 - R_0 = 4001
1910 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1913 /* Multipliers, in order of efficiency:
1914 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1915 0.7317 3*5*7 = 105 = 1
1916 0.7820 3*5*11 = 165 = 1
1917 0.7872 3*5 = 15 = 3
1918 0.8101 3*7*11 = 231 = 3
1919 0.8155 3*7 = 21 = 1
1920 0.8284 5*7*11 = 385 = 1
1921 0.8339 5*7 = 35 = 3
1922 0.8716 3*11 = 33 = 1
1923 0.8774 3 = 3 = 3
1924 0.8913 5*11 = 55 = 3
1925 0.8972 5 = 5 = 1
1926 0.9233 7*11 = 77 = 1
1927 0.9295 7 = 7 = 3
1928 0.9934 11 = 11 = 3
1930 #define QUEUE_SIZE 50
1932 #if STAT_SQUFOF
1933 # define Q_FREQ_SIZE 50
1934 /* Element 0 keeps the total */
1935 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1936 # define MIN(a,b) ((a) < (b) ? (a) : (b))
1937 #endif
1939 /* Return true on success. Expected to fail only for numbers
1940 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1941 static bool
1942 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1944 /* Uses algorithm and notation from
1946 SQUARE FORM FACTORIZATION
1947 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1949 http://homes.cerias.purdue.edu/~ssw/squfof.pdf
1952 static const unsigned int multipliers_1[] =
1953 { /* = 1 (mod 4) */
1954 105, 165, 21, 385, 33, 5, 77, 1, 0
1956 static const unsigned int multipliers_3[] =
1957 { /* = 3 (mod 4) */
1958 1155, 15, 231, 35, 3, 55, 7, 11, 0
1961 const unsigned int *m;
1963 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1965 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1966 return false;
1968 uintmax_t sqrt_n = isqrt2 (n1, n0);
1970 if (n0 == sqrt_n * sqrt_n)
1972 uintmax_t p1, p0;
1974 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1975 assert (p0 == n0);
1977 if (n1 == p1)
1979 if (prime_p (sqrt_n))
1980 factor_insert_multiplicity (factors, sqrt_n, 2);
1981 else
1983 struct factors f;
1985 f.nfactors = 0;
1986 if (!factor_using_squfof (0, sqrt_n, &f))
1988 /* Try pollard rho instead */
1989 factor_using_pollard_rho (sqrt_n, 1, &f);
1991 /* Duplicate the new factors */
1992 for (unsigned int i = 0; i < f.nfactors; i++)
1993 factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]);
1995 return true;
1999 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2000 for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2001 *m; m++)
2003 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2004 unsigned int i;
2005 unsigned int mu = *m;
2006 unsigned int qpos = 0;
2008 assert (mu * n0 % 4 == 3);
2010 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2011 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2012 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2013 mu^3 < n.
2015 However, this seems insufficient: With n = 37243139 and mu =
2016 105, we get a trivial factor, from the square 38809 = 197^2,
2017 without any corresponding Q earlier in the iteration.
2019 Requiring 64 mu^3 < n seems sufficient. */
2020 if (n1 == 0)
2022 if ((uintmax_t) mu*mu*mu >= n0 / 64)
2023 continue;
2025 else
2027 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2028 continue;
2030 umul_ppmm (Dh, Dl, n0, mu);
2031 Dh += n1 * mu;
2033 assert (Dl % 4 != 1);
2034 assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2036 S = isqrt2 (Dh, Dl);
2038 Q1 = 1;
2039 P = S;
2041 /* Square root remainder fits in one word, so ignore high part. */
2042 Q = Dl - P*P;
2043 /* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
2044 L = isqrt (2*S);
2045 B = 2*L;
2046 L1 = mu * 2 * L;
2048 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2049 4 D. */
2051 for (i = 0; i <= B; i++)
2053 uintmax_t q, P1, t, rem;
2055 div_smallq (q, rem, S+P, Q);
2056 P1 = S - rem; /* P1 = q*Q - P */
2058 IF_LINT (assert (q > 0 && Q > 0));
2060 #if STAT_SQUFOF
2061 q_freq[0]++;
2062 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2063 #endif
2065 if (Q <= L1)
2067 uintmax_t g = Q;
2069 if ( (Q & 1) == 0)
2070 g /= 2;
2072 g /= gcd_odd (g, mu);
2074 if (g <= L)
2076 if (qpos >= QUEUE_SIZE)
2077 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2078 queue[qpos].Q = g;
2079 queue[qpos].P = P % g;
2080 qpos++;
2084 /* I think the difference can be either sign, but mod
2085 2^W_TYPE_SIZE arithmetic should be fine. */
2086 t = Q1 + q * (P - P1);
2087 Q1 = Q;
2088 Q = t;
2089 P = P1;
2091 if ( (i & 1) == 0)
2093 uintmax_t r = is_square (Q);
2094 if (r)
2096 for (unsigned int j = 0; j < qpos; j++)
2098 if (queue[j].Q == r)
2100 if (r == 1)
2101 /* Traversed entire cycle. */
2102 goto next_multiplier;
2104 /* Need the absolute value for divisibility test. */
2105 if (P >= queue[j].P)
2106 t = P - queue[j].P;
2107 else
2108 t = queue[j].P - P;
2109 if (t % r == 0)
2111 /* Delete entries up to and including entry
2112 j, which matched. */
2113 memmove (queue, queue + j + 1,
2114 (qpos - j - 1) * sizeof (queue[0]));
2115 qpos -= (j + 1);
2117 goto next_i;
2121 /* We have found a square form, which should give a
2122 factor. */
2123 Q1 = r;
2124 assert (S >= P); /* What signs are possible? */
2125 P += r * ((S - P) / r);
2127 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2128 for the case D = 2N. */
2129 /* Compute Q = (D - P*P) / Q1, but we need double
2130 precision. */
2131 uintmax_t hi, lo;
2132 umul_ppmm (hi, lo, P, P);
2133 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2134 udiv_qrnnd (Q, rem, hi, lo, Q1);
2135 assert (rem == 0);
2137 for (;;)
2139 /* Note: There appears to by a typo in the paper,
2140 Step 4a in the algorithm description says q <--
2141 floor([S+P]/\hat Q), but looking at the equations
2142 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2143 (In this code, \hat Q is Q1). */
2144 div_smallq (q, rem, S+P, Q);
2145 P1 = S - rem; /* P1 = q*Q - P */
2147 #if STAT_SQUFOF
2148 q_freq[0]++;
2149 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2150 #endif
2151 if (P == P1)
2152 break;
2153 t = Q1 + q * (P - P1);
2154 Q1 = Q;
2155 Q = t;
2156 P = P1;
2159 if ( (Q & 1) == 0)
2160 Q /= 2;
2161 Q /= gcd_odd (Q, mu);
2163 assert (Q > 1 && (n1 || Q < n0));
2165 if (prime_p (Q))
2166 factor_insert (factors, Q);
2167 else if (!factor_using_squfof (0, Q, factors))
2168 factor_using_pollard_rho (Q, 2, factors);
2170 divexact_21 (n1, n0, n1, n0, Q);
2172 if (prime2_p (n1, n0))
2173 factor_insert_large (factors, n1, n0);
2174 else
2176 if (!factor_using_squfof (n1, n0, factors))
2178 if (n1 == 0)
2179 factor_using_pollard_rho (n0, 1, factors);
2180 else
2181 factor_using_pollard_rho2 (n1, n0, 1, factors);
2185 return true;
2188 next_i:;
2190 next_multiplier:;
2192 return false;
2195 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2196 results in FACTORS. Use the algorithm selected by the global ALG. */
2197 static void
2198 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2200 factors->nfactors = 0;
2201 factors->plarge[1] = 0;
2203 if (t1 == 0 && t0 < 2)
2204 return;
2206 t0 = factor_using_division (&t1, t1, t0, factors);
2208 if (t1 == 0 && t0 < 2)
2209 return;
2211 if (prime2_p (t1, t0))
2212 factor_insert_large (factors, t1, t0);
2213 else
2215 if (alg == ALG_SQUFOF)
2216 if (factor_using_squfof (t1, t0, factors))
2217 return;
2219 if (t1 == 0)
2220 factor_using_pollard_rho (t0, 1, factors);
2221 else
2222 factor_using_pollard_rho2 (t1, t0, 1, factors);
2226 #if HAVE_GMP
2227 /* Use Pollard-rho to compute the prime factors of
2228 arbitrary-precision T, and put the results in FACTORS. */
2229 static void
2230 mp_factor (mpz_t t, struct mp_factors *factors)
2232 mp_factor_init (factors);
2234 if (mpz_sgn (t) != 0)
2236 mp_factor_using_division (t, factors);
2238 if (mpz_cmp_ui (t, 1) != 0)
2240 devmsg ("[is number prime?] ");
2241 if (mp_prime_p (t))
2242 mp_factor_insert (factors, t);
2243 else
2244 mp_factor_using_pollard_rho (t, 1, factors);
2248 #endif
2250 static strtol_error
2251 strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s)
2253 unsigned int lo_carry;
2254 uintmax_t hi = 0, lo = 0;
2256 strtol_error err = LONGINT_INVALID;
2258 /* Skip initial spaces and '+'. */
2259 for (;;)
2261 char c = *s;
2262 if (c == ' ')
2263 s++;
2264 else if (c == '+')
2266 s++;
2267 break;
2269 else
2270 break;
2273 /* Initial scan for invalid digits. */
2274 const char *p = s;
2275 for (;;)
2277 unsigned int c = *p++;
2278 if (c == 0)
2279 break;
2281 if (UNLIKELY (!ISDIGIT (c)))
2283 err = LONGINT_INVALID;
2284 break;
2287 err = LONGINT_OK; /* we've seen at least one valid digit */
2290 for (;err == LONGINT_OK;)
2292 unsigned int c = *s++;
2293 if (c == 0)
2294 break;
2296 c -= '0';
2298 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2300 err = LONGINT_OVERFLOW;
2301 break;
2303 hi = 10 * hi;
2305 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2306 lo_carry += 10 * lo < 2 * lo;
2308 lo = 10 * lo;
2309 lo += c;
2311 lo_carry += lo < c;
2312 hi += lo_carry;
2313 if (UNLIKELY (hi < lo_carry))
2315 err = LONGINT_OVERFLOW;
2316 break;
2320 *hip = hi;
2321 *lop = lo;
2323 return err;
2326 static size_t n_out; /* How much data we've written to stdout. */
2328 static void
2329 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2331 uintmax_t q, r;
2333 if (t1 == 0)
2335 /* n_out's value is inconsequential on error. */
2336 n_out += (size_t) printf ("%"PRIuMAX, t0);
2338 else
2340 /* Use very plain code here since it seems hard to write fast code
2341 without assuming a specific word size. */
2342 q = t1 / 1000000000;
2343 r = t1 % 1000000000;
2344 udiv_qrnnd (t0, r, r, t0, 1000000000);
2345 print_uintmaxes (q, t0);
2346 n_out += (size_t) printf ("%09u", (unsigned int) r);
2350 /* Single-precision factoring */
2351 static void
2352 print_factors_single (uintmax_t t1, uintmax_t t0)
2354 struct factors factors;
2356 print_uintmaxes (t1, t0);
2357 putchar (':');
2358 n_out++;
2360 factor (t1, t0, &factors);
2362 for (unsigned int j = 0; j < factors.nfactors; j++)
2363 for (unsigned int k = 0; k < factors.e[j]; k++)
2365 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2366 putchar (' ');
2367 char *umaxstr = umaxtostr (factors.p[j], buf);
2368 fputs (umaxstr, stdout);
2369 n_out += strlen (umaxstr) + 1;
2372 if (factors.plarge[1])
2374 putchar (' ');
2375 n_out++;
2376 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2378 putchar ('\n');
2379 n_out++;
2381 /* Assume the stdout buffer is > this value.
2382 Flush here to avoid partial lines being output.
2383 Flushing every line has too much overhead.
2384 TODO: Buffer internally and avoid stdio. */
2385 if (n_out >= 512)
2387 fflush (stdout);
2388 n_out = 0;
2392 /* Emit the factors of the indicated number. If we have the option of using
2393 either algorithm, we select on the basis of the length of the number.
2394 For longer numbers, we prefer the MP algorithm even if the native algorithm
2395 has enough digits, because the algorithm is better. The turnover point
2396 depends on the value. */
2397 static bool
2398 print_factors (const char *input)
2400 uintmax_t t1, t0;
2402 /* Try converting the number to one or two words. If it fails, use GMP or
2403 print an error message. The 2nd condition checks that the most
2404 significant bit of the two-word number is clear, in a typesize neutral
2405 way. */
2406 strtol_error err = strto2uintmax (&t1, &t0, input);
2408 switch (err)
2410 case LONGINT_OK:
2411 if (((t1 << 1) >> 1) == t1)
2413 devmsg ("[using single-precision arithmetic] ");
2414 print_factors_single (t1, t0);
2415 return true;
2417 break;
2419 case LONGINT_OVERFLOW:
2420 /* Try GMP. */
2421 break;
2423 default:
2424 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2425 return false;
2428 #if HAVE_GMP
2429 devmsg ("[using arbitrary-precision arithmetic] ");
2430 mpz_t t;
2431 struct mp_factors factors;
2433 mpz_init_set_str (t, input, 10);
2435 gmp_printf ("%Zd:", t);
2436 mp_factor (t, &factors);
2438 for (unsigned int j = 0; j < factors.nfactors; j++)
2439 for (unsigned int k = 0; k < factors.e[j]; k++)
2440 gmp_printf (" %Zd", factors.p[j]);
2442 mp_factor_clear (&factors);
2443 mpz_clear (t);
2444 putchar ('\n');
2445 fflush (stdout);
2446 return true;
2447 #else
2448 error (0, 0, _("%s is too large"), quote (input));
2449 return false;
2450 #endif
2453 void
2454 usage (int status)
2456 if (status != EXIT_SUCCESS)
2457 emit_try_help ();
2458 else
2460 printf (_("\
2461 Usage: %s [NUMBER]...\n\
2462 or: %s OPTION\n\
2464 program_name, program_name);
2465 fputs (_("\
2466 Print the prime factors of each specified integer NUMBER. If none\n\
2467 are specified on the command line, read them from standard input.\n\
2469 "), stdout);
2470 fputs (HELP_OPTION_DESCRIPTION, stdout);
2471 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2472 emit_ancillary_info (PROGRAM_NAME);
2474 exit (status);
2477 static bool
2478 do_stdin (void)
2480 bool ok = true;
2481 token_buffer tokenbuffer;
2483 init_tokenbuffer (&tokenbuffer);
2485 while (true)
2487 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2488 &tokenbuffer);
2489 if (token_length == (size_t) -1)
2490 break;
2491 ok &= print_factors (tokenbuffer.buffer);
2493 free (tokenbuffer.buffer);
2495 return ok;
2499 main (int argc, char **argv)
2501 initialize_main (&argc, &argv);
2502 set_program_name (argv[0]);
2503 setlocale (LC_ALL, "");
2504 bindtextdomain (PACKAGE, LOCALEDIR);
2505 textdomain (PACKAGE);
2507 atexit (close_stdout);
2509 alg = ALG_POLLARD_RHO; /* Default to Pollard rho */
2511 int c;
2512 while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2514 switch (c)
2516 case DEV_DEBUG_OPTION:
2517 dev_debug = true;
2518 break;
2520 case 's':
2521 alg = ALG_SQUFOF;
2522 break;
2524 case 'w':
2525 flag_prove_primality = false;
2526 break;
2528 case_GETOPT_HELP_CHAR;
2530 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2532 default:
2533 usage (EXIT_FAILURE);
2537 #if STAT_SQUFOF
2538 if (alg == ALG_SQUFOF)
2539 memset (q_freq, 0, sizeof (q_freq));
2540 #endif
2542 bool ok;
2543 if (argc <= optind)
2544 ok = do_stdin ();
2545 else
2547 ok = true;
2548 for (int i = optind; i < argc; i++)
2549 if (! print_factors (argv[i]))
2550 ok = false;
2553 #if STAT_SQUFOF
2554 if (alg == ALG_SQUFOF && q_freq[0] > 0)
2556 double acc_f;
2557 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2558 for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2560 double f = (double) q_freq[i] / q_freq[0];
2561 acc_f += f;
2562 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2563 100.0 * f, 100.0 * acc_f);
2566 #endif
2568 return ok ? EXIT_SUCCESS : EXIT_FAILURE;