build: update gnulib submodule to latest
[coreutils.git] / src / factor.c
blobee578bfe5de84777b2c3d3eaa7e6c110a4333331
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Using GMP is optional. Define HAVE_GMP to make this code include GMP
39 factoring code. The GMP factoring code is based on GMP's demos/factorize.c
40 (last synched 2012-09-07). The GMP-based factoring code will stay in GMP
41 factoring code even if numbers get small enough for using the two-word
42 code.
44 Algorithm:
46 (1) Perform trial division using a small primes table, but without hardware
47 division since the primes table store inverses modulo the word base.
48 (The GMP variant of this code doesn't make use of the precomputed
49 inverses, but instead relies on GMP for fast divisibility testing.)
50 (2) Check the nature of any non-factored part using Miller-Rabin for
51 detecting composites, and Lucas for detecting primes.
52 (3) Factor any remaining composite part using the Pollard-Brent rho
53 algorithm or if USE_SQUFOF is defined to 1, try that first.
54 Status of found factors are checked again using Miller-Rabin and Lucas.
56 We prefer using Hensel norm in the divisions, not the more familiar
57 Euclidian norm, since the former leads to much faster code. In the
58 Pollard-Brent rho code and the prime testing code, we use Montgomery's
59 trick of multiplying all n-residues by the word base, allowing cheap Hensel
60 reductions mod n.
62 Improvements:
64 * Use modular inverses also for exact division in the Lucas code, and
65 elsewhere. A problem is to locate the inverses not from an index, but
66 from a prime. We might instead compute the inverse on-the-fly.
68 * Tune trial division table size (not forgetting that this is a standalone
69 program where the table will be read from disk for each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 /* Whether to recursively factor to prove primality,
88 or run faster probabilistic tests. */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
93 /* Faster for certain ranges but less general. */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
98 /* Output SQUFOF statistics. */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdio.h>
107 #if HAVE_GMP
108 # include <gmp.h>
109 # if !HAVE_DECL_MPZ_INITS
110 # include <stdarg.h>
111 # endif
112 #endif
114 #include <assert.h>
116 #include "system.h"
117 #include "error.h"
118 #include "full-write.h"
119 #include "quote.h"
120 #include "readtokens.h"
121 #include "xstrtol.h"
123 /* The official name of this program (e.g., no 'g' prefix). */
124 #define PROGRAM_NAME "factor"
126 #define AUTHORS \
127 proper_name ("Paul Rubin"), \
128 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
129 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
131 /* Token delimiters when reading from a file. */
132 #define DELIM "\n\t "
134 #ifndef USE_LONGLONG_H
135 /* With the way we use longlong.h, it's only safe to use
136 when UWtype = UHWtype, as there were various cases
137 (as can be seen in the history for longlong.h) where
138 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
139 to avoid compile time or run time issues. */
140 # if LONG_MAX == INTMAX_MAX
141 # define USE_LONGLONG_H 1
142 # endif
143 #endif
145 #if USE_LONGLONG_H
147 /* Make definitions for longlong.h to make it do what it can do for us */
149 /* bitcount for uintmax_t */
150 # if UINTMAX_MAX == UINT32_MAX
151 # define W_TYPE_SIZE 32
152 # elif UINTMAX_MAX == UINT64_MAX
153 # define W_TYPE_SIZE 64
154 # elif UINTMAX_MAX == UINT128_MAX
155 # define W_TYPE_SIZE 128
156 # endif
158 # define UWtype uintmax_t
159 # define UHWtype unsigned long int
160 # undef UDWtype
161 # if HAVE_ATTRIBUTE_MODE
162 typedef unsigned int UQItype __attribute__ ((mode (QI)));
163 typedef int SItype __attribute__ ((mode (SI)));
164 typedef unsigned int USItype __attribute__ ((mode (SI)));
165 typedef int DItype __attribute__ ((mode (DI)));
166 typedef unsigned int UDItype __attribute__ ((mode (DI)));
167 # else
168 typedef unsigned char UQItype;
169 typedef long SItype;
170 typedef unsigned long int USItype;
171 # if HAVE_LONG_LONG_INT
172 typedef long long int DItype;
173 typedef unsigned long long int UDItype;
174 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
175 typedef long int DItype;
176 typedef unsigned long int UDItype;
177 # endif
178 # endif
179 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
180 # define ASSERT(x) /* FIXME make longlong.h really standalone */
181 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
182 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
183 # ifndef __GMP_GNUC_PREREQ
184 # define __GMP_GNUC_PREREQ(a,b) 1
185 # endif
187 /* These stub macros are only used in longlong.h in certain system compiler
188 combinations, so ensure usage to avoid -Wunused-macros warnings. */
189 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
190 ASSERT (1)
191 __GMP_DECLSPEC
192 # endif
194 # if _ARCH_PPC
195 # define HAVE_HOST_CPU_FAMILY_powerpc 1
196 # endif
197 # include "longlong.h"
198 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
199 const unsigned char factor_clz_tab[129] =
201 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
202 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
203 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
204 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
207 # endif
209 #else /* not USE_LONGLONG_H */
211 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
212 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
213 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
214 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
216 #endif
218 #if !defined __clz_tab && !defined UHWtype
219 /* Without this seemingly useless conditional, gcc -Wunused-macros
220 warns that each of the two tested macros is unused on Fedora 18.
221 FIXME: this is just an ugly band-aid. Fix it properly. */
222 #endif
224 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
225 #define MAX_NFACTS 26
227 enum
229 DEV_DEBUG_OPTION = CHAR_MAX + 1
232 static struct option const long_options[] =
234 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
235 {GETOPT_HELP_OPTION_DECL},
236 {GETOPT_VERSION_OPTION_DECL},
237 {NULL, 0, NULL, 0}
240 struct factors
242 uintmax_t plarge[2]; /* Can have a single large factor */
243 uintmax_t p[MAX_NFACTS];
244 unsigned char e[MAX_NFACTS];
245 unsigned char nfactors;
248 #if HAVE_GMP
249 struct mp_factors
251 mpz_t *p;
252 unsigned long int *e;
253 unsigned long int nfactors;
255 #endif
257 static void factor (uintmax_t, uintmax_t, struct factors *);
259 #ifndef umul_ppmm
260 # define umul_ppmm(w1, w0, u, v) \
261 do { \
262 uintmax_t __x0, __x1, __x2, __x3; \
263 unsigned long int __ul, __vl, __uh, __vh; \
264 uintmax_t __u = (u), __v = (v); \
266 __ul = __ll_lowpart (__u); \
267 __uh = __ll_highpart (__u); \
268 __vl = __ll_lowpart (__v); \
269 __vh = __ll_highpart (__v); \
271 __x0 = (uintmax_t) __ul * __vl; \
272 __x1 = (uintmax_t) __ul * __vh; \
273 __x2 = (uintmax_t) __uh * __vl; \
274 __x3 = (uintmax_t) __uh * __vh; \
276 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
277 __x1 += __x2; /* but this indeed can */ \
278 if (__x1 < __x2) /* did we get it? */ \
279 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
281 (w1) = __x3 + __ll_highpart (__x1); \
282 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
283 } while (0)
284 #endif
286 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
287 /* Define our own, not needing normalization. This function is
288 currently not performance critical, so keep it simple. Similar to
289 the mod macro below. */
290 # undef udiv_qrnnd
291 # define udiv_qrnnd(q, r, n1, n0, d) \
292 do { \
293 uintmax_t __d1, __d0, __q, __r1, __r0; \
295 assert ((n1) < (d)); \
296 __d1 = (d); __d0 = 0; \
297 __r1 = (n1); __r0 = (n0); \
298 __q = 0; \
299 for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \
301 rsh2 (__d1, __d0, __d1, __d0, 1); \
302 __q <<= 1; \
303 if (ge2 (__r1, __r0, __d1, __d0)) \
305 __q++; \
306 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
309 (r) = __r0; \
310 (q) = __q; \
311 } while (0)
312 #endif
314 #if !defined add_ssaaaa
315 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
316 do { \
317 uintmax_t _add_x; \
318 _add_x = (al) + (bl); \
319 (sh) = (ah) + (bh) + (_add_x < (al)); \
320 (sl) = _add_x; \
321 } while (0)
322 #endif
324 #define rsh2(rh, rl, ah, al, cnt) \
325 do { \
326 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
327 (rh) = (ah) >> (cnt); \
328 } while (0)
330 #define lsh2(rh, rl, ah, al, cnt) \
331 do { \
332 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
333 (rl) = (al) << (cnt); \
334 } while (0)
336 #define ge2(ah, al, bh, bl) \
337 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
339 #define gt2(ah, al, bh, bl) \
340 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
342 #ifndef sub_ddmmss
343 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
344 do { \
345 uintmax_t _cy; \
346 _cy = (al) < (bl); \
347 (rl) = (al) - (bl); \
348 (rh) = (ah) - (bh) - _cy; \
349 } while (0)
350 #endif
352 #ifndef count_leading_zeros
353 # define count_leading_zeros(count, x) do { \
354 uintmax_t __clz_x = (x); \
355 unsigned int __clz_c; \
356 for (__clz_c = 0; \
357 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
358 __clz_c += 8) \
359 __clz_x <<= 8; \
360 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
361 __clz_x <<= 1; \
362 (count) = __clz_c; \
363 } while (0)
364 #endif
366 #ifndef count_trailing_zeros
367 # define count_trailing_zeros(count, x) do { \
368 uintmax_t __ctz_x = (x); \
369 unsigned int __ctz_c = 0; \
370 while ((__ctz_x & 1) == 0) \
372 __ctz_x >>= 1; \
373 __ctz_c++; \
375 (count) = __ctz_c; \
376 } while (0)
377 #endif
379 /* Requires that a < n and b <= n */
380 #define submod(r,a,b,n) \
381 do { \
382 uintmax_t _t = - (uintmax_t) (a < b); \
383 (r) = ((n) & _t) + (a) - (b); \
384 } while (0)
386 #define addmod(r,a,b,n) \
387 submod ((r), (a), ((n) - (b)), (n))
389 /* Modular two-word addition and subtraction. For performance reasons, the
390 most significant bit of n1 must be clear. The destination variables must be
391 distinct from the mod operand. */
392 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
393 do { \
394 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
395 if (ge2 ((r1), (r0), (n1), (n0))) \
396 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
397 } while (0)
398 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
399 do { \
400 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
401 if ((intmax_t) (r1) < 0) \
402 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
403 } while (0)
405 #define HIGHBIT_TO_MASK(x) \
406 (((intmax_t)-1 >> 1) < 0 \
407 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
408 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
409 ? UINTMAX_MAX : (uintmax_t) 0))
411 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
412 Requires that d1 != 0. */
413 static uintmax_t
414 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
416 int cntd, cnta;
418 assert (d1 != 0);
420 if (a1 == 0)
422 *r1 = 0;
423 return a0;
426 count_leading_zeros (cntd, d1);
427 count_leading_zeros (cnta, a1);
428 int cnt = cntd - cnta;
429 lsh2 (d1, d0, d1, d0, cnt);
430 for (int i = 0; i < cnt; i++)
432 if (ge2 (a1, a0, d1, d0))
433 sub_ddmmss (a1, a0, a1, a0, d1, d0);
434 rsh2 (d1, d0, d1, d0, 1);
437 *r1 = a1;
438 return a0;
441 static uintmax_t _GL_ATTRIBUTE_CONST
442 gcd_odd (uintmax_t a, uintmax_t b)
444 if ( (b & 1) == 0)
446 uintmax_t t = b;
447 b = a;
448 a = t;
450 if (a == 0)
451 return b;
453 /* Take out least significant one bit, to make room for sign */
454 b >>= 1;
456 for (;;)
458 uintmax_t t;
459 uintmax_t bgta;
461 while ((a & 1) == 0)
462 a >>= 1;
463 a >>= 1;
465 t = a - b;
466 if (t == 0)
467 return (a << 1) + 1;
469 bgta = HIGHBIT_TO_MASK (t);
471 /* b <-- min (a, b) */
472 b += (bgta & t);
474 /* a <-- |a - b| */
475 a = (t ^ bgta) - bgta;
479 static uintmax_t
480 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
482 while ((a0 & 1) == 0)
483 rsh2 (a1, a0, a1, a0, 1);
484 while ((b0 & 1) == 0)
485 rsh2 (b1, b0, b1, b0, 1);
487 for (;;)
489 if ((b1 | a1) == 0)
491 *r1 = 0;
492 return gcd_odd (b0, a0);
495 if (gt2 (a1, a0, b1, b0))
497 sub_ddmmss (a1, a0, a1, a0, b1, b0);
499 rsh2 (a1, a0, a1, a0, 1);
500 while ((a0 & 1) == 0);
502 else if (gt2 (b1, b0, a1, a0))
504 sub_ddmmss (b1, b0, b1, b0, a1, a0);
506 rsh2 (b1, b0, b1, b0, 1);
507 while ((b0 & 1) == 0);
509 else
510 break;
513 *r1 = a1;
514 return a0;
517 static void
518 factor_insert_multiplicity (struct factors *factors,
519 uintmax_t prime, unsigned int m)
521 unsigned int nfactors = factors->nfactors;
522 uintmax_t *p = factors->p;
523 unsigned char *e = factors->e;
525 /* Locate position for insert new or increment e. */
526 int i;
527 for (i = nfactors - 1; i >= 0; i--)
529 if (p[i] <= prime)
530 break;
533 if (i < 0 || p[i] != prime)
535 for (int j = nfactors - 1; j > i; j--)
537 p[j + 1] = p[j];
538 e[j + 1] = e[j];
540 p[i + 1] = prime;
541 e[i + 1] = m;
542 factors->nfactors = nfactors + 1;
544 else
546 e[i] += m;
550 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
552 static void
553 factor_insert_large (struct factors *factors,
554 uintmax_t p1, uintmax_t p0)
556 if (p1 > 0)
558 assert (factors->plarge[1] == 0);
559 factors->plarge[0] = p0;
560 factors->plarge[1] = p1;
562 else
563 factor_insert (factors, p0);
566 #if HAVE_GMP
568 # if !HAVE_DECL_MPZ_INITS
570 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
571 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
573 static void
574 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
576 va_list ap;
578 va_start (ap, mpz_single_init);
580 mpz_t *mpz;
581 while ((mpz = va_arg (ap, mpz_t *)))
582 mpz_single_init (*mpz);
584 va_end (ap);
586 # endif
588 static void mp_factor (mpz_t, struct mp_factors *);
590 static void
591 mp_factor_init (struct mp_factors *factors)
593 factors->p = NULL;
594 factors->e = NULL;
595 factors->nfactors = 0;
598 static void
599 mp_factor_clear (struct mp_factors *factors)
601 for (unsigned int i = 0; i < factors->nfactors; i++)
602 mpz_clear (factors->p[i]);
604 free (factors->p);
605 free (factors->e);
608 static void
609 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
611 unsigned long int nfactors = factors->nfactors;
612 mpz_t *p = factors->p;
613 unsigned long int *e = factors->e;
614 long i;
616 /* Locate position for insert new or increment e. */
617 for (i = nfactors - 1; i >= 0; i--)
619 if (mpz_cmp (p[i], prime) <= 0)
620 break;
623 if (i < 0 || mpz_cmp (p[i], prime) != 0)
625 p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
626 e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
628 mpz_init (p[nfactors]);
629 for (long j = nfactors - 1; j > i; j--)
631 mpz_set (p[j + 1], p[j]);
632 e[j + 1] = e[j];
634 mpz_set (p[i + 1], prime);
635 e[i + 1] = 1;
637 factors->p = p;
638 factors->e = e;
639 factors->nfactors = nfactors + 1;
641 else
643 e[i] += 1;
647 static void
648 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
650 mpz_t pz;
652 mpz_init_set_ui (pz, prime);
653 mp_factor_insert (factors, pz);
654 mpz_clear (pz);
656 #endif /* HAVE_GMP */
659 /* Number of bits in an uintmax_t. */
660 enum { W = sizeof (uintmax_t) * CHAR_BIT };
662 /* Verify that uintmax_t does not have holes in its representation. */
663 verify (UINTMAX_MAX >> (W - 1) == 1);
665 #define P(a,b,c,d) a,
666 static const unsigned char primes_diff[] = {
667 #include "primes.h"
668 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
670 #undef P
672 #define PRIMES_PTAB_ENTRIES \
673 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
675 #define P(a,b,c,d) b,
676 static const unsigned char primes_diff8[] = {
677 #include "primes.h"
678 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
680 #undef P
682 struct primes_dtab
684 uintmax_t binv, lim;
687 #define P(a,b,c,d) {c,d},
688 static const struct primes_dtab primes_dtab[] = {
689 #include "primes.h"
690 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
692 #undef P
694 /* Verify that uintmax_t is not wider than
695 the integers used to generate primes.h. */
696 verify (W <= WIDE_UINT_BITS);
698 /* debugging for developers. Enables devmsg().
699 This flag is used only in the GMP code. */
700 static bool dev_debug = false;
702 /* Prove primality or run probabilistic tests. */
703 static bool flag_prove_primality = PROVE_PRIMALITY;
705 /* Number of Miller-Rabin tests to run when not proving primality. */
706 #define MR_REPS 25
708 static void
709 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
710 unsigned int off)
712 for (unsigned int j = 0; j < off; j++)
713 p += primes_diff[i + j];
714 factor_insert (factors, p);
717 /* Trial division with odd primes uses the following trick.
719 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
720 consider the case t < B (this is the second loop below).
722 From our tables we get
724 binv = p^{-1} (mod B)
725 lim = floor ( (B-1) / p ).
727 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
728 (and all quotients in this range occur for some t).
730 Then t = q * p is true also (mod B), and p is invertible we get
732 q = t * binv (mod B).
734 Next, assume that t is *not* divisible by p. Since multiplication
735 by binv (mod B) is a one-to-one mapping,
737 t * binv (mod B) > lim,
739 because all the smaller values are already taken.
741 This can be summed up by saying that the function
743 q(t) = binv * t (mod B)
745 is a permutation of the range 0 <= t < B, with the curious property
746 that it maps the multiples of p onto the range 0 <= q <= lim, in
747 order, and the non-multiples of p onto the range lim < q < B.
750 static uintmax_t
751 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
752 struct factors *factors)
754 if (t0 % 2 == 0)
756 unsigned int cnt;
758 if (t0 == 0)
760 count_trailing_zeros (cnt, t1);
761 t0 = t1 >> cnt;
762 t1 = 0;
763 cnt += W_TYPE_SIZE;
765 else
767 count_trailing_zeros (cnt, t0);
768 rsh2 (t1, t0, t1, t0, cnt);
771 factor_insert_multiplicity (factors, 2, cnt);
774 uintmax_t p = 3;
775 unsigned int i;
776 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
778 for (;;)
780 uintmax_t q1, q0, hi, lo _GL_UNUSED;
782 q0 = t0 * primes_dtab[i].binv;
783 umul_ppmm (hi, lo, q0, p);
784 if (hi > t1)
785 break;
786 hi = t1 - hi;
787 q1 = hi * primes_dtab[i].binv;
788 if (LIKELY (q1 > primes_dtab[i].lim))
789 break;
790 t1 = q1; t0 = q0;
791 factor_insert (factors, p);
793 p += primes_diff[i + 1];
795 if (t1p)
796 *t1p = t1;
798 #define DIVBLOCK(I) \
799 do { \
800 for (;;) \
802 q = t0 * pd[I].binv; \
803 if (LIKELY (q > pd[I].lim)) \
804 break; \
805 t0 = q; \
806 factor_insert_refind (factors, p, i + 1, I); \
808 } while (0)
810 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
812 uintmax_t q;
813 const struct primes_dtab *pd = &primes_dtab[i];
814 DIVBLOCK (0);
815 DIVBLOCK (1);
816 DIVBLOCK (2);
817 DIVBLOCK (3);
818 DIVBLOCK (4);
819 DIVBLOCK (5);
820 DIVBLOCK (6);
821 DIVBLOCK (7);
823 p += primes_diff8[i];
824 if (p * p > t0)
825 break;
828 return t0;
831 #if HAVE_GMP
832 static void
833 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
835 mpz_t q;
836 unsigned long int p;
838 devmsg ("[trial division] ");
840 mpz_init (q);
842 p = mpz_scan1 (t, 0);
843 mpz_div_2exp (t, t, p);
844 while (p)
846 mp_factor_insert_ui (factors, 2);
847 --p;
850 p = 3;
851 for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
853 if (! mpz_divisible_ui_p (t, p))
855 p += primes_diff[i++];
856 if (mpz_cmp_ui (t, p * p) < 0)
857 break;
859 else
861 mpz_tdiv_q_ui (t, t, p);
862 mp_factor_insert_ui (factors, p);
866 mpz_clear (q);
868 #endif
870 /* Entry i contains (2i+1)^(-1) mod 2^8. */
871 static const unsigned char binvert_table[128] =
873 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
874 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
875 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
876 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
877 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
878 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
879 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
880 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
881 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
882 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
883 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
884 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
885 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
886 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
887 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
888 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
891 /* Compute n^(-1) mod B, using a Newton iteration. */
892 #define binv(inv,n) \
893 do { \
894 uintmax_t __n = (n); \
895 uintmax_t __inv; \
897 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
898 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
899 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
900 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
902 if (W_TYPE_SIZE > 64) \
904 int __invbits = 64; \
905 do { \
906 __inv = 2 * __inv - __inv * __inv * __n; \
907 __invbits *= 2; \
908 } while (__invbits < W_TYPE_SIZE); \
911 (inv) = __inv; \
912 } while (0)
914 /* q = u / d, assuming d|u. */
915 #define divexact_21(q1, q0, u1, u0, d) \
916 do { \
917 uintmax_t _di, _q0; \
918 binv (_di, (d)); \
919 _q0 = (u0) * _di; \
920 if ((u1) >= (d)) \
922 uintmax_t _p1, _p0 _GL_UNUSED; \
923 umul_ppmm (_p1, _p0, _q0, d); \
924 (q1) = ((u1) - _p1) * _di; \
925 (q0) = _q0; \
927 else \
929 (q0) = _q0; \
930 (q1) = 0; \
932 } while (0)
934 /* x B (mod n). */
935 #define redcify(r_prim, r, n) \
936 do { \
937 uintmax_t _redcify_q _GL_UNUSED; \
938 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
939 } while (0)
941 /* x B^2 (mod n). Requires x > 0, n1 < B/2 */
942 #define redcify2(r1, r0, x, n1, n0) \
943 do { \
944 uintmax_t _r1, _r0, _i; \
945 if ((x) < (n1)) \
947 _r1 = (x); _r0 = 0; \
948 _i = W_TYPE_SIZE; \
950 else \
952 _r1 = 0; _r0 = (x); \
953 _i = 2*W_TYPE_SIZE; \
955 while (_i-- > 0) \
957 lsh2 (_r1, _r0, _r1, _r0, 1); \
958 if (ge2 (_r1, _r0, (n1), (n0))) \
959 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
961 (r1) = _r1; \
962 (r0) = _r0; \
963 } while (0)
965 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
966 Both a and b must be in redc form, the result will be in redc form too. */
967 static inline uintmax_t
968 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
970 uintmax_t rh, rl, q, th, tl _GL_UNUSED, xh;
972 umul_ppmm (rh, rl, a, b);
973 q = rl * mi;
974 umul_ppmm (th, tl, q, m);
975 xh = rh - th;
976 if (rh < th)
977 xh += m;
979 return xh;
982 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
983 Both a and b must be in redc form, the result will be in redc form too.
984 For performance reasons, the most significant bit of m must be clear. */
985 static uintmax_t
986 mulredc2 (uintmax_t *r1p,
987 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
988 uintmax_t m1, uintmax_t m0, uintmax_t mi)
990 uintmax_t r1, r0, q, p1, p0 _GL_UNUSED, t1, t0, s1, s0;
991 mi = -mi;
992 assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0);
993 assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0);
994 assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0);
996 /* First compute a0 * <b1, b0> B^{-1}
997 +-----+
998 |a0 b0|
999 +--+--+--+
1000 |a0 b1|
1001 +--+--+--+
1002 |q0 m0|
1003 +--+--+--+
1004 |q0 m1|
1005 -+--+--+--+
1006 |r1|r0| 0|
1007 +--+--+--+
1009 umul_ppmm (t1, t0, a0, b0);
1010 umul_ppmm (r1, r0, a0, b1);
1011 q = mi * t0;
1012 umul_ppmm (p1, p0, q, m0);
1013 umul_ppmm (s1, s0, q, m1);
1014 r0 += (t0 != 0); /* Carry */
1015 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1016 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1017 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1019 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1020 +-----+
1021 |a1 b0|
1022 +--+--+
1023 |r1|r0|
1024 +--+--+--+
1025 |a1 b1|
1026 +--+--+--+
1027 |q1 m0|
1028 +--+--+--+
1029 |q1 m1|
1030 -+--+--+--+
1031 |r1|r0| 0|
1032 +--+--+--+
1034 umul_ppmm (t1, t0, a1, b0);
1035 umul_ppmm (s1, s0, a1, b1);
1036 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1037 q = mi * t0;
1038 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1039 umul_ppmm (p1, p0, q, m0);
1040 umul_ppmm (s1, s0, q, m1);
1041 r0 += (t0 != 0); /* Carry */
1042 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1043 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1044 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1046 if (ge2 (r1, r0, m1, m0))
1047 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1049 *r1p = r1;
1050 return r0;
1053 static uintmax_t _GL_ATTRIBUTE_CONST
1054 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1056 uintmax_t y = one;
1058 if (e & 1)
1059 y = b;
1061 while (e != 0)
1063 b = mulredc (b, b, n, ni);
1064 e >>= 1;
1066 if (e & 1)
1067 y = mulredc (y, b, n, ni);
1070 return y;
1073 static uintmax_t
1074 powm2 (uintmax_t *r1m,
1075 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1076 uintmax_t ni, const uintmax_t *one)
1078 uintmax_t r1, r0, b1, b0, n1, n0;
1079 unsigned int i;
1080 uintmax_t e;
1082 b0 = bp[0];
1083 b1 = bp[1];
1084 n0 = np[0];
1085 n1 = np[1];
1087 r0 = one[0];
1088 r1 = one[1];
1090 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1092 if (e & 1)
1094 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1095 r1 = *r1m;
1097 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1098 b1 = *r1m;
1100 for (e = ep[1]; e > 0; e >>= 1)
1102 if (e & 1)
1104 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1105 r1 = *r1m;
1107 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1108 b1 = *r1m;
1110 *r1m = r1;
1111 return r0;
1114 static bool _GL_ATTRIBUTE_CONST
1115 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1116 unsigned int k, uintmax_t one)
1118 uintmax_t y = powm (b, q, n, ni, one);
1120 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1122 if (y == one || y == nm1)
1123 return true;
1125 for (unsigned int i = 1; i < k; i++)
1127 y = mulredc (y, y, n, ni);
1129 if (y == nm1)
1130 return true;
1131 if (y == one)
1132 return false;
1134 return false;
1137 static bool
1138 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1139 const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1141 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1143 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1144 y1 = r1m;
1146 if (y0 == one[0] && y1 == one[1])
1147 return true;
1149 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1151 if (y0 == nm1_0 && y1 == nm1_1)
1152 return true;
1154 for (unsigned int i = 1; i < k; i++)
1156 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1157 y1 = r1m;
1159 if (y0 == nm1_0 && y1 == nm1_1)
1160 return true;
1161 if (y0 == one[0] && y1 == one[1])
1162 return false;
1164 return false;
1167 #if HAVE_GMP
1168 static bool
1169 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1170 mpz_srcptr q, unsigned long int k)
1172 mpz_powm (y, x, q, n);
1174 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1175 return true;
1177 for (unsigned long int i = 1; i < k; i++)
1179 mpz_powm_ui (y, y, 2, n);
1180 if (mpz_cmp (y, nm1) == 0)
1181 return true;
1182 if (mpz_cmp_ui (y, 1) == 0)
1183 return false;
1185 return false;
1187 #endif
1189 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1190 have been observed. The average seem to be about 2. */
1191 static bool
1192 prime_p (uintmax_t n)
1194 int k;
1195 bool is_prime;
1196 uintmax_t a_prim, one, ni;
1197 struct factors factors;
1199 if (n <= 1)
1200 return false;
1202 /* We have already casted out small primes. */
1203 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1204 return true;
1206 /* Precomputation for Miller-Rabin. */
1207 uintmax_t q = n - 1;
1208 for (k = 0; (q & 1) == 0; k++)
1209 q >>= 1;
1211 uintmax_t a = 2;
1212 binv (ni, n); /* ni <- 1/n mod B */
1213 redcify (one, 1, n);
1214 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1216 /* Perform a Miller-Rabin test, finds most composites quickly. */
1217 if (!millerrabin (n, ni, a_prim, q, k, one))
1218 return false;
1220 if (flag_prove_primality)
1222 /* Factor n-1 for Lucas. */
1223 factor (0, n - 1, &factors);
1226 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1227 number composite. */
1228 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1230 if (flag_prove_primality)
1232 is_prime = true;
1233 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1235 is_prime
1236 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1239 else
1241 /* After enough Miller-Rabin runs, be content. */
1242 is_prime = (r == MR_REPS - 1);
1245 if (is_prime)
1246 return true;
1248 a += primes_diff[r]; /* Establish new base. */
1250 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1251 on most processors, since it avoids udiv_qrnnd. If we go down the
1252 udiv_qrnnd_preinv path, this code should be replaced. */
1254 uintmax_t s1, s0;
1255 umul_ppmm (s1, s0, one, a);
1256 if (LIKELY (s1 == 0))
1257 a_prim = s0 % n;
1258 else
1260 uintmax_t dummy _GL_UNUSED;
1261 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1265 if (!millerrabin (n, ni, a_prim, q, k, one))
1266 return false;
1269 error (0, 0, _("Lucas prime test failure. This should not happen"));
1270 abort ();
1273 static bool
1274 prime2_p (uintmax_t n1, uintmax_t n0)
1276 uintmax_t q[2], nm1[2];
1277 uintmax_t a_prim[2];
1278 uintmax_t one[2];
1279 uintmax_t na[2];
1280 uintmax_t ni;
1281 unsigned int k;
1282 struct factors factors;
1284 if (n1 == 0)
1285 return prime_p (n0);
1287 nm1[1] = n1 - (n0 == 0);
1288 nm1[0] = n0 - 1;
1289 if (nm1[0] == 0)
1291 count_trailing_zeros (k, nm1[1]);
1293 q[0] = nm1[1] >> k;
1294 q[1] = 0;
1295 k += W_TYPE_SIZE;
1297 else
1299 count_trailing_zeros (k, nm1[0]);
1300 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1303 uintmax_t a = 2;
1304 binv (ni, n0);
1305 redcify2 (one[1], one[0], 1, n1, n0);
1306 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1308 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1309 na[0] = n0;
1310 na[1] = n1;
1312 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1313 return false;
1315 if (flag_prove_primality)
1317 /* Factor n-1 for Lucas. */
1318 factor (nm1[1], nm1[0], &factors);
1321 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1322 number composite. */
1323 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1325 bool is_prime;
1326 uintmax_t e[2], y[2];
1328 if (flag_prove_primality)
1330 is_prime = true;
1331 if (factors.plarge[1])
1333 uintmax_t pi;
1334 binv (pi, factors.plarge[0]);
1335 e[0] = pi * nm1[0];
1336 e[1] = 0;
1337 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1338 is_prime = (y[0] != one[0] || y[1] != one[1]);
1340 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1342 /* FIXME: We always have the factor 2. Do we really need to
1343 handle it here? We have done the same powering as part
1344 of millerrabin. */
1345 if (factors.p[i] == 2)
1346 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1347 else
1348 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1349 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1350 is_prime = (y[0] != one[0] || y[1] != one[1]);
1353 else
1355 /* After enough Miller-Rabin runs, be content. */
1356 is_prime = (r == MR_REPS - 1);
1359 if (is_prime)
1360 return true;
1362 a += primes_diff[r]; /* Establish new base. */
1363 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1365 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1366 return false;
1369 error (0, 0, _("Lucas prime test failure. This should not happen"));
1370 abort ();
1373 #if HAVE_GMP
1374 static bool
1375 mp_prime_p (mpz_t n)
1377 bool is_prime;
1378 mpz_t q, a, nm1, tmp;
1379 struct mp_factors factors;
1381 if (mpz_cmp_ui (n, 1) <= 0)
1382 return false;
1384 /* We have already casted out small primes. */
1385 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1386 return true;
1388 mpz_inits (q, a, nm1, tmp, NULL);
1390 /* Precomputation for Miller-Rabin. */
1391 mpz_sub_ui (nm1, n, 1);
1393 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1394 unsigned long int k = mpz_scan1 (nm1, 0);
1395 mpz_tdiv_q_2exp (q, nm1, k);
1397 mpz_set_ui (a, 2);
1399 /* Perform a Miller-Rabin test, finds most composites quickly. */
1400 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1402 is_prime = false;
1403 goto ret2;
1406 if (flag_prove_primality)
1408 /* Factor n-1 for Lucas. */
1409 mpz_set (tmp, nm1);
1410 mp_factor (tmp, &factors);
1413 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1414 number composite. */
1415 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1417 if (flag_prove_primality)
1419 is_prime = true;
1420 for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1422 mpz_divexact (tmp, nm1, factors.p[i]);
1423 mpz_powm (tmp, a, tmp, n);
1424 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1427 else
1429 /* After enough Miller-Rabin runs, be content. */
1430 is_prime = (r == MR_REPS - 1);
1433 if (is_prime)
1434 goto ret1;
1436 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1438 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1440 is_prime = false;
1441 goto ret1;
1445 error (0, 0, _("Lucas prime test failure. This should not happen"));
1446 abort ();
1448 ret1:
1449 if (flag_prove_primality)
1450 mp_factor_clear (&factors);
1451 ret2:
1452 mpz_clears (q, a, nm1, tmp, NULL);
1454 return is_prime;
1456 #endif
1458 static void
1459 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1460 struct factors *factors)
1462 uintmax_t x, z, y, P, t, ni, g;
1464 unsigned long int k = 1;
1465 unsigned long int l = 1;
1467 redcify (P, 1, n);
1468 addmod (x, P, P, n); /* i.e., redcify(2) */
1469 y = z = x;
1471 while (n != 1)
1473 assert (a < n);
1475 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1477 for (;;)
1481 x = mulredc (x, x, n, ni);
1482 addmod (x, x, a, n);
1484 submod (t, z, x, n);
1485 P = mulredc (P, t, n, ni);
1487 if (k % 32 == 1)
1489 if (gcd_odd (P, n) != 1)
1490 goto factor_found;
1491 y = x;
1494 while (--k != 0);
1496 z = x;
1497 k = l;
1498 l = 2 * l;
1499 for (unsigned long int i = 0; i < k; i++)
1501 x = mulredc (x, x, n, ni);
1502 addmod (x, x, a, n);
1504 y = x;
1507 factor_found:
1510 y = mulredc (y, y, n, ni);
1511 addmod (y, y, a, n);
1513 submod (t, z, y, n);
1514 g = gcd_odd (t, n);
1516 while (g == 1);
1518 n = n / g;
1520 if (!prime_p (g))
1521 factor_using_pollard_rho (g, a + 1, factors);
1522 else
1523 factor_insert (factors, g);
1525 if (prime_p (n))
1527 factor_insert (factors, n);
1528 break;
1531 x = x % n;
1532 z = z % n;
1533 y = y % n;
1537 static void
1538 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1539 struct factors *factors)
1541 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1543 unsigned long int k = 1;
1544 unsigned long int l = 1;
1546 redcify2 (P1, P0, 1, n1, n0);
1547 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1548 y1 = z1 = x1;
1549 y0 = z0 = x0;
1551 while (n1 != 0 || n0 != 1)
1553 binv (ni, n0);
1555 for (;;)
1559 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1560 x1 = r1m;
1561 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1563 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1564 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1565 P1 = r1m;
1567 if (k % 32 == 1)
1569 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1570 if (g1 != 0 || g0 != 1)
1571 goto factor_found;
1572 y1 = x1; y0 = x0;
1575 while (--k != 0);
1577 z1 = x1; z0 = x0;
1578 k = l;
1579 l = 2 * l;
1580 for (unsigned long int i = 0; i < k; i++)
1582 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1583 x1 = r1m;
1584 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1586 y1 = x1; y0 = x0;
1589 factor_found:
1592 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1593 y1 = r1m;
1594 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1596 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1597 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1599 while (g1 == 0 && g0 == 1);
1601 if (g1 == 0)
1603 /* The found factor is one word. */
1604 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1606 if (!prime_p (g0))
1607 factor_using_pollard_rho (g0, a + 1, factors);
1608 else
1609 factor_insert (factors, g0);
1611 else
1613 /* The found factor is two words. This is highly unlikely, thus hard
1614 to trigger. Please be careful before you change this code! */
1615 uintmax_t ginv;
1617 binv (ginv, g0); /* Compute n = n / g. Since the result will */
1618 n0 = ginv * n0; /* fit one word, we can compute the quotient */
1619 n1 = 0; /* modulo B, ignoring the high divisor word. */
1621 if (!prime2_p (g1, g0))
1622 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1623 else
1624 factor_insert_large (factors, g1, g0);
1627 if (n1 == 0)
1629 if (prime_p (n0))
1631 factor_insert (factors, n0);
1632 break;
1635 factor_using_pollard_rho (n0, a, factors);
1636 return;
1639 if (prime2_p (n1, n0))
1641 factor_insert_large (factors, n1, n0);
1642 break;
1645 x0 = mod2 (&x1, x1, x0, n1, n0);
1646 z0 = mod2 (&z1, z1, z0, n1, n0);
1647 y0 = mod2 (&y1, y1, y0, n1, n0);
1651 #if HAVE_GMP
1652 static void
1653 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1654 struct mp_factors *factors)
1656 mpz_t x, z, y, P;
1657 mpz_t t, t2;
1659 devmsg ("[pollard-rho (%lu)] ", a);
1661 mpz_inits (t, t2, NULL);
1662 mpz_init_set_si (y, 2);
1663 mpz_init_set_si (x, 2);
1664 mpz_init_set_si (z, 2);
1665 mpz_init_set_ui (P, 1);
1667 unsigned long long int k = 1;
1668 unsigned long long int l = 1;
1670 while (mpz_cmp_ui (n, 1) != 0)
1672 for (;;)
1676 mpz_mul (t, x, x);
1677 mpz_mod (x, t, n);
1678 mpz_add_ui (x, x, a);
1680 mpz_sub (t, z, x);
1681 mpz_mul (t2, P, t);
1682 mpz_mod (P, t2, n);
1684 if (k % 32 == 1)
1686 mpz_gcd (t, P, n);
1687 if (mpz_cmp_ui (t, 1) != 0)
1688 goto factor_found;
1689 mpz_set (y, x);
1692 while (--k != 0);
1694 mpz_set (z, x);
1695 k = l;
1696 l = 2 * l;
1697 for (unsigned long long int i = 0; i < k; i++)
1699 mpz_mul (t, x, x);
1700 mpz_mod (x, t, n);
1701 mpz_add_ui (x, x, a);
1703 mpz_set (y, x);
1706 factor_found:
1709 mpz_mul (t, y, y);
1710 mpz_mod (y, t, n);
1711 mpz_add_ui (y, y, a);
1713 mpz_sub (t, z, y);
1714 mpz_gcd (t, t, n);
1716 while (mpz_cmp_ui (t, 1) == 0);
1718 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1720 if (!mp_prime_p (t))
1722 devmsg ("[composite factor--restarting pollard-rho] ");
1723 mp_factor_using_pollard_rho (t, a + 1, factors);
1725 else
1727 mp_factor_insert (factors, t);
1730 if (mp_prime_p (n))
1732 mp_factor_insert (factors, n);
1733 break;
1736 mpz_mod (x, x, n);
1737 mpz_mod (z, z, n);
1738 mpz_mod (y, y, n);
1741 mpz_clears (P, t2, t, z, x, y, NULL);
1743 #endif
1745 #if USE_SQUFOF
1746 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1747 algorithm is replaced, consider also returning the remainder. */
1748 static uintmax_t _GL_ATTRIBUTE_CONST
1749 isqrt (uintmax_t n)
1751 uintmax_t x;
1752 unsigned c;
1753 if (n == 0)
1754 return 0;
1756 count_leading_zeros (c, n);
1758 /* Make x > sqrt(n). This will be invariant through the loop. */
1759 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1761 for (;;)
1763 uintmax_t y = (x + n/x) / 2;
1764 if (y >= x)
1765 return x;
1767 x = y;
1771 static uintmax_t _GL_ATTRIBUTE_CONST
1772 isqrt2 (uintmax_t nh, uintmax_t nl)
1774 unsigned int shift;
1775 uintmax_t x;
1777 /* Ensures the remainder fits in an uintmax_t. */
1778 assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1780 if (nh == 0)
1781 return isqrt (nl);
1783 count_leading_zeros (shift, nh);
1784 shift &= ~1;
1786 /* Make x > sqrt(n) */
1787 x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1788 x <<= (W_TYPE_SIZE - shift) / 2;
1790 /* Do we need more than one iteration? */
1791 for (;;)
1793 uintmax_t r _GL_UNUSED;
1794 uintmax_t q, y;
1795 udiv_qrnnd (q, r, nh, nl, x);
1796 y = (x + q) / 2;
1798 if (y >= x)
1800 uintmax_t hi, lo;
1801 umul_ppmm (hi, lo, x + 1, x + 1);
1802 assert (gt2 (hi, lo, nh, nl));
1804 umul_ppmm (hi, lo, x, x);
1805 assert (ge2 (nh, nl, hi, lo));
1806 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1807 assert (hi == 0);
1809 return x;
1812 x = y;
1816 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1817 # define MAGIC64 0x0202021202030213ULL
1818 # define MAGIC63 0x0402483012450293ULL
1819 # define MAGIC65 0x218a019866014613ULL
1820 # define MAGIC11 0x23b
1822 /* Return the square root if the input is a square, otherwise 0. */
1823 static uintmax_t _GL_ATTRIBUTE_CONST
1824 is_square (uintmax_t x)
1826 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1827 computing the square root. */
1828 if (((MAGIC64 >> (x & 63)) & 1)
1829 && ((MAGIC63 >> (x % 63)) & 1)
1830 /* Both 0 and 64 are squares mod (65) */
1831 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1832 && ((MAGIC11 >> (x % 11) & 1)))
1834 uintmax_t r = isqrt (x);
1835 if (r*r == x)
1836 return r;
1838 return 0;
1841 /* invtab[i] = floor(0x10000 / (0x100 + i) */
1842 static const unsigned short invtab[0x81] =
1844 0x200,
1845 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1846 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1847 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1848 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1849 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1850 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1851 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1852 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1853 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1854 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1855 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1856 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1857 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1858 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1859 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1860 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1863 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1864 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1865 # define div_smallq(q, r, u, d) \
1866 do { \
1867 if ((u) / 0x40 < (d)) \
1869 int _cnt; \
1870 uintmax_t _dinv, _mask, _q, _r; \
1871 count_leading_zeros (_cnt, (d)); \
1872 _r = (u); \
1873 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1875 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1876 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1878 else \
1880 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1881 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1883 _r -= _q*(d); \
1885 _mask = -(uintmax_t) (_r >= (d)); \
1886 (r) = _r - (_mask & (d)); \
1887 (q) = _q - _mask; \
1888 assert ( (q) * (d) + (r) == u); \
1890 else \
1892 uintmax_t _q = (u) / (d); \
1893 (r) = (u) - _q * (d); \
1894 (q) = _q; \
1896 } while (0)
1898 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1899 = 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
1900 with 3025 = 55^2.
1902 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1903 representing G_0 = (-55, 2*4652, 8653).
1905 In the notation of the paper:
1907 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1911 t_0 = floor([q_0 + R_0] / S0) = 1
1912 R_1 = t_0 * S_0 - R_0 = 4001
1913 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1916 /* Multipliers, in order of efficiency:
1917 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1918 0.7317 3*5*7 = 105 = 1
1919 0.7820 3*5*11 = 165 = 1
1920 0.7872 3*5 = 15 = 3
1921 0.8101 3*7*11 = 231 = 3
1922 0.8155 3*7 = 21 = 1
1923 0.8284 5*7*11 = 385 = 1
1924 0.8339 5*7 = 35 = 3
1925 0.8716 3*11 = 33 = 1
1926 0.8774 3 = 3 = 3
1927 0.8913 5*11 = 55 = 3
1928 0.8972 5 = 5 = 1
1929 0.9233 7*11 = 77 = 1
1930 0.9295 7 = 7 = 3
1931 0.9934 11 = 11 = 3
1933 # define QUEUE_SIZE 50
1934 #endif
1936 #if STAT_SQUFOF
1937 # define Q_FREQ_SIZE 50
1938 /* Element 0 keeps the total */
1939 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1940 # define MIN(a,b) ((a) < (b) ? (a) : (b))
1941 #endif
1943 #if USE_SQUFOF
1944 /* Return true on success. Expected to fail only for numbers
1945 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1946 static bool
1947 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1949 /* Uses algorithm and notation from
1951 SQUARE FORM FACTORIZATION
1952 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1954 http://homes.cerias.purdue.edu/~ssw/squfof.pdf
1957 static const unsigned int multipliers_1[] =
1958 { /* = 1 (mod 4) */
1959 105, 165, 21, 385, 33, 5, 77, 1, 0
1961 static const unsigned int multipliers_3[] =
1962 { /* = 3 (mod 4) */
1963 1155, 15, 231, 35, 3, 55, 7, 11, 0
1966 const unsigned int *m;
1968 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1970 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1971 return false;
1973 uintmax_t sqrt_n = isqrt2 (n1, n0);
1975 if (n0 == sqrt_n * sqrt_n)
1977 uintmax_t p1, p0;
1979 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1980 assert (p0 == n0);
1982 if (n1 == p1)
1984 if (prime_p (sqrt_n))
1985 factor_insert_multiplicity (factors, sqrt_n, 2);
1986 else
1988 struct factors f;
1990 f.nfactors = 0;
1991 if (!factor_using_squfof (0, sqrt_n, &f))
1993 /* Try pollard rho instead */
1994 factor_using_pollard_rho (sqrt_n, 1, &f);
1996 /* Duplicate the new factors */
1997 for (unsigned int i = 0; i < f.nfactors; i++)
1998 factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]);
2000 return true;
2004 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2005 for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2006 *m; m++)
2008 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2009 unsigned int i;
2010 unsigned int mu = *m;
2011 unsigned int qpos = 0;
2013 assert (mu * n0 % 4 == 3);
2015 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2016 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2017 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2018 mu^3 < n.
2020 However, this seems insufficient: With n = 37243139 and mu =
2021 105, we get a trivial factor, from the square 38809 = 197^2,
2022 without any corresponding Q earlier in the iteration.
2024 Requiring 64 mu^3 < n seems sufficient. */
2025 if (n1 == 0)
2027 if ((uintmax_t) mu*mu*mu >= n0 / 64)
2028 continue;
2030 else
2032 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2033 continue;
2035 umul_ppmm (Dh, Dl, n0, mu);
2036 Dh += n1 * mu;
2038 assert (Dl % 4 != 1);
2039 assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2041 S = isqrt2 (Dh, Dl);
2043 Q1 = 1;
2044 P = S;
2046 /* Square root remainder fits in one word, so ignore high part. */
2047 Q = Dl - P*P;
2048 /* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
2049 L = isqrt (2*S);
2050 B = 2*L;
2051 L1 = mu * 2 * L;
2053 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2054 4 D. */
2056 for (i = 0; i <= B; i++)
2058 uintmax_t q, P1, t, rem;
2060 div_smallq (q, rem, S+P, Q);
2061 P1 = S - rem; /* P1 = q*Q - P */
2063 IF_LINT (assert (q > 0 && Q > 0));
2065 # if STAT_SQUFOF
2066 q_freq[0]++;
2067 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2068 # endif
2070 if (Q <= L1)
2072 uintmax_t g = Q;
2074 if ( (Q & 1) == 0)
2075 g /= 2;
2077 g /= gcd_odd (g, mu);
2079 if (g <= L)
2081 if (qpos >= QUEUE_SIZE)
2082 error (EXIT_FAILURE, 0, _("squfof queue overflow"));
2083 queue[qpos].Q = g;
2084 queue[qpos].P = P % g;
2085 qpos++;
2089 /* I think the difference can be either sign, but mod
2090 2^W_TYPE_SIZE arithmetic should be fine. */
2091 t = Q1 + q * (P - P1);
2092 Q1 = Q;
2093 Q = t;
2094 P = P1;
2096 if ( (i & 1) == 0)
2098 uintmax_t r = is_square (Q);
2099 if (r)
2101 for (unsigned int j = 0; j < qpos; j++)
2103 if (queue[j].Q == r)
2105 if (r == 1)
2106 /* Traversed entire cycle. */
2107 goto next_multiplier;
2109 /* Need the absolute value for divisibility test. */
2110 if (P >= queue[j].P)
2111 t = P - queue[j].P;
2112 else
2113 t = queue[j].P - P;
2114 if (t % r == 0)
2116 /* Delete entries up to and including entry
2117 j, which matched. */
2118 memmove (queue, queue + j + 1,
2119 (qpos - j - 1) * sizeof (queue[0]));
2120 qpos -= (j + 1);
2122 goto next_i;
2126 /* We have found a square form, which should give a
2127 factor. */
2128 Q1 = r;
2129 assert (S >= P); /* What signs are possible? */
2130 P += r * ((S - P) / r);
2132 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2133 for the case D = 2N. */
2134 /* Compute Q = (D - P*P) / Q1, but we need double
2135 precision. */
2136 uintmax_t hi, lo;
2137 umul_ppmm (hi, lo, P, P);
2138 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2139 udiv_qrnnd (Q, rem, hi, lo, Q1);
2140 assert (rem == 0);
2142 for (;;)
2144 /* Note: There appears to by a typo in the paper,
2145 Step 4a in the algorithm description says q <--
2146 floor([S+P]/\hat Q), but looking at the equations
2147 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2148 (In this code, \hat Q is Q1). */
2149 div_smallq (q, rem, S+P, Q);
2150 P1 = S - rem; /* P1 = q*Q - P */
2152 # if STAT_SQUFOF
2153 q_freq[0]++;
2154 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2155 # endif
2156 if (P == P1)
2157 break;
2158 t = Q1 + q * (P - P1);
2159 Q1 = Q;
2160 Q = t;
2161 P = P1;
2164 if ( (Q & 1) == 0)
2165 Q /= 2;
2166 Q /= gcd_odd (Q, mu);
2168 assert (Q > 1 && (n1 || Q < n0));
2170 if (prime_p (Q))
2171 factor_insert (factors, Q);
2172 else if (!factor_using_squfof (0, Q, factors))
2173 factor_using_pollard_rho (Q, 2, factors);
2175 divexact_21 (n1, n0, n1, n0, Q);
2177 if (prime2_p (n1, n0))
2178 factor_insert_large (factors, n1, n0);
2179 else
2181 if (!factor_using_squfof (n1, n0, factors))
2183 if (n1 == 0)
2184 factor_using_pollard_rho (n0, 1, factors);
2185 else
2186 factor_using_pollard_rho2 (n1, n0, 1, factors);
2190 return true;
2193 next_i:;
2195 next_multiplier:;
2197 return false;
2199 #endif
2201 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2202 results in FACTORS. */
2203 static void
2204 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2206 factors->nfactors = 0;
2207 factors->plarge[1] = 0;
2209 if (t1 == 0 && t0 < 2)
2210 return;
2212 t0 = factor_using_division (&t1, t1, t0, factors);
2214 if (t1 == 0 && t0 < 2)
2215 return;
2217 if (prime2_p (t1, t0))
2218 factor_insert_large (factors, t1, t0);
2219 else
2221 #if USE_SQUFOF
2222 if (factor_using_squfof (t1, t0, factors))
2223 return;
2224 #endif
2226 if (t1 == 0)
2227 factor_using_pollard_rho (t0, 1, factors);
2228 else
2229 factor_using_pollard_rho2 (t1, t0, 1, factors);
2233 #if HAVE_GMP
2234 /* Use Pollard-rho to compute the prime factors of
2235 arbitrary-precision T, and put the results in FACTORS. */
2236 static void
2237 mp_factor (mpz_t t, struct mp_factors *factors)
2239 mp_factor_init (factors);
2241 if (mpz_sgn (t) != 0)
2243 mp_factor_using_division (t, factors);
2245 if (mpz_cmp_ui (t, 1) != 0)
2247 devmsg ("[is number prime?] ");
2248 if (mp_prime_p (t))
2249 mp_factor_insert (factors, t);
2250 else
2251 mp_factor_using_pollard_rho (t, 1, factors);
2255 #endif
2257 static strtol_error
2258 strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s)
2260 unsigned int lo_carry;
2261 uintmax_t hi = 0, lo = 0;
2263 strtol_error err = LONGINT_INVALID;
2265 /* Skip initial spaces and '+'. */
2266 for (;;)
2268 char c = *s;
2269 if (c == ' ')
2270 s++;
2271 else if (c == '+')
2273 s++;
2274 break;
2276 else
2277 break;
2280 /* Initial scan for invalid digits. */
2281 const char *p = s;
2282 for (;;)
2284 unsigned int c = *p++;
2285 if (c == 0)
2286 break;
2288 if (UNLIKELY (!ISDIGIT (c)))
2290 err = LONGINT_INVALID;
2291 break;
2294 err = LONGINT_OK; /* we've seen at least one valid digit */
2297 for (;err == LONGINT_OK;)
2299 unsigned int c = *s++;
2300 if (c == 0)
2301 break;
2303 c -= '0';
2305 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2307 err = LONGINT_OVERFLOW;
2308 break;
2310 hi = 10 * hi;
2312 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2313 lo_carry += 10 * lo < 2 * lo;
2315 lo = 10 * lo;
2316 lo += c;
2318 lo_carry += lo < c;
2319 hi += lo_carry;
2320 if (UNLIKELY (hi < lo_carry))
2322 err = LONGINT_OVERFLOW;
2323 break;
2327 *hip = hi;
2328 *lop = lo;
2330 return err;
2333 /* Structure and routines for buffering and outputting full lines,
2334 to support parallel operation efficiently. */
2335 static struct lbuf_
2337 char *buf;
2338 char *end;
2339 } lbuf;
2341 /* 512 is chosen to give good performance,
2342 and also is the max guaranteed size that
2343 consumers can read atomically through pipes.
2344 Also it's big enough to cater for max line length
2345 even with 128 bit uintmax_t. */
2346 #define FACTOR_PIPE_BUF 512
2348 static void
2349 lbuf_alloc (void)
2351 if (lbuf.buf)
2352 return;
2354 /* Double to ensure enough space for
2355 previous numbers + next number. */
2356 lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2357 lbuf.end = lbuf.buf;
2360 /* Write complete LBUF to standard output. */
2361 static void
2362 lbuf_flush (void)
2364 size_t size = lbuf.end - lbuf.buf;
2365 if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2366 error (EXIT_FAILURE, errno, "%s", _("write error"));
2367 lbuf.end = lbuf.buf;
2370 /* Add a character C to LBUF and if it's a newline
2371 and enough bytes are already buffered,
2372 then write atomically to standard output. */
2373 static void
2374 lbuf_putc (char c)
2376 *lbuf.end++ = c;
2378 if (c == '\n')
2380 size_t buffered = lbuf.end - lbuf.buf;
2382 if (buffered >= FACTOR_PIPE_BUF)
2384 /* Write output in <= PIPE_BUF chunks
2385 so consumers can read atomically. */
2386 char const *tend = lbuf.end;
2388 /* Since a umaxint_t's factors must fit in 512
2389 we're guaranteed to find a newline here. */
2390 char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2391 while (*--tlend != '\n');
2392 tlend++;
2394 lbuf.end = tlend;
2395 lbuf_flush ();
2397 /* Buffer the remainder. */
2398 memcpy (lbuf.buf, tlend, tend - tlend);
2399 lbuf.end = lbuf.buf + (tend - tlend);
2404 /* Buffer an int to the internal LBUF. */
2405 static void
2406 lbuf_putint (uintmax_t i, size_t min_width)
2408 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2409 char const *umaxstr = umaxtostr (i, buf);
2410 size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2411 size_t z = width;
2413 for (; z < min_width; z++)
2414 *lbuf.end++ = '0';
2416 memcpy (lbuf.end, umaxstr, width);
2417 lbuf.end += width;
2420 static void
2421 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2423 uintmax_t q, r;
2425 if (t1 == 0)
2426 lbuf_putint (t0, 0);
2427 else
2429 /* Use very plain code here since it seems hard to write fast code
2430 without assuming a specific word size. */
2431 q = t1 / 1000000000;
2432 r = t1 % 1000000000;
2433 udiv_qrnnd (t0, r, r, t0, 1000000000);
2434 print_uintmaxes (q, t0);
2435 lbuf_putint (r, 9);
2439 /* Single-precision factoring */
2440 static void
2441 print_factors_single (uintmax_t t1, uintmax_t t0)
2443 struct factors factors;
2445 print_uintmaxes (t1, t0);
2446 lbuf_putc (':');
2448 factor (t1, t0, &factors);
2450 for (unsigned int j = 0; j < factors.nfactors; j++)
2451 for (unsigned int k = 0; k < factors.e[j]; k++)
2453 lbuf_putc (' ');
2454 print_uintmaxes (0, factors.p[j]);
2457 if (factors.plarge[1])
2459 lbuf_putc (' ');
2460 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2463 lbuf_putc ('\n');
2466 /* Emit the factors of the indicated number. If we have the option of using
2467 either algorithm, we select on the basis of the length of the number.
2468 For longer numbers, we prefer the MP algorithm even if the native algorithm
2469 has enough digits, because the algorithm is better. The turnover point
2470 depends on the value. */
2471 static bool
2472 print_factors (const char *input)
2474 uintmax_t t1, t0;
2476 /* Try converting the number to one or two words. If it fails, use GMP or
2477 print an error message. The 2nd condition checks that the most
2478 significant bit of the two-word number is clear, in a typesize neutral
2479 way. */
2480 strtol_error err = strto2uintmax (&t1, &t0, input);
2482 switch (err)
2484 case LONGINT_OK:
2485 if (((t1 << 1) >> 1) == t1)
2487 devmsg ("[using single-precision arithmetic] ");
2488 print_factors_single (t1, t0);
2489 return true;
2491 break;
2493 case LONGINT_OVERFLOW:
2494 /* Try GMP. */
2495 break;
2497 default:
2498 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2499 return false;
2502 #if HAVE_GMP
2503 devmsg ("[using arbitrary-precision arithmetic] ");
2504 mpz_t t;
2505 struct mp_factors factors;
2507 mpz_init_set_str (t, input, 10);
2509 gmp_printf ("%Zd:", t);
2510 mp_factor (t, &factors);
2512 for (unsigned int j = 0; j < factors.nfactors; j++)
2513 for (unsigned int k = 0; k < factors.e[j]; k++)
2514 gmp_printf (" %Zd", factors.p[j]);
2516 mp_factor_clear (&factors);
2517 mpz_clear (t);
2518 putchar ('\n');
2519 fflush (stdout);
2520 return true;
2521 #else
2522 error (0, 0, _("%s is too large"), quote (input));
2523 return false;
2524 #endif
2527 void
2528 usage (int status)
2530 if (status != EXIT_SUCCESS)
2531 emit_try_help ();
2532 else
2534 printf (_("\
2535 Usage: %s [NUMBER]...\n\
2536 or: %s OPTION\n\
2538 program_name, program_name);
2539 fputs (_("\
2540 Print the prime factors of each specified integer NUMBER. If none\n\
2541 are specified on the command line, read them from standard input.\n\
2543 "), stdout);
2544 fputs (HELP_OPTION_DESCRIPTION, stdout);
2545 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2546 emit_ancillary_info (PROGRAM_NAME);
2548 exit (status);
2551 static bool
2552 do_stdin (void)
2554 bool ok = true;
2555 token_buffer tokenbuffer;
2557 init_tokenbuffer (&tokenbuffer);
2559 while (true)
2561 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2562 &tokenbuffer);
2563 if (token_length == (size_t) -1)
2564 break;
2565 ok &= print_factors (tokenbuffer.buffer);
2567 free (tokenbuffer.buffer);
2569 return ok;
2573 main (int argc, char **argv)
2575 initialize_main (&argc, &argv);
2576 set_program_name (argv[0]);
2577 setlocale (LC_ALL, "");
2578 bindtextdomain (PACKAGE, LOCALEDIR);
2579 textdomain (PACKAGE);
2581 lbuf_alloc ();
2582 atexit (close_stdout);
2583 atexit (lbuf_flush);
2585 int c;
2586 while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2588 switch (c)
2590 case DEV_DEBUG_OPTION:
2591 dev_debug = true;
2592 break;
2594 case_GETOPT_HELP_CHAR;
2596 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2598 default:
2599 usage (EXIT_FAILURE);
2603 #if STAT_SQUFOF
2604 memset (q_freq, 0, sizeof (q_freq));
2605 #endif
2607 bool ok;
2608 if (argc <= optind)
2609 ok = do_stdin ();
2610 else
2612 ok = true;
2613 for (int i = optind; i < argc; i++)
2614 if (! print_factors (argv[i]))
2615 ok = false;
2618 #if STAT_SQUFOF
2619 if (q_freq[0] > 0)
2621 double acc_f;
2622 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2623 for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2625 double f = (double) q_freq[i] / q_freq[0];
2626 acc_f += f;
2627 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2628 100.0 * f, 100.0 * acc_f);
2631 #endif
2633 return ok ? EXIT_SUCCESS : EXIT_FAILURE;