tests: fix false failure on new ls test
[coreutils.git] / src / factor.c
blobd271de907c5cc7e76d15fdf64dd9247f6d0cd2d6
1 /* factor -- print prime factors of n.
2 Copyright (C) 1986-2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Originally written by Paul Rubin <phr@ocf.berkeley.edu>.
18 Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.
19 Arbitrary-precision code adapted by James Youngman from Torbjörn
20 Granlund's factorize.c, from GNU MP version 4.2.2.
21 In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller.
22 Contains code from GNU MP. */
24 /* Efficiently factor numbers that fit in one or two words (word = uintmax_t),
25 or, with GMP, numbers of any size.
27 Code organisation:
29 There are several variants of many functions, for handling one word, two
30 words, and GMP's mpz_t type. If the one-word variant is called foo, the
31 two-word variant will be foo2, and the one for mpz_t will be mp_foo. In
32 some cases, the plain function variants will handle both one-word and
33 two-word numbers, evidenced by function arguments.
35 The factoring code for two words will fall into the code for one word when
36 progress allows that.
38 Using GMP is optional. Define HAVE_GMP to make this code include GMP
39 factoring code. The GMP factoring code is based on GMP's demos/factorize.c
40 (last synced 2012-09-07). The GMP-based factoring code will stay in GMP
41 factoring code even if numbers get small enough for using the two-word
42 code.
44 Algorithm:
46 (1) Perform trial division using a small primes table, but without hardware
47 division since the primes table store inverses modulo the word base.
48 (The GMP variant of this code doesn't make use of the precomputed
49 inverses, but instead relies on GMP for fast divisibility testing.)
50 (2) Check the nature of any non-factored part using Miller-Rabin for
51 detecting composites, and Lucas for detecting primes.
52 (3) Factor any remaining composite part using the Pollard-Brent rho
53 algorithm or if USE_SQUFOF is defined to 1, try that first.
54 Status of found factors are checked again using Miller-Rabin and Lucas.
56 We prefer using Hensel norm in the divisions, not the more familiar
57 Euclidian norm, since the former leads to much faster code. In the
58 Pollard-Brent rho code and the prime testing code, we use Montgomery's
59 trick of multiplying all n-residues by the word base, allowing cheap Hensel
60 reductions mod n.
62 Improvements:
64 * Use modular inverses also for exact division in the Lucas code, and
65 elsewhere. A problem is to locate the inverses not from an index, but
66 from a prime. We might instead compute the inverse on-the-fly.
68 * Tune trial division table size (not forgetting that this is a standalone
69 program where the table will be read from disk for each invocation).
71 * Implement less naive powm, using k-ary exponentiation for k = 3 or
72 perhaps k = 4.
74 * Try to speed trial division code for single uintmax_t numbers, i.e., the
75 code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR,
76 IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when
77 using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4
78 respectively cycles ought to be possible.
80 * The redcify function could be vastly improved by using (plain Euclidian)
81 pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
82 GMP's gmp-impl.h). The redcify2 function could be vastly improved using
83 similar methoods. These functions currently dominate run time when using
84 the -w option.
87 /* Whether to recursively factor to prove primality,
88 or run faster probabilistic tests. */
89 #ifndef PROVE_PRIMALITY
90 # define PROVE_PRIMALITY 1
91 #endif
93 /* Faster for certain ranges but less general. */
94 #ifndef USE_SQUFOF
95 # define USE_SQUFOF 0
96 #endif
98 /* Output SQUFOF statistics. */
99 #ifndef STAT_SQUFOF
100 # define STAT_SQUFOF 0
101 #endif
104 #include <config.h>
105 #include <getopt.h>
106 #include <stdio.h>
107 #if HAVE_GMP
108 # include <gmp.h>
109 # if !HAVE_DECL_MPZ_INITS
110 # include <stdarg.h>
111 # endif
112 #endif
114 #include <assert.h>
116 #include "system.h"
117 #include "die.h"
118 #include "error.h"
119 #include "full-write.h"
120 #include "quote.h"
121 #include "readtokens.h"
122 #include "xstrtol.h"
124 /* The official name of this program (e.g., no 'g' prefix). */
125 #define PROGRAM_NAME "factor"
127 #define AUTHORS \
128 proper_name ("Paul Rubin"), \
129 proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
130 proper_name_utf8 ("Niels Moller", "Niels M\303\266ller")
132 /* Token delimiters when reading from a file. */
133 #define DELIM "\n\t "
135 #ifndef USE_LONGLONG_H
136 /* With the way we use longlong.h, it's only safe to use
137 when UWtype = UHWtype, as there were various cases
138 (as can be seen in the history for longlong.h) where
139 for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
140 to avoid compile time or run time issues. */
141 # if LONG_MAX == INTMAX_MAX
142 # define USE_LONGLONG_H 1
143 # endif
144 #endif
146 #if USE_LONGLONG_H
148 /* Make definitions for longlong.h to make it do what it can do for us */
150 /* bitcount for uintmax_t */
151 # if UINTMAX_MAX == UINT32_MAX
152 # define W_TYPE_SIZE 32
153 # elif UINTMAX_MAX == UINT64_MAX
154 # define W_TYPE_SIZE 64
155 # elif UINTMAX_MAX == UINT128_MAX
156 # define W_TYPE_SIZE 128
157 # endif
159 # define UWtype uintmax_t
160 # define UHWtype unsigned long int
161 # undef UDWtype
162 # if HAVE_ATTRIBUTE_MODE
163 typedef unsigned int UQItype __attribute__ ((mode (QI)));
164 typedef int SItype __attribute__ ((mode (SI)));
165 typedef unsigned int USItype __attribute__ ((mode (SI)));
166 typedef int DItype __attribute__ ((mode (DI)));
167 typedef unsigned int UDItype __attribute__ ((mode (DI)));
168 # else
169 typedef unsigned char UQItype;
170 typedef long SItype;
171 typedef unsigned long int USItype;
172 # if HAVE_LONG_LONG_INT
173 typedef long long int DItype;
174 typedef unsigned long long int UDItype;
175 # else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
176 typedef long int DItype;
177 typedef unsigned long int UDItype;
178 # endif
179 # endif
180 # define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */
181 # define ASSERT(x) /* FIXME make longlong.h really standalone */
182 # define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */
183 # define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */
184 # ifndef __GMP_GNUC_PREREQ
185 # define __GMP_GNUC_PREREQ(a,b) 1
186 # endif
188 /* These stub macros are only used in longlong.h in certain system compiler
189 combinations, so ensure usage to avoid -Wunused-macros warnings. */
190 # if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab
191 ASSERT (1)
192 __GMP_DECLSPEC
193 # endif
195 # if _ARCH_PPC
196 # define HAVE_HOST_CPU_FAMILY_powerpc 1
197 # endif
198 # include "longlong.h"
199 # ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
200 const unsigned char factor_clz_tab[129] =
202 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
203 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
204 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
205 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
208 # endif
210 #else /* not USE_LONGLONG_H */
212 # define W_TYPE_SIZE (8 * sizeof (uintmax_t))
213 # define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2))
214 # define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1))
215 # define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2))
217 #endif
219 #if !defined __clz_tab && !defined UHWtype
220 /* Without this seemingly useless conditional, gcc -Wunused-macros
221 warns that each of the two tested macros is unused on Fedora 18.
222 FIXME: this is just an ugly band-aid. Fix it properly. */
223 #endif
225 /* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */
226 #define MAX_NFACTS 26
228 enum
230 DEV_DEBUG_OPTION = CHAR_MAX + 1
233 static struct option const long_options[] =
235 {"-debug", no_argument, NULL, DEV_DEBUG_OPTION},
236 {GETOPT_HELP_OPTION_DECL},
237 {GETOPT_VERSION_OPTION_DECL},
238 {NULL, 0, NULL, 0}
241 struct factors
243 uintmax_t plarge[2]; /* Can have a single large factor */
244 uintmax_t p[MAX_NFACTS];
245 unsigned char e[MAX_NFACTS];
246 unsigned char nfactors;
249 #if HAVE_GMP
250 struct mp_factors
252 mpz_t *p;
253 unsigned long int *e;
254 unsigned long int nfactors;
256 #endif
258 static void factor (uintmax_t, uintmax_t, struct factors *);
260 #ifndef umul_ppmm
261 # define umul_ppmm(w1, w0, u, v) \
262 do { \
263 uintmax_t __x0, __x1, __x2, __x3; \
264 unsigned long int __ul, __vl, __uh, __vh; \
265 uintmax_t __u = (u), __v = (v); \
267 __ul = __ll_lowpart (__u); \
268 __uh = __ll_highpart (__u); \
269 __vl = __ll_lowpart (__v); \
270 __vh = __ll_highpart (__v); \
272 __x0 = (uintmax_t) __ul * __vl; \
273 __x1 = (uintmax_t) __ul * __vh; \
274 __x2 = (uintmax_t) __uh * __vl; \
275 __x3 = (uintmax_t) __uh * __vh; \
277 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
278 __x1 += __x2; /* but this indeed can */ \
279 if (__x1 < __x2) /* did we get it? */ \
280 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
282 (w1) = __x3 + __ll_highpart (__x1); \
283 (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \
284 } while (0)
285 #endif
287 #if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION
288 /* Define our own, not needing normalization. This function is
289 currently not performance critical, so keep it simple. Similar to
290 the mod macro below. */
291 # undef udiv_qrnnd
292 # define udiv_qrnnd(q, r, n1, n0, d) \
293 do { \
294 uintmax_t __d1, __d0, __q, __r1, __r0; \
296 assert ((n1) < (d)); \
297 __d1 = (d); __d0 = 0; \
298 __r1 = (n1); __r0 = (n0); \
299 __q = 0; \
300 for (unsigned int __i = W_TYPE_SIZE; __i > 0; __i--) \
302 rsh2 (__d1, __d0, __d1, __d0, 1); \
303 __q <<= 1; \
304 if (ge2 (__r1, __r0, __d1, __d0)) \
306 __q++; \
307 sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \
310 (r) = __r0; \
311 (q) = __q; \
312 } while (0)
313 #endif
315 #if !defined add_ssaaaa
316 # define add_ssaaaa(sh, sl, ah, al, bh, bl) \
317 do { \
318 uintmax_t _add_x; \
319 _add_x = (al) + (bl); \
320 (sh) = (ah) + (bh) + (_add_x < (al)); \
321 (sl) = _add_x; \
322 } while (0)
323 #endif
325 #define rsh2(rh, rl, ah, al, cnt) \
326 do { \
327 (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \
328 (rh) = (ah) >> (cnt); \
329 } while (0)
331 #define lsh2(rh, rl, ah, al, cnt) \
332 do { \
333 (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \
334 (rl) = (al) << (cnt); \
335 } while (0)
337 #define ge2(ah, al, bh, bl) \
338 ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl)))
340 #define gt2(ah, al, bh, bl) \
341 ((ah) > (bh) || ((ah) == (bh) && (al) > (bl)))
343 #ifndef sub_ddmmss
344 # define sub_ddmmss(rh, rl, ah, al, bh, bl) \
345 do { \
346 uintmax_t _cy; \
347 _cy = (al) < (bl); \
348 (rl) = (al) - (bl); \
349 (rh) = (ah) - (bh) - _cy; \
350 } while (0)
351 #endif
353 #ifndef count_leading_zeros
354 # define count_leading_zeros(count, x) do { \
355 uintmax_t __clz_x = (x); \
356 unsigned int __clz_c; \
357 for (__clz_c = 0; \
358 (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \
359 __clz_c += 8) \
360 __clz_x <<= 8; \
361 for (; (intmax_t)__clz_x >= 0; __clz_c++) \
362 __clz_x <<= 1; \
363 (count) = __clz_c; \
364 } while (0)
365 #endif
367 #ifndef count_trailing_zeros
368 # define count_trailing_zeros(count, x) do { \
369 uintmax_t __ctz_x = (x); \
370 unsigned int __ctz_c = 0; \
371 while ((__ctz_x & 1) == 0) \
373 __ctz_x >>= 1; \
374 __ctz_c++; \
376 (count) = __ctz_c; \
377 } while (0)
378 #endif
380 /* Requires that a < n and b <= n */
381 #define submod(r,a,b,n) \
382 do { \
383 uintmax_t _t = - (uintmax_t) (a < b); \
384 (r) = ((n) & _t) + (a) - (b); \
385 } while (0)
387 #define addmod(r,a,b,n) \
388 submod ((r), (a), ((n) - (b)), (n))
390 /* Modular two-word addition and subtraction. For performance reasons, the
391 most significant bit of n1 must be clear. The destination variables must be
392 distinct from the mod operand. */
393 #define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \
394 do { \
395 add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \
396 if (ge2 ((r1), (r0), (n1), (n0))) \
397 sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \
398 } while (0)
399 #define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \
400 do { \
401 sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \
402 if ((intmax_t) (r1) < 0) \
403 add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \
404 } while (0)
406 #define HIGHBIT_TO_MASK(x) \
407 (((intmax_t)-1 >> 1) < 0 \
408 ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \
409 : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \
410 ? UINTMAX_MAX : (uintmax_t) 0))
412 /* Compute r = a mod d, where r = <*t1,retval>, a = <a1,a0>, d = <d1,d0>.
413 Requires that d1 != 0. */
414 static uintmax_t
415 mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0)
417 int cntd, cnta;
419 assert (d1 != 0);
421 if (a1 == 0)
423 *r1 = 0;
424 return a0;
427 count_leading_zeros (cntd, d1);
428 count_leading_zeros (cnta, a1);
429 int cnt = cntd - cnta;
430 lsh2 (d1, d0, d1, d0, cnt);
431 for (int i = 0; i < cnt; i++)
433 if (ge2 (a1, a0, d1, d0))
434 sub_ddmmss (a1, a0, a1, a0, d1, d0);
435 rsh2 (d1, d0, d1, d0, 1);
438 *r1 = a1;
439 return a0;
442 static uintmax_t _GL_ATTRIBUTE_CONST
443 gcd_odd (uintmax_t a, uintmax_t b)
445 if ( (b & 1) == 0)
447 uintmax_t t = b;
448 b = a;
449 a = t;
451 if (a == 0)
452 return b;
454 /* Take out least significant one bit, to make room for sign */
455 b >>= 1;
457 for (;;)
459 uintmax_t t;
460 uintmax_t bgta;
462 while ((a & 1) == 0)
463 a >>= 1;
464 a >>= 1;
466 t = a - b;
467 if (t == 0)
468 return (a << 1) + 1;
470 bgta = HIGHBIT_TO_MASK (t);
472 /* b <-- min (a, b) */
473 b += (bgta & t);
475 /* a <-- |a - b| */
476 a = (t ^ bgta) - bgta;
480 static uintmax_t
481 gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0)
483 while ((a0 & 1) == 0)
484 rsh2 (a1, a0, a1, a0, 1);
485 while ((b0 & 1) == 0)
486 rsh2 (b1, b0, b1, b0, 1);
488 for (;;)
490 if ((b1 | a1) == 0)
492 *r1 = 0;
493 return gcd_odd (b0, a0);
496 if (gt2 (a1, a0, b1, b0))
498 sub_ddmmss (a1, a0, a1, a0, b1, b0);
500 rsh2 (a1, a0, a1, a0, 1);
501 while ((a0 & 1) == 0);
503 else if (gt2 (b1, b0, a1, a0))
505 sub_ddmmss (b1, b0, b1, b0, a1, a0);
507 rsh2 (b1, b0, b1, b0, 1);
508 while ((b0 & 1) == 0);
510 else
511 break;
514 *r1 = a1;
515 return a0;
518 static void
519 factor_insert_multiplicity (struct factors *factors,
520 uintmax_t prime, unsigned int m)
522 unsigned int nfactors = factors->nfactors;
523 uintmax_t *p = factors->p;
524 unsigned char *e = factors->e;
526 /* Locate position for insert new or increment e. */
527 int i;
528 for (i = nfactors - 1; i >= 0; i--)
530 if (p[i] <= prime)
531 break;
534 if (i < 0 || p[i] != prime)
536 for (int j = nfactors - 1; j > i; j--)
538 p[j + 1] = p[j];
539 e[j + 1] = e[j];
541 p[i + 1] = prime;
542 e[i + 1] = m;
543 factors->nfactors = nfactors + 1;
545 else
547 e[i] += m;
551 #define factor_insert(f, p) factor_insert_multiplicity (f, p, 1)
553 static void
554 factor_insert_large (struct factors *factors,
555 uintmax_t p1, uintmax_t p0)
557 if (p1 > 0)
559 assert (factors->plarge[1] == 0);
560 factors->plarge[0] = p0;
561 factors->plarge[1] = p1;
563 else
564 factor_insert (factors, p0);
567 #if HAVE_GMP
569 # if !HAVE_DECL_MPZ_INITS
571 # define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__)
572 # define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__)
574 static void
575 mpz_va_init (void (*mpz_single_init)(mpz_t), ...)
577 va_list ap;
579 va_start (ap, mpz_single_init);
581 mpz_t *mpz;
582 while ((mpz = va_arg (ap, mpz_t *)))
583 mpz_single_init (*mpz);
585 va_end (ap);
587 # endif
589 static void mp_factor (mpz_t, struct mp_factors *);
591 static void
592 mp_factor_init (struct mp_factors *factors)
594 factors->p = NULL;
595 factors->e = NULL;
596 factors->nfactors = 0;
599 static void
600 mp_factor_clear (struct mp_factors *factors)
602 for (unsigned int i = 0; i < factors->nfactors; i++)
603 mpz_clear (factors->p[i]);
605 free (factors->p);
606 free (factors->e);
609 static void
610 mp_factor_insert (struct mp_factors *factors, mpz_t prime)
612 unsigned long int nfactors = factors->nfactors;
613 mpz_t *p = factors->p;
614 unsigned long int *e = factors->e;
615 long i;
617 /* Locate position for insert new or increment e. */
618 for (i = nfactors - 1; i >= 0; i--)
620 if (mpz_cmp (p[i], prime) <= 0)
621 break;
624 if (i < 0 || mpz_cmp (p[i], prime) != 0)
626 p = xrealloc (p, (nfactors + 1) * sizeof p[0]);
627 e = xrealloc (e, (nfactors + 1) * sizeof e[0]);
629 mpz_init (p[nfactors]);
630 for (long j = nfactors - 1; j > i; j--)
632 mpz_set (p[j + 1], p[j]);
633 e[j + 1] = e[j];
635 mpz_set (p[i + 1], prime);
636 e[i + 1] = 1;
638 factors->p = p;
639 factors->e = e;
640 factors->nfactors = nfactors + 1;
642 else
644 e[i] += 1;
648 static void
649 mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime)
651 mpz_t pz;
653 mpz_init_set_ui (pz, prime);
654 mp_factor_insert (factors, pz);
655 mpz_clear (pz);
657 #endif /* HAVE_GMP */
660 /* Number of bits in an uintmax_t. */
661 enum { W = sizeof (uintmax_t) * CHAR_BIT };
663 /* Verify that uintmax_t does not have holes in its representation. */
664 verify (UINTMAX_MAX >> (W - 1) == 1);
666 #define P(a,b,c,d) a,
667 static const unsigned char primes_diff[] = {
668 #include "primes.h"
669 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
671 #undef P
673 #define PRIMES_PTAB_ENTRIES \
674 (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1)
676 #define P(a,b,c,d) b,
677 static const unsigned char primes_diff8[] = {
678 #include "primes.h"
679 0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */
681 #undef P
683 struct primes_dtab
685 uintmax_t binv, lim;
688 #define P(a,b,c,d) {c,d},
689 static const struct primes_dtab primes_dtab[] = {
690 #include "primes.h"
691 {1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */
693 #undef P
695 /* Verify that uintmax_t is not wider than
696 the integers used to generate primes.h. */
697 verify (W <= WIDE_UINT_BITS);
699 /* debugging for developers. Enables devmsg().
700 This flag is used only in the GMP code. */
701 static bool dev_debug = false;
703 /* Prove primality or run probabilistic tests. */
704 static bool flag_prove_primality = PROVE_PRIMALITY;
706 /* Number of Miller-Rabin tests to run when not proving primality. */
707 #define MR_REPS 25
709 static void
710 factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
711 unsigned int off)
713 for (unsigned int j = 0; j < off; j++)
714 p += primes_diff[i + j];
715 factor_insert (factors, p);
718 /* Trial division with odd primes uses the following trick.
720 Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity,
721 consider the case t < B (this is the second loop below).
723 From our tables we get
725 binv = p^{-1} (mod B)
726 lim = floor ( (B-1) / p ).
728 First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim
729 (and all quotients in this range occur for some t).
731 Then t = q * p is true also (mod B), and p is invertible we get
733 q = t * binv (mod B).
735 Next, assume that t is *not* divisible by p. Since multiplication
736 by binv (mod B) is a one-to-one mapping,
738 t * binv (mod B) > lim,
740 because all the smaller values are already taken.
742 This can be summed up by saying that the function
744 q(t) = binv * t (mod B)
746 is a permutation of the range 0 <= t < B, with the curious property
747 that it maps the multiples of p onto the range 0 <= q <= lim, in
748 order, and the non-multiples of p onto the range lim < q < B.
751 static uintmax_t
752 factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0,
753 struct factors *factors)
755 if (t0 % 2 == 0)
757 unsigned int cnt;
759 if (t0 == 0)
761 count_trailing_zeros (cnt, t1);
762 t0 = t1 >> cnt;
763 t1 = 0;
764 cnt += W_TYPE_SIZE;
766 else
768 count_trailing_zeros (cnt, t0);
769 rsh2 (t1, t0, t1, t0, cnt);
772 factor_insert_multiplicity (factors, 2, cnt);
775 uintmax_t p = 3;
776 unsigned int i;
777 for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++)
779 for (;;)
781 uintmax_t q1, q0, hi, lo _GL_UNUSED;
783 q0 = t0 * primes_dtab[i].binv;
784 umul_ppmm (hi, lo, q0, p);
785 if (hi > t1)
786 break;
787 hi = t1 - hi;
788 q1 = hi * primes_dtab[i].binv;
789 if (LIKELY (q1 > primes_dtab[i].lim))
790 break;
791 t1 = q1; t0 = q0;
792 factor_insert (factors, p);
794 p += primes_diff[i + 1];
796 if (t1p)
797 *t1p = t1;
799 #define DIVBLOCK(I) \
800 do { \
801 for (;;) \
803 q = t0 * pd[I].binv; \
804 if (LIKELY (q > pd[I].lim)) \
805 break; \
806 t0 = q; \
807 factor_insert_refind (factors, p, i + 1, I); \
809 } while (0)
811 for (; i < PRIMES_PTAB_ENTRIES; i += 8)
813 uintmax_t q;
814 const struct primes_dtab *pd = &primes_dtab[i];
815 DIVBLOCK (0);
816 DIVBLOCK (1);
817 DIVBLOCK (2);
818 DIVBLOCK (3);
819 DIVBLOCK (4);
820 DIVBLOCK (5);
821 DIVBLOCK (6);
822 DIVBLOCK (7);
824 p += primes_diff8[i];
825 if (p * p > t0)
826 break;
829 return t0;
832 #if HAVE_GMP
833 static void
834 mp_factor_using_division (mpz_t t, struct mp_factors *factors)
836 mpz_t q;
837 unsigned long int p;
839 devmsg ("[trial division] ");
841 mpz_init (q);
843 p = mpz_scan1 (t, 0);
844 mpz_div_2exp (t, t, p);
845 while (p)
847 mp_factor_insert_ui (factors, 2);
848 --p;
851 p = 3;
852 for (unsigned int i = 1; i <= PRIMES_PTAB_ENTRIES;)
854 if (! mpz_divisible_ui_p (t, p))
856 p += primes_diff[i++];
857 if (mpz_cmp_ui (t, p * p) < 0)
858 break;
860 else
862 mpz_tdiv_q_ui (t, t, p);
863 mp_factor_insert_ui (factors, p);
867 mpz_clear (q);
869 #endif
871 /* Entry i contains (2i+1)^(-1) mod 2^8. */
872 static const unsigned char binvert_table[128] =
874 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
875 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
876 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
877 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
878 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
879 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
880 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
881 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
882 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
883 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
884 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
885 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
886 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
887 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
888 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
889 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
892 /* Compute n^(-1) mod B, using a Newton iteration. */
893 #define binv(inv,n) \
894 do { \
895 uintmax_t __n = (n); \
896 uintmax_t __inv; \
898 __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \
899 if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \
900 if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \
901 if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \
903 if (W_TYPE_SIZE > 64) \
905 int __invbits = 64; \
906 do { \
907 __inv = 2 * __inv - __inv * __inv * __n; \
908 __invbits *= 2; \
909 } while (__invbits < W_TYPE_SIZE); \
912 (inv) = __inv; \
913 } while (0)
915 /* q = u / d, assuming d|u. */
916 #define divexact_21(q1, q0, u1, u0, d) \
917 do { \
918 uintmax_t _di, _q0; \
919 binv (_di, (d)); \
920 _q0 = (u0) * _di; \
921 if ((u1) >= (d)) \
923 uintmax_t _p1, _p0 _GL_UNUSED; \
924 umul_ppmm (_p1, _p0, _q0, d); \
925 (q1) = ((u1) - _p1) * _di; \
926 (q0) = _q0; \
928 else \
930 (q0) = _q0; \
931 (q1) = 0; \
933 } while (0)
935 /* x B (mod n). */
936 #define redcify(r_prim, r, n) \
937 do { \
938 uintmax_t _redcify_q _GL_UNUSED; \
939 udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \
940 } while (0)
942 /* x B^2 (mod n). Requires x > 0, n1 < B/2 */
943 #define redcify2(r1, r0, x, n1, n0) \
944 do { \
945 uintmax_t _r1, _r0, _i; \
946 if ((x) < (n1)) \
948 _r1 = (x); _r0 = 0; \
949 _i = W_TYPE_SIZE; \
951 else \
953 _r1 = 0; _r0 = (x); \
954 _i = 2*W_TYPE_SIZE; \
956 while (_i-- > 0) \
958 lsh2 (_r1, _r0, _r1, _r0, 1); \
959 if (ge2 (_r1, _r0, (n1), (n0))) \
960 sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \
962 (r1) = _r1; \
963 (r0) = _r0; \
964 } while (0)
966 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
967 Both a and b must be in redc form, the result will be in redc form too. */
968 static inline uintmax_t
969 mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi)
971 uintmax_t rh, rl, q, th, tl _GL_UNUSED, xh;
973 umul_ppmm (rh, rl, a, b);
974 q = rl * mi;
975 umul_ppmm (th, tl, q, m);
976 xh = rh - th;
977 if (rh < th)
978 xh += m;
980 return xh;
983 /* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B.
984 Both a and b must be in redc form, the result will be in redc form too.
985 For performance reasons, the most significant bit of m must be clear. */
986 static uintmax_t
987 mulredc2 (uintmax_t *r1p,
988 uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0,
989 uintmax_t m1, uintmax_t m0, uintmax_t mi)
991 uintmax_t r1, r0, q, p1, p0 _GL_UNUSED, t1, t0, s1, s0;
992 mi = -mi;
993 assert ( (a1 >> (W_TYPE_SIZE - 1)) == 0);
994 assert ( (b1 >> (W_TYPE_SIZE - 1)) == 0);
995 assert ( (m1 >> (W_TYPE_SIZE - 1)) == 0);
997 /* First compute a0 * <b1, b0> B^{-1}
998 +-----+
999 |a0 b0|
1000 +--+--+--+
1001 |a0 b1|
1002 +--+--+--+
1003 |q0 m0|
1004 +--+--+--+
1005 |q0 m1|
1006 -+--+--+--+
1007 |r1|r0| 0|
1008 +--+--+--+
1010 umul_ppmm (t1, t0, a0, b0);
1011 umul_ppmm (r1, r0, a0, b1);
1012 q = mi * t0;
1013 umul_ppmm (p1, p0, q, m0);
1014 umul_ppmm (s1, s0, q, m1);
1015 r0 += (t0 != 0); /* Carry */
1016 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1017 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1018 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1020 /* Next, (a1 * <b1, b0> + <r1, r0> B^{-1}
1021 +-----+
1022 |a1 b0|
1023 +--+--+
1024 |r1|r0|
1025 +--+--+--+
1026 |a1 b1|
1027 +--+--+--+
1028 |q1 m0|
1029 +--+--+--+
1030 |q1 m1|
1031 -+--+--+--+
1032 |r1|r0| 0|
1033 +--+--+--+
1035 umul_ppmm (t1, t0, a1, b0);
1036 umul_ppmm (s1, s0, a1, b1);
1037 add_ssaaaa (t1, t0, t1, t0, 0, r0);
1038 q = mi * t0;
1039 add_ssaaaa (r1, r0, s1, s0, 0, r1);
1040 umul_ppmm (p1, p0, q, m0);
1041 umul_ppmm (s1, s0, q, m1);
1042 r0 += (t0 != 0); /* Carry */
1043 add_ssaaaa (r1, r0, r1, r0, 0, p1);
1044 add_ssaaaa (r1, r0, r1, r0, 0, t1);
1045 add_ssaaaa (r1, r0, r1, r0, s1, s0);
1047 if (ge2 (r1, r0, m1, m0))
1048 sub_ddmmss (r1, r0, r1, r0, m1, m0);
1050 *r1p = r1;
1051 return r0;
1054 static uintmax_t _GL_ATTRIBUTE_CONST
1055 powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one)
1057 uintmax_t y = one;
1059 if (e & 1)
1060 y = b;
1062 while (e != 0)
1064 b = mulredc (b, b, n, ni);
1065 e >>= 1;
1067 if (e & 1)
1068 y = mulredc (y, b, n, ni);
1071 return y;
1074 static uintmax_t
1075 powm2 (uintmax_t *r1m,
1076 const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np,
1077 uintmax_t ni, const uintmax_t *one)
1079 uintmax_t r1, r0, b1, b0, n1, n0;
1080 unsigned int i;
1081 uintmax_t e;
1083 b0 = bp[0];
1084 b1 = bp[1];
1085 n0 = np[0];
1086 n1 = np[1];
1088 r0 = one[0];
1089 r1 = one[1];
1091 for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1)
1093 if (e & 1)
1095 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1096 r1 = *r1m;
1098 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1099 b1 = *r1m;
1101 for (e = ep[1]; e > 0; e >>= 1)
1103 if (e & 1)
1105 r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni);
1106 r1 = *r1m;
1108 b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni);
1109 b1 = *r1m;
1111 *r1m = r1;
1112 return r0;
1115 static bool _GL_ATTRIBUTE_CONST
1116 millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q,
1117 unsigned int k, uintmax_t one)
1119 uintmax_t y = powm (b, q, n, ni, one);
1121 uintmax_t nm1 = n - one; /* -1, but in redc representation. */
1123 if (y == one || y == nm1)
1124 return true;
1126 for (unsigned int i = 1; i < k; i++)
1128 y = mulredc (y, y, n, ni);
1130 if (y == nm1)
1131 return true;
1132 if (y == one)
1133 return false;
1135 return false;
1138 static bool
1139 millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp,
1140 const uintmax_t *qp, unsigned int k, const uintmax_t *one)
1142 uintmax_t y1, y0, nm1_1, nm1_0, r1m;
1144 y0 = powm2 (&r1m, bp, qp, np, ni, one);
1145 y1 = r1m;
1147 if (y0 == one[0] && y1 == one[1])
1148 return true;
1150 sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]);
1152 if (y0 == nm1_0 && y1 == nm1_1)
1153 return true;
1155 for (unsigned int i = 1; i < k; i++)
1157 y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni);
1158 y1 = r1m;
1160 if (y0 == nm1_0 && y1 == nm1_1)
1161 return true;
1162 if (y0 == one[0] && y1 == one[1])
1163 return false;
1165 return false;
1168 #if HAVE_GMP
1169 static bool
1170 mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
1171 mpz_srcptr q, unsigned long int k)
1173 mpz_powm (y, x, q, n);
1175 if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
1176 return true;
1178 for (unsigned long int i = 1; i < k; i++)
1180 mpz_powm_ui (y, y, 2, n);
1181 if (mpz_cmp (y, nm1) == 0)
1182 return true;
1183 if (mpz_cmp_ui (y, 1) == 0)
1184 return false;
1186 return false;
1188 #endif
1190 /* Lucas' prime test. The number of iterations vary greatly, up to a few dozen
1191 have been observed. The average seem to be about 2. */
1192 static bool
1193 prime_p (uintmax_t n)
1195 int k;
1196 bool is_prime;
1197 uintmax_t a_prim, one, ni;
1198 struct factors factors;
1200 if (n <= 1)
1201 return false;
1203 /* We have already casted out small primes. */
1204 if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
1205 return true;
1207 /* Precomputation for Miller-Rabin. */
1208 uintmax_t q = n - 1;
1209 for (k = 0; (q & 1) == 0; k++)
1210 q >>= 1;
1212 uintmax_t a = 2;
1213 binv (ni, n); /* ni <- 1/n mod B */
1214 redcify (one, 1, n);
1215 addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */
1217 /* Perform a Miller-Rabin test, finds most composites quickly. */
1218 if (!millerrabin (n, ni, a_prim, q, k, one))
1219 return false;
1221 if (flag_prove_primality)
1223 /* Factor n-1 for Lucas. */
1224 factor (0, n - 1, &factors);
1227 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1228 number composite. */
1229 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1231 if (flag_prove_primality)
1233 is_prime = true;
1234 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1236 is_prime
1237 = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one;
1240 else
1242 /* After enough Miller-Rabin runs, be content. */
1243 is_prime = (r == MR_REPS - 1);
1246 if (is_prime)
1247 return true;
1249 a += primes_diff[r]; /* Establish new base. */
1251 /* The following is equivalent to redcify (a_prim, a, n). It runs faster
1252 on most processors, since it avoids udiv_qrnnd. If we go down the
1253 udiv_qrnnd_preinv path, this code should be replaced. */
1255 uintmax_t s1, s0;
1256 umul_ppmm (s1, s0, one, a);
1257 if (LIKELY (s1 == 0))
1258 a_prim = s0 % n;
1259 else
1261 uintmax_t dummy _GL_UNUSED;
1262 udiv_qrnnd (dummy, a_prim, s1, s0, n);
1266 if (!millerrabin (n, ni, a_prim, q, k, one))
1267 return false;
1270 error (0, 0, _("Lucas prime test failure. This should not happen"));
1271 abort ();
1274 static bool
1275 prime2_p (uintmax_t n1, uintmax_t n0)
1277 uintmax_t q[2], nm1[2];
1278 uintmax_t a_prim[2];
1279 uintmax_t one[2];
1280 uintmax_t na[2];
1281 uintmax_t ni;
1282 unsigned int k;
1283 struct factors factors;
1285 if (n1 == 0)
1286 return prime_p (n0);
1288 nm1[1] = n1 - (n0 == 0);
1289 nm1[0] = n0 - 1;
1290 if (nm1[0] == 0)
1292 count_trailing_zeros (k, nm1[1]);
1294 q[0] = nm1[1] >> k;
1295 q[1] = 0;
1296 k += W_TYPE_SIZE;
1298 else
1300 count_trailing_zeros (k, nm1[0]);
1301 rsh2 (q[1], q[0], nm1[1], nm1[0], k);
1304 uintmax_t a = 2;
1305 binv (ni, n0);
1306 redcify2 (one[1], one[0], 1, n1, n0);
1307 addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0);
1309 /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */
1310 na[0] = n0;
1311 na[1] = n1;
1313 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1314 return false;
1316 if (flag_prove_primality)
1318 /* Factor n-1 for Lucas. */
1319 factor (nm1[1], nm1[0], &factors);
1322 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1323 number composite. */
1324 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1326 bool is_prime;
1327 uintmax_t e[2], y[2];
1329 if (flag_prove_primality)
1331 is_prime = true;
1332 if (factors.plarge[1])
1334 uintmax_t pi;
1335 binv (pi, factors.plarge[0]);
1336 e[0] = pi * nm1[0];
1337 e[1] = 0;
1338 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1339 is_prime = (y[0] != one[0] || y[1] != one[1]);
1341 for (unsigned int i = 0; i < factors.nfactors && is_prime; i++)
1343 /* FIXME: We always have the factor 2. Do we really need to
1344 handle it here? We have done the same powering as part
1345 of millerrabin. */
1346 if (factors.p[i] == 2)
1347 rsh2 (e[1], e[0], nm1[1], nm1[0], 1);
1348 else
1349 divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]);
1350 y[0] = powm2 (&y[1], a_prim, e, na, ni, one);
1351 is_prime = (y[0] != one[0] || y[1] != one[1]);
1354 else
1356 /* After enough Miller-Rabin runs, be content. */
1357 is_prime = (r == MR_REPS - 1);
1360 if (is_prime)
1361 return true;
1363 a += primes_diff[r]; /* Establish new base. */
1364 redcify2 (a_prim[1], a_prim[0], a, n1, n0);
1366 if (!millerrabin2 (na, ni, a_prim, q, k, one))
1367 return false;
1370 error (0, 0, _("Lucas prime test failure. This should not happen"));
1371 abort ();
1374 #if HAVE_GMP
1375 static bool
1376 mp_prime_p (mpz_t n)
1378 bool is_prime;
1379 mpz_t q, a, nm1, tmp;
1380 struct mp_factors factors;
1382 if (mpz_cmp_ui (n, 1) <= 0)
1383 return false;
1385 /* We have already casted out small primes. */
1386 if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
1387 return true;
1389 mpz_inits (q, a, nm1, tmp, NULL);
1391 /* Precomputation for Miller-Rabin. */
1392 mpz_sub_ui (nm1, n, 1);
1394 /* Find q and k, where q is odd and n = 1 + 2**k * q. */
1395 unsigned long int k = mpz_scan1 (nm1, 0);
1396 mpz_tdiv_q_2exp (q, nm1, k);
1398 mpz_set_ui (a, 2);
1400 /* Perform a Miller-Rabin test, finds most composites quickly. */
1401 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1403 is_prime = false;
1404 goto ret2;
1407 if (flag_prove_primality)
1409 /* Factor n-1 for Lucas. */
1410 mpz_set (tmp, nm1);
1411 mp_factor (tmp, &factors);
1414 /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
1415 number composite. */
1416 for (unsigned int r = 0; r < PRIMES_PTAB_ENTRIES; r++)
1418 if (flag_prove_primality)
1420 is_prime = true;
1421 for (unsigned long int i = 0; i < factors.nfactors && is_prime; i++)
1423 mpz_divexact (tmp, nm1, factors.p[i]);
1424 mpz_powm (tmp, a, tmp, n);
1425 is_prime = mpz_cmp_ui (tmp, 1) != 0;
1428 else
1430 /* After enough Miller-Rabin runs, be content. */
1431 is_prime = (r == MR_REPS - 1);
1434 if (is_prime)
1435 goto ret1;
1437 mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
1439 if (!mp_millerrabin (n, nm1, a, tmp, q, k))
1441 is_prime = false;
1442 goto ret1;
1446 error (0, 0, _("Lucas prime test failure. This should not happen"));
1447 abort ();
1449 ret1:
1450 if (flag_prove_primality)
1451 mp_factor_clear (&factors);
1452 ret2:
1453 mpz_clears (q, a, nm1, tmp, NULL);
1455 return is_prime;
1457 #endif
1459 static void
1460 factor_using_pollard_rho (uintmax_t n, unsigned long int a,
1461 struct factors *factors)
1463 uintmax_t x, z, y, P, t, ni, g;
1465 unsigned long int k = 1;
1466 unsigned long int l = 1;
1468 redcify (P, 1, n);
1469 addmod (x, P, P, n); /* i.e., redcify(2) */
1470 y = z = x;
1472 while (n != 1)
1474 assert (a < n);
1476 binv (ni, n); /* FIXME: when could we use old 'ni' value? */
1478 for (;;)
1482 x = mulredc (x, x, n, ni);
1483 addmod (x, x, a, n);
1485 submod (t, z, x, n);
1486 P = mulredc (P, t, n, ni);
1488 if (k % 32 == 1)
1490 if (gcd_odd (P, n) != 1)
1491 goto factor_found;
1492 y = x;
1495 while (--k != 0);
1497 z = x;
1498 k = l;
1499 l = 2 * l;
1500 for (unsigned long int i = 0; i < k; i++)
1502 x = mulredc (x, x, n, ni);
1503 addmod (x, x, a, n);
1505 y = x;
1508 factor_found:
1511 y = mulredc (y, y, n, ni);
1512 addmod (y, y, a, n);
1514 submod (t, z, y, n);
1515 g = gcd_odd (t, n);
1517 while (g == 1);
1519 n = n / g;
1521 if (!prime_p (g))
1522 factor_using_pollard_rho (g, a + 1, factors);
1523 else
1524 factor_insert (factors, g);
1526 if (prime_p (n))
1528 factor_insert (factors, n);
1529 break;
1532 x = x % n;
1533 z = z % n;
1534 y = y % n;
1538 static void
1539 factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a,
1540 struct factors *factors)
1542 uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m;
1544 unsigned long int k = 1;
1545 unsigned long int l = 1;
1547 redcify2 (P1, P0, 1, n1, n0);
1548 addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */
1549 y1 = z1 = x1;
1550 y0 = z0 = x0;
1552 while (n1 != 0 || n0 != 1)
1554 binv (ni, n0);
1556 for (;;)
1560 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1561 x1 = r1m;
1562 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1564 submod2 (t1, t0, z1, z0, x1, x0, n1, n0);
1565 P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni);
1566 P1 = r1m;
1568 if (k % 32 == 1)
1570 g0 = gcd2_odd (&g1, P1, P0, n1, n0);
1571 if (g1 != 0 || g0 != 1)
1572 goto factor_found;
1573 y1 = x1; y0 = x0;
1576 while (--k != 0);
1578 z1 = x1; z0 = x0;
1579 k = l;
1580 l = 2 * l;
1581 for (unsigned long int i = 0; i < k; i++)
1583 x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni);
1584 x1 = r1m;
1585 addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0);
1587 y1 = x1; y0 = x0;
1590 factor_found:
1593 y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni);
1594 y1 = r1m;
1595 addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0);
1597 submod2 (t1, t0, z1, z0, y1, y0, n1, n0);
1598 g0 = gcd2_odd (&g1, t1, t0, n1, n0);
1600 while (g1 == 0 && g0 == 1);
1602 if (g1 == 0)
1604 /* The found factor is one word. */
1605 divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */
1607 if (!prime_p (g0))
1608 factor_using_pollard_rho (g0, a + 1, factors);
1609 else
1610 factor_insert (factors, g0);
1612 else
1614 /* The found factor is two words. This is highly unlikely, thus hard
1615 to trigger. Please be careful before you change this code! */
1616 uintmax_t ginv;
1618 binv (ginv, g0); /* Compute n = n / g. Since the result will */
1619 n0 = ginv * n0; /* fit one word, we can compute the quotient */
1620 n1 = 0; /* modulo B, ignoring the high divisor word. */
1622 if (!prime2_p (g1, g0))
1623 factor_using_pollard_rho2 (g1, g0, a + 1, factors);
1624 else
1625 factor_insert_large (factors, g1, g0);
1628 if (n1 == 0)
1630 if (prime_p (n0))
1632 factor_insert (factors, n0);
1633 break;
1636 factor_using_pollard_rho (n0, a, factors);
1637 return;
1640 if (prime2_p (n1, n0))
1642 factor_insert_large (factors, n1, n0);
1643 break;
1646 x0 = mod2 (&x1, x1, x0, n1, n0);
1647 z0 = mod2 (&z1, z1, z0, n1, n0);
1648 y0 = mod2 (&y1, y1, y0, n1, n0);
1652 #if HAVE_GMP
1653 static void
1654 mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
1655 struct mp_factors *factors)
1657 mpz_t x, z, y, P;
1658 mpz_t t, t2;
1660 devmsg ("[pollard-rho (%lu)] ", a);
1662 mpz_inits (t, t2, NULL);
1663 mpz_init_set_si (y, 2);
1664 mpz_init_set_si (x, 2);
1665 mpz_init_set_si (z, 2);
1666 mpz_init_set_ui (P, 1);
1668 unsigned long long int k = 1;
1669 unsigned long long int l = 1;
1671 while (mpz_cmp_ui (n, 1) != 0)
1673 for (;;)
1677 mpz_mul (t, x, x);
1678 mpz_mod (x, t, n);
1679 mpz_add_ui (x, x, a);
1681 mpz_sub (t, z, x);
1682 mpz_mul (t2, P, t);
1683 mpz_mod (P, t2, n);
1685 if (k % 32 == 1)
1687 mpz_gcd (t, P, n);
1688 if (mpz_cmp_ui (t, 1) != 0)
1689 goto factor_found;
1690 mpz_set (y, x);
1693 while (--k != 0);
1695 mpz_set (z, x);
1696 k = l;
1697 l = 2 * l;
1698 for (unsigned long long int i = 0; i < k; i++)
1700 mpz_mul (t, x, x);
1701 mpz_mod (x, t, n);
1702 mpz_add_ui (x, x, a);
1704 mpz_set (y, x);
1707 factor_found:
1710 mpz_mul (t, y, y);
1711 mpz_mod (y, t, n);
1712 mpz_add_ui (y, y, a);
1714 mpz_sub (t, z, y);
1715 mpz_gcd (t, t, n);
1717 while (mpz_cmp_ui (t, 1) == 0);
1719 mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
1721 if (!mp_prime_p (t))
1723 devmsg ("[composite factor--restarting pollard-rho] ");
1724 mp_factor_using_pollard_rho (t, a + 1, factors);
1726 else
1728 mp_factor_insert (factors, t);
1731 if (mp_prime_p (n))
1733 mp_factor_insert (factors, n);
1734 break;
1737 mpz_mod (x, x, n);
1738 mpz_mod (z, z, n);
1739 mpz_mod (y, y, n);
1742 mpz_clears (P, t2, t, z, x, y, NULL);
1744 #endif
1746 #if USE_SQUFOF
1747 /* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
1748 algorithm is replaced, consider also returning the remainder. */
1749 static uintmax_t _GL_ATTRIBUTE_CONST
1750 isqrt (uintmax_t n)
1752 uintmax_t x;
1753 unsigned c;
1754 if (n == 0)
1755 return 0;
1757 count_leading_zeros (c, n);
1759 /* Make x > sqrt(n). This will be invariant through the loop. */
1760 x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) / 2);
1762 for (;;)
1764 uintmax_t y = (x + n/x) / 2;
1765 if (y >= x)
1766 return x;
1768 x = y;
1772 static uintmax_t _GL_ATTRIBUTE_CONST
1773 isqrt2 (uintmax_t nh, uintmax_t nl)
1775 unsigned int shift;
1776 uintmax_t x;
1778 /* Ensures the remainder fits in an uintmax_t. */
1779 assert (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2)));
1781 if (nh == 0)
1782 return isqrt (nl);
1784 count_leading_zeros (shift, nh);
1785 shift &= ~1;
1787 /* Make x > sqrt(n) */
1788 x = isqrt ( (nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
1789 x <<= (W_TYPE_SIZE - shift) / 2;
1791 /* Do we need more than one iteration? */
1792 for (;;)
1794 uintmax_t r _GL_UNUSED;
1795 uintmax_t q, y;
1796 udiv_qrnnd (q, r, nh, nl, x);
1797 y = (x + q) / 2;
1799 if (y >= x)
1801 uintmax_t hi, lo;
1802 umul_ppmm (hi, lo, x + 1, x + 1);
1803 assert (gt2 (hi, lo, nh, nl));
1805 umul_ppmm (hi, lo, x, x);
1806 assert (ge2 (nh, nl, hi, lo));
1807 sub_ddmmss (hi, lo, nh, nl, hi, lo);
1808 assert (hi == 0);
1810 return x;
1813 x = y;
1817 /* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
1818 # define MAGIC64 0x0202021202030213ULL
1819 # define MAGIC63 0x0402483012450293ULL
1820 # define MAGIC65 0x218a019866014613ULL
1821 # define MAGIC11 0x23b
1823 /* Return the square root if the input is a square, otherwise 0. */
1824 static uintmax_t _GL_ATTRIBUTE_CONST
1825 is_square (uintmax_t x)
1827 /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
1828 computing the square root. */
1829 if (((MAGIC64 >> (x & 63)) & 1)
1830 && ((MAGIC63 >> (x % 63)) & 1)
1831 /* Both 0 and 64 are squares mod (65) */
1832 && ((MAGIC65 >> ((x % 65) & 63)) & 1)
1833 && ((MAGIC11 >> (x % 11) & 1)))
1835 uintmax_t r = isqrt (x);
1836 if (r*r == x)
1837 return r;
1839 return 0;
1842 /* invtab[i] = floor(0x10000 / (0x100 + i) */
1843 static const unsigned short invtab[0x81] =
1845 0x200,
1846 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
1847 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
1848 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
1849 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
1850 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
1851 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
1852 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
1853 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
1854 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
1855 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
1856 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
1857 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
1858 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
1859 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
1860 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
1861 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
1864 /* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
1865 that q < 0x40; here it instead uses a table of (Euclidian) inverses. */
1866 # define div_smallq(q, r, u, d) \
1867 do { \
1868 if ((u) / 0x40 < (d)) \
1870 int _cnt; \
1871 uintmax_t _dinv, _mask, _q, _r; \
1872 count_leading_zeros (_cnt, (d)); \
1873 _r = (u); \
1874 if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
1876 _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
1877 _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
1879 else \
1881 _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
1882 _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
1884 _r -= _q*(d); \
1886 _mask = -(uintmax_t) (_r >= (d)); \
1887 (r) = _r - (_mask & (d)); \
1888 (q) = _q - _mask; \
1889 assert ( (q) * (d) + (r) == u); \
1891 else \
1893 uintmax_t _q = (u) / (d); \
1894 (r) = (u) - _q * (d); \
1895 (q) = _q; \
1897 } while (0)
1899 /* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
1900 = 3025, P = 1737, representing F_{18} = (-6314, 2* 1737, 3025),
1901 with 3025 = 55^2.
1903 Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
1904 representing G_0 = (-55, 2*4652, 8653).
1906 In the notation of the paper:
1908 S_{-1} = 55, S_0 = 8653, R_0 = 4652
1912 t_0 = floor([q_0 + R_0] / S0) = 1
1913 R_1 = t_0 * S_0 - R_0 = 4001
1914 S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
1917 /* Multipliers, in order of efficiency:
1918 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
1919 0.7317 3*5*7 = 105 = 1
1920 0.7820 3*5*11 = 165 = 1
1921 0.7872 3*5 = 15 = 3
1922 0.8101 3*7*11 = 231 = 3
1923 0.8155 3*7 = 21 = 1
1924 0.8284 5*7*11 = 385 = 1
1925 0.8339 5*7 = 35 = 3
1926 0.8716 3*11 = 33 = 1
1927 0.8774 3 = 3 = 3
1928 0.8913 5*11 = 55 = 3
1929 0.8972 5 = 5 = 1
1930 0.9233 7*11 = 77 = 1
1931 0.9295 7 = 7 = 3
1932 0.9934 11 = 11 = 3
1934 # define QUEUE_SIZE 50
1935 #endif
1937 #if STAT_SQUFOF
1938 # define Q_FREQ_SIZE 50
1939 /* Element 0 keeps the total */
1940 static unsigned int q_freq[Q_FREQ_SIZE + 1];
1941 # define MIN(a,b) ((a) < (b) ? (a) : (b))
1942 #endif
1944 #if USE_SQUFOF
1945 /* Return true on success. Expected to fail only for numbers
1946 >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
1947 static bool
1948 factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors)
1950 /* Uses algorithm and notation from
1952 SQUARE FORM FACTORIZATION
1953 JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
1955 http://homes.cerias.purdue.edu/~ssw/squfof.pdf
1958 static const unsigned int multipliers_1[] =
1959 { /* = 1 (mod 4) */
1960 105, 165, 21, 385, 33, 5, 77, 1, 0
1962 static const unsigned int multipliers_3[] =
1963 { /* = 3 (mod 4) */
1964 1155, 15, 231, 35, 3, 55, 7, 11, 0
1967 const unsigned int *m;
1969 struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE];
1971 if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2)))
1972 return false;
1974 uintmax_t sqrt_n = isqrt2 (n1, n0);
1976 if (n0 == sqrt_n * sqrt_n)
1978 uintmax_t p1, p0;
1980 umul_ppmm (p1, p0, sqrt_n, sqrt_n);
1981 assert (p0 == n0);
1983 if (n1 == p1)
1985 if (prime_p (sqrt_n))
1986 factor_insert_multiplicity (factors, sqrt_n, 2);
1987 else
1989 struct factors f;
1991 f.nfactors = 0;
1992 if (!factor_using_squfof (0, sqrt_n, &f))
1994 /* Try pollard rho instead */
1995 factor_using_pollard_rho (sqrt_n, 1, &f);
1997 /* Duplicate the new factors */
1998 for (unsigned int i = 0; i < f.nfactors; i++)
1999 factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]);
2001 return true;
2005 /* Select multipliers so we always get n * mu = 3 (mod 4) */
2006 for (m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
2007 *m; m++)
2009 uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B;
2010 unsigned int i;
2011 unsigned int mu = *m;
2012 unsigned int qpos = 0;
2014 assert (mu * n0 % 4 == 3);
2016 /* In the notation of the paper, with mu * n == 3 (mod 4), we
2017 get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
2018 I understand it, the necessary bound is 4 \mu^3 < n, or 32
2019 mu^3 < n.
2021 However, this seems insufficient: With n = 37243139 and mu =
2022 105, we get a trivial factor, from the square 38809 = 197^2,
2023 without any corresponding Q earlier in the iteration.
2025 Requiring 64 mu^3 < n seems sufficient. */
2026 if (n1 == 0)
2028 if ((uintmax_t) mu*mu*mu >= n0 / 64)
2029 continue;
2031 else
2033 if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu)
2034 continue;
2036 umul_ppmm (Dh, Dl, n0, mu);
2037 Dh += n1 * mu;
2039 assert (Dl % 4 != 1);
2040 assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2));
2042 S = isqrt2 (Dh, Dl);
2044 Q1 = 1;
2045 P = S;
2047 /* Square root remainder fits in one word, so ignore high part. */
2048 Q = Dl - P*P;
2049 /* FIXME: When can this differ from floor(sqrt(2 sqrt(D)))? */
2050 L = isqrt (2*S);
2051 B = 2*L;
2052 L1 = mu * 2 * L;
2054 /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
2055 4 D. */
2057 for (i = 0; i <= B; i++)
2059 uintmax_t q, P1, t, rem;
2061 div_smallq (q, rem, S+P, Q);
2062 P1 = S - rem; /* P1 = q*Q - P */
2064 IF_LINT (assert (q > 0 && Q > 0));
2066 # if STAT_SQUFOF
2067 q_freq[0]++;
2068 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2069 # endif
2071 if (Q <= L1)
2073 uintmax_t g = Q;
2075 if ( (Q & 1) == 0)
2076 g /= 2;
2078 g /= gcd_odd (g, mu);
2080 if (g <= L)
2082 if (qpos >= QUEUE_SIZE)
2083 die (EXIT_FAILURE, 0, _("squfof queue overflow"));
2084 queue[qpos].Q = g;
2085 queue[qpos].P = P % g;
2086 qpos++;
2090 /* I think the difference can be either sign, but mod
2091 2^W_TYPE_SIZE arithmetic should be fine. */
2092 t = Q1 + q * (P - P1);
2093 Q1 = Q;
2094 Q = t;
2095 P = P1;
2097 if ( (i & 1) == 0)
2099 uintmax_t r = is_square (Q);
2100 if (r)
2102 for (unsigned int j = 0; j < qpos; j++)
2104 if (queue[j].Q == r)
2106 if (r == 1)
2107 /* Traversed entire cycle. */
2108 goto next_multiplier;
2110 /* Need the absolute value for divisibility test. */
2111 if (P >= queue[j].P)
2112 t = P - queue[j].P;
2113 else
2114 t = queue[j].P - P;
2115 if (t % r == 0)
2117 /* Delete entries up to and including entry
2118 j, which matched. */
2119 memmove (queue, queue + j + 1,
2120 (qpos - j - 1) * sizeof (queue[0]));
2121 qpos -= (j + 1);
2123 goto next_i;
2127 /* We have found a square form, which should give a
2128 factor. */
2129 Q1 = r;
2130 assert (S >= P); /* What signs are possible? */
2131 P += r * ((S - P) / r);
2133 /* Note: Paper says (N - P*P) / Q1, that seems incorrect
2134 for the case D = 2N. */
2135 /* Compute Q = (D - P*P) / Q1, but we need double
2136 precision. */
2137 uintmax_t hi, lo;
2138 umul_ppmm (hi, lo, P, P);
2139 sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
2140 udiv_qrnnd (Q, rem, hi, lo, Q1);
2141 assert (rem == 0);
2143 for (;;)
2145 /* Note: There appears to by a typo in the paper,
2146 Step 4a in the algorithm description says q <--
2147 floor([S+P]/\hat Q), but looking at the equations
2148 in Sec. 3.1, it should be q <-- floor([S+P] / Q).
2149 (In this code, \hat Q is Q1). */
2150 div_smallq (q, rem, S+P, Q);
2151 P1 = S - rem; /* P1 = q*Q - P */
2153 # if STAT_SQUFOF
2154 q_freq[0]++;
2155 q_freq[MIN (q, Q_FREQ_SIZE)]++;
2156 # endif
2157 if (P == P1)
2158 break;
2159 t = Q1 + q * (P - P1);
2160 Q1 = Q;
2161 Q = t;
2162 P = P1;
2165 if ( (Q & 1) == 0)
2166 Q /= 2;
2167 Q /= gcd_odd (Q, mu);
2169 assert (Q > 1 && (n1 || Q < n0));
2171 if (prime_p (Q))
2172 factor_insert (factors, Q);
2173 else if (!factor_using_squfof (0, Q, factors))
2174 factor_using_pollard_rho (Q, 2, factors);
2176 divexact_21 (n1, n0, n1, n0, Q);
2178 if (prime2_p (n1, n0))
2179 factor_insert_large (factors, n1, n0);
2180 else
2182 if (!factor_using_squfof (n1, n0, factors))
2184 if (n1 == 0)
2185 factor_using_pollard_rho (n0, 1, factors);
2186 else
2187 factor_using_pollard_rho2 (n1, n0, 1, factors);
2191 return true;
2194 next_i:;
2196 next_multiplier:;
2198 return false;
2200 #endif
2202 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
2203 results in FACTORS. */
2204 static void
2205 factor (uintmax_t t1, uintmax_t t0, struct factors *factors)
2207 factors->nfactors = 0;
2208 factors->plarge[1] = 0;
2210 if (t1 == 0 && t0 < 2)
2211 return;
2213 t0 = factor_using_division (&t1, t1, t0, factors);
2215 if (t1 == 0 && t0 < 2)
2216 return;
2218 if (prime2_p (t1, t0))
2219 factor_insert_large (factors, t1, t0);
2220 else
2222 #if USE_SQUFOF
2223 if (factor_using_squfof (t1, t0, factors))
2224 return;
2225 #endif
2227 if (t1 == 0)
2228 factor_using_pollard_rho (t0, 1, factors);
2229 else
2230 factor_using_pollard_rho2 (t1, t0, 1, factors);
2234 #if HAVE_GMP
2235 /* Use Pollard-rho to compute the prime factors of
2236 arbitrary-precision T, and put the results in FACTORS. */
2237 static void
2238 mp_factor (mpz_t t, struct mp_factors *factors)
2240 mp_factor_init (factors);
2242 if (mpz_sgn (t) != 0)
2244 mp_factor_using_division (t, factors);
2246 if (mpz_cmp_ui (t, 1) != 0)
2248 devmsg ("[is number prime?] ");
2249 if (mp_prime_p (t))
2250 mp_factor_insert (factors, t);
2251 else
2252 mp_factor_using_pollard_rho (t, 1, factors);
2256 #endif
2258 static strtol_error
2259 strto2uintmax (uintmax_t *hip, uintmax_t *lop, const char *s)
2261 unsigned int lo_carry;
2262 uintmax_t hi = 0, lo = 0;
2264 strtol_error err = LONGINT_INVALID;
2266 /* Skip initial spaces and '+'. */
2267 for (;;)
2269 char c = *s;
2270 if (c == ' ')
2271 s++;
2272 else if (c == '+')
2274 s++;
2275 break;
2277 else
2278 break;
2281 /* Initial scan for invalid digits. */
2282 const char *p = s;
2283 for (;;)
2285 unsigned int c = *p++;
2286 if (c == 0)
2287 break;
2289 if (UNLIKELY (!ISDIGIT (c)))
2291 err = LONGINT_INVALID;
2292 break;
2295 err = LONGINT_OK; /* we've seen at least one valid digit */
2298 for (;err == LONGINT_OK;)
2300 unsigned int c = *s++;
2301 if (c == 0)
2302 break;
2304 c -= '0';
2306 if (UNLIKELY (hi > ~(uintmax_t)0 / 10))
2308 err = LONGINT_OVERFLOW;
2309 break;
2311 hi = 10 * hi;
2313 lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1));
2314 lo_carry += 10 * lo < 2 * lo;
2316 lo = 10 * lo;
2317 lo += c;
2319 lo_carry += lo < c;
2320 hi += lo_carry;
2321 if (UNLIKELY (hi < lo_carry))
2323 err = LONGINT_OVERFLOW;
2324 break;
2328 *hip = hi;
2329 *lop = lo;
2331 return err;
2334 /* Structure and routines for buffering and outputting full lines,
2335 to support parallel operation efficiently. */
2336 static struct lbuf_
2338 char *buf;
2339 char *end;
2340 } lbuf;
2342 /* 512 is chosen to give good performance,
2343 and also is the max guaranteed size that
2344 consumers can read atomically through pipes.
2345 Also it's big enough to cater for max line length
2346 even with 128 bit uintmax_t. */
2347 #define FACTOR_PIPE_BUF 512
2349 static void
2350 lbuf_alloc (void)
2352 if (lbuf.buf)
2353 return;
2355 /* Double to ensure enough space for
2356 previous numbers + next number. */
2357 lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2);
2358 lbuf.end = lbuf.buf;
2361 /* Write complete LBUF to standard output. */
2362 static void
2363 lbuf_flush (void)
2365 size_t size = lbuf.end - lbuf.buf;
2366 if (full_write (STDOUT_FILENO, lbuf.buf, size) != size)
2367 die (EXIT_FAILURE, errno, "%s", _("write error"));
2368 lbuf.end = lbuf.buf;
2371 /* Add a character C to LBUF and if it's a newline
2372 and enough bytes are already buffered,
2373 then write atomically to standard output. */
2374 static void
2375 lbuf_putc (char c)
2377 *lbuf.end++ = c;
2379 if (c == '\n')
2381 size_t buffered = lbuf.end - lbuf.buf;
2383 /* Provide immediate output for interactive input. */
2384 static int line_buffered = -1;
2385 if (line_buffered == -1)
2386 line_buffered = isatty (STDIN_FILENO);
2387 if (line_buffered)
2388 lbuf_flush ();
2389 else if (buffered >= FACTOR_PIPE_BUF)
2391 /* Write output in <= PIPE_BUF chunks
2392 so consumers can read atomically. */
2393 char const *tend = lbuf.end;
2395 /* Since a umaxint_t's factors must fit in 512
2396 we're guaranteed to find a newline here. */
2397 char *tlend = lbuf.buf + FACTOR_PIPE_BUF;
2398 while (*--tlend != '\n');
2399 tlend++;
2401 lbuf.end = tlend;
2402 lbuf_flush ();
2404 /* Buffer the remainder. */
2405 memcpy (lbuf.buf, tlend, tend - tlend);
2406 lbuf.end = lbuf.buf + (tend - tlend);
2411 /* Buffer an int to the internal LBUF. */
2412 static void
2413 lbuf_putint (uintmax_t i, size_t min_width)
2415 char buf[INT_BUFSIZE_BOUND (uintmax_t)];
2416 char const *umaxstr = umaxtostr (i, buf);
2417 size_t width = sizeof (buf) - (umaxstr - buf) - 1;
2418 size_t z = width;
2420 for (; z < min_width; z++)
2421 *lbuf.end++ = '0';
2423 memcpy (lbuf.end, umaxstr, width);
2424 lbuf.end += width;
2427 static void
2428 print_uintmaxes (uintmax_t t1, uintmax_t t0)
2430 uintmax_t q, r;
2432 if (t1 == 0)
2433 lbuf_putint (t0, 0);
2434 else
2436 /* Use very plain code here since it seems hard to write fast code
2437 without assuming a specific word size. */
2438 q = t1 / 1000000000;
2439 r = t1 % 1000000000;
2440 udiv_qrnnd (t0, r, r, t0, 1000000000);
2441 print_uintmaxes (q, t0);
2442 lbuf_putint (r, 9);
2446 /* Single-precision factoring */
2447 static void
2448 print_factors_single (uintmax_t t1, uintmax_t t0)
2450 struct factors factors;
2452 print_uintmaxes (t1, t0);
2453 lbuf_putc (':');
2455 factor (t1, t0, &factors);
2457 for (unsigned int j = 0; j < factors.nfactors; j++)
2458 for (unsigned int k = 0; k < factors.e[j]; k++)
2460 lbuf_putc (' ');
2461 print_uintmaxes (0, factors.p[j]);
2464 if (factors.plarge[1])
2466 lbuf_putc (' ');
2467 print_uintmaxes (factors.plarge[1], factors.plarge[0]);
2470 lbuf_putc ('\n');
2473 /* Emit the factors of the indicated number. If we have the option of using
2474 either algorithm, we select on the basis of the length of the number.
2475 For longer numbers, we prefer the MP algorithm even if the native algorithm
2476 has enough digits, because the algorithm is better. The turnover point
2477 depends on the value. */
2478 static bool
2479 print_factors (const char *input)
2481 uintmax_t t1, t0;
2483 /* Try converting the number to one or two words. If it fails, use GMP or
2484 print an error message. The 2nd condition checks that the most
2485 significant bit of the two-word number is clear, in a typesize neutral
2486 way. */
2487 strtol_error err = strto2uintmax (&t1, &t0, input);
2489 switch (err)
2491 case LONGINT_OK:
2492 if (((t1 << 1) >> 1) == t1)
2494 devmsg ("[using single-precision arithmetic] ");
2495 print_factors_single (t1, t0);
2496 return true;
2498 break;
2500 case LONGINT_OVERFLOW:
2501 /* Try GMP. */
2502 break;
2504 default:
2505 error (0, 0, _("%s is not a valid positive integer"), quote (input));
2506 return false;
2509 #if HAVE_GMP
2510 devmsg ("[using arbitrary-precision arithmetic] ");
2511 mpz_t t;
2512 struct mp_factors factors;
2514 mpz_init_set_str (t, input, 10);
2516 gmp_printf ("%Zd:", t);
2517 mp_factor (t, &factors);
2519 for (unsigned int j = 0; j < factors.nfactors; j++)
2520 for (unsigned int k = 0; k < factors.e[j]; k++)
2521 gmp_printf (" %Zd", factors.p[j]);
2523 mp_factor_clear (&factors);
2524 mpz_clear (t);
2525 putchar ('\n');
2526 fflush (stdout);
2527 return true;
2528 #else
2529 error (0, 0, _("%s is too large"), quote (input));
2530 return false;
2531 #endif
2534 void
2535 usage (int status)
2537 if (status != EXIT_SUCCESS)
2538 emit_try_help ();
2539 else
2541 printf (_("\
2542 Usage: %s [NUMBER]...\n\
2543 or: %s OPTION\n\
2545 program_name, program_name);
2546 fputs (_("\
2547 Print the prime factors of each specified integer NUMBER. If none\n\
2548 are specified on the command line, read them from standard input.\n\
2550 "), stdout);
2551 fputs (HELP_OPTION_DESCRIPTION, stdout);
2552 fputs (VERSION_OPTION_DESCRIPTION, stdout);
2553 emit_ancillary_info (PROGRAM_NAME);
2555 exit (status);
2558 static bool
2559 do_stdin (void)
2561 bool ok = true;
2562 token_buffer tokenbuffer;
2564 init_tokenbuffer (&tokenbuffer);
2566 while (true)
2568 size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1,
2569 &tokenbuffer);
2570 if (token_length == (size_t) -1)
2571 break;
2572 ok &= print_factors (tokenbuffer.buffer);
2574 free (tokenbuffer.buffer);
2576 return ok;
2580 main (int argc, char **argv)
2582 initialize_main (&argc, &argv);
2583 set_program_name (argv[0]);
2584 setlocale (LC_ALL, "");
2585 bindtextdomain (PACKAGE, LOCALEDIR);
2586 textdomain (PACKAGE);
2588 lbuf_alloc ();
2589 atexit (close_stdout);
2590 atexit (lbuf_flush);
2592 int c;
2593 while ((c = getopt_long (argc, argv, "", long_options, NULL)) != -1)
2595 switch (c)
2597 case DEV_DEBUG_OPTION:
2598 dev_debug = true;
2599 break;
2601 case_GETOPT_HELP_CHAR;
2603 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
2605 default:
2606 usage (EXIT_FAILURE);
2610 #if STAT_SQUFOF
2611 memset (q_freq, 0, sizeof (q_freq));
2612 #endif
2614 bool ok;
2615 if (argc <= optind)
2616 ok = do_stdin ();
2617 else
2619 ok = true;
2620 for (int i = optind; i < argc; i++)
2621 if (! print_factors (argv[i]))
2622 ok = false;
2625 #if STAT_SQUFOF
2626 if (q_freq[0] > 0)
2628 double acc_f;
2629 printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
2630 for (unsigned int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
2632 double f = (double) q_freq[i] / q_freq[0];
2633 acc_f += f;
2634 printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
2635 100.0 * f, 100.0 * acc_f);
2638 #endif
2640 return ok ? EXIT_SUCCESS : EXIT_FAILURE;